From fc766e4c4965915ab52a1d1fa3c7a7b3e7bc07f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Oct 2010 03:09:24 +0000
Subject: decnet: RCU conversion and get rid of dev_base_lock

While tracking dev_base_lock users, I found decnet used it in
dnet_select_source(), but for a wrong purpose:

Writers only hold RTNL, not dev_base_lock, so readers must use RCU if
they cannot use RTNL.

Adds an rcu_head in struct dn_ifaddr and handle proper RCU management.

Adds __rcu annotation in dn_route as well.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/af_decnet.c |   2 +-
 net/decnet/dn_dev.c    | 100 +++++++++++++++++++++++++++++--------------------
 net/decnet/dn_fib.c    |   6 ++-
 net/decnet/dn_neigh.c  |   2 +-
 net/decnet/dn_route.c  |  68 +++++++++++++++++++--------------
 5 files changed, 104 insertions(+), 74 deletions(-)

(limited to 'net')

diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d6b93d19790f..18b8a2cbdf77 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1848,7 +1848,7 @@ unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu)
 {
 	unsigned mss = 230 - DN_MAX_NSP_DATA_HEADER;
 	if (dev) {
-		struct dn_dev *dn_db = dev->dn_ptr;
+		struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 		mtu -= LL_RESERVED_SPACE(dev);
 		if (dn_db->use_long)
 			mtu -= 21;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4c409b46aa35..0ba15633c418 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -267,7 +267,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 	if (table->extra1 == NULL)
 		return -EINVAL;
 
-	dn_db = dev->dn_ptr;
+	dn_db = rcu_dereference_raw(dev->dn_ptr);
 	old = dn_db->parms.forwarding;
 
 	err = proc_dointvec(table, write, buffer, lenp, ppos);
@@ -332,14 +332,19 @@ static struct dn_ifaddr *dn_dev_alloc_ifa(void)
 	return ifa;
 }
 
-static __inline__ void dn_dev_free_ifa(struct dn_ifaddr *ifa)
+static void dn_dev_free_ifa_rcu(struct rcu_head *head)
 {
-	kfree(ifa);
+	kfree(container_of(head, struct dn_ifaddr, rcu));
 }
 
-static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int destroy)
+static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
 {
-	struct dn_ifaddr *ifa1 = *ifap;
+	call_rcu(&ifa->rcu, dn_dev_free_ifa_rcu);
+}
+
+static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
+{
+	struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
 	unsigned char mac_addr[6];
 	struct net_device *dev = dn_db->dev;
 
@@ -373,7 +378,9 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	ASSERT_RTNL();
 
 	/* Check for duplicates */
-	for(ifa1 = dn_db->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
+	for (ifa1 = rtnl_dereference(dn_db->ifa_list);
+	     ifa1 != NULL;
+	     ifa1 = rtnl_dereference(ifa1->ifa_next)) {
 		if (ifa1->ifa_local == ifa->ifa_local)
 			return -EEXIST;
 	}
@@ -386,7 +393,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	}
 
 	ifa->ifa_next = dn_db->ifa_list;
-	dn_db->ifa_list = ifa;
+	rcu_assign_pointer(dn_db->ifa_list, ifa);
 
 	dn_ifaddr_notify(RTM_NEWADDR, ifa);
 	blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
@@ -396,7 +403,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 
 static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 	int rv;
 
 	if (dn_db == NULL) {
@@ -425,7 +432,8 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
 	struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
 	struct dn_dev *dn_db;
 	struct net_device *dev;
-	struct dn_ifaddr *ifa = NULL, **ifap = NULL;
+	struct dn_ifaddr *ifa = NULL;
+	struct dn_ifaddr __rcu **ifap = NULL;
 	int ret = 0;
 
 	if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
@@ -454,8 +462,10 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg)
 		goto done;
 	}
 
-	if ((dn_db = dev->dn_ptr) != NULL) {
-		for (ifap = &dn_db->ifa_list; (ifa=*ifap) != NULL; ifap = &ifa->ifa_next)
+	if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
+		for (ifap = &dn_db->ifa_list;
+		     (ifa = rtnl_dereference(*ifap)) != NULL;
+		     ifap = &ifa->ifa_next)
 			if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
 				break;
 	}
@@ -558,7 +568,7 @@ static struct dn_dev *dn_dev_by_index(int ifindex)
 
 	dev = __dev_get_by_index(&init_net, ifindex);
 	if (dev)
-		dn_dev = dev->dn_ptr;
+		dn_dev = rtnl_dereference(dev->dn_ptr);
 
 	return dn_dev;
 }
@@ -576,7 +586,8 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct nlattr *tb[IFA_MAX+1];
 	struct dn_dev *dn_db;
 	struct ifaddrmsg *ifm;
-	struct dn_ifaddr *ifa, **ifap;
+	struct dn_ifaddr *ifa;
+	struct dn_ifaddr __rcu **ifap;
 	int err = -EINVAL;
 
 	if (!net_eq(net, &init_net))
@@ -592,7 +603,9 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		goto errout;
 
 	err = -EADDRNOTAVAIL;
-	for (ifap = &dn_db->ifa_list; (ifa = *ifap); ifap = &ifa->ifa_next) {
+	for (ifap = &dn_db->ifa_list;
+	     (ifa = rtnl_dereference(*ifap)) != NULL;
+	     ifap = &ifa->ifa_next) {
 		if (tb[IFA_LOCAL] &&
 		    nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
 			continue;
@@ -632,7 +645,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
 		return -ENODEV;
 
-	if ((dn_db = dev->dn_ptr) == NULL) {
+	if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
 		dn_db = dn_dev_create(dev, &err);
 		if (!dn_db)
 			return err;
@@ -748,11 +761,11 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 			skip_naddr = 0;
 		}
 
-		if ((dn_db = dev->dn_ptr) == NULL)
+		if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL)
 			goto cont;
 
-		for (ifa = dn_db->ifa_list, dn_idx = 0; ifa;
-		     ifa = ifa->ifa_next, dn_idx++) {
+		for (ifa = rtnl_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
+		     ifa = rtnl_dereference(ifa->ifa_next), dn_idx++) {
 			if (dn_idx < skip_naddr)
 				continue;
 
@@ -773,21 +786,22 @@ done:
 
 static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
 {
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
 	int rv = -ENODEV;
 
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
 	if (dn_db == NULL)
 		goto out;
 
-	rtnl_lock();
-	ifa = dn_db->ifa_list;
+	ifa = rcu_dereference(dn_db->ifa_list);
 	if (ifa != NULL) {
 		*addr = ifa->ifa_local;
 		rv = 0;
 	}
-	rtnl_unlock();
 out:
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -823,7 +837,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	struct endnode_hello_message *msg;
 	struct sk_buff *skb = NULL;
 	__le16 *pktlen;
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
 		return;
@@ -889,7 +903,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn
 static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 {
 	int n;
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 	struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
 	struct sk_buff *skb;
 	size_t size;
@@ -960,7 +974,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 
 static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 {
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.forwarding == 0)
 		dn_send_endnode_hello(dev, ifa);
@@ -998,7 +1012,7 @@ static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 
 static int dn_eth_up(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.forwarding == 0)
 		dev_mc_add(dev, dn_rt_all_end_mcast);
@@ -1012,7 +1026,7 @@ static int dn_eth_up(struct net_device *dev)
 
 static void dn_eth_down(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.forwarding == 0)
 		dev_mc_del(dev, dn_rt_all_end_mcast);
@@ -1025,12 +1039,16 @@ static void dn_dev_set_timer(struct net_device *dev);
 static void dn_dev_timer_func(unsigned long arg)
 {
 	struct net_device *dev = (struct net_device *)arg;
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
 
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
 	if (dn_db->t3 <= dn_db->parms.t2) {
 		if (dn_db->parms.timer3) {
-			for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+			for (ifa = rcu_dereference(dn_db->ifa_list);
+			     ifa;
+			     ifa = rcu_dereference(ifa->ifa_next)) {
 				if (!(ifa->ifa_flags & IFA_F_SECONDARY))
 					dn_db->parms.timer3(dev, ifa);
 			}
@@ -1039,13 +1057,13 @@ static void dn_dev_timer_func(unsigned long arg)
 	} else {
 		dn_db->t3 -= dn_db->parms.t2;
 	}
-
+	rcu_read_unlock();
 	dn_dev_set_timer(dev);
 }
 
 static void dn_dev_set_timer(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
 
 	if (dn_db->parms.t2 > dn_db->parms.t3)
 		dn_db->parms.t2 = dn_db->parms.t3;
@@ -1077,8 +1095,8 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
 		return NULL;
 
 	memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
-	smp_wmb();
-	dev->dn_ptr = dn_db;
+
+	rcu_assign_pointer(dev->dn_ptr, dn_db);
 	dn_db->dev = dev;
 	init_timer(&dn_db->timer);
 
@@ -1086,7 +1104,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
 
 	dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
 	if (!dn_db->neigh_parms) {
-		dev->dn_ptr = NULL;
+		rcu_assign_pointer(dev->dn_ptr, NULL);
 		kfree(dn_db);
 		return NULL;
 	}
@@ -1125,7 +1143,7 @@ void dn_dev_up(struct net_device *dev)
 	struct dn_ifaddr *ifa;
 	__le16 addr = decnet_address;
 	int maybe_default = 0;
-	struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 
 	if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
 		return;
@@ -1176,7 +1194,7 @@ void dn_dev_up(struct net_device *dev)
 
 static void dn_dev_delete(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 
 	if (dn_db == NULL)
 		return;
@@ -1204,13 +1222,13 @@ static void dn_dev_delete(struct net_device *dev)
 
 void dn_dev_down(struct net_device *dev)
 {
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
 	struct dn_ifaddr *ifa;
 
 	if (dn_db == NULL)
 		return;
 
-	while((ifa = dn_db->ifa_list) != NULL) {
+	while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
 		dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
 		dn_dev_free_ifa(ifa);
 	}
@@ -1270,7 +1288,7 @@ static inline int is_dn_dev(struct net_device *dev)
 }
 
 static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rcu)
+	__acquires(RCU)
 {
 	int i;
 	struct net_device *dev;
@@ -1313,7 +1331,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void dn_dev_seq_stop(struct seq_file *seq, void *v)
-	__releases(rcu)
+	__releases(RCU)
 {
 	rcu_read_unlock();
 }
@@ -1340,7 +1358,7 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v)
 		struct net_device *dev = v;
 		char peer_buf[DN_ASCBUF_LEN];
 		char router_buf[DN_ASCBUF_LEN];
-		struct dn_dev *dn_db = dev->dn_ptr;
+		struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
 
 		seq_printf(seq, "%-8s %1s     %04u %04u   %04lu %04lu"
 				"   %04hu    %03d %02x    %-10s %-7s %-7s\n",
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 4ab96c15166d..0ef0a81bcd72 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -610,10 +610,12 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
 	/* Scan device list */
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
-		dn_db = dev->dn_ptr;
+		dn_db = rcu_dereference(dev->dn_ptr);
 		if (dn_db == NULL)
 			continue;
-		for(ifa2 = dn_db->ifa_list; ifa2; ifa2 = ifa2->ifa_next) {
+		for (ifa2 = rcu_dereference(dn_db->ifa_list);
+		     ifa2 != NULL;
+		     ifa2 = rcu_dereference(ifa2->ifa_next)) {
 			if (ifa2->ifa_local == ifa->ifa_local) {
 				found_it = 1;
 				break;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index a085dbcf5c7f..602dade7e9a3 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -391,7 +391,7 @@ int dn_neigh_router_hello(struct sk_buff *skb)
 		write_lock(&neigh->lock);
 
 		neigh->used = jiffies;
-		dn_db = (struct dn_dev *)neigh->dev->dn_ptr;
+		dn_db = rcu_dereference(neigh->dev->dn_ptr);
 
 		if (!(neigh->nud_state & NUD_PERMANENT)) {
 			neigh->updated = jiffies;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index df0f3e54ff8a..94a9eb1d313e 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -93,7 +93,7 @@
 
 struct dn_rt_hash_bucket
 {
-	struct dn_route *chain;
+	struct dn_route __rcu *chain;
 	spinlock_t lock;
 };
 
@@ -157,15 +157,17 @@ static inline void dnrt_drop(struct dn_route *rt)
 static void dn_dst_check_expire(unsigned long dummy)
 {
 	int i;
-	struct dn_route *rt, **rtp;
+	struct dn_route *rt;
+	struct dn_route __rcu **rtp;
 	unsigned long now = jiffies;
 	unsigned long expire = 120 * HZ;
 
-	for(i = 0; i <= dn_rt_hash_mask; i++) {
+	for (i = 0; i <= dn_rt_hash_mask; i++) {
 		rtp = &dn_rt_hash_table[i].chain;
 
 		spin_lock(&dn_rt_hash_table[i].lock);
-		while((rt=*rtp) != NULL) {
+		while ((rt = rcu_dereference_protected(*rtp,
+						lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
 			if (atomic_read(&rt->dst.__refcnt) ||
 					(now - rt->dst.lastuse) < expire) {
 				rtp = &rt->dst.dn_next;
@@ -186,17 +188,19 @@ static void dn_dst_check_expire(unsigned long dummy)
 
 static int dn_dst_gc(struct dst_ops *ops)
 {
-	struct dn_route *rt, **rtp;
+	struct dn_route *rt;
+	struct dn_route __rcu **rtp;
 	int i;
 	unsigned long now = jiffies;
 	unsigned long expire = 10 * HZ;
 
-	for(i = 0; i <= dn_rt_hash_mask; i++) {
+	for (i = 0; i <= dn_rt_hash_mask; i++) {
 
 		spin_lock_bh(&dn_rt_hash_table[i].lock);
 		rtp = &dn_rt_hash_table[i].chain;
 
-		while((rt=*rtp) != NULL) {
+		while ((rt = rcu_dereference_protected(*rtp,
+						lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
 			if (atomic_read(&rt->dst.__refcnt) ||
 					(now - rt->dst.lastuse) < expire) {
 				rtp = &rt->dst.dn_next;
@@ -227,7 +231,7 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 	u32 min_mtu = 230;
 	struct dn_dev *dn = dst->neighbour ?
-			    (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL;
+			    rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL;
 
 	if (dn && dn->use_long == 0)
 		min_mtu -= 6;
@@ -277,13 +281,15 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 
 static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp)
 {
-	struct dn_route *rth, **rthp;
+	struct dn_route *rth;
+	struct dn_route __rcu **rthp;
 	unsigned long now = jiffies;
 
 	rthp = &dn_rt_hash_table[hash].chain;
 
 	spin_lock_bh(&dn_rt_hash_table[hash].lock);
-	while((rth = *rthp) != NULL) {
+	while ((rth = rcu_dereference_protected(*rthp,
+						lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
 		if (compare_keys(&rth->fl, &rt->fl)) {
 			/* Put it first */
 			*rthp = rth->dst.dn_next;
@@ -315,15 +321,15 @@ static void dn_run_flush(unsigned long dummy)
 	int i;
 	struct dn_route *rt, *next;
 
-	for(i = 0; i < dn_rt_hash_mask; i++) {
+	for (i = 0; i < dn_rt_hash_mask; i++) {
 		spin_lock_bh(&dn_rt_hash_table[i].lock);
 
-		if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL)
+		if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
 			goto nothing_to_declare;
 
-		for(; rt; rt=next) {
-			next = rt->dst.dn_next;
-			rt->dst.dn_next = NULL;
+		for(; rt; rt = next) {
+			next = rcu_dereference_raw(rt->dst.dn_next);
+			RCU_INIT_POINTER(rt->dst.dn_next, NULL);
 			dst_free((struct dst_entry *)rt);
 		}
 
@@ -458,15 +464,16 @@ static int dn_return_long(struct sk_buff *skb)
  */
 static int dn_route_rx_packet(struct sk_buff *skb)
 {
-	struct dn_skb_cb *cb = DN_SKB_CB(skb);
+	struct dn_skb_cb *cb;
 	int err;
 
 	if ((err = dn_route_input(skb)) == 0)
 		return dst_input(skb);
 
+	cb = DN_SKB_CB(skb);
 	if (decnet_debug_level & 4) {
 		char *devname = skb->dev ? skb->dev->name : "???";
-		struct dn_skb_cb *cb = DN_SKB_CB(skb);
+
 		printk(KERN_DEBUG
 			"DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
 			(int)cb->rt_flags, devname, skb->len,
@@ -573,7 +580,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 	struct dn_skb_cb *cb;
 	unsigned char flags = 0;
 	__u16 len = le16_to_cpu(*(__le16 *)skb->data);
-	struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
+	struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
 	unsigned char padlen = 0;
 
 	if (!net_eq(dev_net(dev), &init_net))
@@ -728,7 +735,7 @@ static int dn_forward(struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct dst_entry *dst = skb_dst(skb);
-	struct dn_dev *dn_db = dst->dev->dn_ptr;
+	struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
 	struct dn_route *rt;
 	struct neighbour *neigh = dst->neighbour;
 	int header_len;
@@ -835,13 +842,16 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2)
 static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
 {
 	__le16 saddr = 0;
-	struct dn_dev *dn_db = dev->dn_ptr;
+	struct dn_dev *dn_db;
 	struct dn_ifaddr *ifa;
 	int best_match = 0;
 	int ret;
 
-	read_lock(&dev_base_lock);
-	for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+	rcu_read_lock();
+	dn_db = rcu_dereference(dev->dn_ptr);
+	for (ifa = rcu_dereference(dn_db->ifa_list);
+	     ifa != NULL;
+	     ifa = rcu_dereference(ifa->ifa_next)) {
 		if (ifa->ifa_scope > scope)
 			continue;
 		if (!daddr) {
@@ -854,7 +864,7 @@ static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int
 		if (best_match == 0)
 			saddr = ifa->ifa_local;
 	}
-	read_unlock(&dev_base_lock);
+	rcu_read_unlock();
 
 	return saddr;
 }
@@ -1020,7 +1030,7 @@ source_ok:
 		err = -ENODEV;
 		if (dev_out == NULL)
 			goto out;
-		dn_db = dev_out->dn_ptr;
+		dn_db = rcu_dereference_raw(dev_out->dn_ptr);
 		/* Possible improvement - check all devices for local addr */
 		if (dn_dev_islocal(dev_out, fl.fld_dst)) {
 			dev_put(dev_out);
@@ -1233,7 +1243,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 
 	dev_hold(in_dev);
 
-	if ((dn_db = in_dev->dn_ptr) == NULL)
+	if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
 		goto out;
 
 	/* Zero source addresses are not allowed */
@@ -1677,15 +1687,15 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 {
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	rt = rt->dst.dn_next;
-	while(!rt) {
+	rt = rcu_dereference_bh(rt->dst.dn_next);
+	while (!rt) {
 		rcu_read_unlock_bh();
 		if (--s->bucket < 0)
 			break;
 		rcu_read_lock_bh();
-		rt = dn_rt_hash_table[s->bucket].chain;
+		rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
 	}
-	return rcu_dereference_bh(rt);
+	return rt;
 }
 
 static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
-- 
cgit v1.2.3


From 67426b756c4d52c511c4b22b269accea171692a8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Oct 2010 20:44:44 +0000
Subject: af_unix: use keyed wakeups

Instead of wakeup all sleepers, use wake_up_interruptible_sync_poll() to
wakeup only ones interested into writing the socket.

This patch is a specialization of commit 37e5540b3c9d (epoll keyed
wakeups: make sockets use keyed wakeups).

On a test program provided by Alan Crequy :

Before:
real    0m3.101s
user    0m0.000s
sys     0m6.104s

After:

real	0m0.211s
user	0m0.000s
sys	0m0.208s

Reported-by: Alban Crequy <alban.crequy@collabora.co.uk>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3c95304a0817..f33c5958dbb2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -316,7 +316,8 @@ static void unix_write_space(struct sock *sk)
 	if (unix_writable(sk)) {
 		wq = rcu_dereference(sk->sk_wq);
 		if (wq_has_sleeper(wq))
-			wake_up_interruptible_sync(&wq->wait);
+			wake_up_interruptible_sync_poll(&wq->wait,
+				POLLOUT | POLLWRNORM | POLLWRBAND);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 	rcu_read_unlock();
@@ -1710,7 +1711,8 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		goto out_unlock;
 	}
 
-	wake_up_interruptible_sync(&u->peer_wait);
+	wake_up_interruptible_sync_poll(&u->peer_wait,
+					POLLOUT | POLLWRNORM | POLLWRBAND);
 
 	if (msg->msg_name)
 		unix_copy_addr(msg, skb->sk);
-- 
cgit v1.2.3


From 5456f09aaf88731e16dbcea7522cb330b6846415 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 31 Oct 2010 05:36:23 +0000
Subject: af_unix: fix unix_dgram_poll() behavior for EPOLLOUT event

Alban Crequy reported a problem with connected dgram af_unix sockets and
provided a test program. epoll() would miss to send an EPOLLOUT event
when a thread unqueues a packet from the other peer, making its receive
queue not full.

This is because unix_dgram_poll() fails to call sock_poll_wait(file,
&unix_sk(other)->peer_wait, wait);
if the socket is not writeable at the time epoll_ctl(ADD) is called.

We must call sock_poll_wait(), regardless of 'writable' status, so that
epoll can be notified later of states changes.

Misc: avoids testing twice (sk->sk_shutdown & RCV_SHUTDOWN)

Reported-by: Alban Crequy <alban.crequy@collabora.co.uk>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f33c5958dbb2..e8898758dd31 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2074,13 +2074,12 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
 		mask |= POLLERR;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		mask |= POLLRDHUP;
+		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
 		mask |= POLLHUP;
 
 	/* readable? */
-	if (!skb_queue_empty(&sk->sk_receive_queue) ||
-	    (sk->sk_shutdown & RCV_SHUTDOWN))
+	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */
@@ -2092,20 +2091,15 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 			return mask;
 	}
 
-	/* writable? */
 	writable = unix_writable(sk);
-	if (writable) {
-		other = unix_peer_get(sk);
-		if (other) {
-			if (unix_peer(other) != sk) {
-				sock_poll_wait(file, &unix_sk(other)->peer_wait,
-					  wait);
-				if (unix_recvq_full(other))
-					writable = 0;
-			}
-
-			sock_put(other);
+	other = unix_peer_get(sk);
+	if (other) {
+		if (unix_peer(other) != sk) {
+			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
+			if (unix_recvq_full(other))
+				writable = 0;
 		}
+		sock_put(other);
 	}
 
 	if (writable)
-- 
cgit v1.2.3


From 973a34aa8593dbfe84386343c694f5beecb51d8a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 31 Oct 2010 05:38:25 +0000
Subject: af_unix: optimize unix_dgram_poll()

unix_dgram_poll() is pretty expensive to check POLLOUT status, because
it has to lock the socket to get its peer, take a reference on the peer
to check its receive queue status, and queue another poll_wait on
peer_wait. This all can be avoided if the process calling
unix_dgram_poll() is not interested in POLLOUT status. It makes
unix_dgram_recvmsg() faster by not queueing irrelevant pollers in
peer_wait.

On a test program provided by Alan Crequy :

Before:

real    0m0.211s
user    0m0.000s
sys     0m0.208s

After:

real    0m0.044s
user    0m0.000s
sys     0m0.040s

Suggested-by: Davide Libenzi <davidel@xmailserver.org>
Reported-by: Alban Crequy <alban.crequy@collabora.co.uk>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e8898758dd31..7ff31c60186a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2091,6 +2091,10 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 			return mask;
 	}
 
+	/* No write status requested, avoid expensive OUT tests. */
+	if (wait && !(wait->key & (POLLWRBAND | POLLWRNORM | POLLOUT)))
+		return mask;
+
 	writable = unix_writable(sk);
 	other = unix_peer_get(sk);
 	if (other) {
-- 
cgit v1.2.3


From b194a3674fba6d9f9e470084d192c7cb99194a62 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 30 Oct 2010 11:08:52 +0000
Subject: net/core/dev.c: Update WARN uses

Coalesce long formats.
Add missing newlines.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0dd54a69dace..5968c822c999 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1817,8 +1817,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
 			dev->ethtool_ops->get_drvinfo(dev, &info);
 
-		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
-			"ip_summed=%d",
+		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
 		     info.driver, dev ? dev->features : 0L,
 		     skb->sk ? skb->sk->sk_route_caps : 0L,
 		     skb->len, skb->data_len, skb->ip_summed);
-- 
cgit v1.2.3


From 2af6fd8b18ceed416c9dfa675287c765aabf7d43 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 30 Oct 2010 11:08:53 +0000
Subject: net/ipv4/tcp.c: Update WARN uses

Coalesce long formats.
Align arguments.
Remove KERN_<level>.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1664a0590bb8..5f738c5c0dc4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1193,7 +1193,7 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
 	struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 
 	WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
-	     KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
+	     "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
 	     tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
 #endif
 
@@ -1477,10 +1477,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			 * shouldn't happen.
 			 */
 			if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
-			     KERN_INFO "recvmsg bug: copied %X "
-				       "seq %X rcvnxt %X fl %X\n", *seq,
-				       TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
-				       flags))
+				 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
+				 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
+				 flags))
 				break;
 
 			offset = *seq - TCP_SKB_CB(skb)->seq;
@@ -1490,10 +1489,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				goto found_ok_skb;
 			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
-			WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: "
-					"copied %X seq %X rcvnxt %X fl %X\n",
-					*seq, TCP_SKB_CB(skb)->seq,
-					tp->rcv_nxt, flags);
+			WARN(!(flags & MSG_PEEK),
+			     "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
+			     *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
 		}
 
 		/* Well, if we have backlog, try to process it now yet. */
-- 
cgit v1.2.3


From f17a37c9b8c4b32c01e501a84fa6f30e344c6110 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 10 Nov 2010 21:20:07 +0100
Subject: dccp ccid-2: Ack Vector interface clean-up

This patch brings the Ack Vector interface up to date. Its main purpose is
to lay the basis for the subsequent patches of this set, which will use the
new data structure fields and routines.

There are no real algorithmic changes, rather an adaptation:

 (1) Replaced the static Ack Vector size (2) with a #define so that it can
     be adapted (with low loss / Ack Ratio, a value of 1 works, so 2 seems
     to be sufficient for the moment) and added a solution so that computing
     the ECN nonce will continue to work - even with larger Ack Vectors.

 (2) Replaced the #defines for Ack Vector states with a complete enum.

 (3) Replaced #defines to compute Ack Vector length and state with general
     purpose routines (inlines), and updated code to use these.

 (4) Added a `tail' field (conversion to circular buffer in subsequent patch).

 (5) Updated the (outdated) documentation for Ack Vector struct.

 (6) All sequence number containers now trimmed to 48 bits.

 (7) Removal of unused bits:
     * removed dccpav_ack_nonce from struct dccp_ackvec, since this is already
       redundantly stored in the `dccpavr_ack_nonce' (of Ack Vector record);
     * removed Elapsed Time for Ack Vectors (it was nowhere used);
     * replaced semantics of dccpavr_sent_len with dccpavr_ack_runlen, since
       the code needs to be able to remember the old run length;
     * reduced the de-/allocation routines (redundant / duplicate tests).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c      | 178 ++++++++++++++++++-------------------------------
 net/dccp/ackvec.h      | 103 +++++++++++++++-------------
 net/dccp/ccids/ccid2.c |  13 ++--
 net/dccp/input.c       |   6 +-
 net/dccp/options.c     |   1 +
 5 files changed, 128 insertions(+), 173 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 92a6fcb40d7d..17bf10f96057 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -1,7 +1,8 @@
 /*
  *  net/dccp/ackvec.c
  *
- *  An implementation of the DCCP protocol
+ *  An implementation of Ack Vectors for the DCCP protocol
+ *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  *
  *      This program is free software; you can redistribute it and/or modify it
@@ -23,24 +24,32 @@
 static struct kmem_cache *dccp_ackvec_slab;
 static struct kmem_cache *dccp_ackvec_record_slab;
 
-static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 {
-	struct dccp_ackvec_record *avr =
-			kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
+	struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
+
+	if (av != NULL) {
+		av->av_buf_head	= DCCPAV_MAX_ACKVEC_LEN - 1;
+		INIT_LIST_HEAD(&av->av_records);
+	}
+	return av;
+}
 
-	if (avr != NULL)
-		INIT_LIST_HEAD(&avr->avr_node);
+static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
+{
+	struct dccp_ackvec_record *cur, *next;
 
-	return avr;
+	list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
+		kmem_cache_free(dccp_ackvec_record_slab, cur);
+	INIT_LIST_HEAD(&av->av_records);
 }
 
-static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
+void dccp_ackvec_free(struct dccp_ackvec *av)
 {
-	if (unlikely(avr == NULL))
-		return;
-	/* Check if deleting a linked record */
-	WARN_ON(!list_empty(&avr->avr_node));
-	kmem_cache_free(dccp_ackvec_record_slab, avr);
+	if (likely(av != NULL)) {
+		dccp_ackvec_purge_records(av);
+		kmem_cache_free(dccp_ackvec_slab, av);
+	}
 }
 
 static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
@@ -68,24 +77,16 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
 	/* Figure out how many options do we need to represent the ackvec */
 	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts, i;
-	u32 elapsed_time;
+	u16 len = av->av_vec_len + 2 * nr_opts;
+	u8 i, nonce = 0;
 	const unsigned char *tail, *from;
 	unsigned char *to;
 	struct dccp_ackvec_record *avr;
-	suseconds_t delta;
 
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
 		return -1;
 
-	delta = ktime_us_delta(ktime_get_real(), av->av_time);
-	elapsed_time = delta / 10;
-
-	if (elapsed_time != 0 &&
-	    dccp_insert_option_elapsed_time(skb, elapsed_time))
-		return -1;
-
-	avr = dccp_ackvec_record_new();
+	avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 	if (avr == NULL)
 		return -1;
 
@@ -94,7 +95,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	to   = skb_push(skb, len);
 	len  = av->av_vec_len;
 	from = av->av_buf + av->av_buf_head;
-	tail = av->av_buf + DCCP_MAX_ACKVEC_LEN;
+	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 
 	for (i = 0; i < nr_opts; ++i) {
 		int copylen = len;
@@ -102,7 +103,13 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 		if (len > DCCP_SINGLE_OPT_MAXLEN)
 			copylen = DCCP_SINGLE_OPT_MAXLEN;
 
-		*to++ = DCCPO_ACK_VECTOR_0;
+		/*
+		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
+		 * its type; ack_nonce is the sum of all individual buf_nonce's.
+		 */
+		nonce ^= av->av_buf_nonce[i];
+
+		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
 		*to++ = copylen + 2;
 
 		/* Check if buf_head wraps */
@@ -123,75 +130,24 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/*
-	 *	From RFC 4340, A.2:
-	 *
-	 *	For each acknowledgement it sends, the HC-Receiver will add an
-	 *	acknowledgement record.  ack_seqno will equal the HC-Receiver
-	 *	sequence number it used for the ack packet; ack_ptr will equal
-	 *	buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
-	 *	equal buf_nonce.
+	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
 	 */
-	avr->avr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-	avr->avr_ack_ptr   = av->av_buf_head;
-	avr->avr_ack_ackno = av->av_buf_ackno;
-	avr->avr_ack_nonce = av->av_buf_nonce;
-	avr->avr_sent_len  = av->av_vec_len;
+	avr->avr_ack_seqno  = DCCP_SKB_CB(skb)->dccpd_seq;
+	avr->avr_ack_ptr    = av->av_buf_head;
+	avr->avr_ack_ackno  = av->av_buf_ackno;
+	avr->avr_ack_nonce  = nonce;
+	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
 
 	dccp_ackvec_insert_avr(av, avr);
 
 	dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
 		      "ack_ackno=%llu\n",
-		      dccp_role(sk), avr->avr_sent_len,
+		      dccp_role(sk), avr->avr_ack_runlen,
 		      (unsigned long long)avr->avr_ack_seqno,
 		      (unsigned long long)avr->avr_ack_ackno);
 	return 0;
 }
 
-struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
-	struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
-
-	if (av != NULL) {
-		av->av_buf_head	 = DCCP_MAX_ACKVEC_LEN - 1;
-		av->av_buf_ackno = UINT48_MAX + 1;
-		av->av_buf_nonce = 0;
-		av->av_time	 = ktime_set(0, 0);
-		av->av_vec_len	 = 0;
-		INIT_LIST_HEAD(&av->av_records);
-	}
-
-	return av;
-}
-
-void dccp_ackvec_free(struct dccp_ackvec *av)
-{
-	if (unlikely(av == NULL))
-		return;
-
-	if (!list_empty(&av->av_records)) {
-		struct dccp_ackvec_record *avr, *next;
-
-		list_for_each_entry_safe(avr, next, &av->av_records, avr_node) {
-			list_del_init(&avr->avr_node);
-			dccp_ackvec_record_delete(avr);
-		}
-	}
-
-	kmem_cache_free(dccp_ackvec_slab, av);
-}
-
-static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
-				   const u32 index)
-{
-	return av->av_buf[index] & DCCP_ACKVEC_STATE_MASK;
-}
-
-static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
-				 const u32 index)
-{
-	return av->av_buf[index] & DCCP_ACKVEC_LEN_MASK;
-}
-
 /*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
@@ -204,7 +160,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 	long gap;
 	long new_head;
 
-	if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
+	if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN)
 		return -ENOBUFS;
 
 	gap	 = packets - 1;
@@ -212,18 +168,18 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 
 	if (new_head < 0) {
 		if (gap > 0) {
-			memset(av->av_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
+			memset(av->av_buf, DCCPAV_NOT_RECEIVED,
 			       gap + new_head + 1);
 			gap = -new_head;
 		}
-		new_head += DCCP_MAX_ACKVEC_LEN;
+		new_head += DCCPAV_MAX_ACKVEC_LEN;
 	}
 
 	av->av_buf_head = new_head;
 
 	if (gap > 0)
 		memset(av->av_buf + av->av_buf_head + 1,
-		       DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
+		       DCCPAV_NOT_RECEIVED, gap);
 
 	av->av_buf[av->av_buf_head] = state;
 	av->av_vec_len += packets;
@@ -236,6 +192,8 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		    const u64 ackno, const u8 state)
 {
+	u8 *cur_head = av->av_buf + av->av_buf_head,
+	   *buf_end  = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 	/*
 	 * Check at the right places if the buffer is full, if it is, tell the
 	 * caller to start dropping packets till the HC-Sender acks our ACK
@@ -260,7 +218,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 
 	/* See if this is the first ackno being inserted */
 	if (av->av_vec_len == 0) {
-		av->av_buf[av->av_buf_head] = state;
+		*cur_head = state;
 		av->av_vec_len = 1;
 	} else if (after48(ackno, av->av_buf_ackno)) {
 		const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
@@ -269,10 +227,9 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 * Look if the state of this packet is the same as the
 		 * previous ackno and if so if we can bump the head len.
 		 */
-		if (delta == 1 &&
-		    dccp_ackvec_state(av, av->av_buf_head) == state &&
-		    dccp_ackvec_len(av, av->av_buf_head) < DCCP_ACKVEC_LEN_MASK)
-			av->av_buf[av->av_buf_head]++;
+		if (delta == 1 && dccp_ackvec_state(cur_head) == state &&
+		    dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN)
+			*cur_head += 1;
 		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
 			return -ENOBUFS;
 	} else {
@@ -285,21 +242,17 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 		 *	could reduce the complexity of this scan.)
 		 */
 		u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
-		u32 index = av->av_buf_head;
 
 		while (1) {
-			const u8 len = dccp_ackvec_len(av, index);
-			const u8 av_state = dccp_ackvec_state(av, index);
+			const u8 len = dccp_ackvec_runlen(cur_head);
 			/*
 			 * valid packets not yet in av_buf have a reserved
 			 * entry, with a len equal to 0.
 			 */
-			if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
-			    len == 0 && delta == 0) { /* Found our
-							 reserved seat! */
+			if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) {
 				dccp_pr_debug("Found %llu reserved seat!\n",
 					      (unsigned long long)ackno);
-				av->av_buf[index] = state;
+				*cur_head = state;
 				goto out;
 			}
 			/* len == 0 means one packet */
@@ -307,13 +260,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 				goto out_duplicate;
 
 			delta -= len + 1;
-			if (++index == DCCP_MAX_ACKVEC_LEN)
-				index = 0;
+			if (++cur_head == buf_end)
+				cur_head = av->av_buf;
 		}
 	}
 
 	av->av_buf_ackno = ackno;
-	av->av_time = ktime_get_real();
 out:
 	return 0;
 
@@ -333,13 +285,13 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
 	if (av->av_buf_head <= avr->avr_ack_ptr)
 		av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
 	else
-		av->av_vec_len = DCCP_MAX_ACKVEC_LEN - 1 -
+		av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 -
 				 av->av_buf_head + avr->avr_ack_ptr;
 
 	/* free records */
 	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
-		list_del_init(&avr->avr_node);
-		dccp_ackvec_record_delete(avr);
+		list_del(&avr->avr_node);
+		kmem_cache_free(dccp_ackvec_record_slab, avr);
 	}
 }
 
@@ -357,7 +309,7 @@ void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
 		if (ackno == avr->avr_ack_seqno) {
 			dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
 				      "ack_ackno=%llu, ACKED!\n",
-				      dccp_role(sk), 1,
+				      dccp_role(sk), avr->avr_ack_runlen,
 				      (unsigned long long)avr->avr_ack_seqno,
 				      (unsigned long long)avr->avr_ack_ackno);
 			dccp_ackvec_throw_record(av, avr);
@@ -387,7 +339,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 	 */
 	avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
 	while (i--) {
-		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+		const u8 rl = dccp_ackvec_runlen(vector);
 		u64 ackno_end_rl;
 
 		dccp_set_seqno(&ackno_end_rl, *ackno - rl);
@@ -404,8 +356,7 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
 		break;
 found:
 		if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
-			const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
-			if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
+			if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) {
 				dccp_pr_debug("%s ACK vector 0, len=%d, "
 					      "ack_seqno=%llu, ack_ackno=%llu, "
 					      "ACKED!\n",
@@ -448,10 +399,9 @@ int __init dccp_ackvec_init(void)
 	if (dccp_ackvec_slab == NULL)
 		goto out_err;
 
-	dccp_ackvec_record_slab =
-			kmem_cache_create("dccp_ackvec_record",
-					  sizeof(struct dccp_ackvec_record),
-					  0, SLAB_HWCACHE_ALIGN, NULL);
+	dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
+					     sizeof(struct dccp_ackvec_record),
+					     0, SLAB_HWCACHE_ALIGN, NULL);
 	if (dccp_ackvec_record_slab == NULL)
 		goto out_destroy_slab;
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 7ea557b7c6b1..ebcbbc726cff 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -3,9 +3,9 @@
 /*
  *  net/dccp/ackvec.h
  *
- *  An implementation of the DCCP protocol
+ *  An implementation of Ack Vectors for the DCCP protocol
+ *  Copyright (c) 2007 University of Aberdeen, Scotland, UK
  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
- *
  *	This program is free software; you can redistribute it and/or modify it
  *	under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
@@ -13,75 +13,84 @@
 
 #include <linux/dccp.h>
 #include <linux/compiler.h>
-#include <linux/ktime.h>
 #include <linux/list.h>
 #include <linux/types.h>
 
-/* We can spread an ack vector across multiple options */
-#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
+/*
+ * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
+ * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
+ * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
+ * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
+ */
+#define DCCPAV_NUM_ACKVECS	2
+#define DCCPAV_MAX_ACKVEC_LEN	(DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
 
 /* Estimated minimum average Ack Vector length - used for updating MPS */
 #define DCCPAV_MIN_OPTLEN	16
 
-#define DCCP_ACKVEC_STATE_RECEIVED	0
-#define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
-#define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)
+enum dccp_ackvec_states {
+	DCCPAV_RECEIVED =	0x00,
+	DCCPAV_ECN_MARKED =	0x40,
+	DCCPAV_RESERVED =	0x80,
+	DCCPAV_NOT_RECEIVED =	0xC0
+};
+#define DCCPAV_MAX_RUNLEN	0x3F
 
-#define DCCP_ACKVEC_STATE_MASK		0xC0 /* 11000000 */
-#define DCCP_ACKVEC_LEN_MASK		0x3F /* 00111111 */
+static inline u8 dccp_ackvec_runlen(const u8 *cell)
+{
+	return *cell & DCCPAV_MAX_RUNLEN;
+}
 
-/** struct dccp_ackvec - ack vector
- *
- * This data structure is the one defined in RFC 4340, Appendix A.
- *
- * @av_buf_head - circular buffer head
- * @av_buf_tail - circular buffer tail
- * @av_buf_ackno - ack # of the most recent packet acknowledgeable in the
- *		       buffer (i.e. %av_buf_head)
- * @av_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
- * 		       by the buffer with State 0
- *
- * Additionally, the HC-Receiver must keep some information about the
- * Ack Vectors it has recently sent. For each packet sent carrying an
- * Ack Vector, it remembers four variables:
+static inline u8 dccp_ackvec_state(const u8 *cell)
+{
+	return *cell & ~DCCPAV_MAX_RUNLEN;
+}
+
+/** struct dccp_ackvec - Ack Vector main data structure
  *
- * @av_records - list of dccp_ackvec_record
- * @av_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ * This implements a fixed-size circular buffer within an array and is largely
+ * based on Appendix A of RFC 4340.
  *
- * @av_time - the time in usecs
- * @av_buf - circular buffer of acknowledgeable packets
+ * @av_buf:	   circular buffer storage area
+ * @av_buf_head:   head index; begin of live portion in @av_buf
+ * @av_buf_tail:   tail index; first index _after_ the live portion in @av_buf
+ * @av_buf_ackno:  highest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_buf_nonce:  ECN nonce sums, each covering subsequent segments of up to
+ *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
+ * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
+ * @av_veclen:	   length of the live portion of @av_buf
  */
 struct dccp_ackvec {
-	u64			av_buf_ackno;
-	struct list_head	av_records;
-	ktime_t			av_time;
+	u8			av_buf[DCCPAV_MAX_ACKVEC_LEN];
 	u16			av_buf_head;
+	u16			av_buf_tail;
+	u64			av_buf_ackno:48;
+	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
+	struct list_head	av_records;
 	u16			av_vec_len;
-	u8			av_buf_nonce;
-	u8			av_ack_nonce;
-	u8			av_buf[DCCP_MAX_ACKVEC_LEN];
 };
 
-/** struct dccp_ackvec_record - ack vector record
+/** struct dccp_ackvec_record - Records information about sent Ack Vectors
  *
- * ACK vector record as defined in Appendix A of spec.
+ * These list entries define the additional information which the HC-Receiver
+ * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
  *
- * The list is sorted by avr_ack_seqno
+ * @avr_node:	    the list node in @av_records
+ * @avr_ack_seqno:  sequence number of the packet the Ack Vector was sent on
+ * @avr_ack_ackno:  the Ack number that this record/Ack Vector refers to
+ * @avr_ack_ptr:    pointer into @av_buf where this record starts
+ * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
+ * @avr_ack_nonce:  the sum of @av_buf_nonce's at the time this record was sent
  *
- * @avr_node - node in av_records
- * @avr_ack_seqno - sequence number of the packet this record was sent on
- * @avr_ack_ackno - sequence number being acknowledged
- * @avr_ack_ptr - pointer into av_buf where this record starts
- * @avr_ack_nonce - av_ack_nonce at the time this record was sent
- * @avr_sent_len - lenght of the record in av_buf
+ * The list as a whole is sorted in descending order by @avr_ack_seqno.
  */
 struct dccp_ackvec_record {
 	struct list_head avr_node;
-	u64		 avr_ack_seqno;
-	u64		 avr_ack_ackno;
+	u64		 avr_ack_seqno:48;
+	u64		 avr_ack_ackno:48;
 	u16		 avr_ack_ptr;
-	u16		 avr_sent_len;
-	u8		 avr_ack_nonce;
+	u8		 avr_ack_runlen;
+	u8		 avr_ack_nonce:1;
 };
 
 struct sock;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 6576eae9e779..cb1b4a0d1877 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -513,8 +513,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 					 &vector, &veclen)) != -1) {
 		/* go through this ack vector */
 		while (veclen--) {
-			const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
-			u64 ackno_end_rl = SUB48(ackno, rl);
+			u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector));
 
 			ccid2_pr_debug("ackvec start:%llu end:%llu\n",
 				       (unsigned long long)ackno,
@@ -537,17 +536,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = *vector &
-						 DCCP_ACKVEC_STATE_MASK;
+				const u8 state = dccp_ackvec_state(vector);
 
 				/* new packet received or marked */
-				if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+				if (state != DCCPAV_NOT_RECEIVED &&
 				    !seqp->ccid2s_acked) {
-					if (state ==
-					    DCCP_ACKVEC_STATE_ECN_MARKED) {
+					if (state == DCCPAV_ECN_MARKED)
 						ccid2_congestion_event(sk,
 								       seqp);
-					} else
+					else
 						ccid2_new_ack(sk, seqp,
 							      &maxincr);
 
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 265985370fa1..c7aeeba859d4 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -378,8 +378,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 	if (dp->dccps_hc_rx_ackvec != NULL &&
 	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-			    DCCP_SKB_CB(skb)->dccpd_seq,
-			    DCCP_ACKVEC_STATE_RECEIVED))
+			    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
 		goto discard;
 	dccp_deliver_input_to_ccids(sk, skb);
 
@@ -637,8 +636,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 		if (dp->dccps_hc_rx_ackvec != NULL &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-				    DCCP_SKB_CB(skb)->dccpd_seq,
-				    DCCP_ACKVEC_STATE_RECEIVED))
+				    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
 			goto discard;
 
 		dccp_deliver_input_to_ccids(sk, skb);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index cd3061813009..799c6f4547cd 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -340,6 +340,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
 	return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
 }
 
+/* FIXME: This function is currently not used anywhere */
 int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
 {
 	const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
-- 
cgit v1.2.3


From 7d870936602533836bba821bd5c679c62c52a95f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 10 Nov 2010 21:21:02 +0100
Subject: dccp ccid-2: Separate internals of Ack Vectors from option-parsing
 code

This patch
 * separates Ack Vector housekeeping code from option-insertion code;
 * shifts option-specific code from ackvec.c into options.c;
 * introduces a dedicated routine to take care of the Ack Vector records;
 * simplifies the dccp_ackvec_insert_avr() routine: the BUG_ON was redundant,
   since the list is automatically arranged in descending order of ack_seqno.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c  | 100 ++++++++++-------------------------------------------
 net/dccp/ackvec.h  |   2 +-
 net/dccp/options.c |  60 ++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 17bf10f96057..af976fca407a 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -52,99 +52,35 @@ void dccp_ackvec_free(struct dccp_ackvec *av)
 	}
 }
 
-static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
-				   struct dccp_ackvec_record *avr)
-{
-	/*
-	 * AVRs are sorted by seqno. Since we are sending them in order, we
-	 * just add the AVR at the head of the list.
-	 * -sorbo.
-	 */
-	if (!list_empty(&av->av_records)) {
-		const struct dccp_ackvec_record *head =
-					list_entry(av->av_records.next,
-						   struct dccp_ackvec_record,
-						   avr_node);
-		BUG_ON(before48(avr->avr_ack_seqno, head->avr_ack_seqno));
-	}
-
-	list_add(&avr->avr_node, &av->av_records);
-}
-
-int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+/**
+ * dccp_ackvec_update_records  -  Record information about sent Ack Vectors
+ * @av:		Ack Vector records to update
+ * @seqno:	Sequence number of the packet carrying the Ack Vector just sent
+ * @nonce_sum:	The sum of all buffer nonces contained in the Ack Vector
+ */
+int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
-	/* Figure out how many options do we need to represent the ackvec */
-	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts;
-	u8 i, nonce = 0;
-	const unsigned char *tail, *from;
-	unsigned char *to;
 	struct dccp_ackvec_record *avr;
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
-		return -1;
-
 	avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
 	if (avr == NULL)
-		return -1;
-
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
-	to   = skb_push(skb, len);
-	len  = av->av_vec_len;
-	from = av->av_buf + av->av_buf_head;
-	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
-
-	for (i = 0; i < nr_opts; ++i) {
-		int copylen = len;
-
-		if (len > DCCP_SINGLE_OPT_MAXLEN)
-			copylen = DCCP_SINGLE_OPT_MAXLEN;
-
-		/*
-		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
-		 * its type; ack_nonce is the sum of all individual buf_nonce's.
-		 */
-		nonce ^= av->av_buf_nonce[i];
-
-		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
-		*to++ = copylen + 2;
-
-		/* Check if buf_head wraps */
-		if (from + copylen > tail) {
-			const u16 tailsize = tail - from;
-
-			memcpy(to, from, tailsize);
-			to	+= tailsize;
-			len	-= tailsize;
-			copylen	-= tailsize;
-			from	= av->av_buf;
-		}
-
-		memcpy(to, from, copylen);
-		from += copylen;
-		to   += copylen;
-		len  -= copylen;
-	}
+		return -ENOBUFS;
 
-	/*
-	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
-	 */
-	avr->avr_ack_seqno  = DCCP_SKB_CB(skb)->dccpd_seq;
+	avr->avr_ack_seqno  = seqno;
 	avr->avr_ack_ptr    = av->av_buf_head;
 	avr->avr_ack_ackno  = av->av_buf_ackno;
-	avr->avr_ack_nonce  = nonce;
+	avr->avr_ack_nonce  = nonce_sum;
 	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
+	/*
+	 * Since GSS is incremented for each packet, the list is automatically
+	 * arranged in descending order of @ack_seqno.
+	 */
+	list_add(&avr->avr_node, &av->av_records);
 
-	dccp_ackvec_insert_avr(av, avr);
-
-	dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
-		      "ack_ackno=%llu\n",
-		      dccp_role(sk), avr->avr_ack_runlen,
+	dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
 		      (unsigned long long)avr->avr_ack_seqno,
-		      (unsigned long long)avr->avr_ack_ackno);
+		      (unsigned long long)avr->avr_ack_ackno,
+		      avr->avr_ack_runlen);
 	return 0;
 }
 
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index ebcbbc726cff..3e894a0173a7 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -111,7 +111,7 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     u64 *ackno, const u8 opt,
 			     const u8 *value, const u8 len);
 
-extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);
+extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
 
 static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
 {
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 799c6f4547cd..f4ff0a308269 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -425,6 +425,66 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
 	return 0;
 }
 
+static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	/* Figure out how many options do we need to represent the ackvec */
+	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
+	u16 len = av->av_vec_len + 2 * nr_opts;
+	u8 i, nonce = 0;
+	const unsigned char *tail, *from;
+	unsigned char *to;
+
+	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+		return -1;
+
+	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+	to   = skb_push(skb, len);
+	len  = av->av_vec_len;
+	from = av->av_buf + av->av_buf_head;
+	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
+
+	for (i = 0; i < nr_opts; ++i) {
+		int copylen = len;
+
+		if (len > DCCP_SINGLE_OPT_MAXLEN)
+			copylen = DCCP_SINGLE_OPT_MAXLEN;
+
+		/*
+		 * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
+		 * its type; ack_nonce is the sum of all individual buf_nonce's.
+		 */
+		nonce ^= av->av_buf_nonce[i];
+
+		*to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
+		*to++ = copylen + 2;
+
+		/* Check if buf_head wraps */
+		if (from + copylen > tail) {
+			const u16 tailsize = tail - from;
+
+			memcpy(to, from, tailsize);
+			to	+= tailsize;
+			len	-= tailsize;
+			copylen	-= tailsize;
+			from	= av->av_buf;
+		}
+
+		memcpy(to, from, copylen);
+		from += copylen;
+		to   += copylen;
+		len  -= copylen;
+	}
+	/*
+	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
+	 */
+	if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce))
+		return -ENOBUFS;
+	return 0;
+}
+
 /**
  * dccp_insert_option_mandatory  -  Mandatory option (5.8.2)
  * Note that since we are using skb_push, this function needs to be called
-- 
cgit v1.2.3


From b3d14bff12a38ad13a174eb0cc83d2ac7169eee4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 10 Nov 2010 21:21:35 +0100
Subject: dccp ccid-2: Implementation of circular Ack Vector buffer with
 overflow handling

This completes the implementation of a circular buffer for Ack Vectors, by
extending the current (linear array-based) implementation.  The changes are:

 (a) An `overflow' flag to deal with the case of overflow. As before, dynamic
     growth of the buffer will not be supported; but code will be added to deal
     robustly with overflowing Ack Vector buffers.

 (b) A `tail_seqno' field. When naively implementing the algorithm of Appendix A
     in RFC 4340, problems arise whenever subsequent Ack Vector records overlap,
     which can bring the entire run length calculation completely out of synch.
     (This is documented on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/\
                                             ack_vectors/tracking_tail_ackno/ .)
 (c) The buffer length is now computed dynamically (i.e. current fill level),
     as the span between head to tail.

As a result, dccp_ackvec_pending() is now simpler - the #ifdef is no longer
necessary since buf_empty is always true when IP_DCCP_ACKVEC is not configured.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c  | 31 ++++++++++++++++++++++++++++++-
 net/dccp/ackvec.h  | 10 ++++++++--
 net/dccp/dccp.h    | 11 +++++++----
 net/dccp/options.c | 10 +++++-----
 4 files changed, 50 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index af976fca407a..abaf241c7353 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -29,7 +29,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 	struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
 
 	if (av != NULL) {
-		av->av_buf_head	= DCCPAV_MAX_ACKVEC_LEN - 1;
+		av->av_buf_head	= av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
 		INIT_LIST_HEAD(&av->av_records);
 	}
 	return av;
@@ -71,6 +71,14 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 	avr->avr_ack_ackno  = av->av_buf_ackno;
 	avr->avr_ack_nonce  = nonce_sum;
 	avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
+	/*
+	 * When the buffer overflows, we keep no more than one record. This is
+	 * the simplest way of disambiguating sender-Acks dating from before the
+	 * overflow from sender-Acks which refer to after the overflow; a simple
+	 * solution is preferable here since we are handling an exception.
+	 */
+	if (av->av_overflow)
+		dccp_ackvec_purge_records(av);
 	/*
 	 * Since GSS is incremented for each packet, the list is automatically
 	 * arranged in descending order of @ack_seqno.
@@ -84,6 +92,27 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 	return 0;
 }
 
+/*
+ * Buffer index and length computation using modulo-buffersize arithmetic.
+ * Note that, as pointers move from right to left, head is `before' tail.
+ */
+static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
+{
+	return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
+}
+
+static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
+{
+	return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
+}
+
+u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
+{
+	if (unlikely(av->av_overflow))
+		return DCCPAV_MAX_ACKVEC_LEN;
+	return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
+}
+
 /*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 3e894a0173a7..23880be8fc29 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -21,6 +21,7 @@
  * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
  * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
  * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
+ * The maximum value is bounded by the u16 types for indices and functions.
  */
 #define DCCPAV_NUM_ACKVECS	2
 #define DCCPAV_MAX_ACKVEC_LEN	(DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
@@ -55,8 +56,10 @@ static inline u8 dccp_ackvec_state(const u8 *cell)
  * @av_buf_head:   head index; begin of live portion in @av_buf
  * @av_buf_tail:   tail index; first index _after_ the live portion in @av_buf
  * @av_buf_ackno:  highest seqno of acknowledgeable packet recorded in @av_buf
+ * @av_tail_ackno: lowest  seqno of acknowledgeable packet recorded in @av_buf
  * @av_buf_nonce:  ECN nonce sums, each covering subsequent segments of up to
  *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
+ * @av_overflow:   if 1 then buf_head == buf_tail indicates buffer wraparound
  * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
  * @av_veclen:	   length of the live portion of @av_buf
  */
@@ -65,7 +68,9 @@ struct dccp_ackvec {
 	u16			av_buf_head;
 	u16			av_buf_tail;
 	u64			av_buf_ackno:48;
+	u64			av_tail_ackno:48;
 	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
+	u8			av_overflow:1;
 	struct list_head	av_records;
 	u16			av_vec_len;
 };
@@ -112,9 +117,10 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     const u8 *value, const u8 len);
 
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
 
-static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
+static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
 {
-	return av->av_vec_len;
+	return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
 }
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a8ed459508b2..19fafd597465 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -457,12 +457,15 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
 	dp->dccps_awh = dp->dccps_gss;
 }
 
+static inline int dccp_ackvec_pending(const struct sock *sk)
+{
+	return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
+	       !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
+}
+
 static inline int dccp_ack_pending(const struct sock *sk)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	return (dp->dccps_hc_rx_ackvec != NULL &&
-		dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
-	       inet_csk_ack_scheduled(sk);
+	return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
 }
 
 extern int  dccp_feat_finalise_settings(struct dccp_sock *dp);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index f4ff0a308269..5adeeed5e0d2 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -429,9 +429,10 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	const u16 buflen = dccp_ackvec_buflen(av);
 	/* Figure out how many options do we need to represent the ackvec */
-	const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN);
-	u16 len = av->av_vec_len + 2 * nr_opts;
+	const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
+	u16 len = buflen + 2 * nr_opts;
 	u8 i, nonce = 0;
 	const unsigned char *tail, *from;
 	unsigned char *to;
@@ -442,7 +443,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
 
 	to   = skb_push(skb, len);
-	len  = av->av_vec_len;
+	len  = buflen;
 	from = av->av_buf + av->av_buf_head;
 	tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
 
@@ -580,8 +581,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 			if (dccp_insert_option_timestamp(skb))
 				return -1;
 
-		} else if (dp->dccps_hc_rx_ackvec != NULL &&
-			   dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+		} else if (dccp_ackvec_pending(sk) &&
 			   dccp_insert_option_ackvec(sk, skb)) {
 				return -1;
 		}
-- 
cgit v1.2.3


From 72cdd1d971c0deb1619c5c339270570c43647a78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 11 Nov 2010 07:14:07 +0000
Subject: net: get rid of rtable->idev

It seems idev field in struct rtable has no special purpose, but adding
extra atomic ops.

We hold refcounts on the device itself (using percpu data, so pretty
cheap in current kernel).

infiniband case is solved using dst.dev instead of idev->dev

Removal of this field means routing without route cache is now using
shared data, percpu data, and only potential contention is a pair of
atomic ops on struct neighbour per forwarded packet.

About 5% speedup on routing test.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Roland Dreier <rolandd@cisco.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/addr.c |  8 ++++----
 include/net/route.h            |  2 --
 net/ipv4/route.c               | 37 ++++---------------------------------
 net/ipv4/xfrm4_policy.c        | 24 ------------------------
 4 files changed, 8 insertions(+), 63 deletions(-)

(limited to 'net')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index a5ea1bce9689..c15fd2ea56c1 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -200,7 +200,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 	src_in->sin_family = AF_INET;
 	src_in->sin_addr.s_addr = rt->rt_src;
 
-	if (rt->idev->dev->flags & IFF_LOOPBACK) {
+	if (rt->dst.dev->flags & IFF_LOOPBACK) {
 		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
 		if (!ret)
 			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
@@ -208,12 +208,12 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 	}
 
 	/* If the device does ARP internally, return 'done' */
-	if (rt->idev->dev->flags & IFF_NOARP) {
-		rdma_copy_addr(addr, rt->idev->dev, NULL);
+	if (rt->dst.dev->flags & IFF_NOARP) {
+		rdma_copy_addr(addr, rt->dst.dev, NULL);
 		goto put;
 	}
 
-	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
+	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev);
 	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
 		neigh_event_send(rt->dst.neighbour, NULL);
 		ret = -ENODATA;
diff --git a/include/net/route.h b/include/net/route.h
index 7e5e73bfa4de..cea533eaa853 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -55,8 +55,6 @@ struct rtable {
 	/* Cache lookup keys */
 	struct flowi		fl;
 
-	struct in_device	*idev;
-	
 	int			rt_genid;
 	unsigned		rt_flags;
 	__u16			rt_type;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 987bf9adb318..5955965c7953 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -140,13 +140,15 @@ static unsigned long expires_ljiffies;
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static void		 ipv4_dst_destroy(struct dst_entry *dst);
-static void		 ipv4_dst_ifdown(struct dst_entry *dst,
-					 struct net_device *dev, int how);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(struct dst_ops *ops);
 
+static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+			    int how)
+{
+}
 
 static struct dst_ops ipv4_dst_ops = {
 	.family =		AF_INET,
@@ -1433,8 +1435,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				rt->dst.child		= NULL;
 				if (rt->dst.dev)
 					dev_hold(rt->dst.dev);
-				if (rt->idev)
-					in_dev_hold(rt->idev);
 				rt->dst.obsolete	= -1;
 				rt->dst.lastuse	= jiffies;
 				rt->dst.path		= &rt->dst;
@@ -1728,33 +1728,13 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 {
 	struct rtable *rt = (struct rtable *) dst;
 	struct inet_peer *peer = rt->peer;
-	struct in_device *idev = rt->idev;
 
 	if (peer) {
 		rt->peer = NULL;
 		inet_putpeer(peer);
 	}
-
-	if (idev) {
-		rt->idev = NULL;
-		in_dev_put(idev);
-	}
 }
 
-static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-			    int how)
-{
-	struct rtable *rt = (struct rtable *) dst;
-	struct in_device *idev = rt->idev;
-	if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
-		struct in_device *loopback_idev =
-			in_dev_get(dev_net(dev)->loopback_dev);
-		if (loopback_idev) {
-			rt->idev = loopback_idev;
-			in_dev_put(idev);
-		}
-	}
-}
 
 static void ipv4_link_failure(struct sk_buff *skb)
 {
@@ -1910,7 +1890,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->fl.iif	= dev->ifindex;
 	rth->dst.dev	= init_net.loopback_dev;
 	dev_hold(rth->dst.dev);
-	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
@@ -2050,7 +2029,6 @@ static int __mkroute_input(struct sk_buff *skb,
 		rth->fl.iif	= in_dev->dev->ifindex;
 	rth->dst.dev	= (out_dev)->dev;
 	dev_hold(rth->dst.dev);
-	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
@@ -2231,7 +2209,6 @@ local_input:
 	rth->fl.iif	= dev->ifindex;
 	rth->dst.dev	= net->loopback_dev;
 	dev_hold(rth->dst.dev);
-	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
 	rth->dst.input= ip_local_deliver;
@@ -2417,9 +2394,6 @@ static int __mkroute_output(struct rtable **result,
 	if (!rth)
 		return -ENOBUFS;
 
-	in_dev_hold(in_dev);
-	rth->idev = in_dev;
-
 	atomic_set(&rth->dst.__refcnt, 1);
 	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
@@ -2759,9 +2733,6 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 
 		rt->fl = ort->fl;
 
-		rt->idev = ort->idev;
-		if (rt->idev)
-			in_dev_hold(rt->idev);
 		rt->rt_genid = rt_genid(net);
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 4464f3bff6a7..dd1fd8c473fc 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -80,10 +80,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.dst.dev = dev;
 	dev_hold(dev);
 
-	xdst->u.rt.idev = in_dev_get(dev);
-	if (!xdst->u.rt.idev)
-		return -ENODEV;
-
 	xdst->u.rt.peer = rt->peer;
 	if (rt->peer)
 		atomic_inc(&rt->peer->refcnt);
@@ -189,8 +185,6 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 {
 	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
-	if (likely(xdst->u.rt.idev))
-		in_dev_put(xdst->u.rt.idev);
 	if (likely(xdst->u.rt.peer))
 		inet_putpeer(xdst->u.rt.peer);
 	xfrm_dst_destroy(xdst);
@@ -199,27 +193,9 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
 static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 			     int unregister)
 {
-	struct xfrm_dst *xdst;
-
 	if (!unregister)
 		return;
 
-	xdst = (struct xfrm_dst *)dst;
-	if (xdst->u.rt.idev->dev == dev) {
-		struct in_device *loopback_idev =
-			in_dev_get(dev_net(dev)->loopback_dev);
-		BUG_ON(!loopback_idev);
-
-		do {
-			in_dev_put(xdst->u.rt.idev);
-			xdst->u.rt.idev = loopback_idev;
-			in_dev_hold(loopback_idev);
-			xdst = (struct xfrm_dst *)xdst->u.dst.child;
-		} while (xdst->u.dst.xfrm);
-
-		__in_dev_put(loopback_idev);
-	}
-
 	xfrm_dst_ifdown(dst, dev);
 }
 
-- 
cgit v1.2.3


From c753796769e4fb0cd813b6e5801b3c01f4681d4f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 11 Nov 2010 17:07:48 -0800
Subject: ipv4: Make rt->fl.iif tests lest obscure.

When we test rt->fl.iif against zero, we're seeing if it's
an output or an input route.

Make that explicit with some helper functions.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_route.h          | 10 ++++++++++
 include/net/route.h             | 10 ++++++++++
 net/decnet/dn_route.c           |  4 ++--
 net/ipv4/icmp.c                 |  4 ++--
 net/ipv4/igmp.c                 |  2 +-
 net/ipv4/ip_gre.c               |  2 +-
 net/ipv4/ipmr.c                 |  2 +-
 net/ipv4/route.c                | 20 ++++++++++----------
 net/netfilter/ipvs/ip_vs_xmit.c |  8 +++++---
 9 files changed, 42 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index ccadab3aa3f6..9b185df265fb 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -80,6 +80,16 @@ struct dn_route {
 	unsigned rt_type;
 };
 
+static inline bool dn_is_input_route(struct dn_route *rt)
+{
+	return rt->fl.iif != 0;
+}
+
+static inline bool dn_is_output_route(struct dn_route *rt)
+{
+	return rt->fl.iif == 0;
+}
+
 extern void dn_route_init(void);
 extern void dn_route_cleanup(void);
 
diff --git a/include/net/route.h b/include/net/route.h
index cea533eaa853..5cd46d1c0e14 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -71,6 +71,16 @@ struct rtable {
 	struct inet_peer	*peer; /* long-living peer info */
 };
 
+static inline bool rt_is_input_route(struct rtable *rt)
+{
+	return rt->fl.iif != 0;
+}
+
+static inline bool rt_is_output_route(struct rtable *rt)
+{
+	return rt->fl.iif == 0;
+}
+
 struct ip_rt_acct {
 	__u32 	o_bytes;
 	__u32 	o_packets;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 94a9eb1d313e..474d54dd08c2 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1181,7 +1181,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
 			if ((flp->fld_dst == rt->fl.fld_dst) &&
 			    (flp->fld_src == rt->fl.fld_src) &&
 			    (flp->mark == rt->fl.mark) &&
-			    (rt->fl.iif == 0) &&
+			    dn_is_output_route(rt) &&
 			    (rt->fl.oif == flp->oif)) {
 				dst_use(&rt->dst, jiffies);
 				rcu_read_unlock_bh();
@@ -1512,7 +1512,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
 			       rt->dst.error) < 0)
 		goto rtattr_failure;
-	if (rt->fl.iif)
+	if (dn_is_input_route(rt))
 		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 96bc7f9475a3..c6e2affafbd3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -506,8 +506,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 		struct net_device *dev = NULL;
 
 		rcu_read_lock();
-		if (rt->fl.iif &&
-			net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
+		if (rt_is_input_route(rt) &&
+		    net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
 			dev = dev_get_by_index_rcu(net, rt->fl.iif);
 
 		if (dev)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c8877c6c7216..08d0d81ffc15 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -961,7 +961,7 @@ int igmp_rcv(struct sk_buff *skb)
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 		/* Is it our report looped back? */
-		if (skb_rtable(skb)->fl.iif == 0)
+		if (rt_is_output_route(skb_rtable(skb)))
 			break;
 		/* don't rely on MC router hearing unicast reports */
 		if (skb->pkt_type == PACKET_MULTICAST ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 70ff77f02eee..cab2057d5430 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -634,7 +634,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (ipv4_is_multicast(iph->daddr)) {
 			/* Looped back packet, drop it! */
-			if (skb_rtable(skb)->fl.iif == 0)
+			if (rt_is_output_route(skb_rtable(skb)))
 				goto drop;
 			tunnel->dev->stats.multicast++;
 			skb->pkt_type = PACKET_BROADCAST;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 86dd5691af46..ef2b0089e0ea 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1654,7 +1654,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 	if (mrt->vif_table[vif].dev != skb->dev) {
 		int true_vifi;
 
-		if (skb_rtable(skb)->fl.iif == 0) {
+		if (rt_is_output_route(skb_rtable(skb))) {
 			/* It is our own packet, looped back.
 			 * Very complicated situation...
 			 *
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5955965c7953..66610ea3c87b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -623,7 +623,7 @@ static inline int rt_fast_clean(struct rtable *rth)
 	/* Kill broadcast/multicast entries very aggresively, if they
 	   collide in hash table with more useful entries */
 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
-		rth->fl.iif && rth->dst.rt_next;
+		rt_is_input_route(rth) && rth->dst.rt_next;
 }
 
 static inline int rt_valuable(struct rtable *rth)
@@ -668,7 +668,7 @@ static inline u32 rt_score(struct rtable *rt)
 	if (rt_valuable(rt))
 		score |= (1<<31);
 
-	if (!rt->fl.iif ||
+	if (rt_is_output_route(rt) ||
 	    !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
 		score |= (1<<30);
 
@@ -1126,7 +1126,7 @@ restart:
 		 */
 
 		rt->dst.flags |= DST_NOCACHE;
-		if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+		if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
 			int err = arp_bind_neighbour(&rt->dst);
 			if (err) {
 				if (net_ratelimit())
@@ -1224,7 +1224,7 @@ restart:
 	/* Try to bind route to arp only if it is output
 	   route or unicast forwarding path.
 	 */
-	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+	if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
 		int err = arp_bind_neighbour(&rt->dst);
 		if (err) {
 			spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1406,7 +1406,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				if (rth->fl.fl4_dst != daddr ||
 				    rth->fl.fl4_src != skeys[i] ||
 				    rth->fl.oif != ikeys[k] ||
-				    rth->fl.iif != 0 ||
+				    rt_is_input_route(rth) ||
 				    rt_is_expired(rth) ||
 				    !net_eq(dev_net(rth->dst.dev), net)) {
 					rthp = &rth->dst.rt_next;
@@ -1666,7 +1666,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				    rth->rt_dst != daddr ||
 				    rth->rt_src != iph->saddr ||
 				    rth->fl.oif != ikeys[k] ||
-				    rth->fl.iif != 0 ||
+				    rt_is_input_route(rth) ||
 				    dst_metric_locked(&rth->dst, RTAX_MTU) ||
 				    !net_eq(dev_net(rth->dst.dev), net) ||
 				    rt_is_expired(rth))
@@ -1770,7 +1770,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 	__be32 src;
 	struct fib_result res;
 
-	if (rt->fl.iif == 0)
+	if (rt_is_output_route(rt))
 		src = rt->rt_src;
 	else {
 		rcu_read_lock();
@@ -2669,7 +2669,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 		rth = rcu_dereference_bh(rth->dst.rt_next)) {
 		if (rth->fl.fl4_dst == flp->fl4_dst &&
 		    rth->fl.fl4_src == flp->fl4_src &&
-		    rth->fl.iif == 0 &&
+		    rt_is_output_route(rth) &&
 		    rth->fl.oif == flp->oif &&
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
@@ -2824,7 +2824,7 @@ static int rt_fill_info(struct net *net,
 	if (rt->dst.tclassid)
 		NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
-	if (rt->fl.iif)
+	if (rt_is_input_route(rt))
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
 	else if (rt->rt_src != rt->fl.fl4_src)
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
@@ -2849,7 +2849,7 @@ static int rt_fill_info(struct net *net,
 		}
 	}
 
-	if (rt->fl.iif) {
+	if (rt_is_input_route(rt)) {
 #ifdef CONFIG_IP_MROUTE
 		__be32 dst = rt->rt_dst;
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index de04ea39cde8..10bd39c0ae2d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -169,7 +169,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
 	struct net *net = dev_net(dev);
 	struct iphdr *iph = ip_hdr(skb);
 
-	if (rt->fl.iif) {
+	if (rt_is_input_route(rt)) {
 		unsigned long orefdst = skb->_skb_refdst;
 
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
@@ -552,7 +552,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 #endif
 
 	/* From world but DNAT to loopback address? */
-	if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
+	if (local && ipv4_is_loopback(rt->rt_dst) &&
+	    rt_is_input_route(skb_rtable(skb))) {
 		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
 				 "stopping DNAT to loopback address");
 		goto tx_error_put;
@@ -1165,7 +1166,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 #endif
 
 	/* From world but DNAT to loopback address? */
-	if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
+	if (local && ipv4_is_loopback(rt->rt_dst) &&
+	    rt_is_input_route(skb_rtable(skb))) {
 		IP_VS_DBG(1, "%s(): "
 			  "stopping DNAT to loopback %pI4\n",
 			  __func__, &cp->daddr.ip);
-- 
cgit v1.2.3


From 1d7138de878d1d4210727c1200193e69596f93b3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 12 Nov 2010 05:46:50 +0000
Subject: igmp: RCU conversion of in_dev->mc_list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

in_dev->mc_list is protected by one rwlock (in_dev->mc_list_lock).

This can easily be converted to a RCU protection.

Writers hold RTNL, so mc_list_lock is removed, not replaced by a
spinlock.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Cypher Wu <cypher.w@gmail.com>
Cc: Américo Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h       |  12 ++-
 include/linux/inetdevice.h |   5 +-
 include/net/inet_sock.h    |   2 +-
 net/ipv4/igmp.c            | 223 +++++++++++++++++++++------------------------
 4 files changed, 115 insertions(+), 127 deletions(-)

(limited to 'net')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 93fc2449af10..7d164670f264 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -167,10 +167,10 @@ struct ip_sf_socklist {
  */
 
 struct ip_mc_socklist {
-	struct ip_mc_socklist	*next;
+	struct ip_mc_socklist __rcu *next_rcu;
 	struct ip_mreqn		multi;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
-	struct ip_sf_socklist	*sflist;
+	struct ip_sf_socklist __rcu	*sflist;
 	struct rcu_head		rcu;
 };
 
@@ -186,11 +186,14 @@ struct ip_sf_list {
 struct ip_mc_list {
 	struct in_device	*interface;
 	__be32			multiaddr;
+	unsigned int		sfmode;
 	struct ip_sf_list	*sources;
 	struct ip_sf_list	*tomb;
-	unsigned int		sfmode;
 	unsigned long		sfcount[2];
-	struct ip_mc_list	*next;
+	union {
+		struct ip_mc_list *next;
+		struct ip_mc_list __rcu *next_rcu;
+	};
 	struct timer_list	timer;
 	int			users;
 	atomic_t		refcnt;
@@ -201,6 +204,7 @@ struct ip_mc_list {
 	char			loaded;
 	unsigned char		gsquery;	/* check source marks? */
 	unsigned char		crcount;
+	struct rcu_head		rcu;
 };
 
 /* V3 exponential field decoding */
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index ccd5b07d678d..380ba6bc5db1 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -52,9 +52,8 @@ struct in_device {
 	atomic_t		refcnt;
 	int			dead;
 	struct in_ifaddr	*ifa_list;	/* IP ifaddr chain		*/
-	rwlock_t		mc_list_lock;
-	struct ip_mc_list	*mc_list;	/* IP multicast filter chain    */
-	int			mc_count;	          /* Number of installed mcasts	*/
+	struct ip_mc_list __rcu	*mc_list;	/* IP multicast filter chain    */
+	int			mc_count;	/* Number of installed mcasts	*/
 	spinlock_t		mc_tomb_lock;
 	struct ip_mc_list	*mc_tomb;
 	unsigned long		mr_v1_seen;
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1989cfd7405f..8945f9fb192a 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -141,7 +141,7 @@ struct inet_sock {
 				nodefrag:1;
 	int			mc_index;
 	__be32			mc_addr;
-	struct ip_mc_socklist	*mc_list;
+	struct ip_mc_socklist __rcu	*mc_list;
 	struct {
 		unsigned int		flags;
 		unsigned int		fragsize;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 08d0d81ffc15..6f49d6c087da 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -149,11 +149,17 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc);
 static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 			 int sfcount, __be32 *psfsrc, int delta);
 
+
+static void ip_mc_list_reclaim(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ip_mc_list, rcu));
+}
+
 static void ip_ma_put(struct ip_mc_list *im)
 {
 	if (atomic_dec_and_test(&im->refcnt)) {
 		in_dev_put(im->interface);
-		kfree(im);
+		call_rcu(&im->rcu, ip_mc_list_reclaim);
 	}
 }
 
@@ -163,7 +169,7 @@ static void ip_ma_put(struct ip_mc_list *im)
  *	Timer management
  */
 
-static __inline__ void igmp_stop_timer(struct ip_mc_list *im)
+static void igmp_stop_timer(struct ip_mc_list *im)
 {
 	spin_lock_bh(&im->lock);
 	if (del_timer(&im->timer))
@@ -496,14 +502,24 @@ empty_source:
 	return skb;
 }
 
+#define for_each_pmc_rcu(in_dev, pmc)				\
+	for (pmc = rcu_dereference(in_dev->mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rcu_dereference(pmc->next_rcu))
+
+#define for_each_pmc_rtnl(in_dev, pmc)				\
+	for (pmc = rtnl_dereference(in_dev->mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rtnl_dereference(pmc->next_rcu))
+
 static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
 {
 	struct sk_buff *skb = NULL;
 	int type;
 
 	if (!pmc) {
-		read_lock(&in_dev->mc_list_lock);
-		for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+		rcu_read_lock();
+		for_each_pmc_rcu(in_dev, pmc) {
 			if (pmc->multiaddr == IGMP_ALL_HOSTS)
 				continue;
 			spin_lock_bh(&pmc->lock);
@@ -514,7 +530,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
 			skb = add_grec(skb, pmc, type, 0, 0);
 			spin_unlock_bh(&pmc->lock);
 		}
-		read_unlock(&in_dev->mc_list_lock);
+		rcu_read_unlock();
 	} else {
 		spin_lock_bh(&pmc->lock);
 		if (pmc->sfcount[MCAST_EXCLUDE])
@@ -556,7 +572,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 	struct sk_buff *skb = NULL;
 	int type, dtype;
 
-	read_lock(&in_dev->mc_list_lock);
+	rcu_read_lock();
 	spin_lock_bh(&in_dev->mc_tomb_lock);
 
 	/* deleted MCA's */
@@ -593,7 +609,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 	spin_unlock_bh(&in_dev->mc_tomb_lock);
 
 	/* change recs */
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(in_dev, pmc) {
 		spin_lock_bh(&pmc->lock);
 		if (pmc->sfcount[MCAST_EXCLUDE]) {
 			type = IGMPV3_BLOCK_OLD_SOURCES;
@@ -616,7 +632,7 @@ static void igmpv3_send_cr(struct in_device *in_dev)
 		}
 		spin_unlock_bh(&pmc->lock);
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 
 	if (!skb)
 		return;
@@ -813,14 +829,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
 	if (group == IGMP_ALL_HOSTS)
 		return;
 
-	read_lock(&in_dev->mc_list_lock);
-	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, im) {
 		if (im->multiaddr == group) {
 			igmp_stop_timer(im);
 			break;
 		}
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 }
 
 static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
@@ -906,8 +922,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	 * - Use the igmp->igmp_code field as the maximum
 	 *   delay possible
 	 */
-	read_lock(&in_dev->mc_list_lock);
-	for (im=in_dev->mc_list; im!=NULL; im=im->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, im) {
 		int changed;
 
 		if (group && group != im->multiaddr)
@@ -925,7 +941,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (changed)
 			igmp_mod_timer(im, max_delay);
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 }
 
 /* called in rcu_read_lock() section */
@@ -1110,8 +1126,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
 		kfree(pmc);
 	}
 	/* clear dead sources, too */
-	read_lock(&in_dev->mc_list_lock);
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, pmc) {
 		struct ip_sf_list *psf, *psf_next;
 
 		spin_lock_bh(&pmc->lock);
@@ -1123,7 +1139,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
 			kfree(psf);
 		}
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 }
 #endif
 
@@ -1209,7 +1225,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 
 	ASSERT_RTNL();
 
-	for (im=in_dev->mc_list; im; im=im->next) {
+	for_each_pmc_rtnl(in_dev, im) {
 		if (im->multiaddr == addr) {
 			im->users++;
 			ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
@@ -1217,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 		}
 	}
 
-	im = kmalloc(sizeof(*im), GFP_KERNEL);
+	im = kzalloc(sizeof(*im), GFP_KERNEL);
 	if (!im)
 		goto out;
 
@@ -1227,26 +1243,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	im->multiaddr = addr;
 	/* initial mode is (EX, empty) */
 	im->sfmode = MCAST_EXCLUDE;
-	im->sfcount[MCAST_INCLUDE] = 0;
 	im->sfcount[MCAST_EXCLUDE] = 1;
-	im->sources = NULL;
-	im->tomb = NULL;
-	im->crcount = 0;
 	atomic_set(&im->refcnt, 1);
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
-	im->tm_running = 0;
 	setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im);
 	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
-	im->reporter = 0;
-	im->gsquery = 0;
 #endif
-	im->loaded = 0;
-	write_lock_bh(&in_dev->mc_list_lock);
-	im->next = in_dev->mc_list;
-	in_dev->mc_list = im;
+
+	im->next_rcu = in_dev->mc_list;
 	in_dev->mc_count++;
-	write_unlock_bh(&in_dev->mc_list_lock);
+	rcu_assign_pointer(in_dev->mc_list, im);
+
 #ifdef CONFIG_IP_MULTICAST
 	igmpv3_del_delrec(in_dev, im->multiaddr);
 #endif
@@ -1287,17 +1295,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group);
 
 void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 {
-	struct ip_mc_list *i, **ip;
+	struct ip_mc_list *i;
+	struct ip_mc_list __rcu **ip;
 
 	ASSERT_RTNL();
 
-	for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) {
+	for (ip = &in_dev->mc_list;
+	     (i = rtnl_dereference(*ip)) != NULL;
+	     ip = &i->next_rcu) {
 		if (i->multiaddr == addr) {
 			if (--i->users == 0) {
-				write_lock_bh(&in_dev->mc_list_lock);
-				*ip = i->next;
+				*ip = i->next_rcu;
 				in_dev->mc_count--;
-				write_unlock_bh(&in_dev->mc_list_lock);
 				igmp_group_dropped(i);
 
 				if (!in_dev->dead)
@@ -1316,34 +1325,34 @@ EXPORT_SYMBOL(ip_mc_dec_group);
 
 void ip_mc_unmap(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
-	for (i = in_dev->mc_list; i; i = i->next)
-		igmp_group_dropped(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_dropped(pmc);
 }
 
 void ip_mc_remap(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
-	for (i = in_dev->mc_list; i; i = i->next)
-		igmp_group_added(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_added(pmc);
 }
 
 /* Device going down */
 
 void ip_mc_down(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
-	for (i=in_dev->mc_list; i; i=i->next)
-		igmp_group_dropped(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_dropped(pmc);
 
 #ifdef CONFIG_IP_MULTICAST
 	in_dev->mr_ifc_count = 0;
@@ -1374,7 +1383,6 @@ void ip_mc_init_dev(struct in_device *in_dev)
 	in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
 #endif
 
-	rwlock_init(&in_dev->mc_list_lock);
 	spin_lock_init(&in_dev->mc_tomb_lock);
 }
 
@@ -1382,14 +1390,14 @@ void ip_mc_init_dev(struct in_device *in_dev)
 
 void ip_mc_up(struct in_device *in_dev)
 {
-	struct ip_mc_list *i;
+	struct ip_mc_list *pmc;
 
 	ASSERT_RTNL();
 
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
-	for (i=in_dev->mc_list; i; i=i->next)
-		igmp_group_added(i);
+	for_each_pmc_rtnl(in_dev, pmc)
+		igmp_group_added(pmc);
 }
 
 /*
@@ -1405,17 +1413,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 	/* Deactivate timers */
 	ip_mc_down(in_dev);
 
-	write_lock_bh(&in_dev->mc_list_lock);
-	while ((i = in_dev->mc_list) != NULL) {
-		in_dev->mc_list = i->next;
+	while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
+		in_dev->mc_list = i->next_rcu;
 		in_dev->mc_count--;
-		write_unlock_bh(&in_dev->mc_list_lock);
+
 		igmp_group_dropped(i);
 		ip_ma_put(i);
-
-		write_lock_bh(&in_dev->mc_list_lock);
 	}
-	write_unlock_bh(&in_dev->mc_list_lock);
 }
 
 /* RTNL is locked */
@@ -1513,18 +1517,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 	if (!in_dev)
 		return -ENODEV;
-	read_lock(&in_dev->mc_list_lock);
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, pmc) {
 		if (*pmca == pmc->multiaddr)
 			break;
 	}
 	if (!pmc) {
 		/* MCA not found?? bug */
-		read_unlock(&in_dev->mc_list_lock);
+		rcu_read_unlock();
 		return -ESRCH;
 	}
 	spin_lock_bh(&pmc->lock);
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 #ifdef CONFIG_IP_MULTICAST
 	sf_markstate(pmc);
 #endif
@@ -1685,18 +1689,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 
 	if (!in_dev)
 		return -ENODEV;
-	read_lock(&in_dev->mc_list_lock);
-	for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, pmc) {
 		if (*pmca == pmc->multiaddr)
 			break;
 	}
 	if (!pmc) {
 		/* MCA not found?? bug */
-		read_unlock(&in_dev->mc_list_lock);
+		rcu_read_unlock();
 		return -ESRCH;
 	}
 	spin_lock_bh(&pmc->lock);
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 
 #ifdef CONFIG_IP_MULTICAST
 	sf_markstate(pmc);
@@ -1793,7 +1797,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 
 	err = -EADDRINUSE;
 	ifindex = imr->imr_ifindex;
-	for (i = inet->mc_list; i; i = i->next) {
+	for_each_pmc_rtnl(inet, i) {
 		if (i->multi.imr_multiaddr.s_addr == addr &&
 		    i->multi.imr_ifindex == ifindex)
 			goto done;
@@ -1807,7 +1811,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 		goto done;
 
 	memcpy(&iml->multi, imr, sizeof(*imr));
-	iml->next = inet->mc_list;
+	iml->next_rcu = inet->mc_list;
 	iml->sflist = NULL;
 	iml->sfmode = MCAST_EXCLUDE;
 	rcu_assign_pointer(inet->mc_list, iml);
@@ -1821,17 +1825,14 @@ EXPORT_SYMBOL(ip_mc_join_group);
 
 static void ip_sf_socklist_reclaim(struct rcu_head *rp)
 {
-	struct ip_sf_socklist *psf;
-
-	psf = container_of(rp, struct ip_sf_socklist, rcu);
+	kfree(container_of(rp, struct ip_sf_socklist, rcu));
 	/* sk_omem_alloc should have been decreased by the caller*/
-	kfree(psf);
 }
 
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
-	struct ip_sf_socklist *psf = iml->sflist;
+	struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist);
 	int err;
 
 	if (psf == NULL) {
@@ -1851,11 +1852,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 
 static void ip_mc_socklist_reclaim(struct rcu_head *rp)
 {
-	struct ip_mc_socklist *iml;
-
-	iml = container_of(rp, struct ip_mc_socklist, rcu);
+	kfree(container_of(rp, struct ip_mc_socklist, rcu));
 	/* sk_omem_alloc should have been decreased by the caller*/
-	kfree(iml);
 }
 
 
@@ -1866,7 +1864,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp)
 int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct ip_mc_socklist *iml, **imlp;
+	struct ip_mc_socklist *iml;
+	struct ip_mc_socklist __rcu **imlp;
 	struct in_device *in_dev;
 	struct net *net = sock_net(sk);
 	__be32 group = imr->imr_multiaddr.s_addr;
@@ -1876,7 +1875,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	rtnl_lock();
 	in_dev = ip_mc_find_dev(net, imr);
 	ifindex = imr->imr_ifindex;
-	for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
+	for (imlp = &inet->mc_list;
+	     (iml = rtnl_dereference(*imlp)) != NULL;
+	     imlp = &iml->next_rcu) {
 		if (iml->multi.imr_multiaddr.s_addr != group)
 			continue;
 		if (ifindex) {
@@ -1888,7 +1889,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 
-		rcu_assign_pointer(*imlp, iml->next);
+		*imlp = iml->next_rcu;
 
 		if (in_dev)
 			ip_mc_dec_group(in_dev, group);
@@ -1934,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	}
 	err = -EADDRNOTAVAIL;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if ((pmc->multi.imr_multiaddr.s_addr ==
 		     imr.imr_multiaddr.s_addr) &&
 		    (pmc->multi.imr_ifindex == imr.imr_ifindex))
@@ -1958,7 +1959,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 		pmc->sfmode = omode;
 	}
 
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	if (!add) {
 		if (!psl)
 			goto done;	/* err = -EADDRNOTAVAIL */
@@ -2077,7 +2078,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 		goto done;
 	}
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
 		    pmc->multi.imr_ifindex == imr.imr_ifindex)
 			break;
@@ -2107,7 +2108,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 		(void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
 				     msf->imsf_fmode, 0, NULL, 0);
 	}
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	if (psl) {
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			psl->sl_count, psl->sl_addr, 0);
@@ -2155,7 +2156,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	}
 	err = -EADDRNOTAVAIL;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
 		    pmc->multi.imr_ifindex == imr.imr_ifindex)
 			break;
@@ -2163,7 +2164,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	if (!pmc)		/* must have a prior join */
 		goto done;
 	msf->imsf_fmode = pmc->sfmode;
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	rtnl_unlock();
 	if (!psl) {
 		len = 0;
@@ -2208,7 +2209,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 
 	err = -EADDRNOTAVAIL;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rtnl(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == addr &&
 		    pmc->multi.imr_ifindex == gsf->gf_interface)
 			break;
@@ -2216,7 +2217,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (!pmc)		/* must have a prior join */
 		goto done;
 	gsf->gf_fmode = pmc->sfmode;
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	rtnl_unlock();
 	count = psl ? psl->sl_count : 0;
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
@@ -2257,7 +2258,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 		goto out;
 
 	rcu_read_lock();
-	for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) {
+	for_each_pmc_rcu(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
 		    pmc->multi.imr_ifindex == dif)
 			break;
@@ -2265,7 +2266,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	ret = inet->mc_all;
 	if (!pmc)
 		goto unlock;
-	psl = pmc->sflist;
+	psl = rcu_dereference(pmc->sflist);
 	ret = (pmc->sfmode == MCAST_EXCLUDE);
 	if (!psl)
 		goto unlock;
@@ -2300,10 +2301,10 @@ void ip_mc_drop_socket(struct sock *sk)
 		return;
 
 	rtnl_lock();
-	while ((iml = inet->mc_list) != NULL) {
+	while ((iml = rtnl_dereference(inet->mc_list)) != NULL) {
 		struct in_device *in_dev;
-		rcu_assign_pointer(inet->mc_list, iml->next);
 
+		inet->mc_list = iml->next_rcu;
 		in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 		if (in_dev != NULL) {
@@ -2323,8 +2324,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 	struct ip_sf_list *psf;
 	int rv = 0;
 
-	read_lock(&in_dev->mc_list_lock);
-	for (im=in_dev->mc_list; im; im=im->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(in_dev, im) {
 		if (im->multiaddr == mc_addr)
 			break;
 	}
@@ -2345,7 +2346,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p
 		} else
 			rv = 1; /* unspecified source; tentatively allow */
 	}
-	read_unlock(&in_dev->mc_list_lock);
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -2371,13 +2372,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 		in_dev = __in_dev_get_rcu(state->dev);
 		if (!in_dev)
 			continue;
-		read_lock(&in_dev->mc_list_lock);
-		im = in_dev->mc_list;
+		im = rcu_dereference(in_dev->mc_list);
 		if (im) {
 			state->in_dev = in_dev;
 			break;
 		}
-		read_unlock(&in_dev->mc_list_lock);
 	}
 	return im;
 }
@@ -2385,11 +2384,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im)
 {
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
-	im = im->next;
-	while (!im) {
-		if (likely(state->in_dev != NULL))
-			read_unlock(&state->in_dev->mc_list_lock);
 
+	im = rcu_dereference(im->next_rcu);
+	while (!im) {
 		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->in_dev = NULL;
@@ -2398,8 +2395,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
 		state->in_dev = __in_dev_get_rcu(state->dev);
 		if (!state->in_dev)
 			continue;
-		read_lock(&state->in_dev->mc_list_lock);
-		im = state->in_dev->mc_list;
+		im = rcu_dereference(state->in_dev->mc_list);
 	}
 	return im;
 }
@@ -2435,10 +2431,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v)
 	__releases(rcu)
 {
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
-	if (likely(state->in_dev != NULL)) {
-		read_unlock(&state->in_dev->mc_list_lock);
-		state->in_dev = NULL;
-	}
+
+	state->in_dev = NULL;
 	state->dev = NULL;
 	rcu_read_unlock();
 }
@@ -2460,7 +2454,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
 		querier = "NONE";
 #endif
 
-		if (state->in_dev->mc_list == im) {
+		if (rcu_dereference(state->in_dev->mc_list) == im) {
 			seq_printf(seq, "%d\t%-10s: %5d %7s\n",
 				   state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
 		}
@@ -2519,8 +2513,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 		idev = __in_dev_get_rcu(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
-		read_lock(&idev->mc_list_lock);
-		im = idev->mc_list;
+		im = rcu_dereference(idev->mc_list);
 		if (likely(im != NULL)) {
 			spin_lock_bh(&im->lock);
 			psf = im->sources;
@@ -2531,7 +2524,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 			}
 			spin_unlock_bh(&im->lock);
 		}
-		read_unlock(&idev->mc_list_lock);
 	}
 	return psf;
 }
@@ -2545,9 +2537,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
 		spin_unlock_bh(&state->im->lock);
 		state->im = state->im->next;
 		while (!state->im) {
-			if (likely(state->idev != NULL))
-				read_unlock(&state->idev->mc_list_lock);
-
 			state->dev = next_net_device_rcu(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
@@ -2556,8 +2545,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
 			state->idev = __in_dev_get_rcu(state->dev);
 			if (!state->idev)
 				continue;
-			read_lock(&state->idev->mc_list_lock);
-			state->im = state->idev->mc_list;
+			state->im = rcu_dereference(state->idev->mc_list);
 		}
 		if (!state->im)
 			break;
@@ -2603,10 +2591,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
 		spin_unlock_bh(&state->im->lock);
 		state->im = NULL;
 	}
-	if (likely(state->idev != NULL)) {
-		read_unlock(&state->idev->mc_list_lock);
-		state->idev = NULL;
-	}
+	state->idev = NULL;
 	state->dev = NULL;
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From 190683a9d5457e6d962c232ffbecac3ab158dddd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 10 Nov 2010 10:50:44 +0000
Subject: net: net_families __rcu annotations

Use modern RCU API / annotations for net_families array.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 3ca2fd9e3720..c898df76e924 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = {
  */
 
 static DEFINE_SPINLOCK(net_family_lock);
-static const struct net_proto_family *net_families[NPROTO] __read_mostly;
+static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
 
 /*
  *	Statistics counters of the socket lists
@@ -1200,7 +1200,7 @@ int __sock_create(struct net *net, int family, int type, int protocol,
 	 * requested real, full-featured networking support upon configuration.
 	 * Otherwise module support will break!
 	 */
-	if (net_families[family] == NULL)
+	if (rcu_access_pointer(net_families[family]) == NULL)
 		request_module("net-pf-%d", family);
 #endif
 
@@ -2332,10 +2332,11 @@ int sock_register(const struct net_proto_family *ops)
 	}
 
 	spin_lock(&net_family_lock);
-	if (net_families[ops->family])
+	if (rcu_dereference_protected(net_families[ops->family],
+				      lockdep_is_held(&net_family_lock)))
 		err = -EEXIST;
 	else {
-		net_families[ops->family] = ops;
+		rcu_assign_pointer(net_families[ops->family], ops);
 		err = 0;
 	}
 	spin_unlock(&net_family_lock);
@@ -2363,7 +2364,7 @@ void sock_unregister(int family)
 	BUG_ON(family < 0 || family >= NPROTO);
 
 	spin_lock(&net_family_lock);
-	net_families[family] = NULL;
+	rcu_assign_pointer(net_families[family], NULL);
 	spin_unlock(&net_family_lock);
 
 	synchronize_rcu();
-- 
cgit v1.2.3


From 5753fdfe8bd8e9a2ff9e5af19b0ffc78bfcd502a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Nov 2010 17:25:11 +0100
Subject: dccp ccid-2: Algorithm to update buffer state

This provides a routine to consistently update the buffer state when the
peer acknowledges receipt of Ack Vectors; updating state in the list of Ack
Vectors as well as in the circular buffer.

While based on RFC 4340, several additional (and necessary) precautions were
added to protect the consistency of the buffer state. These additions are
essential, since analysis and experience showed that the basic algorithm was
insufficient for this task (which lead to problems that were hard to debug).

The algorithm now
 * deals with HC-sender acknowledging to HC-receiver and vice versa,
 * keeps track of the last unacknowledged but received seqno in tail_ackno,
 * has special cases to reset the overflow condition when appropriate,
 * is protected against receiving older information (would mess up buffer state).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c  | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/ackvec.h  |  1 +
 net/dccp/input.c   |  4 +--
 net/dccp/options.c |  6 ++--
 4 files changed, 93 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index abaf241c7353..e9a0f66e4afe 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -92,6 +92,24 @@ int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
 	return 0;
 }
 
+static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
+						     const u64 ackno)
+{
+	struct dccp_ackvec_record *avr;
+	/*
+	 * Exploit that records are inserted in descending order of sequence
+	 * number, start with the oldest record first. If @ackno is `before'
+	 * the earliest ack_ackno, the packet is too old to be considered.
+	 */
+	list_for_each_entry_reverse(avr, av_list, avr_node) {
+		if (avr->avr_ack_seqno == ackno)
+			return avr;
+		if (before48(ackno, avr->avr_ack_seqno))
+			break;
+	}
+	return NULL;
+}
+
 /*
  * Buffer index and length computation using modulo-buffersize arithmetic.
  * Note that, as pointers move from right to left, head is `before' tail.
@@ -356,6 +374,76 @@ int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 	return 0;
 }
 
+/**
+ * dccp_ackvec_clear_state  -  Perform house-keeping / garbage-collection
+ * This routine is called when the peer acknowledges the receipt of Ack Vectors
+ * up to and including @ackno. While based on on section A.3 of RFC 4340, here
+ * are additional precautions to prevent corrupted buffer state. In particular,
+ * we use tail_ackno to identify outdated records; it always marks the earliest
+ * packet of group (2) in 11.4.2.
+ */
+void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
+ {
+	struct dccp_ackvec_record *avr, *next;
+	u8 runlen_now, eff_runlen;
+	s64 delta;
+
+	avr = dccp_ackvec_lookup(&av->av_records, ackno);
+	if (avr == NULL)
+		return;
+	/*
+	 * Deal with outdated acknowledgments: this arises when e.g. there are
+	 * several old records and the acks from the peer come in slowly. In
+	 * that case we may still have records that pre-date tail_ackno.
+	 */
+	delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
+	if (delta < 0)
+		goto free_records;
+	/*
+	 * Deal with overlapping Ack Vectors: don't subtract more than the
+	 * number of packets between tail_ackno and ack_ackno.
+	 */
+	eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
+
+	runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
+	/*
+	 * The run length of Ack Vector cells does not decrease over time. If
+	 * the run length is the same as at the time the Ack Vector was sent, we
+	 * free the ack_ptr cell. That cell can however not be freed if the run
+	 * length has increased: in this case we need to move the tail pointer
+	 * backwards (towards higher indices), to its next-oldest neighbour.
+	 */
+	if (runlen_now > eff_runlen) {
+
+		av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
+		av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
+
+		/* This move may not have cleared the overflow flag. */
+		if (av->av_overflow)
+			av->av_overflow = (av->av_buf_head == av->av_buf_tail);
+	} else {
+		av->av_buf_tail	= avr->avr_ack_ptr;
+		/*
+		 * We have made sure that avr points to a valid cell within the
+		 * buffer. This cell is either older than head, or equals head
+		 * (empty buffer): in both cases we no longer have any overflow.
+		 */
+		av->av_overflow	= 0;
+	}
+
+	/*
+	 * The peer has acknowledged up to and including ack_ackno. Hence the
+	 * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
+	 */
+	av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
+
+free_records:
+	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
+		list_del(&avr->avr_node);
+		kmem_cache_free(dccp_ackvec_record_slab, avr);
+	}
+}
+
 int __init dccp_ackvec_init(void)
 {
 	dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 23880be8fc29..3f7008187b8e 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -117,6 +117,7 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     const u8 *value, const u8 len);
 
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
 extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
 
 static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index c7aeeba859d4..f91cf5ada306 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -165,8 +165,8 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_sock *dp = dccp_sk(sk);
 
 	if (dp->dccps_hc_rx_ackvec != NULL)
-		dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
-					    DCCP_SKB_CB(skb)->dccpd_ack_seq);
+		dccp_ackvec_clear_state(dp->dccps_hc_rx_ackvec,
+					DCCP_SKB_CB(skb)->dccpd_ack_seq);
 }
 
 static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 5adeeed5e0d2..7743df00f5b1 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 	struct dccp_sock *dp = dccp_sk(sk);
 	const struct dccp_hdr *dh = dccp_hdr(skb);
 	const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
-	u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
 	unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
 	unsigned char *opt_ptr = options;
 	const unsigned char *opt_end = (unsigned char *)dh +
@@ -133,9 +132,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		case DCCPO_ACK_VECTOR_1:
 			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
 				break;
-			if (dp->dccps_hc_rx_ackvec != NULL &&
-			    dccp_ackvec_parse(sk, skb, &ackno, opt, value, len))
-				goto out_invalid_option;
+			dccp_pr_debug("%s Ack Vector (len=%u)\n", dccp_role(sk),
+				      len);
 			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
-- 
cgit v1.2.3


From 3802408644515e29fb723d51a5317301b212cf3a Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Nov 2010 17:25:23 +0100
Subject: dccp ccid-2: Update code for the Ack Vector input/registration
 routine

This patch updates the code which registers new packets as received, using the
new circular buffer interface. It contributes a new algorithm which
 * supports both tail/head pointers and buffer wrap-around and
 * deals with overflow (head/tail move in lock-step).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/ackvec.h |   4 ++
 2 files changed, 154 insertions(+)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index e9a0f66e4afe..f7e647e608bb 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -131,6 +131,156 @@ u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
 	return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
 }
 
+/**
+ * dccp_ackvec_update_old  -  Update previous state as per RFC 4340, 11.4.1
+ * @av:		non-empty buffer to update
+ * @distance:   negative or zero distance of @seqno from buf_ackno downward
+ * @seqno:	the (old) sequence number whose record is to be updated
+ * @state:	state in which packet carrying @seqno was received
+ */
+static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
+				   u64 seqno, enum dccp_ackvec_states state)
+{
+	u16 ptr = av->av_buf_head;
+
+	BUG_ON(distance > 0);
+	if (unlikely(dccp_ackvec_is_empty(av)))
+		return;
+
+	do {
+		u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
+
+		if (distance + runlen >= 0) {
+			/*
+			 * Only update the state if packet has not been received
+			 * yet. This is OK as per the second table in RFC 4340,
+			 * 11.4.1; i.e. here we are using the following table:
+			 *                     RECEIVED
+			 *                      0   1   3
+			 *              S     +---+---+---+
+			 *              T   0 | 0 | 0 | 0 |
+			 *              O     +---+---+---+
+			 *              R   1 | 1 | 1 | 1 |
+			 *              E     +---+---+---+
+			 *              D   3 | 0 | 1 | 3 |
+			 *                    +---+---+---+
+			 * The "Not Received" state was set by reserve_seats().
+			 */
+			if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
+				av->av_buf[ptr] = state;
+			else
+				dccp_pr_debug("Not changing %llu state to %u\n",
+					      (unsigned long long)seqno, state);
+			break;
+		}
+
+		distance += runlen + 1;
+		ptr	  = __ackvec_idx_add(ptr, 1);
+
+	} while (ptr != av->av_buf_tail);
+}
+
+/* Mark @num entries after buf_head as "Not yet received". */
+static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
+{
+	u16 start = __ackvec_idx_add(av->av_buf_head, 1),
+	    len	  = DCCPAV_MAX_ACKVEC_LEN - start;
+
+	/* check for buffer wrap-around */
+	if (num > len) {
+		memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
+		start = 0;
+		num  -= len;
+	}
+	if (num)
+		memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
+}
+
+/**
+ * dccp_ackvec_add_new  -  Record one or more new entries in Ack Vector buffer
+ * @av:		 container of buffer to update (can be empty or non-empty)
+ * @num_packets: number of packets to register (must be >= 1)
+ * @seqno:	 sequence number of the first packet in @num_packets
+ * @state:	 state in which packet carrying @seqno was received
+ */
+static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
+				u64 seqno, enum dccp_ackvec_states state)
+{
+	u32 num_cells = num_packets;
+
+	if (num_packets > DCCPAV_BURST_THRESH) {
+		u32 lost_packets = num_packets - 1;
+
+		DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
+		/*
+		 * We received 1 packet and have a loss of size "num_packets-1"
+		 * which we squeeze into num_cells-1 rather than reserving an
+		 * entire byte for each lost packet.
+		 * The reason is that the vector grows in O(burst_length); when
+		 * it grows too large there will no room left for the payload.
+		 * This is a trade-off: if a few packets out of the burst show
+		 * up later, their state will not be changed; it is simply too
+		 * costly to reshuffle/reallocate/copy the buffer each time.
+		 * Should such problems persist, we will need to switch to a
+		 * different underlying data structure.
+		 */
+		for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
+			u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
+
+			av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
+			av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
+
+			lost_packets -= len;
+		}
+	}
+
+	if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
+		DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
+		av->av_overflow = true;
+	}
+
+	av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
+	if (av->av_overflow)
+		av->av_buf_tail = av->av_buf_head;
+
+	av->av_buf[av->av_buf_head] = state;
+	av->av_buf_ackno	    = seqno;
+
+	if (num_packets > 1)
+		dccp_ackvec_reserve_seats(av, num_packets - 1);
+}
+
+/**
+ * dccp_ackvec_input  -  Register incoming packet in the buffer
+ */
+void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
+{
+	u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+	enum dccp_ackvec_states state = DCCPAV_RECEIVED;
+
+	if (dccp_ackvec_is_empty(av)) {
+		dccp_ackvec_add_new(av, 1, seqno, state);
+		av->av_tail_ackno = seqno;
+
+	} else {
+		s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
+		u8 *current_head = av->av_buf + av->av_buf_head;
+
+		if (num_packets == 1 &&
+		    dccp_ackvec_state(current_head) == state &&
+		    dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
+
+			*current_head   += 1;
+			av->av_buf_ackno = seqno;
+
+		} else if (num_packets > 0) {
+			dccp_ackvec_add_new(av, num_packets, seqno, state);
+		} else {
+			dccp_ackvec_update_old(av, num_packets, seqno, state);
+		}
+	}
+}
+
 /*
  * If several packets are missing, the HC-Receiver may prefer to enter multiple
  * bytes with run length 0, rather than a single byte with a larger run length;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 3f7008187b8e..21c4212d1be0 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -29,6 +29,9 @@
 /* Estimated minimum average Ack Vector length - used for updating MPS */
 #define DCCPAV_MIN_OPTLEN	16
 
+/* Threshold for coping with large bursts of losses */
+#define DCCPAV_BURST_THRESH	(DCCPAV_MAX_ACKVEC_LEN / 8)
+
 enum dccp_ackvec_states {
 	DCCPAV_RECEIVED =	0x00,
 	DCCPAV_ECN_MARKED =	0x40,
@@ -116,6 +119,7 @@ extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
 			     u64 *ackno, const u8 opt,
 			     const u8 *value, const u8 len);
 
+extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
 extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
 extern u16  dccp_ackvec_buflen(const struct dccp_ackvec *av);
-- 
cgit v1.2.3


From 18219463c884bfdb7954d298b9edb5194b14d621 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Nov 2010 17:25:36 +0100
Subject: dccp ccid-2: Consolidate Ack-Vector processing within main DCCP
 module

This aggregates Ack Vector processing (handling input and clearing old state)
into one function, for the following reasons and benefits:
 * all Ack Vector-specific processing is now in one place;
 * duplicated code is removed;
 * ensuring sanity: from an Ack Vector point of view, it is better to clear the
                    old state first before entering new state;
 * Ack Event handling happens mostly within the CCIDs, not the main DCCP module.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/input.c | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index f91cf5ada306..7d230d14ce22 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -160,13 +160,15 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
 	dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
 }
 
-static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
 
-	if (dp->dccps_hc_rx_ackvec != NULL)
-		dccp_ackvec_clear_state(dp->dccps_hc_rx_ackvec,
-					DCCP_SKB_CB(skb)->dccpd_ack_seq);
+	if (av == NULL)
+		return;
+	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+		dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
+	dccp_ackvec_input(av, skb);
 }
 
 static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
@@ -365,21 +367,13 @@ discard:
 int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			 const struct dccp_hdr *dh, const unsigned len)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-
 	if (dccp_check_seqno(sk, skb))
 		goto discard;
 
 	if (dccp_parse_options(sk, NULL, skb))
 		return 1;
 
-	if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
-		dccp_event_ack_recv(sk, skb);
-
-	if (dp->dccps_hc_rx_ackvec != NULL &&
-	    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-			    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
-		goto discard;
+	dccp_handle_ackvec_processing(sk, skb);
 	dccp_deliver_input_to_ccids(sk, skb);
 
 	return __dccp_rcv_established(sk, skb, dh, len);
@@ -631,14 +625,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (dccp_parse_options(sk, NULL, skb))
 			return 1;
 
-		if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
-			dccp_event_ack_recv(sk, skb);
-
-		if (dp->dccps_hc_rx_ackvec != NULL &&
-		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
-				    DCCP_SKB_CB(skb)->dccpd_seq, DCCPAV_RECEIVED))
-			goto discard;
-
+		dccp_handle_ackvec_processing(sk, skb);
 		dccp_deliver_input_to_ccids(sk, skb);
 	}
 
-- 
cgit v1.2.3


From d83447f0944e73d690218d79c07762ffa4ceb9e4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Nov 2010 17:25:46 +0100
Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism

The problem with Ack Vectors is that
  i) their length is variable and can in principle grow quite large,
 ii) it is hard to predict exactly how large they will be.

Due to the second point it seems not a good idea to reduce the MPS; in
particular when on average there is enough room for the Ack Vector and an
increase in length is momentarily due to some burst loss, after which the
Ack Vector returns to its normal/average length.

The solution taken by this patch is to subtract a minimum-expected Ack Vector
length from the MPS, and to defer any larger Ack Vectors onto a separate
Sync - but only if indeed there is no space left on the skb.

This patch provides the infrastructure to schedule Sync-packets for transporting
(urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since
it does not need to wait for new application data.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 include/linux/dccp.h |  2 ++
 net/dccp/options.c   | 24 ++++++++++++++++++++----
 net/dccp/output.c    | 15 +++++++++++++++
 3 files changed, 37 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 749f01ccd26e..eed52bcd35d0 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -462,6 +462,7 @@ struct dccp_ackvec;
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
+ * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
  * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
  * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
@@ -503,6 +504,7 @@ struct dccp_sock {
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
 	__u8				dccps_server_timewait:1;
+	__u8				dccps_sync_scheduled:1;
 	struct tasklet_struct		dccps_xmitlet;
 	struct timer_list		dccps_xmit_timer;
 };
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 7743df00f5b1..dabd6ee34d45 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -427,6 +427,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
+	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	const u16 buflen = dccp_ackvec_buflen(av);
 	/* Figure out how many options do we need to represent the ackvec */
 	const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
@@ -435,10 +436,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	const unsigned char *tail, *from;
 	unsigned char *to;
 
-	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+	if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+		DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
+			  dccp_packet_name(dcb->dccpd_type));
 		return -1;
-
-	DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+	}
+	/*
+	 * Since Ack Vectors are variable-length, we can not always predict
+	 * their size. To catch exception cases where the space is running out
+	 * on the skb, a separate Sync is scheduled to carry the Ack Vector.
+	 */
+	if (len > DCCPAV_MIN_OPTLEN &&
+	    len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
+		DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
+			  "MPS=%u ==> reduce payload size?\n", len, skb->len,
+			  dcb->dccpd_opt_len, dp->dccps_mss_cache);
+		dp->dccps_sync_scheduled = 1;
+		return 0;
+	}
+	dcb->dccpd_opt_len += len;
 
 	to   = skb_push(skb, len);
 	len  = buflen;
@@ -479,7 +495,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	/*
 	 * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
 	 */
-	if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce))
+	if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
 		return -ENOBUFS;
 	return 0;
 }
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 45b91853f5ae..d96dd9d362ae 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -283,6 +283,15 @@ static void dccp_xmit_packet(struct sock *sk)
 	 * any local drop will eventually be reported via receiver feedback.
 	 */
 	ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+
+	/*
+	 * If the CCID needs to transfer additional header options out-of-band
+	 * (e.g. Ack Vectors or feature-negotiation options), it activates this
+	 * flag to schedule a Sync. The Sync will automatically incorporate all
+	 * currently pending header options, thus clearing the backlog.
+	 */
+	if (dp->dccps_sync_scheduled)
+		dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
 }
 
 /**
@@ -636,6 +645,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno,
 	DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
 	DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
 
+	/*
+	 * Clear the flag in case the Sync was scheduled for out-of-band data,
+	 * such as carrying a long Ack Vector.
+	 */
+	dccp_sk(sk)->dccps_sync_scheduled = 0;
+
 	dccp_transmit_skb(sk, skb);
 }
 
-- 
cgit v1.2.3


From 52394eecec4e6fa677a61af26f0bd35de665344e Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Nov 2010 17:26:02 +0100
Subject: dccp ccid-2: Remove old infrastructure

This removes
 * functions for which updates have been provided in the preceding patches and
 * the @av_vec_len field - it is no longer necessary since the buffer length is
   now always computed dynamically.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c | 251 ------------------------------------------------------
 net/dccp/ackvec.h |  14 ---
 2 files changed, 265 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index f7e647e608bb..66b8a51300c0 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -9,18 +9,10 @@
  *      under the terms of the GNU General Public License as published by the
  *      Free Software Foundation; version 2 of the License;
  */
-
-#include "ackvec.h"
 #include "dccp.h"
-
-#include <linux/init.h>
-#include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/skbuff.h>
 #include <linux/slab.h>
 
-#include <net/sock.h>
-
 static struct kmem_cache *dccp_ackvec_slab;
 static struct kmem_cache *dccp_ackvec_record_slab;
 
@@ -281,249 +273,6 @@ void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
 	}
 }
 
-/*
- * If several packets are missing, the HC-Receiver may prefer to enter multiple
- * bytes with run length 0, rather than a single byte with a larger run length;
- * this simplifies table updates if one of the missing packets arrives.
- */
-static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
-						 const unsigned int packets,
-						 const unsigned char state)
-{
-	long gap;
-	long new_head;
-
-	if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN)
-		return -ENOBUFS;
-
-	gap	 = packets - 1;
-	new_head = av->av_buf_head - packets;
-
-	if (new_head < 0) {
-		if (gap > 0) {
-			memset(av->av_buf, DCCPAV_NOT_RECEIVED,
-			       gap + new_head + 1);
-			gap = -new_head;
-		}
-		new_head += DCCPAV_MAX_ACKVEC_LEN;
-	}
-
-	av->av_buf_head = new_head;
-
-	if (gap > 0)
-		memset(av->av_buf + av->av_buf_head + 1,
-		       DCCPAV_NOT_RECEIVED, gap);
-
-	av->av_buf[av->av_buf_head] = state;
-	av->av_vec_len += packets;
-	return 0;
-}
-
-/*
- * Implements the RFC 4340, Appendix A
- */
-int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
-		    const u64 ackno, const u8 state)
-{
-	u8 *cur_head = av->av_buf + av->av_buf_head,
-	   *buf_end  = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
-	/*
-	 * Check at the right places if the buffer is full, if it is, tell the
-	 * caller to start dropping packets till the HC-Sender acks our ACK
-	 * vectors, when we will free up space in av_buf.
-	 *
-	 * We may well decide to do buffer compression, etc, but for now lets
-	 * just drop.
-	 *
-	 * From Appendix A.1.1 (`New Packets'):
-	 *
-	 *	Of course, the circular buffer may overflow, either when the
-	 *	HC-Sender is sending data at a very high rate, when the
-	 *	HC-Receiver's acknowledgements are not reaching the HC-Sender,
-	 *	or when the HC-Sender is forgetting to acknowledge those acks
-	 *	(so the HC-Receiver is unable to clean up old state). In this
-	 *	case, the HC-Receiver should either compress the buffer (by
-	 *	increasing run lengths when possible), transfer its state to
-	 *	a larger buffer, or, as a last resort, drop all received
-	 *	packets, without processing them whatsoever, until its buffer
-	 *	shrinks again.
-	 */
-
-	/* See if this is the first ackno being inserted */
-	if (av->av_vec_len == 0) {
-		*cur_head = state;
-		av->av_vec_len = 1;
-	} else if (after48(ackno, av->av_buf_ackno)) {
-		const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
-
-		/*
-		 * Look if the state of this packet is the same as the
-		 * previous ackno and if so if we can bump the head len.
-		 */
-		if (delta == 1 && dccp_ackvec_state(cur_head) == state &&
-		    dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN)
-			*cur_head += 1;
-		else if (dccp_ackvec_set_buf_head_state(av, delta, state))
-			return -ENOBUFS;
-	} else {
-		/*
-		 * A.1.2.  Old Packets
-		 *
-		 *	When a packet with Sequence Number S <= buf_ackno
-		 *	arrives, the HC-Receiver will scan the table for
-		 *	the byte corresponding to S. (Indexing structures
-		 *	could reduce the complexity of this scan.)
-		 */
-		u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
-
-		while (1) {
-			const u8 len = dccp_ackvec_runlen(cur_head);
-			/*
-			 * valid packets not yet in av_buf have a reserved
-			 * entry, with a len equal to 0.
-			 */
-			if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) {
-				dccp_pr_debug("Found %llu reserved seat!\n",
-					      (unsigned long long)ackno);
-				*cur_head = state;
-				goto out;
-			}
-			/* len == 0 means one packet */
-			if (delta < len + 1)
-				goto out_duplicate;
-
-			delta -= len + 1;
-			if (++cur_head == buf_end)
-				cur_head = av->av_buf;
-		}
-	}
-
-	av->av_buf_ackno = ackno;
-out:
-	return 0;
-
-out_duplicate:
-	/* Duplicate packet */
-	dccp_pr_debug("Received a dup or already considered lost "
-		      "packet: %llu\n", (unsigned long long)ackno);
-	return -EILSEQ;
-}
-
-static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
-				     struct dccp_ackvec_record *avr)
-{
-	struct dccp_ackvec_record *next;
-
-	/* sort out vector length */
-	if (av->av_buf_head <= avr->avr_ack_ptr)
-		av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
-	else
-		av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 -
-				 av->av_buf_head + avr->avr_ack_ptr;
-
-	/* free records */
-	list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
-		list_del(&avr->avr_node);
-		kmem_cache_free(dccp_ackvec_record_slab, avr);
-	}
-}
-
-void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
-				 const u64 ackno)
-{
-	struct dccp_ackvec_record *avr;
-
-	/*
-	 * If we traverse backwards, it should be faster when we have large
-	 * windows. We will be receiving ACKs for stuff we sent a while back
-	 * -sorbo.
-	 */
-	list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
-		if (ackno == avr->avr_ack_seqno) {
-			dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
-				      "ack_ackno=%llu, ACKED!\n",
-				      dccp_role(sk), avr->avr_ack_runlen,
-				      (unsigned long long)avr->avr_ack_seqno,
-				      (unsigned long long)avr->avr_ack_ackno);
-			dccp_ackvec_throw_record(av, avr);
-			break;
-		} else if (avr->avr_ack_seqno > ackno)
-			break; /* old news */
-	}
-}
-
-static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
-					    struct sock *sk, u64 *ackno,
-					    const unsigned char len,
-					    const unsigned char *vector)
-{
-	unsigned char i;
-	struct dccp_ackvec_record *avr;
-
-	/* Check if we actually sent an ACK vector */
-	if (list_empty(&av->av_records))
-		return;
-
-	i = len;
-	/*
-	 * XXX
-	 * I think it might be more efficient to work backwards. See comment on
-	 * rcv_ackno. -sorbo.
-	 */
-	avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
-	while (i--) {
-		const u8 rl = dccp_ackvec_runlen(vector);
-		u64 ackno_end_rl;
-
-		dccp_set_seqno(&ackno_end_rl, *ackno - rl);
-
-		/*
-		 * If our AVR sequence number is greater than the ack, go
-		 * forward in the AVR list until it is not so.
-		 */
-		list_for_each_entry_from(avr, &av->av_records, avr_node) {
-			if (!after48(avr->avr_ack_seqno, *ackno))
-				goto found;
-		}
-		/* End of the av_records list, not found, exit */
-		break;
-found:
-		if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
-			if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) {
-				dccp_pr_debug("%s ACK vector 0, len=%d, "
-					      "ack_seqno=%llu, ack_ackno=%llu, "
-					      "ACKED!\n",
-					      dccp_role(sk), len,
-					      (unsigned long long)
-					      avr->avr_ack_seqno,
-					      (unsigned long long)
-					      avr->avr_ack_ackno);
-				dccp_ackvec_throw_record(av, avr);
-				break;
-			}
-			/*
-			 * If it wasn't received, continue scanning... we might
-			 * find another one.
-			 */
-		}
-
-		dccp_set_seqno(ackno, ackno_end_rl - 1);
-		++vector;
-	}
-}
-
-int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-		      u64 *ackno, const u8 opt, const u8 *value, const u8 len)
-{
-	if (len > DCCP_SINGLE_OPT_MAXLEN)
-		return -1;
-
-	/* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
-	dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
-					ackno, len, value);
-	return 0;
-}
-
 /**
  * dccp_ackvec_clear_state  -  Perform house-keeping / garbage-collection
  * This routine is called when the peer acknowledges the receipt of Ack Vectors
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 21c4212d1be0..e19b8d5ee05f 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -64,7 +64,6 @@ static inline u8 dccp_ackvec_state(const u8 *cell)
  *		   %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
  * @av_overflow:   if 1 then buf_head == buf_tail indicates buffer wraparound
  * @av_records:	   list of %dccp_ackvec_record (Ack Vectors sent previously)
- * @av_veclen:	   length of the live portion of @av_buf
  */
 struct dccp_ackvec {
 	u8			av_buf[DCCPAV_MAX_ACKVEC_LEN];
@@ -75,7 +74,6 @@ struct dccp_ackvec {
 	bool			av_buf_nonce[DCCPAV_NUM_ACKVECS];
 	u8			av_overflow:1;
 	struct list_head	av_records;
-	u16			av_vec_len;
 };
 
 /** struct dccp_ackvec_record - Records information about sent Ack Vectors
@@ -101,24 +99,12 @@ struct dccp_ackvec_record {
 	u8		 avr_ack_nonce:1;
 };
 
-struct sock;
-struct sk_buff;
-
 extern int dccp_ackvec_init(void);
 extern void dccp_ackvec_exit(void);
 
 extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
 extern void dccp_ackvec_free(struct dccp_ackvec *av);
 
-extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
-			   const u64 ackno, const u8 state);
-
-extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
-					struct sock *sk, const u64 ackno);
-extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
-			     u64 *ackno, const u8 opt,
-			     const u8 *value, const u8 len);
-
 extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
 extern int  dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
 extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
-- 
cgit v1.2.3


From 7e87fe84303cc54ecf3c7b688cb08ca24322a41d Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 14 Nov 2010 17:26:13 +0100
Subject: dccp ccid-2: Separate option parsing from CCID processing

This patch replaces an almost identical replication of code: large parts
of dccp_parse_options() re-appeared as ccid2_ackvector() in ccid2.c.

Apart from the duplication, this caused two more problems:
 1. CCIDs should not need to be concerned with parsing header options;
 2. one can not assume that Ack Vectors appear as a contiguous area within an
    skb, it is legal to insert other options and/or padding in between. The
    current code would throw an error and stop reading in such a case.

Since Ack Vectors provide CCID-specific information, they are now processed
by the CCID directly, separating this functionality from the main DCCP code.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c      |  28 +++++++++++
 net/dccp/ackvec.h      |  19 +++++++
 net/dccp/ccids/ccid2.c | 134 +++++++++++++++----------------------------------
 net/dccp/ccids/ccid2.h |   2 +
 net/dccp/options.c     |  17 ++++---
 5 files changed, 100 insertions(+), 100 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 66b8a51300c0..41819848bdda 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -343,6 +343,34 @@ free_records:
 	}
 }
 
+/*
+ *	Routines to keep track of Ack Vectors received in an skb
+ */
+int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
+{
+	struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
+
+	if (new == NULL)
+		return -ENOBUFS;
+	new->vec   = vec;
+	new->len   = len;
+	new->nonce = nonce;
+
+	list_add_tail(&new->node, head);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
+
+void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
+{
+	struct dccp_ackvec_parsed *cur, *next;
+
+	list_for_each_entry_safe(cur, next, parsed_chunks, node)
+		kfree(cur);
+	INIT_LIST_HEAD(parsed_chunks);
+}
+EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
+
 int __init dccp_ackvec_init(void)
 {
 	dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index e19b8d5ee05f..e2ab0627a5ff 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -114,4 +114,23 @@ static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
 {
 	return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
 }
+
+/**
+ * struct dccp_ackvec_parsed  -  Record offsets of Ack Vectors in skb
+ * @vec:	start of vector (offset into skb)
+ * @len:	length of @vec
+ * @nonce:	whether @vec had an ECN nonce of 0 or 1
+ * @node:	FIFO - arranged in descending order of ack_ackno
+ * This structure is used by CCIDs to access Ack Vectors in a received skb.
+ */
+struct dccp_ackvec_parsed {
+	u8		 *vec,
+			 len,
+			 nonce:1;
+	struct list_head node;
+};
+
+extern int dccp_ackvec_parsed_add(struct list_head *head,
+				  u8 *vec, u8 len, u8 nonce);
+extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
 #endif /* _ACKVEC_H */
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index cb1b4a0d1877..e96d5e810039 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -246,68 +246,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 #endif
 }
 
-/* XXX Lame code duplication!
- * returns -1 if none was found.
- * else returns the next offset to use in the function call.
- */
-static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
-			   unsigned char **vec, unsigned char *veclen)
-{
-	const struct dccp_hdr *dh = dccp_hdr(skb);
-	unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
-	unsigned char *opt_ptr;
-	const unsigned char *opt_end = (unsigned char *)dh +
-					(dh->dccph_doff * 4);
-	unsigned char opt, len;
-	unsigned char *value;
-
-	BUG_ON(offset < 0);
-	options += offset;
-	opt_ptr = options;
-	if (opt_ptr >= opt_end)
-		return -1;
-
-	while (opt_ptr != opt_end) {
-		opt   = *opt_ptr++;
-		len   = 0;
-		value = NULL;
-
-		/* Check if this isn't a single byte option */
-		if (opt > DCCPO_MAX_RESERVED) {
-			if (opt_ptr == opt_end)
-				goto out_invalid_option;
-
-			len = *opt_ptr++;
-			if (len < 3)
-				goto out_invalid_option;
-			/*
-			 * Remove the type and len fields, leaving
-			 * just the value size
-			 */
-			len     -= 2;
-			value   = opt_ptr;
-			opt_ptr += len;
-
-			if (opt_ptr > opt_end)
-				goto out_invalid_option;
-		}
-
-		switch (opt) {
-		case DCCPO_ACK_VECTOR_0:
-		case DCCPO_ACK_VECTOR_1:
-			*vec	= value;
-			*veclen = len;
-			return offset + (opt_ptr - options);
-		}
-	}
-
-	return -1;
-
-out_invalid_option:
-	DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
-	return -1;
-}
-
 /**
  * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
  * This code is almost identical with TCP's tcp_rtt_estimator(), since
@@ -432,16 +370,28 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 		ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
 }
 
+static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
+				     u8 option, u8 *optval, u8 optlen)
+{
+	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+
+	switch (option) {
+	case DCCPO_ACK_VECTOR_0:
+	case DCCPO_ACK_VECTOR_1:
+		return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
+					      option - DCCPO_ACK_VECTOR_0);
+	}
+	return 0;
+}
+
 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 	const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
+	struct dccp_ackvec_parsed *avp;
 	u64 ackno, seqno;
 	struct ccid2_seq *seqp;
-	unsigned char *vector;
-	unsigned char veclen;
-	int offset = 0;
 	int done = 0;
 	unsigned int maxincr = 0;
 
@@ -475,17 +425,12 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* check forward path congestion */
-	/* still didn't send out new data packets */
-	if (hc->tx_seqh == hc->tx_seqt)
+	if (dccp_packet_without_ack(skb))
 		return;
 
-	switch (DCCP_SKB_CB(skb)->dccpd_type) {
-	case DCCP_PKT_ACK:
-	case DCCP_PKT_DATAACK:
-		break;
-	default:
-		return;
-	}
+	/* still didn't send out new data packets */
+	if (hc->tx_seqh == hc->tx_seqt)
+		goto done;
 
 	ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
 	if (after48(ackno, hc->tx_high_ack))
@@ -509,15 +454,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
 
 	/* go through all ack vectors */
-	while ((offset = ccid2_ackvector(sk, skb, offset,
-					 &vector, &veclen)) != -1) {
+	list_for_each_entry(avp, &hc->tx_av_chunks, node) {
 		/* go through this ack vector */
-		while (veclen--) {
-			u64 ackno_end_rl = SUB48(ackno, dccp_ackvec_runlen(vector));
+		for (; avp->len--; avp->vec++) {
+			u64 ackno_end_rl = SUB48(ackno,
+						 dccp_ackvec_runlen(avp->vec));
 
-			ccid2_pr_debug("ackvec start:%llu end:%llu\n",
+			ccid2_pr_debug("ackvec %llu |%u,%u|\n",
 				       (unsigned long long)ackno,
-				       (unsigned long long)ackno_end_rl);
+				       dccp_ackvec_state(avp->vec) >> 6,
+				       dccp_ackvec_runlen(avp->vec));
 			/* if the seqno we are analyzing is larger than the
 			 * current ackno, then move towards the tail of our
 			 * seqnos.
@@ -536,7 +482,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			 * run length
 			 */
 			while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
-				const u8 state = dccp_ackvec_state(vector);
+				const u8 state = dccp_ackvec_state(avp->vec);
 
 				/* new packet received or marked */
 				if (state != DCCPAV_NOT_RECEIVED &&
@@ -563,7 +509,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 				break;
 
 			ackno = SUB48(ackno_end_rl, 1);
-			vector++;
 		}
 		if (done)
 			break;
@@ -631,10 +576,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		sk_stop_timer(sk, &hc->tx_rtotimer);
 	else
 		sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-
+done:
 	/* check if incoming Acks allow pending packets to be sent */
 	if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
 		tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
+	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 }
 
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -663,6 +609,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 	hc->tx_last_cong = ccid2_time_stamp;
 	setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
 			(unsigned long)sk);
+	INIT_LIST_HEAD(&hc->tx_av_chunks);
 	return 0;
 }
 
@@ -696,16 +643,17 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 }
 
 struct ccid_operations ccid2_ops = {
-	.ccid_id		= DCCPC_CCID2,
-	.ccid_name		= "TCP-like",
-	.ccid_hc_tx_obj_size	= sizeof(struct ccid2_hc_tx_sock),
-	.ccid_hc_tx_init	= ccid2_hc_tx_init,
-	.ccid_hc_tx_exit	= ccid2_hc_tx_exit,
-	.ccid_hc_tx_send_packet	= ccid2_hc_tx_send_packet,
-	.ccid_hc_tx_packet_sent	= ccid2_hc_tx_packet_sent,
-	.ccid_hc_tx_packet_recv	= ccid2_hc_tx_packet_recv,
-	.ccid_hc_rx_obj_size	= sizeof(struct ccid2_hc_rx_sock),
-	.ccid_hc_rx_packet_recv	= ccid2_hc_rx_packet_recv,
+	.ccid_id		  = DCCPC_CCID2,
+	.ccid_name		  = "TCP-like",
+	.ccid_hc_tx_obj_size	  = sizeof(struct ccid2_hc_tx_sock),
+	.ccid_hc_tx_init	  = ccid2_hc_tx_init,
+	.ccid_hc_tx_exit	  = ccid2_hc_tx_exit,
+	.ccid_hc_tx_send_packet	  = ccid2_hc_tx_send_packet,
+	.ccid_hc_tx_packet_sent	  = ccid2_hc_tx_packet_sent,
+	.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
+	.ccid_hc_tx_packet_recv	  = ccid2_hc_tx_packet_recv,
+	.ccid_hc_rx_obj_size	  = sizeof(struct ccid2_hc_rx_sock),
+	.ccid_hc_rx_packet_recv	  = ccid2_hc_rx_packet_recv,
 };
 
 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 25cb6b216eda..e9985dafc2c7 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -55,6 +55,7 @@ struct ccid2_seq {
  * @tx_rtt_seq:		     to decay RTTVAR at most once per flight
  * @tx_rpseq:		     last consecutive seqno
  * @tx_rpdupack:	     dupacks since rpseq
+ * @tx_av_chunks:	     list of Ack Vectors received on current skb
  */
 struct ccid2_hc_tx_sock {
 	u32			tx_cwnd;
@@ -79,6 +80,7 @@ struct ccid2_hc_tx_sock {
 	int			tx_rpdupack;
 	u32			tx_last_cong;
 	u64			tx_high_ack;
+	struct list_head	tx_av_chunks;
 };
 
 static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index dabd6ee34d45..f06ffcfc8d71 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -128,13 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 			if (rc)
 				goto out_featneg_failed;
 			break;
-		case DCCPO_ACK_VECTOR_0:
-		case DCCPO_ACK_VECTOR_1:
-			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
-				break;
-			dccp_pr_debug("%s Ack Vector (len=%u)\n", dccp_role(sk),
-				      len);
-			break;
 		case DCCPO_TIMESTAMP:
 			if (len != 4)
 				goto out_invalid_option;
@@ -224,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 						     pkt_type, opt, value, len))
 				goto out_invalid_option;
 			break;
+		case DCCPO_ACK_VECTOR_0:
+		case DCCPO_ACK_VECTOR_1:
+			if (dccp_packet_without_ack(skb))   /* RFC 4340, 11.4 */
+				break;
+			/*
+			 * Ack vectors are processed by the TX CCID if it is
+			 * interested. The RX CCID need not parse Ack Vectors,
+			 * since it is only interested in clearing old state.
+			 * Fall through.
+			 */
 		case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
 			if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
 						     pkt_type, opt, value, len))
-- 
cgit v1.2.3


From d9aa93804e53f2153260568024b75ad3d81784f9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 15 Nov 2010 08:52:02 -0800
Subject: ipv4: Fix build with multicast disabled.

net/ipv4/igmp.c: In function 'ip_mc_inc_group':
net/ipv4/igmp.c:1228: error: implicit declaration of function 'for_each_pmc_rtnl'
net/ipv4/igmp.c:1228: error: expected ';' before '{' token
net/ipv4/igmp.c: In function 'ip_mc_unmap':
net/ipv4/igmp.c:1333: error: expected ';' before 'igmp_group_dropped'
 ...

Move for_each_pmc_rcu and for_each_pmc_rtnl macro definitions
outside of multicast ifdef protection.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 0f0e0f0279b8..a1bf2f49e716 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -163,6 +163,16 @@ static void ip_ma_put(struct ip_mc_list *im)
 	}
 }
 
+#define for_each_pmc_rcu(in_dev, pmc)				\
+	for (pmc = rcu_dereference(in_dev->mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rcu_dereference(pmc->next_rcu))
+
+#define for_each_pmc_rtnl(in_dev, pmc)				\
+	for (pmc = rtnl_dereference(in_dev->mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rtnl_dereference(pmc->next_rcu))
+
 #ifdef CONFIG_IP_MULTICAST
 
 /*
@@ -502,16 +512,6 @@ empty_source:
 	return skb;
 }
 
-#define for_each_pmc_rcu(in_dev, pmc)				\
-	for (pmc = rcu_dereference(in_dev->mc_list);		\
-	     pmc != NULL;					\
-	     pmc = rcu_dereference(pmc->next_rcu))
-
-#define for_each_pmc_rtnl(in_dev, pmc)				\
-	for (pmc = rtnl_dereference(in_dev->mc_list);		\
-	     pmc != NULL;					\
-	     pmc = rtnl_dereference(pmc->next_rcu))
-
 static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
 {
 	struct sk_buff *skb = NULL;
-- 
cgit v1.2.3


From e1e78db628b33c657944865e3bca01ee59cc5b80 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 29 Oct 2010 12:14:53 +0000
Subject: offloading: Make scatter/gather more tolerant of vlans.

When checking if it is necessary to linearize a packet, we currently
use vlan_features if the packet contains either an in-band or out-
of-band vlan tag.  However, in-band tags aren't special in any way
for scatter/gather since they are part of the packet buffer and are
simply more data to DMA.  Therefore, only use vlan_features for out-
of-band tags, which could potentially have some interaction with
scatter/gather.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5968c822c999..0b403d503311 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1976,15 +1976,20 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
-	int features = dev->features;
+	if (skb_is_nonlinear(skb)) {
+		int features = dev->features;
 
-	if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
-		features &= dev->vlan_features;
+		if (vlan_tx_tag_present(skb))
+			features &= dev->vlan_features;
 
-	return skb_is_nonlinear(skb) &&
-	       ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
-		(skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
-					      illegal_highdma(dev, skb))));
+		return (skb_has_frag_list(skb) &&
+			!(features & NETIF_F_FRAGLIST)) ||
+			(skb_shinfo(skb)->nr_frags &&
+			(!(features & NETIF_F_SG) ||
+			illegal_highdma(dev, skb)));
+	}
+
+	return 0;
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-- 
cgit v1.2.3


From c8d5bcd1aff89199cde4bd82c5c40fb704c8bba4 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 29 Oct 2010 12:14:54 +0000
Subject: offloading: Support multiple vlan tags in GSO.

We assume that hardware TSO can't support multiple levels of vlan tags
but we allow it to be done.  Therefore, enable GSO to parse these tags
so we can fallback to software.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0b403d503311..368930a988e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1794,16 +1794,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
 	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
 	int err;
 
-	if (type == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veh;
+	while (type == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vh;
 
-		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
 			return ERR_PTR(-EINVAL);
 
-		veh = (struct vlan_ethhdr *)skb->data;
-		type = veh->h_vlan_encapsulated_proto;
+		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+		type = vh->h_vlan_encapsulated_proto;
+		vlan_depth += VLAN_HLEN;
 	}
 
 	skb_reset_mac_header(skb);
-- 
cgit v1.2.3


From 58e998c6d23988490162cef0784b19ea274d90bb Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 29 Oct 2010 12:14:55 +0000
Subject: offloading: Force software GSO for multiple vlan tags.

We currently use vlan_features to check for TSO support if there is
a vlan tag.  However, it's quite likely that the NIC is not able to
do TSO when there is an arbitrary number of tags.  Therefore if there
is more than one tag (in-band or out-of-band), fall back to software
emulation.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++----
 net/core/dev.c            | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 578debb801f4..6e4cfbc53d4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2239,6 +2239,8 @@ unsigned long netdev_fix_features(unsigned long features, const char *name);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
+int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev);
+
 static inline int net_gso_ok(int features, int gso_type)
 {
 	int feature = gso_type << NETIF_F_GSO_SHIFT;
@@ -2254,10 +2256,7 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
 	if (skb_is_gso(skb)) {
-		int features = dev->features;
-
-		if (skb->protocol == htons(ETH_P_8021Q) || skb->vlan_tci)
-			features &= dev->vlan_features;
+		int features = netif_get_vlan_features(skb, dev);
 
 		return (!skb_gso_ok(skb, features) ||
 			unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
diff --git a/net/core/dev.c b/net/core/dev.c
index 368930a988e3..8b500c3e0297 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1968,6 +1968,22 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
+int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+{
+	__be16 protocol = skb->protocol;
+
+	if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+		protocol = veh->h_vlan_encapsulated_proto;
+	} else if (!skb->vlan_tci)
+		return dev->features;
+
+	if (protocol != htons(ETH_P_8021Q))
+		return dev->features & dev->vlan_features;
+	else
+		return 0;
+}
+
 /*
  * Returns true if either:
  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
-- 
cgit v1.2.3


From 029f5fc31cdb35d6c8a7fe9a54bf21556e175988 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Sat, 30 Oct 2010 14:22:32 +0000
Subject: 8021q: set hard_header_len when VLAN offload features are toggled

Toggling the vlan tx|rx hw offloads needs to set the hard_header_len
as well otherwise we end up using LL_RESERVED_SPACE incorrectly.
This results in pskb_expand_head() being used unnecessarily.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 52077ca22072..55d2135889fc 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -334,6 +334,12 @@ static void vlan_transfer_features(struct net_device *dev,
 	vlandev->features &= ~dev->vlan_features;
 	vlandev->features |= dev->features & dev->vlan_features;
 	vlandev->gso_max_size = dev->gso_max_size;
+
+	if (dev->features & NETIF_F_HW_VLAN_TX)
+		vlandev->hard_header_len = dev->hard_header_len;
+	else
+		vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
+
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
 #endif
-- 
cgit v1.2.3


From 8f5549f381ced6a255f2c7127b2b3b3b05fdfd6e Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Sat, 30 Oct 2010 14:22:37 +0000
Subject: net: remove check for headroom in vlan_dev_create

It is possible for the headroom to be smaller then the
hard_header_len for a short period of time after toggling
the vlan offload setting.

This is not a hard error and skb_cow_head is called in
__vlan_put_tag() to resolve this.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 14e3d1fa07a0..afb03c5a7adc 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -274,9 +274,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	u16 vlan_tci = 0;
 	int rc;
 
-	if (WARN_ON(skb_headroom(skb) < dev->hard_header_len))
-		return -ENOSPC;
-
 	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
-- 
cgit v1.2.3


From 636e19a34275d7d6fda0fefa965b1e2a715e2b02 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Sat, 30 Oct 2010 14:22:42 +0000
Subject: net: consolidate 8021q tagging

Now that VLAN packets are tagged in dev_hard_start_xmit()
at the bottom of the stack we no longer need to tag them
in the 8021Q module (Except in the !VLAN_FLAG_REORDER_HDR
case).

This allows the accel path and non accel paths to be consolidated.
Here the vlan_tci in the skb is always set and we allow the
stack to add the actual tag in dev_hard_start_xmit().

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.h     |   4 --
 net/8021q/vlan_dev.c | 105 ++++-----------------------------------------------
 net/8021q/vlanproc.c |   4 --
 3 files changed, 7 insertions(+), 106 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index db01b3181fdc..4625ba64dfdc 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -45,8 +45,6 @@ struct vlan_rx_stats {
  *	@real_dev: underlying netdevice
  *	@real_dev_addr: address of underlying netdevice
  *	@dent: proc dir entry
- *	@cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX
- *	@cnt_encap_on_xmit: statistic - number of skb encapsulations on TX
  *	@vlan_rx_stats: ptr to percpu rx stats
  */
 struct vlan_dev_info {
@@ -62,8 +60,6 @@ struct vlan_dev_info {
 	unsigned char				real_dev_addr[ETH_ALEN];
 
 	struct proc_dir_entry			*dent;
-	unsigned long				cnt_inc_headroom_on_tx;
-	unsigned long				cnt_encap_on_xmit;
 	struct vlan_rx_stats __percpu		*vlan_rx_stats;
 };
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index afb03c5a7adc..f3c9552f6ba8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -323,24 +323,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 */
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
 	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
-		unsigned int orig_headroom = skb_headroom(skb);
 		u16 vlan_tci;
-
-		vlan_dev_info(dev)->cnt_encap_on_xmit++;
-
 		vlan_tci = vlan_dev_info(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-		skb = __vlan_put_tag(skb, vlan_tci);
-		if (!skb) {
-			txq->tx_dropped++;
-			return NETDEV_TX_OK;
-		}
-
-		if (orig_headroom < VLAN_HLEN)
-			vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
+		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
 	}
 
-
 	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
@@ -354,32 +342,6 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
-static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
-						    struct net_device *dev)
-{
-	int i = skb_get_queue_mapping(skb);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-	u16 vlan_tci;
-	unsigned int len;
-	int ret;
-
-	vlan_tci = vlan_dev_info(dev)->vlan_id;
-	vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
-	skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
-
-	skb->dev = vlan_dev_info(dev)->real_dev;
-	len = skb->len;
-	ret = dev_queue_xmit(skb);
-
-	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
-		txq->tx_packets++;
-		txq->tx_bytes += len;
-	} else
-		txq->tx_dropped++;
-
-	return ret;
-}
-
 static u16 vlan_dev_select_queue(struct net_device *dev, struct sk_buff *skb)
 {
 	struct net_device *rdev = vlan_dev_info(dev)->real_dev;
@@ -716,8 +678,7 @@ static const struct header_ops vlan_header_ops = {
 	.parse	 = eth_header_parse,
 };
 
-static const struct net_device_ops vlan_netdev_ops, vlan_netdev_accel_ops,
-		    vlan_netdev_ops_sq, vlan_netdev_accel_ops_sq;
+static const struct net_device_ops vlan_netdev_ops, vlan_netdev_ops_sq;
 
 static int vlan_dev_init(struct net_device *dev)
 {
@@ -752,19 +713,16 @@ static int vlan_dev_init(struct net_device *dev)
 	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
-		if (real_dev->netdev_ops->ndo_select_queue)
-			dev->netdev_ops = &vlan_netdev_accel_ops_sq;
-		else
-			dev->netdev_ops = &vlan_netdev_accel_ops;
 	} else {
 		dev->header_ops      = &vlan_header_ops;
 		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
-		if (real_dev->netdev_ops->ndo_select_queue)
-			dev->netdev_ops = &vlan_netdev_ops_sq;
-		else
-			dev->netdev_ops = &vlan_netdev_ops;
 	}
 
+	if (real_dev->netdev_ops->ndo_select_queue)
+		dev->netdev_ops = &vlan_netdev_ops_sq;
+	else
+		dev->netdev_ops = &vlan_netdev_ops;
+
 	if (is_vlan_dev(real_dev))
 		subclass = 1;
 
@@ -905,30 +863,6 @@ static const struct net_device_ops vlan_netdev_ops = {
 #endif
 };
 
-static const struct net_device_ops vlan_netdev_accel_ops = {
-	.ndo_change_mtu		= vlan_dev_change_mtu,
-	.ndo_init		= vlan_dev_init,
-	.ndo_uninit		= vlan_dev_uninit,
-	.ndo_open		= vlan_dev_open,
-	.ndo_stop		= vlan_dev_stop,
-	.ndo_start_xmit =  vlan_dev_hwaccel_hard_start_xmit,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= vlan_dev_set_mac_address,
-	.ndo_set_rx_mode	= vlan_dev_set_rx_mode,
-	.ndo_set_multicast_list	= vlan_dev_set_rx_mode,
-	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
-	.ndo_do_ioctl		= vlan_dev_ioctl,
-	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats64	= vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
-	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
-	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
-	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
-	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
-#endif
-};
-
 static const struct net_device_ops vlan_netdev_ops_sq = {
 	.ndo_select_queue	= vlan_dev_select_queue,
 	.ndo_change_mtu		= vlan_dev_change_mtu,
@@ -954,31 +888,6 @@ static const struct net_device_ops vlan_netdev_ops_sq = {
 #endif
 };
 
-static const struct net_device_ops vlan_netdev_accel_ops_sq = {
-	.ndo_select_queue	= vlan_dev_select_queue,
-	.ndo_change_mtu		= vlan_dev_change_mtu,
-	.ndo_init		= vlan_dev_init,
-	.ndo_uninit		= vlan_dev_uninit,
-	.ndo_open		= vlan_dev_open,
-	.ndo_stop		= vlan_dev_stop,
-	.ndo_start_xmit =  vlan_dev_hwaccel_hard_start_xmit,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= vlan_dev_set_mac_address,
-	.ndo_set_rx_mode	= vlan_dev_set_rx_mode,
-	.ndo_set_multicast_list	= vlan_dev_set_rx_mode,
-	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
-	.ndo_do_ioctl		= vlan_dev_ioctl,
-	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats64	= vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
-	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
-	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
-	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
-	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
-#endif
-};
-
 void vlan_setup(struct net_device *dev)
 {
 	ether_setup(dev);
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 80e280f56686..8a64db19b8a5 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -299,10 +299,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	seq_puts(seq, "\n");
 	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
 	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
-	seq_printf(seq, fmt, "total headroom inc",
-		   dev_info->cnt_inc_headroom_on_tx);
-	seq_printf(seq, fmt, "total encap on xmit",
-		   dev_info->cnt_encap_on_xmit);
 	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
 	/* now show all PRIORITY mappings relating to this VLAN */
 	seq_printf(seq, "\nINGRESS priority mappings: "
-- 
cgit v1.2.3


From 2e48928d8a0f38c1b5c81eb3f1294de8a6382c68 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 20 Oct 2010 10:16:58 -0700
Subject: rfkill: remove dead code

The following code is defined but never used.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 31 -------------------------------
 net/rfkill/core.c      | 14 --------------
 2 files changed, 45 deletions(-)

(limited to 'net')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 08c32e4f261a..c6c608482cba 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -354,37 +354,6 @@ static inline bool rfkill_blocked(struct rfkill *rfkill)
 }
 #endif /* RFKILL || RFKILL_MODULE */
 
-
-#ifdef CONFIG_RFKILL_LEDS
-/**
- * rfkill_get_led_trigger_name - Get the LED trigger name for the button's LED.
- * This function might return a NULL pointer if registering of the
- * LED trigger failed. Use this as "default_trigger" for the LED.
- */
-const char *rfkill_get_led_trigger_name(struct rfkill *rfkill);
-
-/**
- * rfkill_set_led_trigger_name -- set the LED trigger name
- * @rfkill: rfkill struct
- * @name: LED trigger name
- *
- * This function sets the LED trigger name of the radio LED
- * trigger that rfkill creates. It is optional, but if called
- * must be called before rfkill_register() to be effective.
- */
-void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name);
-#else
-static inline const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
-{
-	return NULL;
-}
-
-static inline void
-rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
-{
-}
-#endif
-
 #endif /* __KERNEL__ */
 
 #endif /* RFKILL_H */
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 04f599089e6d..0198191b756d 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -149,20 +149,6 @@ static void rfkill_led_trigger_activate(struct led_classdev *led)
 	rfkill_led_trigger_event(rfkill);
 }
 
-const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
-{
-	return rfkill->led_trigger.name;
-}
-EXPORT_SYMBOL(rfkill_get_led_trigger_name);
-
-void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
-{
-	BUG_ON(!rfkill);
-
-	rfkill->ledtrigname = name;
-}
-EXPORT_SYMBOL(rfkill_set_led_trigger_name);
-
 static int rfkill_led_trigger_register(struct rfkill *rfkill)
 {
 	rfkill->led_trigger.name = rfkill->ledtrigname
-- 
cgit v1.2.3


From 7ca43d03b1291481bdf894bbaec5d580e7684e7d Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 20 Oct 2010 10:18:53 -0700
Subject: cfg80211: pass the reg hint initiator to helpers

This is required later.

Cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Cc: stable@kernel.org
signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4b9f8912526c..b64596fe7363 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -720,7 +720,9 @@ EXPORT_SYMBOL(freq_reg_info);
  * on the wiphy with the target_bw specified. Then we can simply use
  * that below for the desired_bw_khz below.
  */
-static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
+static void handle_channel(struct wiphy *wiphy,
+			   enum nl80211_reg_initiator initiator,
+			   enum ieee80211_band band,
 			   unsigned int chan_idx)
 {
 	int r;
@@ -784,7 +786,9 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 		chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
 }
 
-static void handle_band(struct wiphy *wiphy, enum ieee80211_band band)
+static void handle_band(struct wiphy *wiphy,
+			enum ieee80211_band band,
+			enum nl80211_reg_initiator initiator)
 {
 	unsigned int i;
 	struct ieee80211_supported_band *sband;
@@ -793,7 +797,7 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band)
 	sband = wiphy->bands[band];
 
 	for (i = 0; i < sband->n_channels; i++)
-		handle_channel(wiphy, band, i);
+		handle_channel(wiphy, initiator, band, i);
 }
 
 static bool ignore_reg_update(struct wiphy *wiphy,
@@ -1030,7 +1034,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy,
 		goto out;
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
-			handle_band(wiphy, band);
+			handle_band(wiphy, band, initiator);
 	}
 out:
 	reg_process_beacons(wiphy);
-- 
cgit v1.2.3


From 749b527b21465fb079796c03ffb4302584dc31c1 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 20 Oct 2010 10:18:54 -0700
Subject: cfg80211: fix allowing country IEs for WIPHY_FLAG_STRICT_REGULATORY

We should be enabling country IE hints for WIPHY_FLAG_STRICT_REGULATORY
even if we haven't yet recieved regulatory domain hint for the driver
if it needed one. Without this Country IEs are not passed on to drivers
that have set WIPHY_FLAG_STRICT_REGULATORY, today this is just all
Atheros chipset drivers: ath5k, ath9k, ar9170, carl9170.

This was part of the original design, however it was completely
overlooked...

Cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Cc: stable@kernel.org
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 15 ++++++++-------
 net/wireless/reg.c     |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2a7936d7851d..e5702f5ac57c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1321,13 +1321,14 @@ struct cfg80211_ops {
  * 	initiator is %REGDOM_SET_BY_CORE).
  * @WIPHY_FLAG_STRICT_REGULATORY: tells us the driver for this device will
  *	ignore regulatory domain settings until it gets its own regulatory
- *	domain via its regulatory_hint(). After its gets its own regulatory
- *	domain it will only allow further regulatory domain settings to
- *	further enhance compliance. For example if channel 13 and 14 are
- *	disabled by this regulatory domain no user regulatory domain can
- *	enable these channels at a later time. This can be used for devices
- *	which do not have calibration information gauranteed for frequencies
- *	or settings outside of its regulatory domain.
+ *	domain via its regulatory_hint() unless the regulatory hint is
+ *	from a country IE. After its gets its own regulatory domain it will
+ *	only allow further regulatory domain settings to further enhance
+ *	compliance. For example if channel 13 and 14 are disabled by this
+ *	regulatory domain no user regulatory domain can enable these channels
+ *	at a later time. This can be used for devices which do not have
+ *	calibration information guaranteed for frequencies or settings
+ *	outside of its regulatory domain.
  * @WIPHY_FLAG_DISABLE_BEACON_HINTS: enable this if your driver needs to ensure
  *	that passive scan flags and beaconing flags may not be lifted by
  *	cfg80211 due to regulatory beacon hints. For more information on beacon
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index b64596fe7363..1bc8131a5185 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -813,6 +813,7 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 	 * desired regulatory domain set
 	 */
 	if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd &&
+	    initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    !is_world_regdom(last_request->alpha2))
 		return true;
 	return false;
-- 
cgit v1.2.3


From ca4ffe8f2848169a8ded0ea8a60b2d81925564c9 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 20 Oct 2010 10:18:55 -0700
Subject: cfg80211: fix disabling channels based on hints

After a module loads you will have loaded the world roaming regulatory
domain or a custom regulatory domain. Further regulatory hints are
welcomed and should be respected unless the regulatory hint is coming
from a country IE as the IEEE spec allows for a country IE to be a subset
of what is allowed by the local regulatory agencies.

So disable all channels that do not fit a regulatory domain sent
from a unless the hint is from a country IE and the country IE had
no information about the band we are currently processing.

This fixes a few regulatory issues, for example for drivers that depend
on CRDA and had no 5 GHz freqencies allowed were not properly disabling
5 GHz at all, furthermore it also allows users to restrict devices
further as was intended.

If you recieve a country IE upon association we will also disable the
channels that are not allowed if the country IE had at least one
channel on the respective band we are procesing.

This was the original intention behind this design but it was
completely overlooked...

Cc: David Quan <david.quan@atheros.com>
Cc: Jouni Malinen <jouni.malinen@atheros.com>
cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Cc: stable@kernel.org
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  6 +++++-
 net/wireless/reg.c      | 20 +++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 0edb2566c14c..fb877b5621b7 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1307,7 +1307,11 @@ enum nl80211_bitrate_attr {
  * 	wireless core it thinks its knows the regulatory domain we should be in.
  * @NL80211_REGDOM_SET_BY_COUNTRY_IE: the wireless core has received an
  * 	802.11 country information element with regulatory information it
- * 	thinks we should consider.
+ * 	thinks we should consider. cfg80211 only processes the country
+ *	code from the IE, and relies on the regulatory domain information
+ *	structure pased by userspace (CRDA) from our wireless-regdb.
+ *	If a channel is enabled but the country code indicates it should
+ *	be disabled we disable the channel and re-enable it upon disassociation.
  */
 enum nl80211_reg_initiator {
 	NL80211_REGDOM_SET_BY_CORE,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1bc8131a5185..8ab65f2afe70 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -750,8 +750,26 @@ static void handle_channel(struct wiphy *wiphy,
 			  desired_bw_khz,
 			  &reg_rule);
 
-	if (r)
+	if (r) {
+		/*
+		 * We will disable all channels that do not match our
+		 * recieved regulatory rule unless the hint is coming
+		 * from a Country IE and the Country IE had no information
+		 * about a band. The IEEE 802.11 spec allows for an AP
+		 * to send only a subset of the regulatory rules allowed,
+		 * so an AP in the US that only supports 2.4 GHz may only send
+		 * a country IE with information for the 2.4 GHz band
+		 * while 5 GHz is still supported.
+		 */
+		if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+		    r == -ERANGE)
+			return;
+
+		REG_DBG_PRINT("cfg80211: Disabling freq %d MHz\n",
+			      chan->center_freq);
+		chan->flags = IEEE80211_CHAN_DISABLED;
 		return;
+	}
 
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
-- 
cgit v1.2.3


From 926a0a094d2b9052db3f7f37438c3d305cea4be7 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 21 Oct 2010 19:17:03 +0530
Subject: cfg80211: add debug prints for when we ignore regulatory hints

This can help with debugging issues. You will only see
these with CONFIG_CFG80211_REG_DEBUG enabled.

Cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: Senthil Balasubramanian <senthilkumar@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 8ab65f2afe70..7bff1c1d6c8f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -711,6 +711,25 @@ int freq_reg_info(struct wiphy *wiphy,
 }
 EXPORT_SYMBOL(freq_reg_info);
 
+#ifdef CONFIG_CFG80211_REG_DEBUG
+static const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
+{
+	switch (initiator) {
+	case NL80211_REGDOM_SET_BY_CORE:
+		return "Set by core";
+	case NL80211_REGDOM_SET_BY_USER:
+		return "Set by user";
+	case NL80211_REGDOM_SET_BY_DRIVER:
+		return "Set by driver";
+	case NL80211_REGDOM_SET_BY_COUNTRY_IE:
+		return "Set by country IE";
+	default:
+		WARN_ON(1);
+		return "Set by bug";
+	}
+}
+#endif
+
 /*
  * Note that right now we assume the desired channel bandwidth
  * is always 20 MHz for each individual channel (HT40 uses 20 MHz
@@ -821,19 +840,36 @@ static void handle_band(struct wiphy *wiphy,
 static bool ignore_reg_update(struct wiphy *wiphy,
 			      enum nl80211_reg_initiator initiator)
 {
-	if (!last_request)
+	if (!last_request) {
+		REG_DBG_PRINT("cfg80211: Ignoring regulatory request %s since "
+			      "last_request is not set\n",
+			      reg_initiator_name(initiator));
 		return true;
+	}
+
 	if (initiator == NL80211_REGDOM_SET_BY_CORE &&
-	    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY)
+	    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) {
+		REG_DBG_PRINT("cfg80211: Ignoring regulatory request %s "
+			      "since the driver uses its own custom "
+			      "regulatory domain ",
+			      reg_initiator_name(initiator));
 		return true;
+	}
+
 	/*
 	 * wiphy->regd will be set once the device has its own
 	 * desired regulatory domain set
 	 */
 	if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd &&
 	    initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
-	    !is_world_regdom(last_request->alpha2))
+	    !is_world_regdom(last_request->alpha2)) {
+		REG_DBG_PRINT("cfg80211: Ignoring regulatory request %s "
+			      "since the driver requires its own regulaotry "
+			      "domain to be set first",
+			      reg_initiator_name(initiator));
 		return true;
+	}
+
 	return false;
 }
 
-- 
cgit v1.2.3


From a65185367f9f876448f0f12ac09a673d20371efc Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 20 Oct 2010 10:18:57 -0700
Subject: cfg80211: add debug print when disabling a channel on a custom regd

Cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 7bff1c1d6c8f..6e7a9d853191 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1125,6 +1125,11 @@ static void handle_channel_custom(struct wiphy *wiphy,
 			       regd);
 
 	if (r) {
+		REG_DBG_PRINT("cfg80211: Disabling freq %d MHz as custom "
+			      "regd has no rule that fits a %d MHz "
+			      "wide channel\n",
+			      chan->center_freq,
+			      KHZ_TO_MHZ(desired_bw_khz));
 		chan->flags = IEEE80211_CHAN_DISABLED;
 		return;
 	}
-- 
cgit v1.2.3


From e702d3cf29143327679ce2e2a60775eaf829f377 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 21 Oct 2010 19:17:04 +0530
Subject: cfg80211: add debug print when processing a channel

In the worst case you are seeing really odd things you want
more information than what is provided right now, for those
that insist and want debug info through CONFIG_CFG80211_REG_DEBUG
provide a print of when we are processing a channel and with what
regulatory rule.

Cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: Senthil Balasubramanian <senthilkumar@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6e7a9d853191..5556c5fe489a 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -728,6 +728,41 @@ static const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
 		return "Set by bug";
 	}
 }
+
+static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
+				    u32 desired_bw_khz,
+				    const struct ieee80211_reg_rule *reg_rule)
+{
+	const struct ieee80211_power_rule *power_rule;
+	const struct ieee80211_freq_range *freq_range;
+	char max_antenna_gain[32];
+
+	power_rule = &reg_rule->power_rule;
+	freq_range = &reg_rule->freq_range;
+
+	if (!power_rule->max_antenna_gain)
+		snprintf(max_antenna_gain, 32, "N/A");
+	else
+		snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain);
+
+	REG_DBG_PRINT("cfg80211: Updating information on frequency %d MHz "
+		      "for %d a MHz width channel with regulatory rule:\n",
+		      chan->center_freq,
+		      KHZ_TO_MHZ(desired_bw_khz));
+
+	REG_DBG_PRINT("cfg80211: %d KHz - %d KHz @  KHz), (%s mBi, %d mBm)\n",
+		      freq_range->start_freq_khz,
+		      freq_range->end_freq_khz,
+		      max_antenna_gain,
+		      power_rule->max_eirp);
+}
+#else
+static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
+				    u32 desired_bw_khz,
+				    const struct ieee80211_reg_rule *reg_rule)
+{
+	return;
+}
 #endif
 
 /*
@@ -790,6 +825,8 @@ static void handle_channel(struct wiphy *wiphy,
 		return;
 	}
 
+	chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule);
+
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
@@ -1134,6 +1171,8 @@ static void handle_channel_custom(struct wiphy *wiphy,
 		return;
 	}
 
+	chan_reg_rule_print_dbg(chan, desired_bw_khz, reg_rule);
+
 	power_rule = &reg_rule->power_rule;
 	freq_range = &reg_rule->freq_range;
 
-- 
cgit v1.2.3


From d91e41b690f795c04af4eb6fe28d2cafd3291051 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 20 Oct 2010 10:18:59 -0700
Subject: cfg80211: prefix REG_DBG_PRINT() with cfg80211

Everyone's doing it, its the cool thing.

Cc: Easwar Krishnan <easwar.krishnan@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5556c5fe489a..3be18d9a944f 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -48,7 +48,7 @@
 #ifdef CONFIG_CFG80211_REG_DEBUG
 #define REG_DBG_PRINT(format, args...) \
 	do { \
-		printk(KERN_DEBUG format , ## args); \
+		printk(KERN_DEBUG "cfg80211: " format , ## args); \
 	} while (0)
 #else
 #define REG_DBG_PRINT(args...)
@@ -745,12 +745,12 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
 	else
 		snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain);
 
-	REG_DBG_PRINT("cfg80211: Updating information on frequency %d MHz "
+	REG_DBG_PRINT("Updating information on frequency %d MHz "
 		      "for %d a MHz width channel with regulatory rule:\n",
 		      chan->center_freq,
 		      KHZ_TO_MHZ(desired_bw_khz));
 
-	REG_DBG_PRINT("cfg80211: %d KHz - %d KHz @  KHz), (%s mBi, %d mBm)\n",
+	REG_DBG_PRINT("%d KHz - %d KHz @  KHz), (%s mBi, %d mBm)\n",
 		      freq_range->start_freq_khz,
 		      freq_range->end_freq_khz,
 		      max_antenna_gain,
@@ -819,8 +819,7 @@ static void handle_channel(struct wiphy *wiphy,
 		    r == -ERANGE)
 			return;
 
-		REG_DBG_PRINT("cfg80211: Disabling freq %d MHz\n",
-			      chan->center_freq);
+		REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq);
 		chan->flags = IEEE80211_CHAN_DISABLED;
 		return;
 	}
@@ -878,7 +877,7 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 			      enum nl80211_reg_initiator initiator)
 {
 	if (!last_request) {
-		REG_DBG_PRINT("cfg80211: Ignoring regulatory request %s since "
+		REG_DBG_PRINT("Ignoring regulatory request %s since "
 			      "last_request is not set\n",
 			      reg_initiator_name(initiator));
 		return true;
@@ -886,7 +885,7 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 
 	if (initiator == NL80211_REGDOM_SET_BY_CORE &&
 	    wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) {
-		REG_DBG_PRINT("cfg80211: Ignoring regulatory request %s "
+		REG_DBG_PRINT("Ignoring regulatory request %s "
 			      "since the driver uses its own custom "
 			      "regulatory domain ",
 			      reg_initiator_name(initiator));
@@ -900,7 +899,7 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 	if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd &&
 	    initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    !is_world_regdom(last_request->alpha2)) {
-		REG_DBG_PRINT("cfg80211: Ignoring regulatory request %s "
+		REG_DBG_PRINT("Ignoring regulatory request %s "
 			      "since the driver requires its own regulaotry "
 			      "domain to be set first",
 			      reg_initiator_name(initiator));
@@ -1162,7 +1161,7 @@ static void handle_channel_custom(struct wiphy *wiphy,
 			       regd);
 
 	if (r) {
-		REG_DBG_PRINT("cfg80211: Disabling freq %d MHz as custom "
+		REG_DBG_PRINT("Disabling freq %d MHz as custom "
 			      "regd has no rule that fits a %d MHz "
 			      "wide channel\n",
 			      chan->center_freq,
@@ -1662,7 +1661,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
 	if (is_user_regdom_saved()) {
 		/* Unless we're asked to ignore it and reset it */
 		if (reset_user) {
-			REG_DBG_PRINT("cfg80211: Restoring regulatory settings "
+			REG_DBG_PRINT("Restoring regulatory settings "
 			       "including user preference\n");
 			user_alpha2[0] = '9';
 			user_alpha2[1] = '7';
@@ -1673,7 +1672,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
 			 * back as they were for a full restore.
 			 */
 			if (!is_world_regdom(ieee80211_regdom)) {
-				REG_DBG_PRINT("cfg80211: Keeping preference on "
+				REG_DBG_PRINT("Keeping preference on "
 				       "module parameter ieee80211_regdom: %c%c\n",
 				       ieee80211_regdom[0],
 				       ieee80211_regdom[1]);
@@ -1681,7 +1680,7 @@ static void restore_alpha2(char *alpha2, bool reset_user)
 				alpha2[1] = ieee80211_regdom[1];
 			}
 		} else {
-			REG_DBG_PRINT("cfg80211: Restoring regulatory settings "
+			REG_DBG_PRINT("Restoring regulatory settings "
 			       "while preserving user preference for: %c%c\n",
 			       user_alpha2[0],
 			       user_alpha2[1]);
@@ -1689,14 +1688,14 @@ static void restore_alpha2(char *alpha2, bool reset_user)
 			alpha2[1] = user_alpha2[1];
 		}
 	} else if (!is_world_regdom(ieee80211_regdom)) {
-		REG_DBG_PRINT("cfg80211: Keeping preference on "
+		REG_DBG_PRINT("Keeping preference on "
 		       "module parameter ieee80211_regdom: %c%c\n",
 		       ieee80211_regdom[0],
 		       ieee80211_regdom[1]);
 		alpha2[0] = ieee80211_regdom[0];
 		alpha2[1] = ieee80211_regdom[1];
 	} else
-		REG_DBG_PRINT("cfg80211: Restoring regulatory settings\n");
+		REG_DBG_PRINT("Restoring regulatory settings\n");
 }
 
 /*
@@ -1764,7 +1763,7 @@ static void restore_regulatory_settings(bool reset_user)
 
 void regulatory_hint_disconnect(void)
 {
-	REG_DBG_PRINT("cfg80211: All devices are disconnected, going to "
+	REG_DBG_PRINT("All devices are disconnected, going to "
 		      "restore regulatory settings\n");
 	restore_regulatory_settings(false);
 }
@@ -1794,7 +1793,7 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
 	if (!reg_beacon)
 		return -ENOMEM;
 
-	REG_DBG_PRINT("cfg80211: Found new beacon on "
+	REG_DBG_PRINT("Found new beacon on "
 		      "frequency: %d MHz (Ch %d) on %s\n",
 		      beacon_chan->center_freq,
 		      ieee80211_frequency_to_channel(beacon_chan->center_freq),
-- 
cgit v1.2.3


From c7317e41df30c7e04dca46360e5ebb0cb36dda45 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 21 Oct 2010 02:47:25 +0200
Subject: mac80211: minstrel_ht - reduce the overhead of rate sampling

- reduce the number of retransmission attempts for sample rates
- sample lower rates less often
- do not use RTS/CTS for sampling frames
- increase the time between sampling attempts

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 2a18d6602d4a..2d6f0259e0c6 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -407,8 +407,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 	mi->ampdu_len += info->status.ampdu_len;
 
 	if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
-		mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
-		mi->sample_tries = 3;
+		mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
+		mi->sample_tries = 2;
 		mi->sample_count--;
 	}
 
@@ -506,7 +506,9 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 	if (!mr->retry_updated)
 		minstrel_calc_retransmit(mp, mi, index);
 
-	if (mr->probability < MINSTREL_FRAC(20, 100))
+	if (sample)
+		rate->count = 1;
+	else if (mr->probability < MINSTREL_FRAC(20, 100))
 		rate->count = 2;
 	else if (rtscts)
 		rate->count = mr->retry_count_rtscts;
@@ -562,7 +564,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 	 */
 	if (minstrel_get_duration(sample_idx) >
 	    minstrel_get_duration(mi->max_tp_rate)) {
-		if (mr->sample_skipped < 10)
+		if (mr->sample_skipped < 20)
 			goto next;
 
 		if (mi->sample_slow++ > 2)
@@ -586,6 +588,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 	struct minstrel_ht_sta *mi = &msp->ht;
 	struct minstrel_priv *mp = priv;
 	int sample_idx;
+	bool sample = false;
 
 	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
@@ -596,10 +599,11 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 	info->flags |= mi->tx_flags;
 	sample_idx = minstrel_get_sample_rate(mp, mi);
 	if (sample_idx >= 0) {
+		sample = true;
 		minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
 			txrc, true, false);
 		minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
-			txrc, false, true);
+			txrc, false, false);
 		info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 	} else {
 		minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
@@ -607,7 +611,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 		minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
 			txrc, false, true);
 	}
-	minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true);
+	minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample);
 
 	ar[3].count = 0;
 	ar[3].idx = -1;
-- 
cgit v1.2.3


From 07caf9d6c9135ae25a760867f37aab90c1008380 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 27 Oct 2010 14:58:29 +0200
Subject: mac80211: refactor debugfs function generation code

refactor mac80211 debugfs code by using a format&copy function, instead of
duplicating the code for each generated function.

this change reduces about 600B from mac80211.ko

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs.c     | 60 ++++++++++++++++++++++++----------------------
 net/mac80211/debugfs.h     |  2 ++
 net/mac80211/debugfs_key.c | 19 +++++++--------
 net/mac80211/debugfs_sta.c | 26 ++++++++------------
 4 files changed, 52 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 18260aa99c56..1f02e599a318 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -21,16 +21,30 @@ int mac80211_open_file_generic(struct inode *inode, struct file *file)
 	return 0;
 }
 
-#define DEBUGFS_READONLY_FILE(name, buflen, fmt, value...)		\
+#define DEBUGFS_FORMAT_BUFFER_SIZE 100
+
+int mac80211_format_buffer(char __user *userbuf, size_t count,
+				  loff_t *ppos, char *fmt, ...)
+{
+	va_list args;
+	char buf[DEBUGFS_FORMAT_BUFFER_SIZE];
+	int res;
+
+	va_start(args, fmt);
+	res = vscnprintf(buf, sizeof(buf), fmt, args);
+	va_end(args);
+
+	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+}
+
+#define DEBUGFS_READONLY_FILE(name, fmt, value...)			\
 static ssize_t name## _read(struct file *file, char __user *userbuf,	\
 			    size_t count, loff_t *ppos)			\
 {									\
 	struct ieee80211_local *local = file->private_data;		\
-	char buf[buflen];						\
-	int res;							\
 									\
-	res = scnprintf(buf, buflen, fmt "\n", ##value);		\
-	return simple_read_from_buffer(userbuf, count, ppos, buf, res);	\
+	return mac80211_format_buffer(userbuf, count, ppos, 		\
+				      fmt "\n", ##value);		\
 }									\
 									\
 static const struct file_operations name## _ops = {			\
@@ -46,13 +60,13 @@ static const struct file_operations name## _ops = {			\
 	debugfs_create_file(#name, mode, phyd, local, &name## _ops);
 
 
-DEBUGFS_READONLY_FILE(frequency, 20, "%d",
+DEBUGFS_READONLY_FILE(frequency, "%d",
 		      local->hw.conf.channel->center_freq);
-DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d",
+DEBUGFS_READONLY_FILE(total_ps_buffered, "%d",
 		      local->total_ps_buffered);
-DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x",
+DEBUGFS_READONLY_FILE(wep_iv, "%#08x",
 		      local->wep_iv & 0xffffff);
-DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s",
+DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s",
 	local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver");
 
 static ssize_t tsf_read(struct file *file, char __user *user_buf,
@@ -60,13 +74,11 @@ static ssize_t tsf_read(struct file *file, char __user *user_buf,
 {
 	struct ieee80211_local *local = file->private_data;
 	u64 tsf;
-	char buf[100];
 
 	tsf = drv_get_tsf(local);
 
-	snprintf(buf, sizeof(buf), "0x%016llx\n", (unsigned long long) tsf);
-
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 19);
+	return mac80211_format_buffer(user_buf, count, ppos, "0x%016llx\n",
+				      (unsigned long long) tsf);
 }
 
 static ssize_t tsf_write(struct file *file,
@@ -131,12 +143,9 @@ static ssize_t noack_read(struct file *file, char __user *user_buf,
 			  size_t count, loff_t *ppos)
 {
 	struct ieee80211_local *local = file->private_data;
-	int res;
-	char buf[10];
 
-	res = scnprintf(buf, sizeof(buf), "%d\n", local->wifi_wme_noack_test);
-
-	return simple_read_from_buffer(user_buf, count, ppos, buf, res);
+	return mac80211_format_buffer(user_buf, count, ppos, "%d\n",
+				      local->wifi_wme_noack_test);
 }
 
 static ssize_t noack_write(struct file *file,
@@ -168,12 +177,8 @@ static ssize_t uapsd_queues_read(struct file *file, char __user *user_buf,
 				 size_t count, loff_t *ppos)
 {
 	struct ieee80211_local *local = file->private_data;
-	int res;
-	char buf[10];
-
-	res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_queues);
-
-	return simple_read_from_buffer(user_buf, count, ppos, buf, res);
+	return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n",
+				      local->uapsd_queues);
 }
 
 static ssize_t uapsd_queues_write(struct file *file,
@@ -215,12 +220,9 @@ static ssize_t uapsd_max_sp_len_read(struct file *file, char __user *user_buf,
 				     size_t count, loff_t *ppos)
 {
 	struct ieee80211_local *local = file->private_data;
-	int res;
-	char buf[10];
 
-	res = scnprintf(buf, sizeof(buf), "0x%x\n", local->uapsd_max_sp_len);
-
-	return simple_read_from_buffer(user_buf, count, ppos, buf, res);
+	return mac80211_format_buffer(user_buf, count, ppos, "0x%x\n",
+				      local->uapsd_max_sp_len);
 }
 
 static ssize_t uapsd_max_sp_len_write(struct file *file,
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 09cc9be34796..7c87529630f5 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -4,6 +4,8 @@
 #ifdef CONFIG_MAC80211_DEBUGFS
 extern void debugfs_hw_add(struct ieee80211_local *local);
 extern int mac80211_open_file_generic(struct inode *inode, struct file *file);
+extern int mac80211_format_buffer(char __user *userbuf, size_t count,
+				  loff_t *ppos, char *fmt, ...);
 #else
 static inline void debugfs_hw_add(struct ieee80211_local *local)
 {
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 1243d1db5c59..5822a6ce7671 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -15,18 +15,17 @@
 #include "debugfs.h"
 #include "debugfs_key.h"
 
-#define KEY_READ(name, prop, buflen, format_string)			\
+#define KEY_READ(name, prop, format_string)				\
 static ssize_t key_##name##_read(struct file *file,			\
 				 char __user *userbuf,			\
 				 size_t count, loff_t *ppos)		\
 {									\
-	char buf[buflen];						\
 	struct ieee80211_key *key = file->private_data;			\
-	int res = scnprintf(buf, buflen, format_string, key->prop);	\
-	return simple_read_from_buffer(userbuf, count, ppos, buf, res);	\
+	return mac80211_format_buffer(userbuf, count, ppos, 		\
+				      format_string, key->prop);	\
 }
-#define KEY_READ_D(name) KEY_READ(name, name, 20, "%d\n")
-#define KEY_READ_X(name) KEY_READ(name, name, 20, "0x%x\n")
+#define KEY_READ_D(name) KEY_READ(name, name, "%d\n")
+#define KEY_READ_X(name) KEY_READ(name, name, "0x%x\n")
 
 #define KEY_OPS(name)							\
 static const struct file_operations key_ ##name## _ops = {		\
@@ -39,9 +38,9 @@ static const struct file_operations key_ ##name## _ops = {		\
 		 KEY_READ_##format(name)				\
 		 KEY_OPS(name)
 
-#define KEY_CONF_READ(name, buflen, format_string)			\
-	KEY_READ(conf_##name, conf.name, buflen, format_string)
-#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, 20, "%d\n")
+#define KEY_CONF_READ(name, format_string)				\
+	KEY_READ(conf_##name, conf.name, format_string)
+#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, "%d\n")
 
 #define KEY_CONF_OPS(name)						\
 static const struct file_operations key_ ##name## _ops = {		\
@@ -59,7 +58,7 @@ KEY_CONF_FILE(keyidx, D);
 KEY_CONF_FILE(hw_key_idx, D);
 KEY_FILE(flags, X);
 KEY_FILE(tx_rx_count, D);
-KEY_READ(ifindex, sdata->name, IFNAMSIZ + 2, "%s\n");
+KEY_READ(ifindex, sdata->name, "%s\n");
 KEY_OPS(ifindex);
 
 static ssize_t key_algorithm_read(struct file *file,
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 4601fea1784d..f0fce37f4069 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -17,20 +17,18 @@
 
 /* sta attributtes */
 
-#define STA_READ(name, buflen, field, format_string)			\
+#define STA_READ(name, field, format_string)				\
 static ssize_t sta_ ##name## _read(struct file *file,			\
 				   char __user *userbuf,		\
 				   size_t count, loff_t *ppos)		\
 {									\
-	int res;							\
 	struct sta_info *sta = file->private_data;			\
-	char buf[buflen];						\
-	res = scnprintf(buf, buflen, format_string, sta->field);	\
-	return simple_read_from_buffer(userbuf, count, ppos, buf, res);	\
+	return mac80211_format_buffer(userbuf, count, ppos, 		\
+				      format_string, sta->field);	\
 }
-#define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n")
-#define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n")
-#define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n")
+#define STA_READ_D(name, field) STA_READ(name, field, "%d\n")
+#define STA_READ_U(name, field) STA_READ(name, field, "%u\n")
+#define STA_READ_S(name, field) STA_READ(name, field, "%s\n")
 
 #define STA_OPS(name)							\
 static const struct file_operations sta_ ##name## _ops = {		\
@@ -79,22 +77,18 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file,
 					  char __user *userbuf,
 					  size_t count, loff_t *ppos)
 {
-	char buf[20];
 	struct sta_info *sta = file->private_data;
-	int res = scnprintf(buf, sizeof(buf), "%u\n",
-			    skb_queue_len(&sta->ps_tx_buf));
-	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+	return mac80211_format_buffer(userbuf, count, ppos, "%u\n",
+				      skb_queue_len(&sta->ps_tx_buf));
 }
 STA_OPS(num_ps_buf_frames);
 
 static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
 				    size_t count, loff_t *ppos)
 {
-	char buf[20];
 	struct sta_info *sta = file->private_data;
-	int res = scnprintf(buf, sizeof(buf), "%d\n",
-			    jiffies_to_msecs(jiffies - sta->last_rx));
-	return simple_read_from_buffer(userbuf, count, ppos, buf, res);
+	return mac80211_format_buffer(userbuf, count, ppos, "%d\n",
+				      jiffies_to_msecs(jiffies - sta->last_rx));
 }
 STA_OPS(inactive_ms);
 
-- 
cgit v1.2.3


From ffa56e540c3949c4560dcce45eca247819e183c1 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Thu, 4 Nov 2010 22:59:56 +0100
Subject: mac80211: Remove redundant checks for NULL before calls to
 crypto_free_cipher()

crypto_free_cipher() is a wrapper around crypto_free_tfm() which is a
wrapper around crypto_destroy_tfm() and the latter can handle being passed
a NULL pointer, so checking for NULL in the
ieee80211_aes_key_free()/ieee80211_aes_cmac_key_free() wrappers around
crypto_free_cipher() is pointless and just increase object code size
needlesly and makes us execute extra test/branch instructions that we
don't need.
Btw; don't we have to many wrappers around wrappers ad nauseam here?
Anyway, this patch removes the redundant conditionals.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/aes_ccm.c  | 3 +--
 net/mac80211/aes_cmac.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index d2b03e0851ef..4bd6ef0be380 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -147,6 +147,5 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
 
 void ieee80211_aes_key_free(struct crypto_cipher *tfm)
 {
-	if (tfm)
-		crypto_free_cipher(tfm);
+	crypto_free_cipher(tfm);
 }
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index b4d66cca76d6..d502b2684a66 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -128,6 +128,5 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
 
 void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm)
 {
-	if (tfm)
-		crypto_free_cipher(tfm);
+	crypto_free_cipher(tfm);
 }
-- 
cgit v1.2.3


From e1f2d8c2cc61d2b9472efe44e8a2b098336914b4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 15 Nov 2010 10:37:30 -0800
Subject: vlan: Fix build warning in vlandev_seq_show()

net/8021q/vlanproc.c: In function 'vlandev_seq_show':
net/8021q/vlanproc.c:283:20: warning: unused variable 'fmt'

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlanproc.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 8a64db19b8a5..d1314cf18adf 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -280,7 +280,6 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
 	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
-	static const char fmt[] = "%30s %12lu\n";
 	static const char fmt64[] = "%30s %12llu\n";
 	int i;
 
-- 
cgit v1.2.3


From cc9ff19da9bf76a2f70bcb80225a1c587c162e52 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 3 Nov 2010 04:41:38 +0000
Subject: xfrm: use gre key as flow upper protocol info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GRE Key field is intended to be used for identifying an individual
traffic flow within a tunnel. It is useful to be able to have XFRM
policy selector matches to have different policies for different
GRE tunnels.

Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h      |  2 ++
 include/net/xfrm.h      |  6 ++++++
 net/ipv4/ip_gre.c       | 12 +++++++-----
 net/ipv4/xfrm4_policy.c | 15 +++++++++++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 0ac3fb5e0973..7196e6864b8d 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -67,6 +67,7 @@ struct flowi {
 		} dnports;
 
 		__be32		spi;
+		__be32		gre_key;
 
 		struct {
 			__u8	type;
@@ -78,6 +79,7 @@ struct flowi {
 #define fl_icmp_code	uli_u.icmpt.code
 #define fl_ipsec_spi	uli_u.spi
 #define fl_mh_type	uli_u.mht.type
+#define fl_gre_key	uli_u.gre_key
 	__u32           secid;	/* used by xfrm; see secid.txt */
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index bcfb6b24b019..54b283229488 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -805,6 +805,9 @@ __be16 xfrm_flowi_sport(struct flowi *fl)
 	case IPPROTO_MH:
 		port = htons(fl->fl_mh_type);
 		break;
+	case IPPROTO_GRE:
+		port = htonl(fl->fl_gre_key) >> 16;
+		break;
 	default:
 		port = 0;	/*XXX*/
 	}
@@ -826,6 +829,9 @@ __be16 xfrm_flowi_dport(struct flowi *fl)
 	case IPPROTO_ICMPV6:
 		port = htons(fl->fl_icmp_code);
 		break;
+	case IPPROTO_GRE:
+		port = htonl(fl->fl_gre_key) & 0xffff;
+		break;
 	default:
 		port = 0;	/*XXX*/
 	}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index cab2057d5430..aace653710f6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -779,9 +779,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 					.tos = RT_TOS(tos)
 				}
 			},
-			.proto = IPPROTO_GRE
-		}
-;
+			.proto = IPPROTO_GRE,
+			.fl_gre_key = tunnel->parms.o_key
+		};
 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
 			dev->stats.tx_carrier_errors++;
 			goto tx_error;
@@ -958,7 +958,8 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 					.tos = RT_TOS(iph->tos)
 				}
 			},
-			.proto = IPPROTO_GRE
+			.proto = IPPROTO_GRE,
+			.fl_gre_key = tunnel->parms.o_key
 		};
 		struct rtable *rt;
 
@@ -1223,7 +1224,8 @@ static int ipgre_open(struct net_device *dev)
 					.tos = RT_TOS(t->parms.iph.tos)
 				}
 			},
-			.proto = IPPROTO_GRE
+			.proto = IPPROTO_GRE,
+			.fl_gre_key = t->parms.o_key
 		};
 		struct rtable *rt;
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index dd1fd8c473fc..4a8c5335770c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -11,6 +11,7 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/inetdevice.h>
+#include <linux/if_tunnel.h>
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
@@ -154,6 +155,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 				fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
 			}
 			break;
+
+		case IPPROTO_GRE:
+			if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
+				__be16 *greflags = (__be16 *)xprth;
+				__be32 *gre_hdr = (__be32 *)xprth;
+
+				if (greflags[0] & GRE_KEY) {
+					if (greflags[0] & GRE_CSUM)
+						gre_hdr++;
+					fl->fl_gre_key = gre_hdr[1];
+				}
+			}
+			break;
+
 		default:
 			fl->fl_ipsec_spi = 0;
 			break;
-- 
cgit v1.2.3


From ed9af2e839c06c18f721da2c768fbb444c4a10e5 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 9 Nov 2010 10:47:30 +0000
Subject: net: Move TX queue allocation to alloc_netdev_mq

TX queues are now allocated in alloc_netdev_mq and freed in
free_netdev.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8b500c3e0297..75490670e0a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5136,10 +5136,6 @@ int register_netdevice(struct net_device *dev)
 	if (ret)
 		goto out;
 
-	ret = netif_alloc_netdev_queues(dev);
-	if (ret)
-		goto out;
-
 	netdev_init_queues(dev);
 
 	/* Init, if this function is available */
@@ -5599,6 +5595,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
+	if (netif_alloc_netdev_queues(dev))
+		goto free_pcpu;
 
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = queue_count;
@@ -5619,6 +5617,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
+	kfree(dev->_tx);
 free_p:
 	kfree(p);
 	return NULL;
-- 
cgit v1.2.3


From fe8222406c8277a21172479d3a8283d31c209028 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 9 Nov 2010 10:47:38 +0000
Subject: net: Simplify RX queue allocation

This patch move RX queue allocation to alloc_netdev_mq and freeing of
the queues to free_netdev (symmetric to TX queue allocation).  Each
kobject RX queue takes a reference to the queue's device so that the
device can't be freed before all the kobjects have been released-- this
obviates the need for reference counts specific to RX queues.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +--
 net/core/dev.c            | 19 ++++++++++---------
 net/core/net-sysfs.c      |  7 ++-----
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6e4cfbc53d4c..fccb11f879e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -592,8 +592,7 @@ struct netdev_rx_queue {
 	struct rps_map __rcu		*rps_map;
 	struct rps_dev_flow_table __rcu	*rps_flow_table;
 	struct kobject			kobj;
-	struct netdev_rx_queue		*first;
-	atomic_t			count;
+	struct net_device		*dev;
 } ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RPS */
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 75490670e0a9..8725d168d1f5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5051,12 +5051,8 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 	}
 	dev->_rx = rx;
 
-	/*
-	 * Set a pointer to first element in the array which holds the
-	 * reference count.
-	 */
 	for (i = 0; i < count; i++)
-		rx[i].first = rx;
+		rx[i].dev = dev;
 #endif
 	return 0;
 }
@@ -5132,10 +5128,6 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
-	ret = netif_alloc_rx_queues(dev);
-	if (ret)
-		goto out;
-
 	netdev_init_queues(dev);
 
 	/* Init, if this function is available */
@@ -5601,6 +5593,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = queue_count;
 	dev->real_num_rx_queues = queue_count;
+	if (netif_alloc_rx_queues(dev))
+		goto free_pcpu;
 #endif
 
 	dev->gso_max_size = GSO_MAX_SIZE;
@@ -5618,6 +5612,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
 	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
+
 free_p:
 	kfree(p);
 	return NULL;
@@ -5639,6 +5637,9 @@ void free_netdev(struct net_device *dev)
 	release_net(dev_net(dev));
 
 	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
 
 	kfree(rcu_dereference_raw(dev->ingress_queue));
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a5ff5a89f376..3ba526b56fe3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -706,7 +706,6 @@ static struct attribute *rx_queue_default_attrs[] = {
 static void rx_queue_release(struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
-	struct netdev_rx_queue *first = queue->first;
 	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 
@@ -719,8 +718,7 @@ static void rx_queue_release(struct kobject *kobj)
 	if (flow_table)
 		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
 
-	if (atomic_dec_and_test(&first->count))
-		kfree(first);
+	dev_put(queue->dev);
 }
 
 static struct kobj_type rx_queue_ktype = {
@@ -732,7 +730,6 @@ static struct kobj_type rx_queue_ktype = {
 static int rx_queue_add_kobject(struct net_device *net, int index)
 {
 	struct netdev_rx_queue *queue = net->_rx + index;
-	struct netdev_rx_queue *first = queue->first;
 	struct kobject *kobj = &queue->kobj;
 	int error = 0;
 
@@ -745,7 +742,7 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
 	}
 
 	kobject_uevent(kobj, KOBJ_ADD);
-	atomic_inc(&first->count);
+	dev_hold(queue->dev);
 
 	return error;
 }
-- 
cgit v1.2.3


From 8a22c99a80b0926585cfcbcc423ee2c49c1fd820 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 14 Nov 2010 17:05:00 +0000
Subject: net/ipv6/mcast.c: Remove unnecessary semicolons

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d1444b95ad7e..9c5074528a71 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -257,7 +257,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 		return NULL;
 	idev = __in6_dev_get(dev);
 	if (!idev)
-		return NULL;;
+		return NULL;
 	read_lock_bh(&idev->lock);
 	if (idev->dead) {
 		read_unlock_bh(&idev->lock);
-- 
cgit v1.2.3


From e80516880019aa1f7c5c410276edfea9575ec89f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 15 Nov 2010 06:38:10 +0000
Subject: bridge: add RCU annotation to bridge multicast table

Add modern __rcu annotatations to bridge multicast table.
Use newer hlist macros to avoid direct access to hlist internals.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c   |  4 +--
 net/bridge/br_multicast.c | 78 +++++++++++++++++++++++++++++++----------------
 net/bridge/br_private.h   |  6 ++--
 3 files changed, 56 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index cbfe87f0f34a..2bd11ec6d166 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -223,7 +223,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 	struct net_bridge_port_group *p;
 	struct hlist_node *rp;
 
-	rp = rcu_dereference(br->router_list.first);
+	rp = rcu_dereference(hlist_first_rcu(&br->router_list));
 	p = mdst ? rcu_dereference(mdst->ports) : NULL;
 	while (p || rp) {
 		struct net_bridge_port *port, *lport, *rport;
@@ -242,7 +242,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		if ((unsigned long)lport >= (unsigned long)port)
 			p = rcu_dereference(p->next);
 		if ((unsigned long)rport >= (unsigned long)port)
-			rp = rcu_dereference(rp->next);
+			rp = rcu_dereference(hlist_next_rcu(rp));
 	}
 
 	if (!prev)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index eb5b256ffc88..326e599f83fb 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -33,6 +33,9 @@
 
 #include "br_private.h"
 
+#define mlock_dereference(X, br) \
+	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
 {
@@ -135,7 +138,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb)
 {
-	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
 	struct br_ip ip;
 
 	if (br->multicast_disabled)
@@ -235,7 +238,8 @@ static void br_multicast_group_expired(unsigned long data)
 	if (mp->ports)
 		goto out;
 
-	mdb = br->mdb;
+	mdb = mlock_dereference(br->mdb, br);
+
 	hlist_del_rcu(&mp->hlist[mdb->ver]);
 	mdb->size--;
 
@@ -249,16 +253,20 @@ out:
 static void br_multicast_del_pg(struct net_bridge *br,
 				struct net_bridge_port_group *pg)
 {
-	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
-	struct net_bridge_port_group **pp;
+	struct net_bridge_port_group __rcu **pp;
+
+	mdb = mlock_dereference(br->mdb, br);
 
 	mp = br_mdb_ip_get(mdb, &pg->addr);
 	if (WARN_ON(!mp))
 		return;
 
-	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+	for (pp = &mp->ports;
+	     (p = mlock_dereference(*pp, br)) != NULL;
+	     pp = &p->next) {
 		if (p != pg)
 			continue;
 
@@ -294,10 +302,10 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
-static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max,
+static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
 			 int elasticity)
 {
-	struct net_bridge_mdb_htable *old = *mdbp;
+	struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1);
 	struct net_bridge_mdb_htable *mdb;
 	int err;
 
@@ -569,7 +577,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 	struct net_bridge *br, struct net_bridge_port *port,
 	struct br_ip *group, int hash)
 {
-	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	struct hlist_node *p;
 	unsigned count = 0;
@@ -577,6 +585,7 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 	int elasticity;
 	int err;
 
+	mdb = rcu_dereference_protected(br->mdb, 1);
 	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
 		count++;
 		if (unlikely(br_ip_equal(group, &mp->addr)))
@@ -642,10 +651,11 @@ static struct net_bridge_mdb_entry *br_multicast_new_group(
 	struct net_bridge *br, struct net_bridge_port *port,
 	struct br_ip *group)
 {
-	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	int hash;
 
+	mdb = rcu_dereference_protected(br->mdb, 1);
 	if (!mdb) {
 		if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0))
 			return NULL;
@@ -660,7 +670,7 @@ static struct net_bridge_mdb_entry *br_multicast_new_group(
 
 	case -EAGAIN:
 rehash:
-		mdb = br->mdb;
+		mdb = rcu_dereference_protected(br->mdb, 1);
 		hash = br_ip_hash(mdb, group);
 		break;
 
@@ -692,7 +702,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 {
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
-	struct net_bridge_port_group **pp;
+	struct net_bridge_port_group __rcu **pp;
 	unsigned long now = jiffies;
 	int err;
 
@@ -712,7 +722,9 @@ static int br_multicast_add_group(struct net_bridge *br,
 		goto out;
 	}
 
-	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+	for (pp = &mp->ports;
+	     (p = mlock_dereference(*pp, br)) != NULL;
+	     pp = &p->next) {
 		if (p->port == port)
 			goto found;
 		if ((unsigned long)p->port < (unsigned long)port)
@@ -1106,7 +1118,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	struct net_bridge_mdb_entry *mp;
 	struct igmpv3_query *ih3;
 	struct net_bridge_port_group *p;
-	struct net_bridge_port_group **pp;
+	struct net_bridge_port_group __rcu **pp;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
 	__be32 group;
@@ -1145,7 +1157,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip4_get(br->mdb, group);
+	mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group);
 	if (!mp)
 		goto out;
 
@@ -1157,7 +1169,9 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	     try_to_del_timer_sync(&mp->timer) >= 0))
 		mod_timer(&mp->timer, now + max_delay);
 
-	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+	for (pp = &mp->ports;
+	     (p = mlock_dereference(*pp, br)) != NULL;
+	     pp = &p->next) {
 		if (timer_pending(&p->timer) ?
 		    time_after(p->timer.expires, now + max_delay) :
 		    try_to_del_timer_sync(&p->timer) >= 0)
@@ -1178,7 +1192,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb);
 	struct net_bridge_mdb_entry *mp;
 	struct mld2_query *mld2q;
-	struct net_bridge_port_group *p, **pp;
+	struct net_bridge_port_group *p;
+	struct net_bridge_port_group __rcu **pp;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
 	struct in6_addr *group = NULL;
@@ -1214,7 +1229,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip6_get(br->mdb, group);
+	mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group);
 	if (!mp)
 		goto out;
 
@@ -1225,7 +1240,9 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	     try_to_del_timer_sync(&mp->timer) >= 0))
 		mod_timer(&mp->timer, now + max_delay);
 
-	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+	for (pp = &mp->ports;
+	     (p = mlock_dereference(*pp, br)) != NULL;
+	     pp = &p->next) {
 		if (timer_pending(&p->timer) ?
 		    time_after(p->timer.expires, now + max_delay) :
 		    try_to_del_timer_sync(&p->timer) >= 0)
@@ -1254,7 +1271,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
 	    timer_pending(&br->multicast_querier_timer))
 		goto out;
 
-	mdb = br->mdb;
+	mdb = mlock_dereference(br->mdb, br);
 	mp = br_mdb_ip_get(mdb, group);
 	if (!mp)
 		goto out;
@@ -1277,7 +1294,9 @@ static void br_multicast_leave_group(struct net_bridge *br,
 		goto out;
 	}
 
-	for (p = mp->ports; p; p = p->next) {
+	for (p = mlock_dereference(mp->ports, br);
+	     p != NULL;
+	     p = mlock_dereference(p->next, br)) {
 		if (p->port != port)
 			continue;
 
@@ -1625,7 +1644,7 @@ void br_multicast_stop(struct net_bridge *br)
 	del_timer_sync(&br->multicast_query_timer);
 
 	spin_lock_bh(&br->multicast_lock);
-	mdb = br->mdb;
+	mdb = mlock_dereference(br->mdb, br);
 	if (!mdb)
 		goto out;
 
@@ -1729,6 +1748,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 {
 	struct net_bridge_port *port;
 	int err = 0;
+	struct net_bridge_mdb_htable *mdb;
 
 	spin_lock(&br->multicast_lock);
 	if (br->multicast_disabled == !val)
@@ -1741,15 +1761,16 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 	if (!netif_running(br->dev))
 		goto unlock;
 
-	if (br->mdb) {
-		if (br->mdb->old) {
+	mdb = mlock_dereference(br->mdb, br);
+	if (mdb) {
+		if (mdb->old) {
 			err = -EEXIST;
 rollback:
 			br->multicast_disabled = !!val;
 			goto unlock;
 		}
 
-		err = br_mdb_rehash(&br->mdb, br->mdb->max,
+		err = br_mdb_rehash(&br->mdb, mdb->max,
 				    br->hash_elasticity);
 		if (err)
 			goto rollback;
@@ -1774,6 +1795,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
 {
 	int err = -ENOENT;
 	u32 old;
+	struct net_bridge_mdb_htable *mdb;
 
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev))
@@ -1782,7 +1804,9 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
 	err = -EINVAL;
 	if (!is_power_of_2(val))
 		goto unlock;
-	if (br->mdb && val < br->mdb->size)
+
+	mdb = mlock_dereference(br->mdb, br);
+	if (mdb && val < mdb->size)
 		goto unlock;
 
 	err = 0;
@@ -1790,8 +1814,8 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
 	old = br->hash_max;
 	br->hash_max = val;
 
-	if (br->mdb) {
-		if (br->mdb->old) {
+	if (mdb) {
+		if (mdb->old) {
 			err = -EEXIST;
 rollback:
 			br->hash_max = old;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 75c90edaf7db..b862071bf601 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -72,7 +72,7 @@ struct net_bridge_fdb_entry
 
 struct net_bridge_port_group {
 	struct net_bridge_port		*port;
-	struct net_bridge_port_group	*next;
+	struct net_bridge_port_group __rcu *next;
 	struct hlist_node		mglist;
 	struct rcu_head			rcu;
 	struct timer_list		timer;
@@ -86,7 +86,7 @@ struct net_bridge_mdb_entry
 	struct hlist_node		hlist[2];
 	struct hlist_node		mglist;
 	struct net_bridge		*br;
-	struct net_bridge_port_group	*ports;
+	struct net_bridge_port_group __rcu *ports;
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct timer_list		query_timer;
@@ -227,7 +227,7 @@ struct net_bridge
 	unsigned long			multicast_startup_query_interval;
 
 	spinlock_t			multicast_lock;
-	struct net_bridge_mdb_htable	*mdb;
+	struct net_bridge_mdb_htable __rcu *mdb;
 	struct hlist_head		router_list;
 	struct hlist_head		mglist;
 
-- 
cgit v1.2.3


From a386f99025f13b32502fe5dedf223c20d7283826 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 15 Nov 2010 06:38:11 +0000
Subject: bridge: add proper RCU annotation to should_route_hook

Add br_should_route_hook_t typedef, this is the only way we can
get a clean RCU implementation for function pointer.

Move route_hook to location where it is used.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h             |  4 +++-
 net/bridge/br.c                       |  4 ----
 net/bridge/br_input.c                 | 10 +++++++---
 net/bridge/netfilter/ebtable_broute.c |  3 ++-
 4 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 0d241a5c4909..f7e73c338c40 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,7 +102,9 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern int (*br_should_route_hook)(struct sk_buff *skb);
+
+typedef int (*br_should_route_hook_t)(struct sk_buff *skb);
+extern br_should_route_hook_t __rcu *br_should_route_hook;
 
 #endif
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index c8436fa31344..84bbb82599b2 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -22,8 +22,6 @@
 
 #include "br_private.h"
 
-int (*br_should_route_hook)(struct sk_buff *skb);
-
 static const struct stp_proto br_stp_proto = {
 	.rcv	= br_stp_rcv,
 };
@@ -102,8 +100,6 @@ static void __exit br_deinit(void)
 	br_fdb_fini();
 }
 
-EXPORT_SYMBOL(br_should_route_hook);
-
 module_init(br_init)
 module_exit(br_deinit)
 MODULE_LICENSE("GPL");
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 25207a1f182b..6f6d8e1b776f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -21,6 +21,10 @@
 /* Bridge group multicast address 802.1d (pg 51). */
 const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 
+/* Hook for brouter */
+br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
+EXPORT_SYMBOL(br_should_route_hook);
+
 static int br_pass_frame_up(struct sk_buff *skb)
 {
 	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
@@ -139,7 +143,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
 	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	int (*rhook)(struct sk_buff *skb);
+	br_should_route_hook_t *rhook;
 
 	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
 		return skb;
@@ -173,8 +177,8 @@ forward:
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
 		rhook = rcu_dereference(br_should_route_hook);
-		if (rhook != NULL) {
-			if (rhook(skb))
+		if (rhook) {
+			if ((*rhook)(skb))
 				return skb;
 			dest = eth_hdr(skb)->h_dest;
 		}
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index ae3f106c3908..1bcaf36ad612 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -87,7 +87,8 @@ static int __init ebtable_broute_init(void)
 	if (ret < 0)
 		return ret;
 	/* see br_input.c */
-	rcu_assign_pointer(br_should_route_hook, ebt_broute);
+	rcu_assign_pointer(br_should_route_hook,
+			   (br_should_route_hook_t *)ebt_broute);
 	return 0;
 }
 
-- 
cgit v1.2.3


From b5ed54e94d324f17c97852296d61a143f01b227a Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 15 Nov 2010 06:38:13 +0000
Subject: bridge: fix RCU races with bridge port

The macro br_port_exists() is not enough protection when only
RCU is being used. There is a tiny race where other CPU has cleared port
handler hook, but is bridge port flag might still be set.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c             | 15 +++++++++------
 net/bridge/br_if.c              |  5 +----
 net/bridge/br_netfilter.c       | 13 +++++++------
 net/bridge/br_netlink.c         | 10 ++++++----
 net/bridge/br_notify.c          |  2 +-
 net/bridge/br_private.h         | 14 +++++++++++---
 net/bridge/br_stp_bpdu.c        |  8 ++++----
 net/bridge/netfilter/ebtables.c | 11 +++++------
 8 files changed, 44 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 90512ccfd3e9..2872393b2939 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -238,15 +238,18 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
 int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 {
 	struct net_bridge_fdb_entry *fdb;
+	struct net_bridge_port *port;
 	int ret;
 
-	if (!br_port_exists(dev))
-		return 0;
-
 	rcu_read_lock();
-	fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr);
-	ret = fdb && fdb->dst->dev != dev &&
-		fdb->dst->state == BR_STATE_FORWARDING;
+	port = br_port_get_rcu(dev);
+	if (!port)
+		ret = 0;
+	else {
+		fdb = __br_fdb_get(port->br, addr);
+		ret = fdb && fdb->dst->dev != dev &&
+			fdb->dst->state == BR_STATE_FORWARDING;
+	}
 	rcu_read_unlock();
 
 	return ret;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 89ad25a76202..427f90a8ab7b 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -475,11 +475,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 {
 	struct net_bridge_port *p;
 
-	if (!br_port_exists(dev))
-		return -EINVAL;
-
 	p = br_port_get(dev);
-	if (p->br != br)
+	if (!p || p->br != br)
 		return -EINVAL;
 
 	del_nbp(p);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 865fd7634b67..ce8b2eed4e73 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -131,17 +131,18 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
-	if (!br_port_exists(dev))
-		return NULL;
-	return &br_port_get_rcu(dev)->br->fake_rtable;
+	struct net_bridge_port *port;
+
+	port = br_port_get_rcu(dev);
+	return port ? &port->br->fake_rtable : NULL;
 }
 
 static inline struct net_device *bridge_parent(const struct net_device *dev)
 {
-	if (!br_port_exists(dev))
-		return NULL;
+	struct net_bridge_port *port;
 
-	return br_port_get_rcu(dev)->br->dev;
+	port = br_port_get_rcu(dev);
+	return port ? port->br->dev : NULL;
 }
 
 static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4a6a378c84e3..e3de0a428f5d 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -119,11 +119,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 
 	idx = 0;
 	for_each_netdev(net, dev) {
+		struct net_bridge_port *port = br_port_get(dev);
+
 		/* not a bridge port */
-		if (!br_port_exists(dev) || idx < cb->args[0])
+		if (!port || idx < cb->args[0])
 			goto skip;
 
-		if (br_fill_ifinfo(skb, br_port_get(dev),
+		if (br_fill_ifinfo(skb, port,
 				   NETLINK_CB(cb->skb).pid,
 				   cb->nlh->nlmsg_seq, RTM_NEWLINK,
 				   NLM_F_MULTI) < 0)
@@ -169,9 +171,9 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	if (!dev)
 		return -ENODEV;
 
-	if (!br_port_exists(dev))
-		return -EINVAL;
 	p = br_port_get(dev);
+	if (!p)
+		return -EINVAL;
 
 	/* if kernel STP is running, don't allow changes */
 	if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 404d4e14c6a7..ef2175c8b91d 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,7 +32,7 @@ struct notifier_block br_device_notifier = {
 static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct net_bridge_port *p = br_port_get(dev);
+	struct net_bridge_port *p;
 	struct net_bridge *br;
 	int err;
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b862071bf601..46e0bec1d7c5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -151,11 +151,19 @@ struct net_bridge_port
 #endif
 };
 
-#define br_port_get_rcu(dev) \
-	((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
-#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
 
+static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
+{
+	struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data);
+	return br_port_exists(dev) ? port : NULL;
+}
+
+static inline struct net_bridge_port *br_port_get(struct net_device *dev)
+{
+	return br_port_exists(dev) ? dev->rx_handler_data : NULL;
+}
+
 struct br_cpu_netstats {
 	u64			rx_packets;
 	u64			rx_bytes;
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 35cf27087b56..3d9a55d3822f 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -141,10 +141,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 	struct net_bridge *br;
 	const unsigned char *buf;
 
-	if (!br_port_exists(dev))
-		goto err;
-	p = br_port_get_rcu(dev);
-
 	if (!pskb_may_pull(skb, 4))
 		goto err;
 
@@ -153,6 +149,10 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 	if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0)
 		goto err;
 
+	p = br_port_get_rcu(dev);
+	if (!p)
+		goto err;
+
 	br = p->br;
 	spin_lock(&br->lock);
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a1dcf83f0d58..cbc9f395ab1e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -128,6 +128,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
                 const struct net_device *in, const struct net_device *out)
 {
 	const struct ethhdr *h = eth_hdr(skb);
+	const struct net_bridge_port *p;
 	__be16 ethproto;
 	int verdict, i;
 
@@ -148,13 +149,11 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
 		return 1;
 	/* rcu_read_lock()ed by nf_hook_slow */
-	if (in && br_port_exists(in) &&
-	    FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev),
-		   EBT_ILOGICALIN))
+	if (in && (p = br_port_get_rcu(in)) != NULL &&
+	    FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN))
 		return 1;
-	if (out && br_port_exists(out) &&
-	    FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev),
-		   EBT_ILOGICALOUT))
+	if (out && (p = br_port_get_rcu(out)) != NULL &&
+	    FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
-- 
cgit v1.2.3


From ec1e5610c00c7f5bc530d2aadd47faa473b90a30 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 15 Nov 2010 06:38:14 +0000
Subject: bridge: add RCU annotations to bridge port lookup

br_port_get() renamed to br_port_get_rtnl() to make clear RTNL is held.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c      | 2 +-
 net/bridge/br_netlink.c | 4 ++--
 net/bridge/br_notify.c  | 4 ++--
 net/bridge/br_private.h | 5 +++--
 4 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 427f90a8ab7b..d9d1e2bac1d6 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -475,7 +475,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 {
 	struct net_bridge_port *p;
 
-	p = br_port_get(dev);
+	p = br_port_get_rtnl(dev);
 	if (!p || p->br != br)
 		return -EINVAL;
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e3de0a428f5d..f8bf4c7f842c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -119,7 +119,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 
 	idx = 0;
 	for_each_netdev(net, dev) {
-		struct net_bridge_port *port = br_port_get(dev);
+		struct net_bridge_port *port = br_port_get_rtnl(dev);
 
 		/* not a bridge port */
 		if (!port || idx < cb->args[0])
@@ -171,7 +171,7 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	if (!dev)
 		return -ENODEV;
 
-	p = br_port_get(dev);
+	p = br_port_get_rtnl(dev);
 	if (!p)
 		return -EINVAL;
 
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index ef2175c8b91d..7d337c9b6082 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -37,10 +37,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	int err;
 
 	/* not a port of a bridge */
-	if (!br_port_exists(dev))
+	p = br_port_get_rtnl(dev);
+	if (!p)
 		return NOTIFY_DONE;
 
-	p = br_port_get(dev);
 	br = p->br;
 
 	switch (event) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 46e0bec1d7c5..84aac7734bfc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -159,9 +159,10 @@ static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *d
 	return br_port_exists(dev) ? port : NULL;
 }
 
-static inline struct net_bridge_port *br_port_get(struct net_device *dev)
+static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
 {
-	return br_port_exists(dev) ? dev->rx_handler_data : NULL;
+	return br_port_exists(dev) ?
+		rtnl_dereference(dev->rx_handler_data) : NULL;
 }
 
 struct br_cpu_netstats {
-- 
cgit v1.2.3


From 6b35308850e1679741e8b646cfb7bb3ab5369888 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 15 Nov 2010 20:15:03 -0800
Subject: net: Export netif_get_vlan_features().

ERROR: "netif_get_vlan_features" [drivers/net/xen-netfront.ko] undefined!

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8725d168d1f5..381b8e280162 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1983,6 +1983,7 @@ int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
 	else
 		return 0;
 }
+EXPORT_SYMBOL(netif_get_vlan_features);
 
 /*
  * Returns true if either:
-- 
cgit v1.2.3


From 9d82ca98f71fd686ef2f3017c5e3e6a4871b6e46 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Mon, 15 Nov 2010 20:29:21 +0000
Subject: ipv6: fix missing in6_ifa_put in addrconf

Fix ref count bug introduced by

commit 2de795707294972f6c34bae9de713e502c431296
Author: Lorenzo Colitti <lorenzo@google.com>
Date:   Wed Oct 27 18:16:49 2010 +0000

ipv6: addrconf: don't remove address state on ifdown if the address
is being kept

Fix logic so that addrconf_ifdown() decrements the inet6_ifaddr
refcnt correctly with in6_ifa_put().

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b41ce0f0d514..aaa3ca448d08 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2754,13 +2754,13 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			ifa->state = INET6_IFADDR_STATE_DEAD;
 			spin_unlock_bh(&ifa->state_lock);
 
-			if (state == INET6_IFADDR_STATE_DEAD) {
-				in6_ifa_put(ifa);
-			} else {
+			if (state != INET6_IFADDR_STATE_DEAD) {
 				__ipv6_ifa_notify(RTM_DELADDR, ifa);
 				atomic_notifier_call_chain(&inet6addr_chain,
 							   NETDEV_DOWN, ifa);
 			}
+
+			in6_ifa_put(ifa);
 			write_lock_bh(&idev->lock);
 		}
 	}
-- 
cgit v1.2.3


From 0e3125c755445664f00ad036e4fc2cd32fd52877 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 16 Nov 2010 10:26:47 -0800
Subject: packet: Enhance AF_PACKET implementation to not require high order
 contiguous memory allocation (v4) MIME-Version: 1.0 Content-Type: text/plain;
 charset=UTF-8 Content-Transfer-Encoding: 8bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Version 4 of this patch.

Change notes:
1) Removed extra memset.  Didn't think kcalloc added a GFP_ZERO the way kzalloc did :)

Summary:
It was shown to me recently that systems under high load were driven very deep
into swap when tcpdump was run.  The reason this happened was because the
AF_PACKET protocol has a SET_RINGBUFFER socket option that allows the user space
application to specify how many entries an AF_PACKET socket will have and how
large each entry will be.  It seems the default setting for tcpdump is to set
the ring buffer to 32 entries of 64 Kb each, which implies 32 order 5
allocation.  Thats difficult under good circumstances, and horrid under memory
pressure.

I thought it would be good to make that a bit more usable.  I was going to do a
simple conversion of the ring buffer from contigous pages to iovecs, but
unfortunately, the metadata which AF_PACKET places in these buffers can easily
span a page boundary, and given that these buffers get mapped into user space,
and the data layout doesn't easily allow for a change to padding between frames
to avoid that, a simple iovec change is just going to break user space ABI
consistency.

So I've done this, I've added a three tiered mechanism to the af_packet set_ring
socket option.  It attempts to allocate memory in the following order:

1) Using __get_free_pages with GFP_NORETRY set, so as to fail quickly without
digging into swap

2) Using vmalloc

3) Using __get_free_pages with GFP_NORETRY clear, causing us to try as hard as
needed to get the memory

The effect is that we don't disturb the system as much when we're under load,
while still being able to conduct tcpdumps effectively.

Tested successfully by me.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Maciej Żenczykowski <zenczykowski@gmail.com>
Reported-by: Maciej Żenczykowski <zenczykowski@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 85 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8298e676f5a0..20964560a0ed 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -61,6 +61,7 @@
 #include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -163,8 +164,14 @@ struct packet_mreq_max {
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		int closing, int tx_ring);
 
+#define PGV_FROM_VMALLOC 1
+struct pgv {
+	char *buffer;
+	unsigned char flags;
+};
+
 struct packet_ring_buffer {
-	char			**pg_vec;
+	struct pgv		*pg_vec;
 	unsigned int		head;
 	unsigned int		frames_per_block;
 	unsigned int		frame_size;
@@ -283,7 +290,8 @@ static void *packet_lookup_frame(struct packet_sock *po,
 	pg_vec_pos = position / rb->frames_per_block;
 	frame_offset = position % rb->frames_per_block;
 
-	h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
+	h.raw = rb->pg_vec[pg_vec_pos].buffer +
+		(frame_offset * rb->frame_size);
 
 	if (status != __packet_get_status(po, h.raw))
 		return NULL;
@@ -2325,37 +2333,74 @@ static const struct vm_operations_struct packet_mmap_ops = {
 	.close	=	packet_mm_close,
 };
 
-static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
+static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
+			unsigned int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++) {
-		if (likely(pg_vec[i]))
-			free_pages((unsigned long) pg_vec[i], order);
+		if (likely(pg_vec[i].buffer)) {
+			if (pg_vec[i].flags & PGV_FROM_VMALLOC)
+				vfree(pg_vec[i].buffer);
+			else
+				free_pages((unsigned long)pg_vec[i].buffer,
+					   order);
+			pg_vec[i].buffer = NULL;
+		}
 	}
 	kfree(pg_vec);
 }
 
-static inline char *alloc_one_pg_vec_page(unsigned long order)
+static inline char *alloc_one_pg_vec_page(unsigned long order,
+					  unsigned char *flags)
 {
-	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
+	char *buffer = NULL;
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
+			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
+
+	buffer = (char *) __get_free_pages(gfp_flags, order);
+
+	if (buffer)
+		return buffer;
+
+	/*
+	 * __get_free_pages failed, fall back to vmalloc
+	 */
+	*flags |= PGV_FROM_VMALLOC;
+	buffer = vmalloc((1 << order) * PAGE_SIZE);
 
-	return (char *) __get_free_pages(gfp_flags, order);
+	if (buffer)
+		return buffer;
+
+	/*
+	 * vmalloc failed, lets dig into swap here
+	 */
+	*flags = 0;
+	gfp_flags &= ~__GFP_NORETRY;
+	buffer = (char *)__get_free_pages(gfp_flags, order);
+	if (buffer)
+		return buffer;
+
+	/*
+	 * complete and utter failure
+	 */
+	return NULL;
 }
 
-static char **alloc_pg_vec(struct tpacket_req *req, int order)
+static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
 {
 	unsigned int block_nr = req->tp_block_nr;
-	char **pg_vec;
+	struct pgv *pg_vec;
 	int i;
 
-	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
+	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
 	if (unlikely(!pg_vec))
 		goto out;
 
 	for (i = 0; i < block_nr; i++) {
-		pg_vec[i] = alloc_one_pg_vec_page(order);
-		if (unlikely(!pg_vec[i]))
+		pg_vec[i].buffer = alloc_one_pg_vec_page(order,
+							 &pg_vec[i].flags);
+		if (unlikely(!pg_vec[i].buffer))
 			goto out_free_pgvec;
 	}
 
@@ -2364,6 +2409,7 @@ out:
 
 out_free_pgvec:
 	free_pg_vec(pg_vec, order, block_nr);
+	kfree(pg_vec);
 	pg_vec = NULL;
 	goto out;
 }
@@ -2371,7 +2417,7 @@ out_free_pgvec:
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 		int closing, int tx_ring)
 {
-	char **pg_vec = NULL;
+	struct pgv *pg_vec = NULL;
 	struct packet_sock *po = pkt_sk(sk);
 	int was_running, order = 0;
 	struct packet_ring_buffer *rb;
@@ -2533,15 +2579,22 @@ static int packet_mmap(struct file *file, struct socket *sock,
 			continue;
 
 		for (i = 0; i < rb->pg_vec_len; i++) {
-			struct page *page = virt_to_page(rb->pg_vec[i]);
+			struct page *page;
+			void *kaddr = rb->pg_vec[i].buffer;
 			int pg_num;
 
 			for (pg_num = 0; pg_num < rb->pg_vec_pages;
-					pg_num++, page++) {
+					pg_num++) {
+				if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
+					page = vmalloc_to_page(kaddr);
+				else
+					page = virt_to_page(kaddr);
+
 				err = vm_insert_page(vma, start, page);
 				if (unlikely(err))
 					goto out;
 				start += PAGE_SIZE;
+				kaddr += PAGE_SIZE;
 			}
 		}
 	}
-- 
cgit v1.2.3


From 4af429d29b341bb1735f04c2fb960178ed5d52e7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 10 Nov 2010 23:42:00 +0000
Subject: vlan: lockless transmit path

vlan is a stacked device, like tunnels. We should use the lockless
mechanism we are using in tunnels and loopback.

This patch completely removes locking in TX path.

tx stat counters are added into existing percpu stat structure, renamed
from vlan_rx_stats to vlan_pcpu_stats.

Note : this partially reverts commit 2e59af3dcbdf (vlan: multiqueue vlan
device)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c         |  4 +---
 net/8021q/vlan.h         | 18 +++++++++-----
 net/8021q/vlan_core.c    |  4 ++--
 net/8021q/vlan_dev.c     | 61 +++++++++++++++++++++++++++---------------------
 net/8021q/vlan_netlink.c | 20 ----------------
 5 files changed, 50 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 55d2135889fc..dc1071327d87 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -272,13 +272,11 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
-	new_dev = alloc_netdev_mq(sizeof(struct vlan_dev_info), name,
-				  vlan_setup, real_dev->num_tx_queues);
+	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
 
 	if (new_dev == NULL)
 		return -ENOBUFS;
 
-	netif_copy_real_num_queues(new_dev, real_dev);
 	dev_net_set(new_dev, net);
 	/* need 4 bytes for extra VLAN header info,
 	 * hope the underlying device can handle it.
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 4625ba64dfdc..5687c9b95f33 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -19,19 +19,25 @@ struct vlan_priority_tci_mapping {
 
 
 /**
- *	struct vlan_rx_stats - VLAN percpu rx stats
+ *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
  *	@rx_multicast: number of received multicast packets
+ *	@tx_packets: number of transmitted packets
+ *	@tx_bytes: number of transmitted bytes
  *	@syncp: synchronization point for 64bit counters
- *	@rx_errors: number of errors
+ *	@rx_errors: number of rx errors
+ *	@tx_dropped: number of tx drops
  */
-struct vlan_rx_stats {
+struct vlan_pcpu_stats {
 	u64			rx_packets;
 	u64			rx_bytes;
 	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
 	struct u64_stats_sync	syncp;
-	unsigned long		rx_errors;
+	u32			rx_errors;
+	u32			tx_dropped;
 };
 
 /**
@@ -45,7 +51,7 @@ struct vlan_rx_stats {
  *	@real_dev: underlying netdevice
  *	@real_dev_addr: address of underlying netdevice
  *	@dent: proc dir entry
- *	@vlan_rx_stats: ptr to percpu rx stats
+ *	@vlan_pcpu_stats: ptr to percpu rx stats
  */
 struct vlan_dev_info {
 	unsigned int				nr_ingress_mappings;
@@ -60,7 +66,7 @@ struct vlan_dev_info {
 	unsigned char				real_dev_addr[ETH_ALEN];
 
 	struct proc_dir_entry			*dent;
-	struct vlan_rx_stats __percpu		*vlan_rx_stats;
+	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
 };
 
 static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 69b2f79800a5..ce8e3ab3e7a5 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
 	struct sk_buff *skb = *skbp;
 	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
 	struct net_device *vlan_dev;
-	struct vlan_rx_stats *rx_stats;
+	struct vlan_pcpu_stats *rx_stats;
 
 	vlan_dev = vlan_find_dev(skb->dev, vlan_id);
 	if (!vlan_dev) {
@@ -26,7 +26,7 @@ bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats);
+	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f3c9552f6ba8..2fa3f4a3f60f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -141,7 +141,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		  struct packet_type *ptype, struct net_device *orig_dev)
 {
 	struct vlan_hdr *vhdr;
-	struct vlan_rx_stats *rx_stats;
+	struct vlan_pcpu_stats *rx_stats;
 	struct net_device *vlan_dev;
 	u16 vlan_id;
 	u16 vlan_tci;
@@ -177,7 +177,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	} else {
 		skb->dev = vlan_dev;
 
-		rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
+		rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_pcpu_stats);
 
 		u64_stats_update_begin(&rx_stats->syncp);
 		rx_stats->rx_packets++;
@@ -310,8 +310,6 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev)
 {
-	int i = skb_get_queue_mapping(skb);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
 	unsigned int len;
 	int ret;
@@ -334,10 +332,16 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
-		txq->tx_packets++;
-		txq->tx_bytes += len;
-	} else
-		txq->tx_dropped++;
+		struct vlan_pcpu_stats *stats;
+
+		stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+		u64_stats_update_begin(&stats->syncp);
+		stats->tx_packets++;
+		stats->tx_bytes += len;
+		u64_stats_update_begin(&stats->syncp);
+	} else {
+		this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+	}
 
 	return ret;
 }
@@ -696,6 +700,7 @@ static int vlan_dev_init(struct net_device *dev)
 		      (1<<__LINK_STATE_PRESENT);
 
 	dev->features |= real_dev->features & real_dev->vlan_features;
+	dev->features |= NETIF_F_LLTX;
 	dev->gso_max_size = real_dev->gso_max_size;
 
 	/* ipv6 shared card related stuff */
@@ -728,8 +733,8 @@ static int vlan_dev_init(struct net_device *dev)
 
 	vlan_dev_set_lockdep_class(dev, subclass);
 
-	vlan_dev_info(dev)->vlan_rx_stats = alloc_percpu(struct vlan_rx_stats);
-	if (!vlan_dev_info(dev)->vlan_rx_stats)
+	vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+	if (!vlan_dev_info(dev)->vlan_pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -741,8 +746,8 @@ static void vlan_dev_uninit(struct net_device *dev)
 	struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	int i;
 
-	free_percpu(vlan->vlan_rx_stats);
-	vlan->vlan_rx_stats = NULL;
+	free_percpu(vlan->vlan_pcpu_stats);
+	vlan->vlan_pcpu_stats = NULL;
 	for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
 		while ((pm = vlan->egress_priority_map[i]) != NULL) {
 			vlan->egress_priority_map[i] = pm->next;
@@ -780,33 +785,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	dev_txq_stats_fold(dev, stats);
 
-	if (vlan_dev_info(dev)->vlan_rx_stats) {
-		struct vlan_rx_stats *p, accum = {0};
+	if (vlan_dev_info(dev)->vlan_pcpu_stats) {
+		struct vlan_pcpu_stats *p;
+		u32 rx_errors = 0, tx_dropped = 0;
 		int i;
 
 		for_each_possible_cpu(i) {
-			u64 rxpackets, rxbytes, rxmulticast;
+			u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
 			unsigned int start;
 
-			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i);
+			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
 			do {
 				start = u64_stats_fetch_begin_bh(&p->syncp);
 				rxpackets	= p->rx_packets;
 				rxbytes		= p->rx_bytes;
 				rxmulticast	= p->rx_multicast;
+				txpackets	= p->tx_packets;
+				txbytes		= p->tx_bytes;
 			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
-			accum.rx_packets += rxpackets;
-			accum.rx_bytes   += rxbytes;
-			accum.rx_multicast += rxmulticast;
-			/* rx_errors is ulong, not protected by syncp */
-			accum.rx_errors  += p->rx_errors;
+
+			stats->rx_packets	+= rxpackets;
+			stats->rx_bytes		+= rxbytes;
+			stats->multicast	+= rxmulticast;
+			stats->tx_packets	+= txpackets;
+			stats->tx_bytes		+= txbytes;
+			/* rx_errors & tx_dropped are u32 */
+			rx_errors	+= p->rx_errors;
+			tx_dropped	+= p->tx_dropped;
 		}
-		stats->rx_packets = accum.rx_packets;
-		stats->rx_bytes   = accum.rx_bytes;
-		stats->rx_errors  = accum.rx_errors;
-		stats->multicast  = accum.rx_multicast;
+		stats->rx_errors  = rx_errors;
+		stats->tx_dropped = tx_dropped;
 	}
 	return stats;
 }
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index ddc105734af7..be9a5c19a775 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -101,25 +101,6 @@ static int vlan_changelink(struct net_device *dev,
 	return 0;
 }
 
-static int vlan_get_tx_queues(struct net *net,
-			      struct nlattr *tb[],
-			      unsigned int *num_tx_queues,
-			      unsigned int *real_num_tx_queues)
-{
-	struct net_device *real_dev;
-
-	if (!tb[IFLA_LINK])
-		return -EINVAL;
-
-	real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
-		return -ENODEV;
-
-	*num_tx_queues      = real_dev->num_tx_queues;
-	*real_num_tx_queues = real_dev->real_num_tx_queues;
-	return 0;
-}
-
 static int vlan_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
@@ -237,7 +218,6 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.maxtype	= IFLA_VLAN_MAX,
 	.policy		= vlan_policy,
 	.priv_size	= sizeof(struct vlan_dev_info),
-	.get_tx_queues  = vlan_get_tx_queues,
 	.setup		= vlan_setup,
 	.validate	= vlan_validate,
 	.newlink	= vlan_newlink,
-- 
cgit v1.2.3


From 213b15ca818adf7766cd7162c2159a6ecdd3bab8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 11 Nov 2010 09:42:45 +0000
Subject: vlan: remove ndo_select_queue() logic

Now vlan are lockless, we dont need special ndo_select_queue() logic.
dev_pick_tx() will do the multiqueue stuff on the real device transmit.

Suggested-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 40 ++--------------------------------------
 1 file changed, 2 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2fa3f4a3f60f..be737539f34d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -346,14 +346,6 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
-static u16 vlan_dev_select_queue(struct net_device *dev, struct sk_buff *skb)
-{
-	struct net_device *rdev = vlan_dev_info(dev)->real_dev;
-	const struct net_device_ops *ops = rdev->netdev_ops;
-
-	return ops->ndo_select_queue(rdev, skb);
-}
-
 static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 {
 	/* TODO: gotta make sure the underlying layer can handle it,
@@ -682,7 +674,7 @@ static const struct header_ops vlan_header_ops = {
 	.parse	 = eth_header_parse,
 };
 
-static const struct net_device_ops vlan_netdev_ops, vlan_netdev_ops_sq;
+static const struct net_device_ops vlan_netdev_ops;
 
 static int vlan_dev_init(struct net_device *dev)
 {
@@ -723,10 +715,7 @@ static int vlan_dev_init(struct net_device *dev)
 		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
 	}
 
-	if (real_dev->netdev_ops->ndo_select_queue)
-		dev->netdev_ops = &vlan_netdev_ops_sq;
-	else
-		dev->netdev_ops = &vlan_netdev_ops;
+	dev->netdev_ops = &vlan_netdev_ops;
 
 	if (is_vlan_dev(real_dev))
 		subclass = 1;
@@ -872,31 +861,6 @@ static const struct net_device_ops vlan_netdev_ops = {
 #endif
 };
 
-static const struct net_device_ops vlan_netdev_ops_sq = {
-	.ndo_select_queue	= vlan_dev_select_queue,
-	.ndo_change_mtu		= vlan_dev_change_mtu,
-	.ndo_init		= vlan_dev_init,
-	.ndo_uninit		= vlan_dev_uninit,
-	.ndo_open		= vlan_dev_open,
-	.ndo_stop		= vlan_dev_stop,
-	.ndo_start_xmit =  vlan_dev_hard_start_xmit,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= vlan_dev_set_mac_address,
-	.ndo_set_rx_mode	= vlan_dev_set_rx_mode,
-	.ndo_set_multicast_list	= vlan_dev_set_rx_mode,
-	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
-	.ndo_do_ioctl		= vlan_dev_ioctl,
-	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats64	= vlan_dev_get_stats64,
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
-	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
-	.ndo_fcoe_enable	= vlan_dev_fcoe_enable,
-	.ndo_fcoe_disable	= vlan_dev_fcoe_disable,
-	.ndo_fcoe_get_wwn	= vlan_dev_fcoe_get_wwn,
-#endif
-};
-
 void vlan_setup(struct net_device *dev)
 {
 	ether_setup(dev);
-- 
cgit v1.2.3


From c31504dc0d1dc853dcee509d9999169a9097a717 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 15 Nov 2010 19:58:26 +0000
Subject: udp: use atomic_inc_not_zero_hint

UDP sockets refcount is usually 2, unless an incoming frame is going to
be queued in receive or backlog queue.

Using atomic_inc_not_zero_hint() permits to reduce latency, because
processor issues less memory transactions.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 net/ipv4/udp.c     | 4 ++--
 net/ipv6/udp.c     | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index a6338d039857..eb0c1f504678 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -57,7 +57,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/poll.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <net/dst.h>
 #include <net/checksum.h>
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5e0a3a582a59..491ecd3f7a01 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -430,7 +430,7 @@ begin:
 
 	if (result) {
 exact_match:
-		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
 			result = NULL;
 		else if (unlikely(compute_score2(result, net, saddr, sport,
 				  daddr, hnum, dif) < badness)) {
@@ -500,7 +500,7 @@ begin:
 		goto begin;
 
 	if (result) {
-		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
 			result = NULL;
 		else if (unlikely(compute_score(result, net, saddr, hnum, sport,
 				  daddr, dport, dif) < badness)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91def93bec85..b541a4e009fb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -227,7 +227,7 @@ begin:
 
 	if (result) {
 exact_match:
-		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
 			result = NULL;
 		else if (unlikely(compute_score2(result, net, saddr, sport,
 				  daddr, hnum, dif) < badness)) {
@@ -294,7 +294,7 @@ begin:
 		goto begin;
 
 	if (result) {
-		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
 			result = NULL;
 		else if (unlikely(compute_score(result, net, hnum, saddr, sport,
 					daddr, dport, dif) < badness)) {
-- 
cgit v1.2.3


From f23a478075659db8a4fd62fa6e264a8bb052cc5b Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 8 Nov 2010 11:51:06 +0200
Subject: mac80211: support hardware TX fragmentation offload

The lower driver is notified when the fragmentation threshold changes
and upon a reconfig of the interface.

If the driver supports hardware TX fragmentation, don't fragment
packets in the stack.

Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      |  6 ++++++
 net/mac80211/cfg.c          |  7 +++++++
 net/mac80211/driver-ops.h   | 14 ++++++++++++++
 net/mac80211/driver-trace.h | 21 +++++++++++++++++++++
 net/mac80211/tx.c           | 11 +++++++++--
 net/mac80211/util.c         |  3 +++
 6 files changed, 60 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9fdf982d1286..6122e8a3297e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1652,6 +1652,11 @@ enum ieee80211_ampdu_mlme_action {
  *	and IV16) for the given key from hardware.
  *	The callback must be atomic.
  *
+ * @set_frag_threshold: Configuration of fragmentation threshold. Assign this
+ *	if the device does fragmentation by itself; if this callback is
+ *	implemented then the stack will not do fragmentation.
+ *	The callback can sleep.
+ *
  * @set_rts_threshold: Configuration of RTS threshold (if device needs it)
  *	The callback can sleep.
  *
@@ -1765,6 +1770,7 @@ struct ieee80211_ops {
 			 struct ieee80211_low_level_stats *stats);
 	void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx,
 			     u32 *iv32, u16 *iv16);
+	int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value);
 	int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value);
 	int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 18bd0e550600..3df12f7d0cfe 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1299,6 +1299,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	int err;
 
+	if (changed & WIPHY_PARAM_FRAG_THRESHOLD) {
+		err = drv_set_frag_threshold(local, wiphy->frag_threshold);
+
+		if (err)
+			return err;
+	}
+
 	if (changed & WIPHY_PARAM_COVERAGE_CLASS) {
 		err = drv_set_coverage_class(local, wiphy->coverage_class);
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 16983825f8e8..79019f94f621 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -233,6 +233,20 @@ static inline void drv_get_tkip_seq(struct ieee80211_local *local,
 	trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16);
 }
 
+static inline int drv_set_frag_threshold(struct ieee80211_local *local,
+					u32 value)
+{
+	int ret = 0;
+
+	might_sleep();
+
+	trace_drv_set_frag_threshold(local, value);
+	if (local->ops->set_frag_threshold)
+		ret = local->ops->set_frag_threshold(&local->hw, value);
+	trace_drv_return_int(local, ret);
+	return ret;
+}
+
 static inline int drv_set_rts_threshold(struct ieee80211_local *local,
 					u32 value)
 {
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 6831fb1641c8..431d65500d6a 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -531,6 +531,27 @@ TRACE_EVENT(drv_get_tkip_seq,
 	)
 );
 
+TRACE_EVENT(drv_set_frag_threshold,
+	TP_PROTO(struct ieee80211_local *local, u32 value),
+
+	TP_ARGS(local, value),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u32, value)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->value = value;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " value:%d",
+		LOCAL_PR_ARG, __entry->value
+	)
+);
+
 TRACE_EVENT(drv_set_rts_threshold,
 	TP_PROTO(struct ieee80211_local *local, u32 value),
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 96c594309506..b392876af7d8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1033,6 +1033,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 	struct ieee80211_radiotap_header *rthdr =
 		(struct ieee80211_radiotap_header *) skb->data;
 	struct ieee80211_supported_band *sband;
+	bool hw_frag;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
 						   NULL);
@@ -1042,6 +1043,9 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 	info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	tx->flags &= ~IEEE80211_TX_FRAGMENTED;
 
+	/* packet is fragmented in HW if we have a non-NULL driver callback */
+	hw_frag = (tx->local->ops->set_frag_threshold != NULL);
+
 	/*
 	 * for every radiotap entry that is present
 	 * (ieee80211_radiotap_iterator_next returns -ENOENT when no more
@@ -1078,7 +1082,8 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 			}
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP)
 				info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT;
-			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG)
+			if ((*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) &&
+								!hw_frag)
 				tx->flags |= IEEE80211_TX_FRAGMENTED;
 			break;
 
@@ -1181,8 +1186,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	/*
 	 * Set this flag (used below to indicate "automatic fragmentation"),
 	 * it will be cleared/left by radiotap as desired.
+	 * Only valid when fragmentation is done by the stack.
 	 */
-	tx->flags |= IEEE80211_TX_FRAGMENTED;
+	if (!local->ops->set_frag_threshold)
+		tx->flags |= IEEE80211_TX_FRAGMENTED;
 
 	/* process and remove the injection radiotap header */
 	if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) {
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0b6fc92bc0d7..e486286ebf1a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1152,6 +1152,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	}
 	mutex_unlock(&local->sta_mtx);
 
+	/* setup fragmentation threshold */
+	drv_set_frag_threshold(local, hw->wiphy->frag_threshold);
+
 	/* setup RTS threshold */
 	drv_set_rts_threshold(local, hw->wiphy->rts_threshold);
 
-- 
cgit v1.2.3


From afe0cbf87500f0585d217deb8c6fd329793a7957 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 10 Nov 2010 12:50:50 +0900
Subject: cfg80211: Add nl80211 antenna configuration

Allow setting of TX and RX antennas configuration via nl80211.

The antenna configuration is defined as a bitmap of allowed antennas to use.
This API can be used to mask out antennas which are not attached or should not
be used for other reasons like regulatory concerns or special setups.

Separate bitmaps are used for RX and TX to allow configuring different antennas
for receiving and transmitting. Each bitmap is 32 bit long, each bit
representing one antenna, starting with antenna 1 at the first bit. If an
antenna bit is set, this means the driver is allowed to use this antenna for RX
or TX respectively; if the bit is not set the hardware is not allowed to use
this antenna.

Using bitmaps has the benefit of allowing for a flexible configuration
interface which can support many different configurations and which can be used
for 802.11n as well as non-802.11n devices. Instead of relying on some hardware
specific assumptions, drivers can use this information to know which antennas
are actually attached to the system and derive their capabilities based on
that.

802.11n devices should enable or disable chains, based on which antennas are
present (If all antennas belonging to a particular chain are disabled, the
entire chain should be disabled). HT capabilities (like STBC, TX Beamforming,
Antenna selection) should be calculated based on the available chains after
applying the antenna masks. Should a 802.11n device have diversity antennas
attached to one of their chains, diversity can be enabled or disabled based on
the antenna information.

Non-802.11n drivers can use the antenna masks to select RX and TX antennas and
to enable or disable antenna diversity.

While covering chainmasks for 802.11n and the standard "legacy" modes "fixed
antenna 1", "fixed antenna 2" and "diversity" this API also allows more rare,
but useful configurations as follows:

1) Send on antenna 1, receive on antenna 2 (or vice versa). This can be used to
have a low gain antenna for TX in order to keep within the regulatory
constraints and a high gain antenna for RX in order to receive weaker signals
("speak softly, but listen harder"). This can be useful for building long-shot
outdoor links. Another usage of this setup is having a low-noise pre-amplifier
on antenna 1 and a power amplifier on the other antenna. This way transmit
noise is mostly kept out of the low noise receive channel.
(This would be bitmaps: tx 1 rx 2).

2) Another similar setup is: Use RX diversity on both antennas, but always send
on antenna 1. Again that would allow us to benefit from a higher gain RX
antenna, while staying within the legal limits.
(This would be: tx 0 rx 3).

3) And finally there can be special experimental setups in research and
development even with pre 802.11n hardware where more than 2 antennas are
available. It's good to keep the API simple, yet flexible.

Signed-off-by: Bruno Randolf <br1@einfach.org>

--
v7:	Made bitmasks 32 bit wide and rebased to latest wireless-testing.
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 25 +++++++++++++++++++++++++
 include/net/cfg80211.h  |  3 +++
 net/wireless/nl80211.c  | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 58 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index fb877b5621b7..17c5c8849250 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -804,6 +804,28 @@ enum nl80211_commands {
  * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly
  *	means support for per-station GTKs.
  *
+ * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting.
+ *	This can be used to mask out antennas which are not attached or should
+ *	not be used for transmitting. If an antenna is not selected in this
+ *	bitmap the hardware is not allowed to transmit on this antenna.
+ *
+ *	Each bit represents one antenna, starting with antenna 1 at the first
+ *	bit. Depending on which antennas are selected in the bitmap, 802.11n
+ *	drivers can derive which chainmasks to use (if all antennas belonging to
+ *	a particular chain are disabled this chain should be disabled) and if
+ *	a chain has diversity antennas wether diversity should be used or not.
+ *	HT capabilities (STBC, TX Beamforming, Antenna selection) can be
+ *	derived from the available chains after applying the antenna mask.
+ *	Non-802.11n drivers can derive wether to use diversity or not.
+ *	Drivers may reject configurations or RX/TX mask combinations they cannot
+ *	support by returning -EINVAL.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving.
+ *	This can be used to mask out antennas which are not attached or should
+ *	not be used for receiving. If an antenna is not selected in this bitmap
+ *	the hardware should not be configured to receive on this antenna.
+ *	For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -973,6 +995,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SUPPORT_IBSS_RSN,
 
+	NL80211_ATTR_WIPHY_ANTENNA_TX,
+	NL80211_ATTR_WIPHY_ANTENNA_RX,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e5702f5ac57c..07425e648a09 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1304,6 +1304,9 @@ struct cfg80211_ops {
 	void	(*mgmt_frame_register)(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       u16 frame_type, bool reg);
+
+	int	(*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant);
+	int	(*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant);
 };
 
 /*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c506241f8637..5e4dda4c0fd3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -166,7 +166,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 
 	[NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
+
 	[NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
+
+	[NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -526,7 +530,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		    dev->wiphy.rts_threshold);
 	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
 		    dev->wiphy.coverage_class);
-
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
 		   dev->wiphy.max_scan_ssids);
 	NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
@@ -545,6 +548,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
 
+	if (dev->ops->get_antenna) {
+		u32 tx_ant = 0, rx_ant = 0;
+		int res;
+		res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant);
+		if (!res) {
+			NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant);
+			NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant);
+		}
+	}
+
 	nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
 	if (!nl_modes)
 		goto nla_put_failure;
@@ -1024,6 +1037,22 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			goto bad_res;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
+	    info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) {
+		u32 tx_ant, rx_ant;
+		if (!rdev->ops->set_antenna) {
+			result = -EOPNOTSUPP;
+			goto bad_res;
+		}
+
+		tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]);
+		rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]);
+
+		result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant);
+		if (result)
+			goto bad_res;
+	}
+
 	changed = 0;
 
 	if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) {
-- 
cgit v1.2.3


From 15d967532148a5fcda075282b82a271b6595a386 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 10 Nov 2010 12:50:56 +0900
Subject: mac80211: Add antenna configuration

Allow antenna configuration by calling driver's function for it.

We disallow antenna configuration if the wiphy is already running, mainly to
make life easier for 802.11n drivers which need to recalculate HT capabilites.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      |  2 ++
 net/mac80211/cfg.c          | 19 +++++++++++++++++
 net/mac80211/driver-ops.h   | 23 +++++++++++++++++++++
 net/mac80211/driver-trace.h | 50 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6122e8a3297e..a7323eca08d1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1799,6 +1799,8 @@ struct ieee80211_ops {
 	void (*channel_switch)(struct ieee80211_hw *hw,
 			       struct ieee80211_channel_switch *ch_switch);
 	int (*napi_poll)(struct ieee80211_hw *hw, int budget);
+	int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant);
+	int (*get_antenna)(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant);
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 3df12f7d0cfe..0c544074479e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1628,6 +1628,23 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
 	ieee80211_queue_work(&local->hw, &local->reconfig_filter);
 }
 
+static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	if (local->started)
+		return -EOPNOTSUPP;
+
+	return drv_set_antenna(local, tx_ant, rx_ant);
+}
+
+static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	return drv_get_antenna(local, tx_ant, rx_ant);
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1680,4 +1697,6 @@ struct cfg80211_ops mac80211_config_ops = {
 	.mgmt_tx = ieee80211_mgmt_tx,
 	.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
 	.mgmt_frame_register = ieee80211_mgmt_frame_register,
+	.set_antenna = ieee80211_set_antenna,
+	.get_antenna = ieee80211_get_antenna,
 };
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 79019f94f621..4244554d218a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -442,4 +442,27 @@ static inline void drv_channel_switch(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+
+static inline int drv_set_antenna(struct ieee80211_local *local,
+				  u32 tx_ant, u32 rx_ant)
+{
+	int ret = -EOPNOTSUPP;
+	might_sleep();
+	if (local->ops->set_antenna)
+		ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant);
+	trace_drv_set_antenna(local, tx_ant, rx_ant, ret);
+	return ret;
+}
+
+static inline int drv_get_antenna(struct ieee80211_local *local,
+				  u32 *tx_ant, u32 *rx_ant)
+{
+	int ret = -EOPNOTSUPP;
+	might_sleep();
+	if (local->ops->get_antenna)
+		ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant);
+	trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret);
+	return ret;
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 431d65500d6a..c2772f23ac9c 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -883,6 +883,56 @@ TRACE_EVENT(drv_channel_switch,
 	)
 );
 
+TRACE_EVENT(drv_set_antenna,
+	TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
+
+	TP_ARGS(local, tx_ant, rx_ant, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u32, tx_ant)
+		__field(u32, rx_ant)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->tx_ant = tx_ant;
+		__entry->rx_ant = rx_ant;
+		__entry->ret = ret;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
+		LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
+	)
+);
+
+TRACE_EVENT(drv_get_antenna,
+	TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
+
+	TP_ARGS(local, tx_ant, rx_ant, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u32, tx_ant)
+		__field(u32, rx_ant)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->tx_ant = tx_ant;
+		__entry->rx_ant = rx_ant;
+		__entry->ret = ret;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
+		LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
+	)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
-- 
cgit v1.2.3


From a619a4c0e1fd4e8c360c63d0df3fa0a401107d69 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Thu, 11 Nov 2010 08:50:18 +0200
Subject: mac80211: Add function to get probe request template for current AP

Chipsets with hardware based connection monitoring need to autonomically
send directed probe-request frames to the AP (in the event of beacon loss,
for example.)

For the hardware to be able to do this, it requires a template for the frame
to transmit to the AP, filled in with the BSSID and SSID of the AP, but also
the supported rate IE's.

This patch adds a function to mac80211, which allows the hardware driver to
fetch this template after association, so it can be configured to the hardware.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     | 15 +++++++++++++++
 net/mac80211/ieee80211_i.h |  4 ++++
 net/mac80211/mlme.c        | 24 ++++++++++++++++++++++++
 net/mac80211/util.c        | 23 ++++++++++++++++++-----
 4 files changed, 61 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a7323eca08d1..af7e84199e62 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2508,6 +2508,21 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
 void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
 			       struct ieee80211_sta *pubsta, bool block);
 
+/**
+ * ieee80211_ap_probereq_get - retrieve a Probe Request template
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * Creates a Probe Request template which can, for example, be uploaded to
+ * hardware. The template is filled with bssid, ssid and supported rate
+ * information. This function must only be called from within the
+ * .bss_info_changed callback function and only in managed mode. The function
+ * is only useful when the interface is associated, otherwise it will return
+ * NULL.
+ */
+struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
+					  struct ieee80211_vif *vif);
+
 /**
  * ieee80211_beacon_loss - inform hardware does not receive beacons
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b80c38689927..59a1d38212fd 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1287,6 +1287,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 			     const u8 *ie, size_t ie_len,
 			     enum ieee80211_band band, u32 rate_mask,
 			     u8 channel);
+struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
+					  u8 *dst,
+					  const u8 *ssid, size_t ssid_len,
+					  const u8 *ie, size_t ie_len);
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 			      const u8 *ssid, size_t ssid_len,
 			      const u8 *ie, size_t ie_len);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a3a9421555af..dfc4a316ac1c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1108,6 +1108,30 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 }
 
+struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
+					  struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct sk_buff *skb;
+	const u8 *ssid;
+
+	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
+		return NULL;
+
+	ASSERT_MGD_MTX(ifmgd);
+
+	if (!ifmgd->associated)
+		return NULL;
+
+	ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
+	skb = ieee80211_build_probe_req(sdata, ifmgd->associated->bssid,
+					ssid + 2, ssid[1], NULL, 0);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_ap_probereq_get);
+
 static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e486286ebf1a..68d0518254dd 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1011,9 +1011,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 	return pos - buffer;
 }
 
-void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
-			      const u8 *ssid, size_t ssid_len,
-			      const u8 *ie, size_t ie_len)
+struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
+					  u8 *dst,
+					  const u8 *ssid, size_t ssid_len,
+					  const u8 *ie, size_t ie_len)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
@@ -1027,7 +1028,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	if (!buf) {
 		printk(KERN_DEBUG "%s: failed to allocate temporary IE "
 		       "buffer\n", sdata->name);
-		return;
+		return NULL;
 	}
 
 	chan = ieee80211_frequency_to_channel(
@@ -1050,8 +1051,20 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
 	}
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
-	ieee80211_tx_skb(sdata, skb);
 	kfree(buf);
+
+	return skb;
+}
+
+void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
+			      const u8 *ssid, size_t ssid_len,
+			      const u8 *ie, size_t ie_len)
+{
+	struct sk_buff *skb;
+
+	skb = ieee80211_build_probe_req(sdata, dst, ssid, ssid_len, ie, ie_len);
+	if (skb)
+		ieee80211_tx_skb(sdata, skb);
 }
 
 u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
-- 
cgit v1.2.3


From 885a46d0f7942d76c2f3860acb45f75237d3bb42 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 11 Nov 2010 15:07:22 +0100
Subject: cfg80211: add support for setting the ad-hoc multicast rate

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 4 ++++
 include/net/cfg80211.h  | 2 ++
 net/wireless/nl80211.c  | 5 +++++
 3 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 17c5c8849250..037b4e498890 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -826,6 +826,8 @@ enum nl80211_commands {
  *	the hardware should not be configured to receive on this antenna.
  *	For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX.
  *
+ * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -998,6 +1000,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_WIPHY_ANTENNA_TX,
 	NL80211_ATTR_WIPHY_ANTENNA_RX,
 
+	NL80211_ATTR_MCAST_RATE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 07425e648a09..8fd9eebd0cc9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -923,6 +923,7 @@ struct cfg80211_disassoc_request {
  * @privacy: this is a protected network, keys will be configured
  *	after joining
  * @basic_rates: bitmap of basic rates to use when creating the IBSS
+ * @mcast_rate: multicast tx rate (in 100 kbps)
  */
 struct cfg80211_ibss_params {
 	u8 *ssid;
@@ -934,6 +935,7 @@ struct cfg80211_ibss_params {
 	u32 basic_rates;
 	bool channel_fixed;
 	bool privacy;
+	int mcast_rate;
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5e4dda4c0fd3..605553842226 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -171,6 +171,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 
 	[NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
+
+	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -3681,6 +3683,9 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 				return -EINVAL;
 		}
 	}
+	if (info->attrs[NL80211_ATTR_MCAST_RATE])
+		ibss.mcast_rate =
+			nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]);
 
 	if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
 		connkeys = nl80211_parse_connkeys(rdev,
-- 
cgit v1.2.3


From 8f0729b16ae354f9db89394fc1d2d65003455d56 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 11 Nov 2010 15:07:23 +0100
Subject: mac80211: add support for setting the ad-hoc multicast rate

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  6 ++++--
 net/mac80211/ibss.c    |  1 +
 net/mac80211/rate.c    | 19 +++++++++++++++----
 net/mac80211/tx.c      |  5 +++--
 4 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index af7e84199e62..1248369a7c30 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -205,6 +205,7 @@ enum ieee80211_bss_change {
  * @basic_rates: bitmap of basic rates, each bit stands for an
  *	index into the rate table configured by the driver in
  *	the current band.
+ * @mcast_rate: multicast rate for AP and Ad-Hoc (in 100 kbps)
  * @bssid: The BSSID for this BSS
  * @enable_beacon: whether beaconing should be enabled or not
  * @channel_type: Channel type for this BSS -- the hardware might be
@@ -244,6 +245,7 @@ struct ieee80211_bss_conf {
 	u16 assoc_capability;
 	u64 timestamp;
 	u32 basic_rates;
+	u32 mcast_rate;
 	u16 ht_operation_mode;
 	s32 cqm_rssi_thold;
 	u32 cqm_rssi_hyst;
@@ -2663,7 +2665,7 @@ enum rate_control_changed {
  * @rate_idx_mask: user-requested rate mask (not MCS for now)
  * @skb: the skb that will be transmitted, the control information in it needs
  *	to be filled in
- * @ap: whether this frame is sent out in AP mode
+ * @bss: whether this frame is sent out in AP or IBSS mode
  */
 struct ieee80211_tx_rate_control {
 	struct ieee80211_hw *hw;
@@ -2674,7 +2676,7 @@ struct ieee80211_tx_rate_control {
 	bool rts, short_preamble;
 	u8 max_rate_idx;
 	u32 rate_idx_mask;
-	bool ap;
+	bool bss;
 };
 
 struct rate_control_ops {
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 239c4836a946..6fe6837dc134 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -915,6 +915,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 
 	sdata->u.ibss.privacy = params->privacy;
 	sdata->u.ibss.basic_rates = params->basic_rates;
+	sdata->vif.bss_conf.mcast_rate = params->mcast_rate;
 
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
 
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 33f76993da08..76de4f8d9327 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -211,10 +211,20 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
 	return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
 }
 
-static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
+static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u32 mcast_rate,
+				  struct ieee80211_supported_band *sband)
 {
 	u8 i;
 
+	if (mcast_rate) {
+		for (i = 0; i < sband->n_bitrates; i++) {
+			if (sband->bitrates[i].bitrate == mcast_rate) {
+				*idx = i;
+				return;
+			}
+		}
+	}
+
 	if (basic_rates == 0)
 		return; /* assume basic rates unknown and accept rate */
 	if (*idx < 0)
@@ -222,7 +232,7 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
 	if (basic_rates & (1 << *idx))
 		return; /* selected rate is a basic rate */
 
-	for (i = *idx + 1; i <= max_rate_idx; i++) {
+	for (i = *idx + 1; i <= sband->n_bitrates; i++) {
 		if (basic_rates & (1 << i)) {
 			*idx = i;
 			return;
@@ -243,10 +253,11 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
 		info->control.rates[0].count =
 			(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
 			1 : txrc->hw->max_rate_tries;
-		if (!sta && txrc->ap)
+		if (!sta && txrc->bss)
 			rc_send_low_broadcast(&info->control.rates[0].idx,
 					      txrc->bss_conf->basic_rates,
-					      txrc->sband->n_bitrates);
+					      txrc->bss_conf->mcast_rate,
+					      txrc->sband);
 		return true;
 	}
 	return false;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b392876af7d8..e69483647f33 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -622,7 +622,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 		txrc.max_rate_idx = -1;
 	else
 		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
-	txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP;
+	txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
+		    tx->sdata->vif.type == NL80211_IFTYPE_ADHOC);
 
 	/* set up RTS protection if desired */
 	if (len > tx->local->hw.wiphy->rts_threshold) {
@@ -2308,7 +2309,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		txrc.max_rate_idx = -1;
 	else
 		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
-	txrc.ap = true;
+	txrc.bss = true;
 	rate_control_get_rate(sdata, NULL, &txrc);
 
 	info->control.vif = vif;
-- 
cgit v1.2.3


From 37d668004289d202f71dc5bfdadf6c18b34577a2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 15 Nov 2010 11:12:33 +0000
Subject: net/atm: Remove unnecessary casts of netdev_priv

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 2 +-
 net/atm/lec.c    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ad2b232a2055..fce2eae8d476 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -97,7 +97,7 @@ static LIST_HEAD(br2684_devs);
 
 static inline struct br2684_dev *BRPRIV(const struct net_device *net_dev)
 {
-	return (struct br2684_dev *)netdev_priv(net_dev);
+	return netdev_priv(net_dev);
 }
 
 static inline struct net_device *list_entry_brdev(const struct list_head *le)
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 181d70c73d70..179e04bc99dd 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -816,8 +816,7 @@ static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
 	if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
 		return -EINVAL;
 	vcc->proto_data = dev_lec[arg];
-	return lec_mcast_make((struct lec_priv *)netdev_priv(dev_lec[arg]),
-				vcc);
+	return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
 }
 
 /* Initialize device. */
-- 
cgit v1.2.3


From da6836500414ae734cd9873c2d553db594f831e9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 16 Nov 2010 11:52:38 +0000
Subject: netfilter: allow hooks to pass error code back up the stack

SELinux would like to pass certain fatal errors back up the stack.  This patch
implements the generic netfilter support for this functionality.

Based-on-patch-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 2 ++
 net/netfilter/core.c      | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 03317c8d4077..1893837b3966 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -33,6 +33,8 @@
 
 #define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
 
+#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+
 /* only for userspace compatibility */
 #ifndef __KERNEL__
 /* Generic cache responses from hook functions.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 85dabb86be6f..32fcbe290c04 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -173,9 +173,11 @@ next_hook:
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
-	} else if (verdict == NF_DROP) {
+	} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
 		kfree_skb(skb);
-		ret = -EPERM;
+		ret = -(verdict >> NF_VERDICT_BITS);
+		if (ret == 0)
+			ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
 		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
-- 
cgit v1.2.3


From ee58681195bf243bafc44ca53f3c24429d096cce Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 16 Nov 2010 11:52:49 +0000
Subject: network: tcp_connect should return certain errors up the stack

The current tcp_connect code completely ignores errors from sending an skb.
This makes sense in many situations (like -ENOBUFFS) but I want to be able to
immediately fail connections if they are denied by the SELinux netfilter hook.
Netfilter does not normally return ECONNREFUSED when it drops a packet so we
respect that error code as a final and fatal error that can not be recovered.

Based-on-patch-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 05b1ecf36763..bb8f547fc7d2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2592,6 +2592,7 @@ int tcp_connect(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
+	int err;
 
 	tcp_connect_init(sk);
 
@@ -2614,7 +2615,9 @@ int tcp_connect(struct sock *sk)
 	sk->sk_wmem_queued += buff->truesize;
 	sk_mem_charge(sk, buff->truesize);
 	tp->packets_out += tcp_skb_pcount(buff);
-	tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
+	err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
+	if (err == -ECONNREFUSED)
+		return err;
 
 	/* We change tp->snd_nxt after the tcp_transmit_skb() call
 	 * in order to make this packet get counted in tcpOutSegs.
-- 
cgit v1.2.3


From f8ff182c716c6f11ca3061961f5722f26a14e101 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Tue, 16 Nov 2010 04:30:14 +0000
Subject: rtnetlink: Link address family API

Each net_device contains address family specific data such as
per device settings and statistics. We already expose this data
via procfs/sysfs and partially netlink.

The netlink method requires the requester to send one RTM_GETLINK
request for each address family it wishes to receive data of
and then merge this data itself.

This patch implements a new API which combines all address family
specific link data in a new netlink attribute IFLA_AF_SPEC.
IFLA_AF_SPEC contains a sequence of nested attributes, one for each
address family which in turn defines the structure of its own
attribute. Example:

   [IFLA_AF_SPEC] = {
       [AF_INET] = {
           [IFLA_INET_CONF] = ...,
       },
       [AF_INET6] = {
           [IFLA_INET6_FLAGS] = ...,
           [IFLA_INET6_CONF] = ...,
       }
   }

The API also allows for address families to implement a function
which parses the IFLA_AF_SPEC attribute sent by userspace to
implement address family specific link options.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h |  19 +++++++
 include/net/rtnetlink.h |  31 ++++++++++
 net/core/rtnetlink.c    | 147 +++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 195 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 2fc66dd783ee..443d04a66a79 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -80,6 +80,24 @@ struct rtnl_link_ifmap {
 	__u8	port;
 };
 
+/*
+ * IFLA_AF_SPEC
+ *   Contains nested attributes for address family specific attributes.
+ *   Each address family may create a attribute with the address family
+ *   number as type and create its own attribute structure in it.
+ *
+ *   Example:
+ *   [IFLA_AF_SPEC] = {
+ *       [AF_INET] = {
+ *           [IFLA_INET_CONF] = ...,
+ *       },
+ *       [AF_INET6] = {
+ *           [IFLA_INET6_FLAGS] = ...,
+ *           [IFLA_INET6_CONF] = ...,
+ *       }
+ *   }
+ */
+
 enum {
 	IFLA_UNSPEC,
 	IFLA_ADDRESS,
@@ -116,6 +134,7 @@ enum {
 	IFLA_STATS64,
 	IFLA_VF_PORTS,
 	IFLA_PORT_SELF,
+	IFLA_AF_SPEC,
 	__IFLA_MAX
 };
 
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e013c68bfb00..35be0bbcd7da 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -83,6 +83,37 @@ extern void	__rtnl_link_unregister(struct rtnl_link_ops *ops);
 extern int	rtnl_link_register(struct rtnl_link_ops *ops);
 extern void	rtnl_link_unregister(struct rtnl_link_ops *ops);
 
+/**
+ * 	struct rtnl_af_ops - rtnetlink address family operations
+ *
+ *	@list: Used internally
+ * 	@family: Address family
+ * 	@fill_link_af: Function to fill IFLA_AF_SPEC with address family
+ * 		       specific netlink attributes.
+ * 	@get_link_af_size: Function to calculate size of address family specific
+ * 			   netlink attributes exlusive the container attribute.
+ * 	@parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify
+ *			net_device accordingly.
+ */
+struct rtnl_af_ops {
+	struct list_head	list;
+	int			family;
+
+	int			(*fill_link_af)(struct sk_buff *skb,
+						const struct net_device *dev);
+	size_t			(*get_link_af_size)(const struct net_device *dev);
+
+	int			(*parse_link_af)(struct net_device *dev,
+						 const struct nlattr *attr);
+};
+
+extern int	__rtnl_af_register(struct rtnl_af_ops *ops);
+extern void	__rtnl_af_unregister(struct rtnl_af_ops *ops);
+
+extern int	rtnl_af_register(struct rtnl_af_ops *ops);
+extern void	rtnl_af_unregister(struct rtnl_af_ops *ops);
+
+
 extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]);
 extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
 	char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 841c287ef40a..bf69e5871b1a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
 	return size;
 }
 
+static LIST_HEAD(rtnl_af_ops);
+
+static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
+{
+	const struct rtnl_af_ops *ops;
+
+	list_for_each_entry(ops, &rtnl_af_ops, list) {
+		if (ops->family == family)
+			return ops;
+	}
+
+	return NULL;
+}
+
+/**
+ * __rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * The caller must hold the rtnl_mutex.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	list_add_tail(&ops->list, &rtnl_af_ops);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_register);
+
+/**
+ * rtnl_af_register - Register rtnl_af_ops with rtnetlink.
+ * @ops: struct rtnl_af_ops * to register
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_af_register(struct rtnl_af_ops *ops)
+{
+	int err;
+
+	rtnl_lock();
+	err = __rtnl_af_register(ops);
+	rtnl_unlock();
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtnl_af_register);
+
+/**
+ * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ *
+ * The caller must hold the rtnl_mutex.
+ */
+void __rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	list_del(&ops->list);
+}
+EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
+
+/**
+ * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
+ * @ops: struct rtnl_af_ops * to unregister
+ */
+void rtnl_af_unregister(struct rtnl_af_ops *ops)
+{
+	rtnl_lock();
+	__rtnl_af_unregister(ops);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(rtnl_af_unregister);
+
+static size_t rtnl_link_get_af_size(const struct net_device *dev)
+{
+	struct rtnl_af_ops *af_ops;
+	size_t size;
+
+	/* IFLA_AF_SPEC */
+	size = nla_total_size(sizeof(struct nlattr));
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->get_link_af_size) {
+			/* AF_* + nested data */
+			size += nla_total_size(sizeof(struct nlattr)) +
+				af_ops->get_link_af_size(dev);
+		}
+	}
+
+	return size;
+}
+
 static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
 {
 	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
@@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(4) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
-	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+	       + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
 }
 
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	struct nlmsghdr *nlh;
 	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
-	struct nlattr *attr;
+	struct nlattr *attr, *af_spec;
+	struct rtnl_af_ops *af_ops;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
 	if (nlh == NULL)
@@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			goto nla_put_failure;
 	}
 
+	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+		goto nla_put_failure;
+
+	list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+		if (af_ops->fill_link_af) {
+			struct nlattr *af;
+			int err;
+
+			if (!(af = nla_nest_start(skb, af_ops->family)))
+				goto nla_put_failure;
+
+			err = af_ops->fill_link_af(skb, dev);
+
+			/*
+			 * Caller may return ENODATA to indicate that there
+			 * was no data to be dumped. This is not an error, it
+			 * means we should trim the attribute header and
+			 * continue.
+			 */
+			if (err == -ENODATA)
+				nla_nest_cancel(skb, af);
+			else if (err < 0)
+				goto nla_put_failure;
+
+			nla_nest_end(skb, af);
+		}
+	}
+
+	nla_nest_end(skb, af_spec);
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
+	[IFLA_AF_SPEC]		= { .type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -1225,6 +1347,27 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 			goto errout;
 		modified = 1;
 	}
+
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				continue;
+
+			if (!af_ops->parse_link_af)
+				continue;
+
+			err = af_ops->parse_link_af(dev, af);
+			if (err < 0)
+				goto errout;
+
+			modified = 1;
+		}
+	}
 	err = 0;
 
 errout:
-- 
cgit v1.2.3


From 9f0f7272ac9506f4c8c05cc597b7e376b0b9f3e4 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Tue, 16 Nov 2010 04:32:48 +0000
Subject: ipv4: AF_INET link address family

Implements the AF_INET link address family exposing the per
device configuration settings via netlink using the attribute
IFLA_INET_CONF.

The format of IFLA_INET_CONF differs depending on the direction
the attribute is sent. The attribute sent by the kernel consists
of a u32 array, basically a 1:1 copy of in_device->cnf.data[].
The attribute expected by the kernel must consist of a sequence
of nested u32 attributes, each representing a change request,
e.g.
	[IFLA_INET_CONF] = {
		[IPV4_DEVCONF_FORWARDING] = 1,
		[IPV4_DEVCONF_NOXFRM] = 0,
	}

libnl userspace API documentation and example available from:
http://www.infradead.org/~tgr/libnl/doc-git/group__link__inet.html

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h |  8 ++++++
 net/ipv4/devinet.c      | 75 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

(limited to 'net')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 443d04a66a79..2e02e4d7b11e 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -147,6 +147,14 @@ enum {
 #define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
 #endif
 
+enum {
+	IFLA_INET_UNSPEC,
+	IFLA_INET_CONF,
+	__IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
 /* ifi_flags.
 
    IFF_* flags.
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dc94b0316b78..71afc26c2df8 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1256,6 +1256,72 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
 }
 
+static size_t inet_get_link_af_size(const struct net_device *dev)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+
+	if (!in_dev)
+		return 0;
+
+	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
+}
+
+static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct nlattr *nla;
+	int i;
+
+	if (!in_dev)
+		return -ENODATA;
+
+	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
+		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
+
+	return 0;
+}
+
+static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
+	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
+};
+
+static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct nlattr *a, *tb[IFLA_INET_MAX+1];
+	int err, rem;
+
+	if (!in_dev)
+		return -EOPNOTSUPP;
+
+	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[IFLA_INET_CONF]) {
+		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
+			int cfgid = nla_type(a);
+
+			if (nla_len(a) < 4)
+				return -EINVAL;
+
+			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
+				return -EINVAL;
+		}
+	}
+
+	if (tb[IFLA_INET_CONF]) {
+		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
+			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_SYSCTL
 
 static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -1619,6 +1685,13 @@ static __net_initdata struct pernet_operations devinet_ops = {
 	.exit = devinet_exit_net,
 };
 
+static struct rtnl_af_ops inet_af_ops = {
+	.family		  = AF_INET,
+	.fill_link_af	  = inet_fill_link_af,
+	.get_link_af_size = inet_get_link_af_size,
+	.parse_link_af	  = inet_parse_link_af,
+};
+
 void __init devinet_init(void)
 {
 	register_pernet_subsys(&devinet_ops);
@@ -1626,6 +1699,8 @@ void __init devinet_init(void)
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
 
+	rtnl_af_register(&inet_af_ops);
+
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
-- 
cgit v1.2.3


From b382b191ea9e9ccefc437433d23befe91f4a8925 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Tue, 16 Nov 2010 04:33:57 +0000
Subject: ipv6: AF_INET6 link address family

IPv6 already exposes some address family data via netlink in the
IFLA_PROTINFO attribute if RTM_GETLINK request is sent with the
address family set to AF_INET6. We take over this format and
reuse all the code.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 122 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 89 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index aaa3ca448d08..470e7acb91df 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3831,6 +3831,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
 }
 
+static inline size_t inet6_ifla6_size(void)
+{
+	return nla_total_size(4) /* IFLA_INET6_FLAGS */
+	     + nla_total_size(sizeof(struct ifla_cacheinfo))
+	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+	     + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+}
+
 static inline size_t inet6_if_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -3838,13 +3847,7 @@ static inline size_t inet6_if_nlmsg_size(void)
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
 	       + nla_total_size(4) /* IFLA_MTU */
 	       + nla_total_size(4) /* IFLA_LINK */
-	       + nla_total_size( /* IFLA_PROTINFO */
-			nla_total_size(4) /* IFLA_INET6_FLAGS */
-			+ nla_total_size(sizeof(struct ifla_cacheinfo))
-			+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
-			+ nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
-			+ nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
-		 );
+	       + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
 }
 
 static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
@@ -3891,15 +3894,76 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 	}
 }
 
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+{
+	struct nlattr *nla;
+	struct ifla_cacheinfo ci;
+
+	NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
+
+	ci.max_reasm_len = IPV6_MAXPLEN;
+	ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
+		    + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+	ci.reachable_time = idev->nd_parms->reachable_time;
+	ci.retrans_time = idev->nd_parms->retrans_time;
+	NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+
+	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+	if (nla == NULL)
+		goto nla_put_failure;
+	ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+
+	/* XXX - MC not implemented */
+
+	nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+	nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static size_t inet6_get_link_af_size(const struct net_device *dev)
+{
+	if (!__in6_dev_get(dev))
+		return 0;
+
+	return inet6_ifla6_size();
+}
+
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
+	if (!idev)
+		return -ENODATA;
+
+	if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int inet6_parse_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	return -EOPNOTSUPP;
+}
+
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct net_device *dev = idev->dev;
-	struct nlattr *nla;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	void *protoinfo;
-	struct ifla_cacheinfo ci;
 
 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
 	if (nlh == NULL)
@@ -3926,31 +3990,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	if (protoinfo == NULL)
 		goto nla_put_failure;
 
-	NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
-
-	ci.max_reasm_len = IPV6_MAXPLEN;
-	ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
-		    + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-	ci.reachable_time = idev->nd_parms->reachable_time;
-	ci.retrans_time = idev->nd_parms->retrans_time;
-	NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
-
-	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
-	if (nla == NULL)
-		goto nla_put_failure;
-	ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
-
-	/* XXX - MC not implemented */
-
-	nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
-	if (nla == NULL)
-		goto nla_put_failure;
-	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
-
-	nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
-	if (nla == NULL)
+	if (inet6_fill_ifla6_attrs(skb, idev) < 0)
 		goto nla_put_failure;
-	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
 
 	nla_nest_end(skb, protoinfo);
 	return nlmsg_end(skb, nlh);
@@ -4621,6 +4662,13 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
+static struct rtnl_af_ops inet6_ops = {
+	.family		  = AF_INET6,
+	.fill_link_af	  = inet6_fill_link_af,
+	.get_link_af_size = inet6_get_link_af_size,
+	.parse_link_af	  = inet6_parse_link_af,
+};
+
 /*
  *	Init / cleanup code
  */
@@ -4672,6 +4720,10 @@ int __init addrconf_init(void)
 
 	addrconf_verify(0);
 
+	err = rtnl_af_register(&inet6_ops);
+	if (err < 0)
+		goto errout_af;
+
 	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
 	if (err < 0)
 		goto errout;
@@ -4687,6 +4739,8 @@ int __init addrconf_init(void)
 
 	return 0;
 errout:
+	rtnl_af_unregister(&inet6_ops);
+errout_af:
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 errlo:
 	unregister_pernet_subsys(&addrconf_ops);
@@ -4707,6 +4761,8 @@ void addrconf_cleanup(void)
 
 	rtnl_lock();
 
+	__rtnl_af_unregister(&inet6_ops);
+
 	/* clean dev list */
 	for_each_netdev(&init_net, dev) {
 		if (__in6_dev_get(dev) == NULL)
-- 
cgit v1.2.3


From 5811662b15db018c740c57d037523683fd3e6123 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 12 Nov 2010 18:43:55 +0000
Subject: net: use the macros defined for the members of flowi

Use the macros defined for the members of flowi to clean the code up.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h             | 12 +++++-------
 net/atm/clip.c                  |  3 ++-
 net/bridge/br_netfilter.c       |  9 ++-------
 net/dccp/ipv4.c                 | 13 +++++--------
 net/decnet/dn_route.c           | 22 +++++++++------------
 net/decnet/dn_rules.c           |  2 +-
 net/ipv4/af_inet.c              | 18 +++++------------
 net/ipv4/arp.c                  | 12 ++++++------
 net/ipv4/fib_frontend.c         | 28 ++++++++-------------------
 net/ipv4/fib_semantics.c        |  8 ++------
 net/ipv4/icmp.c                 | 28 +++++++++------------------
 net/ipv4/igmp.c                 |  8 +++-----
 net/ipv4/inet_connection_sock.c | 15 ++++++--------
 net/ipv4/ip_gre.c               | 31 +++++++++--------------------
 net/ipv4/ip_output.c            | 25 ++++++++++--------------
 net/ipv4/ipip.c                 | 20 ++++++-------------
 net/ipv4/ipmr.c                 | 18 +++++------------
 net/ipv4/netfilter.c            |  8 ++++----
 net/ipv4/raw.c                  |  7 +++----
 net/ipv4/route.c                | 43 ++++++++++++++++-------------------------
 net/ipv4/syncookies.c           | 15 ++++++--------
 net/ipv4/udp.c                  | 12 +++++-------
 net/ipv4/xfrm4_policy.c         |  8 ++------
 net/ipv6/ip6mr.c                |  4 +---
 net/ipv6/netfilter.c            |  6 ++----
 net/ipv6/route.c                | 24 ++++++-----------------
 net/ipv6/sit.c                  | 14 ++++++--------
 net/l2tp/l2tp_ip.c              | 12 +++++-------
 net/netfilter/ipvs/ip_vs_ctl.c  |  6 ++----
 net/netfilter/ipvs/ip_vs_xmit.c | 34 ++++++++------------------------
 net/netfilter/xt_TEE.c          | 12 ++++++------
 net/rxrpc/ar-peer.c             | 10 +++++-----
 32 files changed, 171 insertions(+), 316 deletions(-)

(limited to 'net')

diff --git a/include/net/route.h b/include/net/route.h
index 5cd46d1c0e14..b8c1f7703fc6 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -169,14 +169,12 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst,
 {
 	struct flowi fl = { .oif = oif,
 			    .mark = sk->sk_mark,
-			    .nl_u = { .ip4_u = { .daddr = dst,
-						 .saddr = src,
-						 .tos   = tos } },
+			    .fl4_dst = dst,
+			    .fl4_src = src,
+			    .fl4_tos = tos,
 			    .proto = protocol,
-			    .uli_u = { .ports =
-				       { .sport = sport,
-					 .dport = dport } } };
-
+			    .fl_ip_sport = sport,
+			    .fl_ip_dport = dport };
 	int err;
 	struct net *net = sock_net(sk);
 
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ff956d1115bc..d257da50fcfb 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -502,7 +502,8 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	struct atmarp_entry *entry;
 	int error;
 	struct clip_vcc *clip_vcc;
-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} };
+	struct flowi fl = { .fl4_dst = ip,
+			    .fl4_tos = 1 };
 	struct rtable *rt;
 
 	if (vcc->push != clip_push) {
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index ce8b2eed4e73..6e1392093911 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -413,13 +413,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 	if (dnat_took_place(skb)) {
 		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
 			struct flowi fl = {
-				.nl_u = {
-					.ip4_u = {
-						 .daddr = iph->daddr,
-						 .saddr = 0,
-						 .tos = RT_TOS(iph->tos) },
-				},
-				.proto = 0,
+				.fl4_dst = iph->daddr,
+				.fl4_tos = RT_TOS(iph->tos),
 			};
 			struct in_device *in_dev = __in_dev_get_rcu(dev);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3f69ea114829..45a434f94169 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 {
 	struct rtable *rt;
 	struct flowi fl = { .oif = skb_rtable(skb)->rt_iif,
-			    .nl_u = { .ip4_u =
-				      { .daddr = ip_hdr(skb)->saddr,
-					.saddr = ip_hdr(skb)->daddr,
-					.tos = RT_CONN_FLAGS(sk) } },
+			    .fl4_dst = ip_hdr(skb)->saddr,
+			    .fl4_src = ip_hdr(skb)->daddr,
+			    .fl4_tos = RT_CONN_FLAGS(sk),
 			    .proto = sk->sk_protocol,
-			    .uli_u = { .ports =
-				       { .sport = dccp_hdr(skb)->dccph_dport,
-					 .dport = dccp_hdr(skb)->dccph_sport }
-				     }
+			    .fl_ip_sport = dccp_hdr(skb)->dccph_dport,
+			    .fl_ip_dport = dccp_hdr(skb)->dccph_sport
 			  };
 
 	security_skb_classify_flow(skb, &fl);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 474d54dd08c2..8280e43c8861 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -271,10 +271,10 @@ static void dn_dst_link_failure(struct sk_buff *skb)
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
-	return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) |
-		(fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) |
+	return ((fl1->fld_dst ^ fl2->fld_dst) |
+		(fl1->fld_src ^ fl2->fld_src) |
 		(fl1->mark ^ fl2->mark) |
-		(fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) |
+		(fl1->fld_scope ^ fl2->fld_scope) |
 		(fl1->oif ^ fl2->oif) |
 		(fl1->iif ^ fl2->iif)) == 0;
 }
@@ -882,11 +882,9 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re
 
 static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard)
 {
-	struct flowi fl = { .nl_u = { .dn_u =
-				      { .daddr = oldflp->fld_dst,
-					.saddr = oldflp->fld_src,
-					.scope = RT_SCOPE_UNIVERSE,
-				     } },
+	struct flowi fl = { .fld_dst = oldflp->fld_dst,
+			    .fld_src = oldflp->fld_src,
+			    .fld_scope = RT_SCOPE_UNIVERSE,
 			    .mark = oldflp->mark,
 			    .iif = init_net.loopback_dev->ifindex,
 			    .oif = oldflp->oif };
@@ -1230,11 +1228,9 @@ static int dn_route_input_slow(struct sk_buff *skb)
 	int flags = 0;
 	__le16 gateway = 0;
 	__le16 local_src = 0;
-	struct flowi fl = { .nl_u = { .dn_u =
-				     { .daddr = cb->dst,
-				       .saddr = cb->src,
-				       .scope = RT_SCOPE_UNIVERSE,
-				    } },
+	struct flowi fl = { .fld_dst = cb->dst,
+			    .fld_src = cb->src,
+			    .fld_scope = RT_SCOPE_UNIVERSE,
 			    .mark = skb->mark,
 			    .iif = skb->dev->ifindex };
 	struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 48fdf10be7a1..6eb91df3c550 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -175,7 +175,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 
 unsigned dnet_addr_type(__le16 addr)
 {
-	struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
+	struct flowi fl = { .fld_dst = addr };
 	struct dn_fib_res res;
 	unsigned ret = RTN_UNICAST;
 	struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f581f77d1097..f2b61107df6c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1148,21 +1148,13 @@ int inet_sk_rebuild_header(struct sock *sk)
 	struct flowi fl = {
 		.oif = sk->sk_bound_dev_if,
 		.mark = sk->sk_mark,
-		.nl_u = {
-			.ip4_u = {
-				.daddr	= daddr,
-				.saddr	= inet->inet_saddr,
-				.tos	= RT_CONN_FLAGS(sk),
-			},
-		},
+		.fl4_dst = daddr,
+		.fl4_src = inet->inet_saddr,
+		.fl4_tos = RT_CONN_FLAGS(sk),
 		.proto = sk->sk_protocol,
 		.flags = inet_sk_flowi_flags(sk),
-		.uli_u = {
-			.ports = {
-				.sport = inet->inet_sport,
-				.dport = inet->inet_dport,
-			},
-		},
+		.fl_ip_sport = inet->inet_sport,
+		.fl_ip_dport = inet->inet_dport,
 	};
 
 	security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index d8e540c5b071..7833f17b648a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -433,8 +433,8 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
 
 static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 {
-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
-						 .saddr = tip } } };
+	struct flowi fl = { .fl4_dst = sip,
+			    .fl4_src = tip };
 	struct rtable *rt;
 	int flag = 0;
 	/*unsigned long now; */
@@ -1061,8 +1061,8 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 	if (r->arp_flags & ATF_PERM)
 		r->arp_flags |= ATF_COM;
 	if (dev == NULL) {
-		struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
-						    .tos = RTO_ONLINK } };
+		struct flowi fl = { .fl4_dst = ip,
+				    .fl4_tos = RTO_ONLINK };
 		struct rtable *rt;
 		err = ip_route_output_key(net, &rt, &fl);
 		if (err != 0)
@@ -1169,8 +1169,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 
 	ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
 	if (dev == NULL) {
-		struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
-						    .tos = RTO_ONLINK } };
+		struct flowi fl = { .fl4_dst = ip,
+				    .fl4_tos = RTO_ONLINK };
 		struct rtable *rt;
 		err = ip_route_output_key(net, &rt, &fl);
 		if (err != 0)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb6f69a8f27a..d3a1112b9d9c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -158,11 +158,7 @@ static void fib_flush(struct net *net)
 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 {
 	struct flowi fl = {
-		.nl_u = {
-			.ip4_u = {
-				.daddr = addr
-			}
-		},
+		.fl4_dst = addr,
 		.flags = FLOWI_FLAG_MATCH_ANY_IIF
 	};
 	struct fib_result res = { 0 };
@@ -193,7 +189,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
 					    const struct net_device *dev,
 					    __be32 addr)
 {
-	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
+	struct flowi		fl = { .fl4_dst = addr };
 	struct fib_result	res;
 	unsigned ret = RTN_BROADCAST;
 	struct fib_table *local_table;
@@ -247,13 +243,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 {
 	struct in_device *in_dev;
 	struct flowi fl = {
-		.nl_u = {
-			.ip4_u = {
-				.daddr = src,
-				.saddr = dst,
-				.tos = tos
-			}
-		},
+		.fl4_dst = src,
+		.fl4_src = dst,
+		.fl4_tos = tos,
 		.mark = mark,
 		.iif = oif
 	};
@@ -853,13 +845,9 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
 	struct fib_result       res;
 	struct flowi            fl = {
 		.mark = frn->fl_mark,
-		.nl_u = {
-			.ip4_u = {
-				.daddr = frn->fl_addr,
-				.tos = frn->fl_tos,
-				.scope = frn->fl_scope
-			}
-		}
+		.fl4_dst = frn->fl_addr,
+		.fl4_tos = frn->fl_tos,
+		.fl4_scope = frn->fl_scope,
 	};
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3e0da3ef6116..12d3dc3df1b7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -563,12 +563,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 		rcu_read_lock();
 		{
 			struct flowi fl = {
-				.nl_u = {
-					.ip4_u = {
-						.daddr = nh->nh_gw,
-						.scope = cfg->fc_scope + 1,
-					},
-				},
+				.fl4_dst = nh->nh_gw,
+				.fl4_scope = cfg->fc_scope + 1,
 				.oif = nh->nh_oif,
 			};
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c6e2affafbd3..4daebd17b6ed 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -386,10 +386,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 			daddr = icmp_param->replyopts.faddr;
 	}
 	{
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = daddr,
-						.saddr = rt->rt_spec_dst,
-						.tos = RT_TOS(ip_hdr(skb)->tos) } },
+		struct flowi fl = { .fl4_dst= daddr,
+				    .fl4_src = rt->rt_spec_dst,
+				    .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
 				    .proto = IPPROTO_ICMP };
 		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(net, &rt, &fl))
@@ -542,22 +541,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 
 	{
 		struct flowi fl = {
-			.nl_u = {
-				.ip4_u = {
-					.daddr = icmp_param.replyopts.srr ?
-						icmp_param.replyopts.faddr :
-						iph->saddr,
-					.saddr = saddr,
-					.tos = RT_TOS(tos)
-				}
-			},
+			.fl4_dst = icmp_param.replyopts.srr ?
+				   icmp_param.replyopts.faddr : iph->saddr,
+			.fl4_src = saddr,
+			.fl4_tos = RT_TOS(tos),
 			.proto = IPPROTO_ICMP,
-			.uli_u = {
-				.icmpt = {
-					.type = type,
-					.code = code
-				}
-			}
+			.fl_icmp_type = type,
+			.fl_icmp_code = code,
 		};
 		int err;
 		struct rtable *rt2;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a1bf2f49e716..afb1e82a59f9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -314,8 +314,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 
 	{
 		struct flowi fl = { .oif = dev->ifindex,
-				    .nl_u = { .ip4_u = {
-				    .daddr = IGMPV3_ALL_MCR } },
+				    .fl4_dst = IGMPV3_ALL_MCR,
 				    .proto = IPPROTO_IGMP };
 		if (ip_route_output_key(net, &rt, &fl)) {
 			kfree_skb(skb);
@@ -660,7 +659,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 
 	{
 		struct flowi fl = { .oif = dev->ifindex,
-				    .nl_u = { .ip4_u = { .daddr = dst } },
+				    .fl4_dst = dst,
 				    .proto = IPPROTO_IGMP };
 		if (ip_route_output_key(net, &rt, &fl))
 			return -1;
@@ -1425,8 +1424,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 /* RTNL is locked */
 static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 {
-	struct flowi fl = { .nl_u = { .ip4_u =
-				      { .daddr = imr->imr_multiaddr.s_addr } } };
+	struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr };
 	struct rtable *rt;
 	struct net_device *dev = NULL;
 	struct in_device *idev = NULL;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7174370b1195..06f5f8f482f0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -358,17 +358,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	struct ip_options *opt = inet_rsk(req)->opt;
 	struct flowi fl = { .oif = sk->sk_bound_dev_if,
 			    .mark = sk->sk_mark,
-			    .nl_u = { .ip4_u =
-				      { .daddr = ((opt && opt->srr) ?
-						  opt->faddr :
-						  ireq->rmt_addr),
-					.saddr = ireq->loc_addr,
-					.tos = RT_CONN_FLAGS(sk) } },
+			    .fl4_dst = ((opt && opt->srr) ?
+					  opt->faddr : ireq->rmt_addr),
+			    .fl4_src = ireq->loc_addr,
+			    .fl4_tos = RT_CONN_FLAGS(sk),
 			    .proto = sk->sk_protocol,
 			    .flags = inet_sk_flowi_flags(sk),
-			    .uli_u = { .ports =
-				       { .sport = inet_sk(sk)->inet_sport,
-					 .dport = ireq->rmt_port } } };
+			    .fl_ip_sport = inet_sk(sk)->inet_sport,
+			    .fl_ip_dport = ireq->rmt_port };
 	struct net *net = sock_net(sk);
 
 	security_req_classify_flow(req, &fl);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index aace653710f6..897210adaa77 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -772,14 +772,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	{
 		struct flowi fl = {
 			.oif = tunnel->parms.link,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = dst,
-					.saddr = tiph->saddr,
-					.tos = RT_TOS(tos)
-				}
-			},
-			.proto = IPPROTO_GRE,
+			.fl4_dst = dst,
+			.fl4_src = tiph->saddr,
+			.fl4_tos = RT_TOS(tos),
 			.fl_gre_key = tunnel->parms.o_key
 		};
 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
@@ -951,13 +946,9 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 	if (iph->daddr) {
 		struct flowi fl = {
 			.oif = tunnel->parms.link,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = iph->daddr,
-					.saddr = iph->saddr,
-					.tos = RT_TOS(iph->tos)
-				}
-			},
+			.fl4_dst = iph->daddr,
+			.fl4_src = iph->saddr,
+			.fl4_tos = RT_TOS(iph->tos),
 			.proto = IPPROTO_GRE,
 			.fl_gre_key = tunnel->parms.o_key
 		};
@@ -1217,13 +1208,9 @@ static int ipgre_open(struct net_device *dev)
 	if (ipv4_is_multicast(t->parms.iph.daddr)) {
 		struct flowi fl = {
 			.oif = t->parms.link,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = t->parms.iph.daddr,
-					.saddr = t->parms.iph.saddr,
-					.tos = RT_TOS(t->parms.iph.tos)
-				}
-			},
+			.fl4_dst = t->parms.iph.daddr,
+			.fl4_src = t->parms.iph.saddr,
+			.fl4_tos = RT_TOS(t->parms.iph.tos),
 			.proto = IPPROTO_GRE,
 			.fl_gre_key = t->parms.o_key
 		};
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 439d2a34ee44..5090c7ff525e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -341,15 +341,13 @@ int ip_queue_xmit(struct sk_buff *skb)
 		{
 			struct flowi fl = { .oif = sk->sk_bound_dev_if,
 					    .mark = sk->sk_mark,
-					    .nl_u = { .ip4_u =
-						      { .daddr = daddr,
-							.saddr = inet->inet_saddr,
-							.tos = RT_CONN_FLAGS(sk) } },
+					    .fl4_dst = daddr,
+					    .fl4_src = inet->inet_saddr,
+					    .fl4_tos = RT_CONN_FLAGS(sk),
 					    .proto = sk->sk_protocol,
 					    .flags = inet_sk_flowi_flags(sk),
-					    .uli_u = { .ports =
-						       { .sport = inet->inet_sport,
-							 .dport = inet->inet_dport } } };
+					    .fl_ip_sport = inet->inet_sport,
+					    .fl_ip_dport = inet->inet_dport };
 
 			/* If this fails, retransmit mechanism of transport layer will
 			 * keep trying until route appears or the connection times
@@ -1404,14 +1402,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 
 	{
 		struct flowi fl = { .oif = arg->bound_dev_if,
-				    .nl_u = { .ip4_u =
-					      { .daddr = daddr,
-						.saddr = rt->rt_spec_dst,
-						.tos = RT_TOS(ip_hdr(skb)->tos) } },
-				    /* Not quite clean, but right. */
-				    .uli_u = { .ports =
-					       { .sport = tcp_hdr(skb)->dest,
-						 .dport = tcp_hdr(skb)->source } },
+				    .fl4_dst = daddr,
+				    .fl4_src = rt->rt_spec_dst,
+				    .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
+				    .fl_ip_sport = tcp_hdr(skb)->dest,
+				    .fl_ip_dport = tcp_hdr(skb)->source,
 				    .proto = sk->sk_protocol,
 				    .flags = ip_reply_arg_flowi_flags(arg) };
 		security_skb_classify_flow(skb, &fl);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index cd300aaee78f..e70ad581398e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -463,13 +463,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	{
 		struct flowi fl = {
 			.oif = tunnel->parms.link,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = dst,
-					.saddr = tiph->saddr,
-					.tos = RT_TOS(tos)
-				}
-			},
+			.fl4_dst = dst,
+			.fl4_src= tiph->saddr,
+			.fl4_tos = RT_TOS(tos),
 			.proto = IPPROTO_IPIP
 		};
 
@@ -589,13 +585,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 	if (iph->daddr) {
 		struct flowi fl = {
 			.oif = tunnel->parms.link,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = iph->daddr,
-					.saddr = iph->saddr,
-					.tos = RT_TOS(iph->tos)
-				}
-			},
+			.fl4_dst = iph->daddr,
+			.fl4_src = iph->saddr,
+			.fl4_tos = RT_TOS(iph->tos),
 			.proto = IPPROTO_IPIP
 		};
 		struct rtable *rt;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ef2b0089e0ea..3f3a9afd73e0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1537,13 +1537,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	if (vif->flags & VIFF_TUNNEL) {
 		struct flowi fl = {
 			.oif = vif->link,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = vif->remote,
-					.saddr = vif->local,
-					.tos = RT_TOS(iph->tos)
-				}
-			},
+			.fl4_dst = vif->remote,
+			.fl4_src = vif->local,
+			.fl4_tos = RT_TOS(iph->tos),
 			.proto = IPPROTO_IPIP
 		};
 
@@ -1553,12 +1549,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	} else {
 		struct flowi fl = {
 			.oif = vif->link,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = iph->daddr,
-					.tos = RT_TOS(iph->tos)
-				}
-			},
+			.fl4_dst = iph->daddr,
+			.fl4_tos = RT_TOS(iph->tos),
 			.proto = IPPROTO_IPIP
 		};
 
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index d88a46c54fd1..994a1f29ebbc 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -31,10 +31,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
 	 */
 	if (addr_type == RTN_LOCAL) {
-		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.fl4_dst = iph->daddr;
 		if (type == RTN_LOCAL)
-			fl.nl_u.ip4_u.saddr = iph->saddr;
-		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+			fl.fl4_src = iph->saddr;
+		fl.fl4_tos = RT_TOS(iph->tos);
 		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
 		fl.mark = skb->mark;
 		fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
@@ -47,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
-		fl.nl_u.ip4_u.daddr = iph->saddr;
+		fl.fl4_dst = iph->saddr;
 		if (ip_route_output_key(net, &rt, &fl) != 0)
 			return -1;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1f85ef289895..a3d5ab786e81 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -549,10 +549,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	{
 		struct flowi fl = { .oif = ipc.oif,
 				    .mark = sk->sk_mark,
-				    .nl_u = { .ip4_u =
-					      { .daddr = daddr,
-						.saddr = saddr,
-						.tos = tos } },
+				    .fl4_dst = daddr,
+				    .fl4_src = saddr,
+				    .fl4_tos = tos,
 				    .proto = inet->hdrincl ? IPPROTO_RAW :
 							     sk->sk_protocol,
 				  };
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 66610ea3c87b..ec2333fb637e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -684,17 +684,17 @@ static inline bool rt_caching(const struct net *net)
 static inline bool compare_hash_inputs(const struct flowi *fl1,
 					const struct flowi *fl2)
 {
-	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
+	return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
+		((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
 		(fl1->iif ^ fl2->iif)) == 0);
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
-	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
+	return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
+		((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
 		(fl1->mark ^ fl2->mark) |
-		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
+		(*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
 		(fl1->oif ^ fl2->oif) |
 		(fl1->iif ^ fl2->iif)) == 0;
 }
@@ -2089,12 +2089,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
 	struct fib_result res;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
-	struct flowi fl = { .nl_u = { .ip4_u =
-				      { .daddr = daddr,
-					.saddr = saddr,
-					.tos = tos,
-					.scope = RT_SCOPE_UNIVERSE,
-				      } },
+	struct flowi fl = { .fl4_dst	= daddr,
+			    .fl4_src	= saddr,
+			    .fl4_tos	= tos,
+			    .fl4_scope	= RT_SCOPE_UNIVERSE,
 			    .mark = skb->mark,
 			    .iif = dev->ifindex };
 	unsigned	flags = 0;
@@ -2480,14 +2478,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 				const struct flowi *oldflp)
 {
 	u32 tos	= RT_FL_TOS(oldflp);
-	struct flowi fl = { .nl_u = { .ip4_u =
-				      { .daddr = oldflp->fl4_dst,
-					.saddr = oldflp->fl4_src,
-					.tos = tos & IPTOS_RT_MASK,
-					.scope = ((tos & RTO_ONLINK) ?
-						  RT_SCOPE_LINK :
-						  RT_SCOPE_UNIVERSE),
-				      } },
+	struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
+			    .fl4_src = oldflp->fl4_src,
+			    .fl4_tos = tos & IPTOS_RT_MASK,
+			    .fl4_scope = ((tos & RTO_ONLINK) ?
+					  RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
 			    .mark = oldflp->mark,
 			    .iif = net->loopback_dev->ifindex,
 			    .oif = oldflp->oif };
@@ -2944,13 +2939,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 			err = -rt->dst.error;
 	} else {
 		struct flowi fl = {
-			.nl_u = {
-				.ip4_u = {
-					.daddr = dst,
-					.saddr = src,
-					.tos = rtm->rtm_tos,
-				},
-			},
+			.fl4_dst = dst,
+			.fl4_src = src,
+			.fl4_tos = rtm->rtm_tos,
 			.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
 			.mark = mark,
 		};
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 650cace2180d..47519205a014 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -346,17 +346,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	 */
 	{
 		struct flowi fl = { .mark = sk->sk_mark,
-				    .nl_u = { .ip4_u =
-					      { .daddr = ((opt && opt->srr) ?
-							  opt->faddr :
-							  ireq->rmt_addr),
-						.saddr = ireq->loc_addr,
-						.tos = RT_CONN_FLAGS(sk) } },
+				    .fl4_dst = ((opt && opt->srr) ?
+						opt->faddr : ireq->rmt_addr),
+				    .fl4_src = ireq->loc_addr,
+				    .fl4_tos = RT_CONN_FLAGS(sk),
 				    .proto = IPPROTO_TCP,
 				    .flags = inet_sk_flowi_flags(sk),
-				    .uli_u = { .ports =
-					       { .sport = th->dest,
-						 .dport = th->source } } };
+				    .fl_ip_sport = th->dest,
+				    .fl_ip_dport = th->source };
 		security_req_classify_flow(req, &fl);
 		if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
 			reqsk_free(req);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 491ecd3f7a01..b37181da487c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -890,15 +890,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (rt == NULL) {
 		struct flowi fl = { .oif = ipc.oif,
 				    .mark = sk->sk_mark,
-				    .nl_u = { .ip4_u =
-					      { .daddr = faddr,
-						.saddr = saddr,
-						.tos = tos } },
+				    .fl4_dst = faddr,
+				    .fl4_src = saddr,
+				    .fl4_tos = tos,
 				    .proto = sk->sk_protocol,
 				    .flags = inet_sk_flowi_flags(sk),
-				    .uli_u = { .ports =
-					       { .sport = inet->inet_sport,
-						 .dport = dport } } };
+				    .fl_ip_sport = inet->inet_sport,
+				    .fl_ip_dport = dport };
 		struct net *net = sock_net(sk);
 
 		security_sk_classify_flow(sk, &fl);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 4a8c5335770c..b057d40addec 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -23,12 +23,8 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
 					  xfrm_address_t *daddr)
 {
 	struct flowi fl = {
-		.nl_u = {
-			.ip4_u = {
-				.tos = tos,
-				.daddr = daddr->a4,
-			},
-		},
+		.fl4_dst = daddr->a4,
+		.fl4_tos = tos,
 	};
 	struct dst_entry *dst;
 	struct rtable *rt;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6f32ffce7022..9fab274019c0 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
 
 	fl = (struct flowi) {
 		.oif = vif->link,
-		.nl_u = { .ip6_u =
-				{ .daddr = ipv6h->daddr, }
-		}
+		.fl6_dst = ipv6h->daddr,
 	};
 
 	dst = ip6_route_output(net, NULL, &fl);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 7155b2451d7c..35915e8617f0 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	struct flowi fl = {
 		.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
 		.mark = skb->mark,
-		.nl_u =
-		{ .ip6_u =
-		  { .daddr = iph->daddr,
-		    .saddr = iph->saddr, } },
+		.fl6_dst = iph->daddr,
+		.fl6_src = iph->saddr,
 	};
 
 	dst = ip6_route_output(net, skb->sk, &fl);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 96455ffb76fb..c346ccf66ae1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -558,11 +558,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 {
 	struct flowi fl = {
 		.oif = oif,
-		.nl_u = {
-			.ip6_u = {
-				.daddr = *daddr,
-			},
-		},
+		.fl6_dst = *daddr,
 	};
 	struct dst_entry *dst;
 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
@@ -778,13 +774,9 @@ void ip6_route_input(struct sk_buff *skb)
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct flowi fl = {
 		.iif = skb->dev->ifindex,
-		.nl_u = {
-			.ip6_u = {
-				.daddr = iph->daddr,
-				.saddr = iph->saddr,
-				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
-			},
-		},
+		.fl6_dst = iph->daddr,
+		.fl6_src = iph->saddr,
+		.fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 		.mark = skb->mark,
 		.proto = iph->nexthdr,
 	};
@@ -1463,12 +1455,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
 	struct ip6rd_flowi rdfl = {
 		.fl = {
 			.oif = dev->ifindex,
-			.nl_u = {
-				.ip6_u = {
-					.daddr = *dest,
-					.saddr = *src,
-				},
-			},
+			.fl6_dst = *dest,
+			.fl6_src = *src,
 		},
 	};
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d6bfaec3bbbf..6e48a80d0f25 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -730,10 +730,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	{
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = dst,
-						.saddr = tiph->saddr,
-						.tos = RT_TOS(tos) } },
+		struct flowi fl = { .fl4_dst = dst,
+				    .fl4_src = tiph->saddr,
+				    .fl4_tos = RT_TOS(tos),
 				    .oif = tunnel->parms.link,
 				    .proto = IPPROTO_IPV6 };
 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
@@ -855,10 +854,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 	iph = &tunnel->parms.iph;
 
 	if (iph->daddr) {
-		struct flowi fl = { .nl_u = { .ip4_u =
-					      { .daddr = iph->daddr,
-						.saddr = iph->saddr,
-						.tos = RT_TOS(iph->tos) } },
+		struct flowi fl = { .fl4_dst = iph->daddr,
+				    .fl4_src = iph->saddr,
+				    .fl4_tos = RT_TOS(iph->tos),
 				    .oif = tunnel->parms.link,
 				    .proto = IPPROTO_IPV6 };
 		struct rtable *rt;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0bf6a59545ab..04635e88e8ed 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -476,15 +476,13 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 
 		{
 			struct flowi fl = { .oif = sk->sk_bound_dev_if,
-					    .nl_u = { .ip4_u = {
-							.daddr = daddr,
-							.saddr = inet->inet_saddr,
-							.tos = RT_CONN_FLAGS(sk) } },
+					    .fl4_dst = daddr,
+					    .fl4_src = inet->inet_saddr,
+					    .fl4_tos = RT_CONN_FLAGS(sk),
 					    .proto = sk->sk_protocol,
 					    .flags = inet_sk_flowi_flags(sk),
-					    .uli_u = { .ports = {
-							 .sport = inet->inet_sport,
-							 .dport = inet->inet_dport } } };
+					    .fl_ip_sport = inet->inet_sport,
+					    .fl_ip_dport = inet->inet_dport };
 
 			/* If this fails, retransmit mechanism of transport layer will
 			 * keep trying until route appears or the connection times
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5f5daa30b0af..c6f293639220 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -110,10 +110,8 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
 	struct rt6_info *rt;
 	struct flowi fl = {
 		.oif = 0,
-		.nl_u = {
-			.ip6_u = {
-				.daddr = *addr,
-				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
+		.fl6_dst = *addr,
+		.fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
 	};
 
 	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 10bd39c0ae2d..5325a3fbe4ac 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -96,12 +96,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
 		if (!(rt = (struct rtable *)
 		      __ip_vs_dst_check(dest, rtos))) {
 			struct flowi fl = {
-				.oif = 0,
-				.nl_u = {
-					.ip4_u = {
-						.daddr = dest->addr.ip,
-						.saddr = 0,
-						.tos = rtos, } },
+				.fl4_dst = dest->addr.ip,
+				.fl4_tos = rtos,
 			};
 
 			if (ip_route_output_key(net, &rt, &fl)) {
@@ -118,12 +114,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
 		spin_unlock(&dest->dst_lock);
 	} else {
 		struct flowi fl = {
-			.oif = 0,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = daddr,
-					.saddr = 0,
-					.tos = rtos, } },
+			.fl4_dst = daddr,
+			.fl4_tos = rtos,
 		};
 
 		if (ip_route_output_key(net, &rt, &fl)) {
@@ -178,14 +170,9 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
 		refdst_drop(orefdst);
 	} else {
 		struct flowi fl = {
-			.oif = 0,
-			.nl_u = {
-				.ip4_u = {
-					.daddr = iph->daddr,
-					.saddr = iph->saddr,
-					.tos = RT_TOS(iph->tos),
-				}
-			},
+			.fl4_dst = iph->daddr,
+			.fl4_src = iph->saddr,
+			.fl4_tos = RT_TOS(iph->tos),
 			.mark = skb->mark,
 		};
 		struct rtable *rt;
@@ -216,12 +203,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
 {
 	struct dst_entry *dst;
 	struct flowi fl = {
-		.oif = 0,
-		.nl_u = {
-			.ip6_u = {
-				.daddr = *daddr,
-			},
-		},
+		.fl6_dst = *daddr,
 	};
 
 	dst = ip6_route_output(net, NULL, &fl);
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 22a2d421e7eb..5128a6c4cb2c 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -70,9 +70,9 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 			return false;
 		fl.oif = info->priv->oif;
 	}
-	fl.nl_u.ip4_u.daddr = info->gw.ip;
-	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
-	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+	fl.fl4_dst = info->gw.ip;
+	fl.fl4_tos = RT_TOS(iph->tos);
+	fl.fl4_scope = RT_SCOPE_UNIVERSE;
 	if (ip_route_output_key(net, &rt, &fl) != 0)
 		return false;
 
@@ -150,9 +150,9 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 			return false;
 		fl.oif = info->priv->oif;
 	}
-	fl.nl_u.ip6_u.daddr = info->gw.in6;
-	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
-				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+	fl.fl6_dst = info->gw.in6;
+	fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+			   (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
 	dst = ip6_route_output(net, NULL, &fl);
 	if (dst == NULL)
 		return false;
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 9f1729bd60de..a53fb25a64ed 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -47,12 +47,12 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 	case AF_INET:
 		fl.oif = 0;
 		fl.proto = IPPROTO_UDP,
-		fl.nl_u.ip4_u.saddr = 0;
-		fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr;
-		fl.nl_u.ip4_u.tos = 0;
+		fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr;
+		fl.fl4_src = 0;
+		fl.fl4_tos = 0;
 		/* assume AFS.CM talking to AFS.FS */
-		fl.uli_u.ports.sport = htons(7001);
-		fl.uli_u.ports.dport = htons(7000);
+		fl.fl_ip_sport = htons(7001);
+		fl.fl_ip_dport = htons(7000);
 		break;
 	default:
 		BUG();
-- 
cgit v1.2.3


From 9ea19481db31d614f729f346bdcf28e4e60ff14a Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 16 Nov 2010 06:31:39 +0000
Subject: net: zero kobject in rx_queue_release

netif_set_real_num_rx_queues() can decrement and increment
the number of rx queues. For example ixgbe does this as
features and offloads are toggled. Presumably this could
also happen across down/up on most devices if the available
resources changed (cpu offlined).

The kobject needs to be zero'd in this case so that the
state is not preserved across kobject_put()/kobject_init_and_add().

This resolves the following error report.

ixgbe 0000:03:00.0: eth2: NIC Link is Up 10 Gbps, Flow Control: RX/TX
kobject (ffff880324b83210): tried to init an initialized object, something is seriously wrong.
Pid: 1972, comm: lldpad Not tainted 2.6.37-rc18021qaz+ #169
Call Trace:
 [<ffffffff8121c940>] kobject_init+0x3a/0x83
 [<ffffffff8121cf77>] kobject_init_and_add+0x23/0x57
 [<ffffffff8107b800>] ? mark_lock+0x21/0x267
 [<ffffffff813c6d11>] net_rx_queue_update_kobjects+0x63/0xc6
 [<ffffffff813b5e0e>] netif_set_real_num_rx_queues+0x5f/0x78
 [<ffffffffa0261d49>] ixgbe_set_num_queues+0x1c6/0x1ca [ixgbe]
 [<ffffffffa0262509>] ixgbe_init_interrupt_scheme+0x1e/0x79c [ixgbe]
 [<ffffffffa0274596>] ixgbe_dcbnl_set_state+0x167/0x189 [ixgbe]

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3ba526b56fe3..7abeb7ceaa4c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -711,13 +711,18 @@ static void rx_queue_release(struct kobject *kobj)
 
 
 	map = rcu_dereference_raw(queue->rps_map);
-	if (map)
+	if (map) {
+		RCU_INIT_POINTER(queue->rps_map, NULL);
 		call_rcu(&map->rcu, rps_map_release);
+	}
 
 	flow_table = rcu_dereference_raw(queue->rps_flow_table);
-	if (flow_table)
+	if (flow_table) {
+		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
 		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+	}
 
+	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
 
-- 
cgit v1.2.3


From dda0b38692a7298f433b92b1329867b1ecabb4bb Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Sun, 14 Nov 2010 07:06:08 +0000
Subject: net: ipv4: tcp_probe: cleanup snprintf() use

snprintf() returns number of bytes that were copied if there is no overflow.
This code uses return value as number of copied bytes.  Theoretically format
string '%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n' may be expanded
up to 163 bytes.  In reality tv.tv_sec is just few bytes instead of 20, 2 ports
are just 5 bytes each instead of 10, length is 5 bytes instead of 10.  The rest
is an unstrusted input.  Theoretically if tv_sec is big then copy_to_user() would
overflow tbuf.

tbuf was increased to fit in 163 bytes.  snprintf() is used to follow return
value semantic.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_probe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 6211e2114173..85ee7eb7e38e 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -154,7 +154,7 @@ static int tcpprobe_sprint(char *tbuf, int n)
 	struct timespec tv
 		= ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
 
-	return snprintf(tbuf, n,
+	return scnprintf(tbuf, n,
 			"%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n",
 			(unsigned long) tv.tv_sec,
 			(unsigned long) tv.tv_nsec,
@@ -174,7 +174,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
 		return -EINVAL;
 
 	while (cnt < len) {
-		char tbuf[128];
+		char tbuf[164];
 		int width;
 
 		/* Wait for data in buffer */
-- 
cgit v1.2.3


From 4bce22b9b84032c77c7e038b07b24fcc706dfc10 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 16 Nov 2010 11:49:58 -0800
Subject: mac80211: defines for AC numbers

In many places we've just hardcoded the
AC numbers -- which is a relic from the
original mac80211 (d80211). Add constants
for them so we know what we're talking
about.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 14 ++++++++++++++
 net/mac80211/wme.c     | 11 ++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1248369a7c30..5b0fff2178bb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -96,6 +96,20 @@ enum ieee80211_max_queues {
 	IEEE80211_MAX_QUEUES =		4,
 };
 
+/**
+ * enum ieee80211_ac_numbers - AC numbers as used in mac80211
+ * @IEEE80211_AC_VO: voice
+ * @IEEE80211_AC_VI: video
+ * @IEEE80211_AC_BE: best effort
+ * @IEEE80211_AC_BK: background
+ */
+enum ieee80211_ac_numbers {
+	IEEE80211_AC_VO		= 0,
+	IEEE80211_AC_VI		= 1,
+	IEEE80211_AC_BE		= 2,
+	IEEE80211_AC_BK		= 3,
+};
+
 /**
  * struct ieee80211_tx_queue_params - transmit queue configuration
  *
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 34e6d02da779..58e75bbc1f91 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -21,7 +21,16 @@
 /* Default mapping in classifier to work with default
  * queue setup.
  */
-const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 };
+const int ieee802_1d_to_ac[8] = {
+	IEEE80211_AC_BE,
+	IEEE80211_AC_BK,
+	IEEE80211_AC_BK,
+	IEEE80211_AC_BE,
+	IEEE80211_AC_VI,
+	IEEE80211_AC_VI,
+	IEEE80211_AC_VO,
+	IEEE80211_AC_VO
+};
 
 static int wme_downgrade_ac(struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 50a9432daeece6fc1309bef1dc0a7b8fde8204cb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 16 Nov 2010 11:50:28 -0800
Subject: mac80211: fix powersaving clients races

The code to handle powersaving stations has a race:
when the powersave flag is lifted from a station,
we could transmit a packet that is being processed
for TX at the same time right away, even if there
are other frames queued for it. This would cause
frame reordering. To fix this, lift the flag only
under the appropriate lock that blocks TX.

Additionally, the code to allow drivers to block a
station while frames for it are on the HW queue is
never re-enabled the station, so traffic would get
stuck indefinitely. Fix this by clearing the flag
for this appropriately.

Finally, as an optimisation, don't do anything if
the driver unblocks an already unblocked station.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/rx.c          |  2 --
 net/mac80211/sta_info.c    | 17 ++++++++++++++---
 net/mac80211/util.c        | 14 ++++++++++++--
 4 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 59a1d38212fd..3598abf21844 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1278,6 +1278,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
 			       struct sk_buff *skb);
 int ieee80211_add_pending_skbs(struct ieee80211_local *local,
 			       struct sk_buff_head *skbs);
+int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
+				  struct sk_buff_head *skbs,
+				  void (*fn)(void *data), void *data);
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 902b03ee8f60..d2fcd22ab06d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1102,8 +1102,6 @@ static void ap_sta_ps_end(struct sta_info *sta)
 
 	atomic_dec(&sdata->bss->num_sta_ps);
 
-	clear_sta_flags(sta, WLAN_STA_PS_STA);
-
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n",
 	       sdata->name, sta->sta.addr, sta->sta.aid);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6d8f897d8763..eff58571fd7e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -199,8 +199,11 @@ static void sta_unblock(struct work_struct *wk)
 
 	if (!test_sta_flags(sta, WLAN_STA_PS_STA))
 		ieee80211_sta_ps_deliver_wakeup(sta);
-	else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL))
+	else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) {
+		clear_sta_flags(sta, WLAN_STA_PS_DRIVER);
 		ieee80211_sta_ps_deliver_poll_response(sta);
+	} else
+		clear_sta_flags(sta, WLAN_STA_PS_DRIVER);
 }
 
 static int sta_prepare_rate_control(struct ieee80211_local *local,
@@ -880,6 +883,13 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
 }
 EXPORT_SYMBOL(ieee80211_find_sta);
 
+static void clear_sta_ps_flags(void *_sta)
+{
+	struct sta_info *sta = _sta;
+
+	clear_sta_flags(sta, WLAN_STA_PS_DRIVER | WLAN_STA_PS_STA);
+}
+
 /* powersave support code */
 void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 {
@@ -894,7 +904,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 
 	/* Send all buffered frames to the station */
 	sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered);
-	buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf);
+	buffered = ieee80211_add_pending_skbs_fn(local, &sta->ps_tx_buf,
+						 clear_sta_ps_flags, sta);
 	sent += buffered;
 	local->total_ps_buffered -= buffered;
 
@@ -973,7 +984,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
 
 	if (block)
 		set_sta_flags(sta, WLAN_STA_PS_DRIVER);
-	else
+	else if (test_sta_flags(sta, WLAN_STA_PS_DRIVER))
 		ieee80211_queue_work(hw, &sta->drv_unblock_wk);
 }
 EXPORT_SYMBOL(ieee80211_sta_block_awake);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 68d0518254dd..e497476174ce 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -368,8 +368,9 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 }
 
-int ieee80211_add_pending_skbs(struct ieee80211_local *local,
-			       struct sk_buff_head *skbs)
+int ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
+				  struct sk_buff_head *skbs,
+				  void (*fn)(void *data), void *data)
 {
 	struct ieee80211_hw *hw = &local->hw;
 	struct sk_buff *skb;
@@ -394,6 +395,9 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local,
 		__skb_queue_tail(&local->pending[queue], skb);
 	}
 
+	if (fn)
+		fn(data);
+
 	for (i = 0; i < hw->queues; i++)
 		__ieee80211_wake_queue(hw, i,
 			IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
@@ -402,6 +406,12 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local,
 	return ret;
 }
 
+int ieee80211_add_pending_skbs(struct ieee80211_local *local,
+			       struct sk_buff_head *skbs)
+{
+	return ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
+}
+
 void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
 				    enum queue_stop_reason reason)
 {
-- 
cgit v1.2.3


From 866f3b25a2eb60d7529c227a0ecd80c3aba443fd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Nov 2010 09:33:19 -0800
Subject: bonding: IGMP handling cleanup

Instead of iterating in_dev->mc_list from bonding driver, its better
to call a helper function provided by igmp.c
Details of implementation (locking) are private to igmp code.

ip_mc_rejoin_group(struct ip_mc_list *im) becomes
ip_mc_rejoin_groups(struct in_device *in_dev);

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  8 ++------
 include/linux/igmp.h            |  2 +-
 net/ipv4/igmp.c                 | 32 +++++++++++++++++++-------------
 3 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 518844852f06..e588b2e1c3b3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -873,15 +873,11 @@ static void bond_mc_del(struct bonding *bond, void *addr)
 static void __bond_resend_igmp_join_requests(struct net_device *dev)
 {
 	struct in_device *in_dev;
-	struct ip_mc_list *im;
 
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
-	if (in_dev) {
-		for (im = in_dev->mc_list; im; im = im->next)
-			ip_mc_rejoin_group(im);
-	}
-
+	if (in_dev)
+			ip_mc_rejoin_groups(in_dev);
 	rcu_read_unlock();
 }
 
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 7d164670f264..c4987f265109 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -238,7 +238,7 @@ extern void ip_mc_unmap(struct in_device *);
 extern void ip_mc_remap(struct in_device *);
 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
-extern void ip_mc_rejoin_group(struct ip_mc_list *im);
+extern void ip_mc_rejoin_groups(struct in_device *in_dev);
 
 #endif
 #endif
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index afb1e82a59f9..50f6bc1a002a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1267,26 +1267,32 @@ EXPORT_SYMBOL(ip_mc_inc_group);
 
 /*
  *	Resend IGMP JOIN report; used for bonding.
+ *	Called with rcu_read_lock()
  */
-void ip_mc_rejoin_group(struct ip_mc_list *im)
+void ip_mc_rejoin_groups(struct in_device *in_dev)
 {
 #ifdef CONFIG_IP_MULTICAST
-	struct in_device *in_dev = im->interface;
+	struct ip_mc_list *im;
+	int type;
 
-	if (im->multiaddr == IGMP_ALL_HOSTS)
-		return;
+	for_each_pmc_rcu(in_dev, im) {
+		if (im->multiaddr == IGMP_ALL_HOSTS)
+			continue;
 
-	/* a failover is happening and switches
-	 * must be notified immediately */
-	if (IGMP_V1_SEEN(in_dev))
-		igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
-	else if (IGMP_V2_SEEN(in_dev))
-		igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
-	else
-		igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
+		/* a failover is happening and switches
+		 * must be notified immediately
+		 */
+		if (IGMP_V1_SEEN(in_dev))
+			type = IGMP_HOST_MEMBERSHIP_REPORT;
+		else if (IGMP_V2_SEEN(in_dev))
+			type = IGMPV2_HOST_MEMBERSHIP_REPORT;
+		else
+			type = IGMPV3_HOST_MEMBERSHIP_REPORT;
+		igmp_send_report(in_dev, im, type);
+	}
 #endif
 }
-EXPORT_SYMBOL(ip_mc_rejoin_group);
+EXPORT_SYMBOL(ip_mc_rejoin_groups);
 
 /*
  *	A socket has left a multicast group on device dev
-- 
cgit v1.2.3


From f72f2f4cdeb67bc262d80a6d474292f00182a4dc Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Thu, 18 Nov 2010 09:37:07 -0800
Subject: dccp ccid-2: whitespace fix-up

This fixes whitespace noise introduced in commit "dccp ccid-2: Algorithm to
update buffer state", 5753fdfe8bd8e9a2ff9e5af19b0ffc78bfcd502a, 14 Nov 2010.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 41819848bdda..25b7a8d1ad58 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -282,7 +282,7 @@ void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
  * packet of group (2) in 11.4.2.
  */
 void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
- {
+{
 	struct dccp_ackvec_record *avr, *next;
 	u8 runlen_now, eff_runlen;
 	s64 delta;
-- 
cgit v1.2.3


From 9e50e3ac5a5bbb1fd2949bdd57444ad1b93e5f41 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 16 Nov 2010 19:12:28 +0000
Subject: net: add priority field to pktgen

Add option to set skb priority to pktgen. Useful for testing
QOS features. Also by running pktgen on the vlan device the
qdisc on the real device can be tested.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 33bc3823ac6f..52fc1e08a7c4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -378,6 +378,7 @@ struct pktgen_dev {
 
 	u16 queue_map_min;
 	u16 queue_map_max;
+	__u32 skb_priority;	/* skb priority field */
 	int node;               /* Memory node */
 
 #ifdef CONFIG_XFRM
@@ -547,6 +548,10 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 		   pkt_dev->queue_map_min,
 		   pkt_dev->queue_map_max);
 
+	if (pkt_dev->skb_priority)
+		seq_printf(seq, "     skb_priority: %u\n",
+			   pkt_dev->skb_priority);
+
 	if (pkt_dev->flags & F_IPV6) {
 		char b1[128], b2[128], b3[128];
 		fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -1711,6 +1716,18 @@ static ssize_t pktgen_if_write(struct file *file,
 		return count;
 	}
 
+	if (!strcmp(name, "skb_priority")) {
+		len = num_arg(&user_buffer[i], 9, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		pkt_dev->skb_priority = value;
+		sprintf(pg_result, "OK: skb_priority=%i",
+			pkt_dev->skb_priority);
+		return count;
+	}
+
 	sprintf(pkt_dev->result, "No such parameter \"%s\"", name);
 	return -EINVAL;
 }
@@ -2671,6 +2688,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->transport_header = skb->network_header + sizeof(struct iphdr);
 	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
 	skb_set_queue_mapping(skb, queue_map);
+	skb->priority = pkt_dev->skb_priority;
+
 	iph = ip_hdr(skb);
 	udph = udp_hdr(skb);
 
@@ -3016,6 +3035,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
 	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
 	skb_set_queue_mapping(skb, queue_map);
+	skb->priority = pkt_dev->skb_priority;
 	iph = ipv6_hdr(skb);
 	udph = udp_hdr(skb);
 
-- 
cgit v1.2.3


From cba328fc5ede9091616e7296483840869b615a46 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 16 Nov 2010 15:19:51 +0000
Subject: filter: Optimize instruction revalidation code.

Since repeating u16 value to u8 value conversion using switch() clause's
case statement is wasteful, this patch introduces u16 to u8 mapping table
and removes most of case statements. As a result, the size of net/core/filter.o
is reduced by about 29% on x86.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 231 +++++++++++++++++-------------------------------------
 1 file changed, 72 insertions(+), 159 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 23e9b2a6b4c8..03dc0710194f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -383,7 +383,57 @@ EXPORT_SYMBOL(sk_run_filter);
  */
 int sk_chk_filter(struct sock_filter *filter, int flen)
 {
-	struct sock_filter *ftest;
+	/*
+	 * Valid instructions are initialized to non-0.
+	 * Invalid instructions are initialized to 0.
+	 */
+	static const u8 codes[] = {
+		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K + 1,
+		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X + 1,
+		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K + 1,
+		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X + 1,
+		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K + 1,
+		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X + 1,
+		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X + 1,
+		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K + 1,
+		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X + 1,
+		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K + 1,
+		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X + 1,
+		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K + 1,
+		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X + 1,
+		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K + 1,
+		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X + 1,
+		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG + 1,
+		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS + 1,
+		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS + 1,
+		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS + 1,
+		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN + 1,
+		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND + 1,
+		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND + 1,
+		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND + 1,
+		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM + 1,
+		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN + 1,
+		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH + 1,
+		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM + 1,
+		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX + 1,
+		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA + 1,
+		[BPF_RET|BPF_K]          = BPF_S_RET_K + 1,
+		[BPF_RET|BPF_A]          = BPF_S_RET_A + 1,
+		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K + 1,
+		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM + 1,
+		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM + 1,
+		[BPF_ST]                 = BPF_S_ST + 1,
+		[BPF_STX]                = BPF_S_STX + 1,
+		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA + 1,
+		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K + 1,
+		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X + 1,
+		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K + 1,
+		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X + 1,
+		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K + 1,
+		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X + 1,
+		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K + 1,
+		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X + 1,
+	};
 	int pc;
 
 	if (flen == 0 || flen > BPF_MAXINSNS)
@@ -391,136 +441,31 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 
 	/* check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
-		ftest = &filter[pc];
-
-		/* Only allow valid instructions */
-		switch (ftest->code) {
-		case BPF_ALU|BPF_ADD|BPF_K:
-			ftest->code = BPF_S_ALU_ADD_K;
-			break;
-		case BPF_ALU|BPF_ADD|BPF_X:
-			ftest->code = BPF_S_ALU_ADD_X;
-			break;
-		case BPF_ALU|BPF_SUB|BPF_K:
-			ftest->code = BPF_S_ALU_SUB_K;
-			break;
-		case BPF_ALU|BPF_SUB|BPF_X:
-			ftest->code = BPF_S_ALU_SUB_X;
-			break;
-		case BPF_ALU|BPF_MUL|BPF_K:
-			ftest->code = BPF_S_ALU_MUL_K;
-			break;
-		case BPF_ALU|BPF_MUL|BPF_X:
-			ftest->code = BPF_S_ALU_MUL_X;
-			break;
-		case BPF_ALU|BPF_DIV|BPF_X:
-			ftest->code = BPF_S_ALU_DIV_X;
-			break;
-		case BPF_ALU|BPF_AND|BPF_K:
-			ftest->code = BPF_S_ALU_AND_K;
-			break;
-		case BPF_ALU|BPF_AND|BPF_X:
-			ftest->code = BPF_S_ALU_AND_X;
-			break;
-		case BPF_ALU|BPF_OR|BPF_K:
-			ftest->code = BPF_S_ALU_OR_K;
-			break;
-		case BPF_ALU|BPF_OR|BPF_X:
-			ftest->code = BPF_S_ALU_OR_X;
-			break;
-		case BPF_ALU|BPF_LSH|BPF_K:
-			ftest->code = BPF_S_ALU_LSH_K;
-			break;
-		case BPF_ALU|BPF_LSH|BPF_X:
-			ftest->code = BPF_S_ALU_LSH_X;
-			break;
-		case BPF_ALU|BPF_RSH|BPF_K:
-			ftest->code = BPF_S_ALU_RSH_K;
-			break;
-		case BPF_ALU|BPF_RSH|BPF_X:
-			ftest->code = BPF_S_ALU_RSH_X;
-			break;
-		case BPF_ALU|BPF_NEG:
-			ftest->code = BPF_S_ALU_NEG;
-			break;
-		case BPF_LD|BPF_W|BPF_ABS:
-			ftest->code = BPF_S_LD_W_ABS;
-			break;
-		case BPF_LD|BPF_H|BPF_ABS:
-			ftest->code = BPF_S_LD_H_ABS;
-			break;
-		case BPF_LD|BPF_B|BPF_ABS:
-			ftest->code = BPF_S_LD_B_ABS;
-			break;
-		case BPF_LD|BPF_W|BPF_LEN:
-			ftest->code = BPF_S_LD_W_LEN;
-			break;
-		case BPF_LD|BPF_W|BPF_IND:
-			ftest->code = BPF_S_LD_W_IND;
-			break;
-		case BPF_LD|BPF_H|BPF_IND:
-			ftest->code = BPF_S_LD_H_IND;
-			break;
-		case BPF_LD|BPF_B|BPF_IND:
-			ftest->code = BPF_S_LD_B_IND;
-			break;
-		case BPF_LD|BPF_IMM:
-			ftest->code = BPF_S_LD_IMM;
-			break;
-		case BPF_LDX|BPF_W|BPF_LEN:
-			ftest->code = BPF_S_LDX_W_LEN;
-			break;
-		case BPF_LDX|BPF_B|BPF_MSH:
-			ftest->code = BPF_S_LDX_B_MSH;
-			break;
-		case BPF_LDX|BPF_IMM:
-			ftest->code = BPF_S_LDX_IMM;
-			break;
-		case BPF_MISC|BPF_TAX:
-			ftest->code = BPF_S_MISC_TAX;
-			break;
-		case BPF_MISC|BPF_TXA:
-			ftest->code = BPF_S_MISC_TXA;
-			break;
-		case BPF_RET|BPF_K:
-			ftest->code = BPF_S_RET_K;
-			break;
-		case BPF_RET|BPF_A:
-			ftest->code = BPF_S_RET_A;
-			break;
+		struct sock_filter *ftest = &filter[pc];
+		u16 code = ftest->code;
 
+		if (code >= ARRAY_SIZE(codes))
+			return -EINVAL;
+		code = codes[code];
+		/* Undo the '+ 1' in codes[] after validation. */
+		if (!code--)
+			return -EINVAL;
 		/* Some instructions need special checks */
-
+		switch (code) {
+		case BPF_S_ALU_DIV_K:
 			/* check for division by zero */
-		case BPF_ALU|BPF_DIV|BPF_K:
 			if (ftest->k == 0)
 				return -EINVAL;
-			ftest->code = BPF_S_ALU_DIV_K;
 			break;
-
-		/* check for invalid memory addresses */
-		case BPF_LD|BPF_MEM:
-			if (ftest->k >= BPF_MEMWORDS)
-				return -EINVAL;
-			ftest->code = BPF_S_LD_MEM;
-			break;
-		case BPF_LDX|BPF_MEM:
-			if (ftest->k >= BPF_MEMWORDS)
-				return -EINVAL;
-			ftest->code = BPF_S_LDX_MEM;
-			break;
-		case BPF_ST:
-			if (ftest->k >= BPF_MEMWORDS)
-				return -EINVAL;
-			ftest->code = BPF_S_ST;
-			break;
-		case BPF_STX:
+		case BPF_S_LD_MEM:
+		case BPF_S_LDX_MEM:
+		case BPF_S_ST:
+		case BPF_S_STX:
+			/* check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
-			ftest->code = BPF_S_STX;
 			break;
-
-		case BPF_JMP|BPF_JA:
+		case BPF_S_JMP_JA:
 			/*
 			 * Note, the large ftest->k might cause loops.
 			 * Compare this with conditional jumps below,
@@ -528,40 +473,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			 */
 			if (ftest->k >= (unsigned)(flen-pc-1))
 				return -EINVAL;
-			ftest->code = BPF_S_JMP_JA;
-			break;
-
-		case BPF_JMP|BPF_JEQ|BPF_K:
-			ftest->code = BPF_S_JMP_JEQ_K;
-			break;
-		case BPF_JMP|BPF_JEQ|BPF_X:
-			ftest->code = BPF_S_JMP_JEQ_X;
 			break;
-		case BPF_JMP|BPF_JGE|BPF_K:
-			ftest->code = BPF_S_JMP_JGE_K;
-			break;
-		case BPF_JMP|BPF_JGE|BPF_X:
-			ftest->code = BPF_S_JMP_JGE_X;
-			break;
-		case BPF_JMP|BPF_JGT|BPF_K:
-			ftest->code = BPF_S_JMP_JGT_K;
-			break;
-		case BPF_JMP|BPF_JGT|BPF_X:
-			ftest->code = BPF_S_JMP_JGT_X;
-			break;
-		case BPF_JMP|BPF_JSET|BPF_K:
-			ftest->code = BPF_S_JMP_JSET_K;
-			break;
-		case BPF_JMP|BPF_JSET|BPF_X:
-			ftest->code = BPF_S_JMP_JSET_X;
-			break;
-
-		default:
-			return -EINVAL;
-		}
-
-			/* for conditionals both must be safe */
-		switch (ftest->code) {
 		case BPF_S_JMP_JEQ_K:
 		case BPF_S_JMP_JEQ_X:
 		case BPF_S_JMP_JGE_K:
@@ -570,10 +482,13 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 		case BPF_S_JMP_JGT_X:
 		case BPF_S_JMP_JSET_X:
 		case BPF_S_JMP_JSET_K:
+			/* for conditionals both must be safe */
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
+			break;
 		}
+		ftest->code = code;
 	}
 
 	/* last instruction must be a RET code */
@@ -581,10 +496,8 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 	case BPF_S_RET_K:
 	case BPF_S_RET_A:
 		return 0;
-		break;
-		default:
-			return -EINVAL;
-		}
+	}
+	return -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
 
-- 
cgit v1.2.3


From 4c3710afbc333c33100739dec10662b4ee64e219 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 16 Nov 2010 20:28:24 +0000
Subject: net: move definitions of BPF_S_* to net/core/filter.c

BPF_S_* are used internally, should not be exposed to the others.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 48 ------------------------------------------------
 net/core/filter.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 69b43dbea6c6..151f5d703b7e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -91,54 +91,6 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_TAX         0x00
 #define         BPF_TXA         0x80
 
-enum {
-	BPF_S_RET_K = 0,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-};
-
 #ifndef BPF_MAXINSNS
 #define BPF_MAXINSNS 4096
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index 03dc0710194f..15a545d39cd3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -38,6 +38,54 @@
 #include <asm/unaligned.h>
 #include <linux/filter.h>
 
+enum {
+	BPF_S_RET_K = 0,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+};
+
 /* No hurry in this branch */
 static void *__load_pointer(struct sk_buff *skb, int k)
 {
-- 
cgit v1.2.3


From 57e1ab6eaddc9f2c358cd4afb497cda6e3c6821a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 16 Nov 2010 20:36:42 +0000
Subject: igmp: refine skb allocations

IGMP allocates MTU sized skbs. This may fail for large MTU (order-2
allocations), so add a fallback to try lower sizes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 50f6bc1a002a..e0e77e297de3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -300,6 +300,8 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
 	return scount;
 }
 
+#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb))
+
 static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 {
 	struct sk_buff *skb;
@@ -308,9 +310,16 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	struct igmpv3_report *pig;
 	struct net *net = dev_net(dev);
 
-	skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
-	if (skb == NULL)
-		return NULL;
+	while (1) {
+		skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
+				GFP_ATOMIC | __GFP_NOWARN);
+		if (skb)
+			break;
+		size >>= 1;
+		if (size < 256)
+			return NULL;
+	}
+	igmp_skb_size(skb) = size;
 
 	{
 		struct flowi fl = { .oif = dev->ifindex,
@@ -399,7 +408,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
 	return skb;
 }
 
-#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
+#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \
 	skb_tailroom(skb)) : 0)
 
 static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
-- 
cgit v1.2.3


From 86107fd170bc379869250eb7e1bd393a3a70e8ae Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Tue, 16 Nov 2010 10:58:48 +0900
Subject: nl80211/mac80211: Report signal average

Extend nl80211 to report an exponential weighted moving average (EWMA) of the
signal value. Since the signal value usually fluctuates between different
packets, an average can be more useful than the value of the last packet.

This uses the recently added generic EWMA library function.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 include/net/cfg80211.h  | 4 ++++
 net/mac80211/Kconfig    | 1 +
 net/mac80211/cfg.c      | 3 ++-
 net/mac80211/rx.c       | 1 +
 net/mac80211/sta_info.c | 2 ++
 net/mac80211/sta_info.h | 3 +++
 net/wireless/nl80211.c  | 3 +++
 8 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 037b4e498890..1ce3775e9e26 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1161,6 +1161,7 @@ enum nl80211_rate_info {
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
+ * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_sta_info_txrate.
  * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
@@ -1178,6 +1179,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_PLID,
 	NL80211_STA_INFO_PLINK_STATE,
 	NL80211_STA_INFO_SIGNAL,
+	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_TX_BITRATE,
 	NL80211_STA_INFO_RX_PACKETS,
 	NL80211_STA_INFO_TX_PACKETS,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8fd9eebd0cc9..69e2364889f1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -424,6 +424,7 @@ struct station_parameters {
  * @STATION_INFO_TX_RETRIES: @tx_retries filled
  * @STATION_INFO_TX_FAILED: @tx_failed filled
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
+ * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -439,6 +440,7 @@ enum station_info_flags {
 	STATION_INFO_TX_RETRIES		= 1<<10,
 	STATION_INFO_TX_FAILED		= 1<<11,
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
+	STATION_INFO_SIGNAL_AVG		= 1<<13,
 };
 
 /**
@@ -485,6 +487,7 @@ struct rate_info {
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
+ * @signal_avg: signal strength average in dBm
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
@@ -505,6 +508,7 @@ struct station_info {
 	u16 plid;
 	u8 plink_state;
 	s8 signal;
+	s8 signal_avg;
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 4d6f8653ec88..798d9b9462e2 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -6,6 +6,7 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRC32
+	select AVERAGE
 	---help---
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0c544074479e..92c9cf6a7d1c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -343,8 +343,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
 		sinfo->signal = (s8)sta->last_signal;
+		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
 	}
 
 	sinfo->txrate.flags = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d2fcd22ab06d..9dd60a74181f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1156,6 +1156,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
+	ewma_add(&sta->avg_signal, -status->signal);
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index eff58571fd7e..f43fca8907f7 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->local = local;
 	sta->sdata = sdata;
 
+	ewma_init(&sta->avg_signal, 1000, 8);
+
 	if (sta_prepare_rate_control(local, sta, gfp)) {
 		kfree(sta);
 		return NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9265acadef32..84062e2c782c 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 #include <linux/workqueue.h>
+#include <linux/average.h>
 #include "key.h"
 
 /**
@@ -224,6 +225,7 @@ enum plink_state {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
+ * @avg_signal: moving average of signal of received frames from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -291,6 +293,7 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
+	struct ewma avg_signal;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 605553842226..d06a40d17002 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1872,6 +1872,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_SIGNAL)
 		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL,
 			   sinfo->signal);
+	if (sinfo->filled & STATION_INFO_SIGNAL_AVG)
+		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
+			   sinfo->signal_avg);
 	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
 		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
 		if (!txrate)
-- 
cgit v1.2.3


From 93aaae2e01e57483256b7da05c9a7ebd65ad4686 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Nov 2010 09:49:59 -0800
Subject: filter: optimize sk_run_filter

Remove pc variable to avoid arithmetic to compute fentry at each filter
instruction. Jumps directly manipulate fentry pointer.

As the last instruction of filter[] is guaranteed to be a RETURN, and
all jumps are before the last instruction, we dont need to check filter
bounds (number of instructions in filter array) at each iteration, so we
remove it from sk_run_filter() params.

On x86_32 remove f_k var introduced in commit 57fe93b374a6b871
(filter: make sure filters dont read uninitialized memory)

Note : We could use a CONFIG_ARCH_HAS_{FEW|MANY}_REGISTERS in order to
avoid too many ifdefs in this code.

This helps compiler to use cpu registers to hold fentry and A
accumulator.

On x86_32, this saves 401 bytes, and more important, sk_run_filter()
runs much faster because less register pressure (One less conditional
branch per BPF instruction)

# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   2948       0       0    2948     b84 net/core/filter.o
   3349       0       0    3349     d15 net/core/filter_pre.o

on x86_64 :
# size net/core/filter.o net/core/filter_pre.o
   text    data     bss     dec     hex filename
   5173       0       0    5173    1435 net/core/filter.o
   5224       0       0    5224    1468 net/core/filter_pre.o

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_ppp.c | 14 +++----
 drivers/net/ppp_generic.c   | 12 ++----
 include/linux/filter.h      |  2 +-
 net/core/filter.c           | 93 +++++++++++++++++++++++----------------------
 net/core/timestamping.c     |  2 +-
 net/packet/af_packet.c      |  2 +-
 6 files changed, 61 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 97c5cc2997f5..9e8162c80bb0 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1147,15 +1147,14 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 	}
 
 	if (is->pass_filter
-	    && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0) {
+	    && sk_run_filter(skb, is->pass_filter) == 0) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
 		kfree_skb(skb);
 		return;
 	}
 	if (!(is->active_filter
-	      && sk_run_filter(skb, is->active_filter,
-	                       is->active_len) == 0)) {
+	      && sk_run_filter(skb, is->active_filter) == 0)) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
 		lp->huptimer = 0;
@@ -1294,15 +1293,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (ipt->pass_filter
-	    && sk_run_filter(skb, ipt->pass_filter, ipt->pass_len) == 0) {
+	    && sk_run_filter(skb, ipt->pass_filter) == 0) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
 		kfree_skb(skb);
 		goto unlock;
 	}
 	if (!(ipt->active_filter
-	      && sk_run_filter(skb, ipt->active_filter,
-		               ipt->active_len) == 0)) {
+	      && sk_run_filter(skb, ipt->active_filter) == 0)) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: link-active filter: reseting huptimer.\n");
 		lp->huptimer = 0;
@@ -1492,9 +1490,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
 	}
 	
 	drop |= is->pass_filter
-	        && sk_run_filter(skb, is->pass_filter, is->pass_len) == 0;
+	        && sk_run_filter(skb, is->pass_filter) == 0;
 	drop |= is->active_filter
-	        && sk_run_filter(skb, is->active_filter, is->active_len) == 0;
+	        && sk_run_filter(skb, is->active_filter) == 0;
 	
 	skb_push(skb, IPPP_MAX_HEADER - 4);
 	return drop;
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 09cf56d0416a..0c91598ae280 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1136,8 +1136,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		   a four-byte PPP header on each packet */
 		*skb_push(skb, 2) = 1;
 		if (ppp->pass_filter &&
-		    sk_run_filter(skb, ppp->pass_filter,
-				  ppp->pass_len) == 0) {
+		    sk_run_filter(skb, ppp->pass_filter) == 0) {
 			if (ppp->debug & 1)
 				printk(KERN_DEBUG "PPP: outbound frame not passed\n");
 			kfree_skb(skb);
@@ -1145,8 +1144,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		}
 		/* if this packet passes the active filter, record the time */
 		if (!(ppp->active_filter &&
-		      sk_run_filter(skb, ppp->active_filter,
-				    ppp->active_len) == 0))
+		      sk_run_filter(skb, ppp->active_filter) == 0))
 			ppp->last_xmit = jiffies;
 		skb_pull(skb, 2);
 #else
@@ -1758,8 +1756,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
 			*skb_push(skb, 2) = 0;
 			if (ppp->pass_filter &&
-			    sk_run_filter(skb, ppp->pass_filter,
-					  ppp->pass_len) == 0) {
+			    sk_run_filter(skb, ppp->pass_filter) == 0) {
 				if (ppp->debug & 1)
 					printk(KERN_DEBUG "PPP: inbound frame "
 					       "not passed\n");
@@ -1767,8 +1764,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 				return;
 			}
 			if (!(ppp->active_filter &&
-			      sk_run_filter(skb, ppp->active_filter,
-					    ppp->active_len) == 0))
+			      sk_run_filter(skb, ppp->active_filter) == 0))
 				ppp->last_recv = jiffies;
 			__skb_pull(skb, 2);
 		} else
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d703b7e..447a775878fb 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -147,7 +147,7 @@ struct sock;
 
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
 extern unsigned int sk_run_filter(struct sk_buff *skb,
-				  struct sock_filter *filter, int flen);
+				  const struct sock_filter *filter);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, int flen);
diff --git a/net/core/filter.c b/net/core/filter.c
index 15a545d39cd3..9e77b3c816c5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -137,7 +137,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock_bh();
 	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = sk_run_filter(skb, filter->insns, filter->len);
+		unsigned int pkt_len = sk_run_filter(skb, filter->insns);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -151,14 +151,15 @@ EXPORT_SYMBOL(sk_filter);
  *	sk_run_filter - run a filter on a socket
  *	@skb: buffer to run the filter on
  *	@filter: filter to apply
- *	@flen: length of filter
  *
  * Decode and apply filter instructions to the skb->data.
- * Return length to keep, 0 for none. skb is the data we are
- * filtering, filter is the array of filter instructions, and
- * len is the number of filter blocks in the array.
+ * Return length to keep, 0 for none. @skb is the data we are
+ * filtering, @filter is the array of filter instructions.
+ * Because all jumps are guaranteed to be before last instruction,
+ * and last instruction guaranteed to be a RET, we dont need to check
+ * flen. (We used to pass to this function the length of filter)
  */
-unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
 {
 	void *ptr;
 	u32 A = 0;			/* Accumulator */
@@ -167,34 +168,36 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 	unsigned long memvalid = 0;
 	u32 tmp;
 	int k;
-	int pc;
 
 	BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
 	/*
 	 * Process array of filter instructions.
 	 */
-	for (pc = 0; pc < flen; pc++) {
-		const struct sock_filter *fentry = &filter[pc];
-		u32 f_k = fentry->k;
+	for (;; fentry++) {
+#if defined(CONFIG_X86_32)
+#define	K (fentry->k)
+#else
+		const u32 K = fentry->k;
+#endif
 
 		switch (fentry->code) {
 		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
 		case BPF_S_ALU_ADD_K:
-			A += f_k;
+			A += K;
 			continue;
 		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
 		case BPF_S_ALU_SUB_K:
-			A -= f_k;
+			A -= K;
 			continue;
 		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
 		case BPF_S_ALU_MUL_K:
-			A *= f_k;
+			A *= K;
 			continue;
 		case BPF_S_ALU_DIV_X:
 			if (X == 0)
@@ -202,64 +205,64 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 			A /= X;
 			continue;
 		case BPF_S_ALU_DIV_K:
-			A /= f_k;
+			A /= K;
 			continue;
 		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
 		case BPF_S_ALU_AND_K:
-			A &= f_k;
+			A &= K;
 			continue;
 		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
 		case BPF_S_ALU_OR_K:
-			A |= f_k;
+			A |= K;
 			continue;
 		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
 		case BPF_S_ALU_LSH_K:
-			A <<= f_k;
+			A <<= K;
 			continue;
 		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
 		case BPF_S_ALU_RSH_K:
-			A >>= f_k;
+			A >>= K;
 			continue;
 		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
 		case BPF_S_JMP_JA:
-			pc += f_k;
+			fentry += K;
 			continue;
 		case BPF_S_JMP_JGT_K:
-			pc += (A > f_k) ? fentry->jt : fentry->jf;
+			fentry += (A > K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGE_K:
-			pc += (A >= f_k) ? fentry->jt : fentry->jf;
+			fentry += (A >= K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JEQ_K:
-			pc += (A == f_k) ? fentry->jt : fentry->jf;
+			fentry += (A == K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JSET_K:
-			pc += (A & f_k) ? fentry->jt : fentry->jf;
+			fentry += (A & K) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGT_X:
-			pc += (A > X) ? fentry->jt : fentry->jf;
+			fentry += (A > X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGE_X:
-			pc += (A >= X) ? fentry->jt : fentry->jf;
+			fentry += (A >= X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JEQ_X:
-			pc += (A == X) ? fentry->jt : fentry->jf;
+			fentry += (A == X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JSET_X:
-			pc += (A & X) ? fentry->jt : fentry->jf;
+			fentry += (A & X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_LD_W_ABS:
-			k = f_k;
+			k = K;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
@@ -268,7 +271,7 @@ load_w:
 			}
 			break;
 		case BPF_S_LD_H_ABS:
-			k = f_k;
+			k = K;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
@@ -277,7 +280,7 @@ load_h:
 			}
 			break;
 		case BPF_S_LD_B_ABS:
-			k = f_k;
+			k = K;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
 			if (ptr != NULL) {
@@ -292,34 +295,34 @@ load_b:
 			X = skb->len;
 			continue;
 		case BPF_S_LD_W_IND:
-			k = X + f_k;
+			k = X + K;
 			goto load_w;
 		case BPF_S_LD_H_IND:
-			k = X + f_k;
+			k = X + K;
 			goto load_h;
 		case BPF_S_LD_B_IND:
-			k = X + f_k;
+			k = X + K;
 			goto load_b;
 		case BPF_S_LDX_B_MSH:
-			ptr = load_pointer(skb, f_k, 1, &tmp);
+			ptr = load_pointer(skb, K, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
 		case BPF_S_LD_IMM:
-			A = f_k;
+			A = K;
 			continue;
 		case BPF_S_LDX_IMM:
-			X = f_k;
+			X = K;
 			continue;
 		case BPF_S_LD_MEM:
-			A = (memvalid & (1UL << f_k)) ?
-				mem[f_k] : 0;
+			A = (memvalid & (1UL << K)) ?
+				mem[K] : 0;
 			continue;
 		case BPF_S_LDX_MEM:
-			X = (memvalid & (1UL << f_k)) ?
-				mem[f_k] : 0;
+			X = (memvalid & (1UL << K)) ?
+				mem[K] : 0;
 			continue;
 		case BPF_S_MISC_TAX:
 			X = A;
@@ -328,16 +331,16 @@ load_b:
 			A = X;
 			continue;
 		case BPF_S_RET_K:
-			return f_k;
+			return K;
 		case BPF_S_RET_A:
 			return A;
 		case BPF_S_ST:
-			memvalid |= 1UL << f_k;
-			mem[f_k] = A;
+			memvalid |= 1UL << K;
+			mem[K] = A;
 			continue;
 		case BPF_S_STX:
-			memvalid |= 1UL << f_k;
-			mem[f_k] = X;
+			memvalid |= 1UL << K;
+			mem[K] = X;
 			continue;
 		default:
 			WARN_ON(1);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85b..dac7ed687f60 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -31,7 +31,7 @@ static unsigned int classify(struct sk_buff *skb)
 	if (likely(skb->dev &&
 		   skb->dev->phydev &&
 		   skb->dev->phydev->drv))
-		return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+		return sk_run_filter(skb, ptp_filter);
 	else
 		return PTP_CLASS_NONE;
 }
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 20964560a0ed..b6372dd128d7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -519,7 +519,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 	rcu_read_lock_bh();
 	filter = rcu_dereference_bh(sk->sk_filter);
 	if (filter != NULL)
-		res = sk_run_filter(skb, filter->insns, filter->len);
+		res = sk_run_filter(skb, filter->insns);
 	rcu_read_unlock_bh();
 
 	return res;
-- 
cgit v1.2.3


From 8c1592d68bc89248bfd0ee287648f41c1370d826 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Nov 2010 21:56:38 +0000
Subject: filter: cleanup codes[] init

Starting the translated instruction to 1 instead of 0 allows us to
remove one descrement at check time and makes codes[] array init
cleaner.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 95 +++++++++++++++++++++++++++----------------------------
 1 file changed, 47 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 9e77b3c816c5..c0b68f7c8036 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,7 +39,7 @@
 #include <linux/filter.h>
 
 enum {
-	BPF_S_RET_K = 0,
+	BPF_S_RET_K = 1,
 	BPF_S_RET_A,
 	BPF_S_ALU_ADD_K,
 	BPF_S_ALU_ADD_X,
@@ -439,51 +439,51 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 	 * Invalid instructions are initialized to 0.
 	 */
 	static const u8 codes[] = {
-		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K + 1,
-		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X + 1,
-		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K + 1,
-		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X + 1,
-		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K + 1,
-		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X + 1,
-		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X + 1,
-		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K + 1,
-		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X + 1,
-		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K + 1,
-		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X + 1,
-		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K + 1,
-		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X + 1,
-		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K + 1,
-		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X + 1,
-		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG + 1,
-		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS + 1,
-		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS + 1,
-		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS + 1,
-		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN + 1,
-		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND + 1,
-		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND + 1,
-		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND + 1,
-		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM + 1,
-		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN + 1,
-		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH + 1,
-		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM + 1,
-		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX + 1,
-		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA + 1,
-		[BPF_RET|BPF_K]          = BPF_S_RET_K + 1,
-		[BPF_RET|BPF_A]          = BPF_S_RET_A + 1,
-		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K + 1,
-		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM + 1,
-		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM + 1,
-		[BPF_ST]                 = BPF_S_ST + 1,
-		[BPF_STX]                = BPF_S_STX + 1,
-		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA + 1,
-		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K + 1,
-		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X + 1,
-		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K + 1,
-		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X + 1,
-		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K + 1,
-		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X + 1,
-		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K + 1,
-		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X + 1,
+		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
+		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
+		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
+		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
+		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
+		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
+		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
+		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
+		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
+		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
+		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
+		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
+		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
+		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
+		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
+		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
+		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
+		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
+		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
+		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
+		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
+		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
+		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
+		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
+		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
+		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
+		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
+		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
+		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
+		[BPF_RET|BPF_K]          = BPF_S_RET_K,
+		[BPF_RET|BPF_A]          = BPF_S_RET_A,
+		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
+		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
+		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
+		[BPF_ST]                 = BPF_S_ST,
+		[BPF_STX]                = BPF_S_STX,
+		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
+		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
+		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
+		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
+		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
+		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
+		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
+		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
+		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
 	};
 	int pc;
 
@@ -498,8 +498,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 		if (code >= ARRAY_SIZE(codes))
 			return -EINVAL;
 		code = codes[code];
-		/* Undo the '+ 1' in codes[] after validation. */
-		if (!code--)
+		if (!code)
 			return -EINVAL;
 		/* Some instructions need special checks */
 		switch (code) {
-- 
cgit v1.2.3


From c26aed40f4fd18f86bcc6aba557cab700b129b73 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Nov 2010 22:04:46 +0000
Subject: filter: use reciprocal divide

At compile time, we can replace the DIV_K instruction (divide by a
constant value) by a reciprocal divide.

At exec time, the expensive divide is replaced by a multiply, a less
expensive operation on most processors.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index c0b68f7c8036..23e0a2a9a4de 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -37,6 +37,7 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/filter.h>
+#include <linux/reciprocal_div.h>
 
 enum {
 	BPF_S_RET_K = 1,
@@ -205,7 +206,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry
 			A /= X;
 			continue;
 		case BPF_S_ALU_DIV_K:
-			A /= K;
+			A = reciprocal_divide(A, K);
 			continue;
 		case BPF_S_ALU_AND_X:
 			A &= X;
@@ -506,6 +507,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			/* check for division by zero */
 			if (ftest->k == 0)
 				return -EINVAL;
+			ftest->k = reciprocal_value(ftest->k);
 			break;
 		case BPF_S_LD_MEM:
 		case BPF_S_LDX_MEM:
-- 
cgit v1.2.3


From 70be998c2b44f942f11383496622500136816acb Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 18 Nov 2010 13:20:57 +0000
Subject: X25: pushdown bkl in ioctls

Push down the bkl in the ioctls so they can be removed one at a time.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 41 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f7af98dff409..c99029bc411c 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1357,14 +1357,16 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	void __user *argp = (void __user *)arg;
 	int rc;
 
-	lock_kernel();
 	switch (cmd) {
 		case TIOCOUTQ: {
-			int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
+			int amount;
 
+			lock_kernel();
+			amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
 			if (amount < 0)
 				amount = 0;
 			rc = put_user(amount, (unsigned int __user *)argp);
+			unlock_kernel();
 			break;
 		}
 
@@ -1375,23 +1377,29 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			 * These two are safe on a single CPU system as
 			 * only user tasks fiddle here
 			 */
+			lock_kernel();
 			if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
 				amount = skb->len;
 			rc = put_user(amount, (unsigned int __user *)argp);
+			unlock_kernel();
 			break;
 		}
 
 		case SIOCGSTAMP:
 			rc = -EINVAL;
+			lock_kernel();
 			if (sk)
 				rc = sock_get_timestamp(sk,
 						(struct timeval __user *)argp);
+			unlock_kernel();
 			break;
 		case SIOCGSTAMPNS:
 			rc = -EINVAL;
+			lock_kernel();
 			if (sk)
 				rc = sock_get_timestampns(sk,
 						(struct timespec __user *)argp);
+			unlock_kernel();
 			break;
 		case SIOCGIFADDR:
 		case SIOCSIFADDR:
@@ -1410,27 +1418,36 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = -EPERM;
 			if (!capable(CAP_NET_ADMIN))
 				break;
+			lock_kernel();
 			rc = x25_route_ioctl(cmd, argp);
+			unlock_kernel();
 			break;
 		case SIOCX25GSUBSCRIP:
+			lock_kernel();
 			rc = x25_subscr_ioctl(cmd, argp);
+			unlock_kernel();
 			break;
 		case SIOCX25SSUBSCRIP:
 			rc = -EPERM;
 			if (!capable(CAP_NET_ADMIN))
 				break;
+			lock_kernel();
 			rc = x25_subscr_ioctl(cmd, argp);
+			unlock_kernel();
 			break;
 		case SIOCX25GFACILITIES: {
 			struct x25_facilities fac = x25->facilities;
+			lock_kernel();
 			rc = copy_to_user(argp, &fac,
 					  sizeof(fac)) ? -EFAULT : 0;
+			unlock_kernel();
 			break;
 		}
 
 		case SIOCX25SFACILITIES: {
 			struct x25_facilities facilities;
 			rc = -EFAULT;
+			lock_kernel();
 			if (copy_from_user(&facilities, argp,
 					   sizeof(facilities)))
 				break;
@@ -1466,12 +1483,15 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 				break;
 			x25->facilities = facilities;
 			rc = 0;
+			unlock_kernel();
 			break;
 		}
 
 		case SIOCX25GDTEFACILITIES: {
+			lock_kernel();
 			rc = copy_to_user(argp, &x25->dte_facilities,
 						sizeof(x25->dte_facilities));
+			unlock_kernel();
 			if (rc)
 				rc = -EFAULT;
 			break;
@@ -1480,6 +1500,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCX25SDTEFACILITIES: {
 			struct x25_dte_facilities dtefacs;
 			rc = -EFAULT;
+			lock_kernel();
 			if (copy_from_user(&dtefacs, argp, sizeof(dtefacs)))
 				break;
 			rc = -EINVAL;
@@ -1496,13 +1517,16 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 				break;
 			x25->dte_facilities = dtefacs;
 			rc = 0;
+			unlock_kernel();
 			break;
 		}
 
 		case SIOCX25GCALLUSERDATA: {
 			struct x25_calluserdata cud = x25->calluserdata;
+			lock_kernel();
 			rc = copy_to_user(argp, &cud,
 					  sizeof(cud)) ? -EFAULT : 0;
+			unlock_kernel();
 			break;
 		}
 
@@ -1510,6 +1534,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			struct x25_calluserdata calluserdata;
 
 			rc = -EFAULT;
+			lock_kernel();
 			if (copy_from_user(&calluserdata, argp,
 					   sizeof(calluserdata)))
 				break;
@@ -1517,24 +1542,29 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			if (calluserdata.cudlength > X25_MAX_CUD_LEN)
 				break;
 			x25->calluserdata = calluserdata;
+			unlock_kernel();
 			rc = 0;
 			break;
 		}
 
 		case SIOCX25GCAUSEDIAG: {
 			struct x25_causediag causediag;
+			lock_kernel();
 			causediag = x25->causediag;
 			rc = copy_to_user(argp, &causediag,
 					  sizeof(causediag)) ? -EFAULT : 0;
+			unlock_kernel();
 			break;
 		}
 
 		case SIOCX25SCAUSEDIAG: {
 			struct x25_causediag causediag;
 			rc = -EFAULT;
+			lock_kernel();
 			if (copy_from_user(&causediag, argp, sizeof(causediag)))
 				break;
 			x25->causediag = causediag;
+			unlock_kernel();
 			rc = 0;
 			break;
 
@@ -1543,6 +1573,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCX25SCUDMATCHLEN: {
 			struct x25_subaddr sub_addr;
 			rc = -EINVAL;
+			lock_kernel();
 			if(sk->sk_state != TCP_CLOSE)
 				break;
 			rc = -EFAULT;
@@ -1553,21 +1584,25 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN)
 				break;
 			x25->cudmatchlength = sub_addr.cudmatchlength;
+			unlock_kernel();
 			rc = 0;
 			break;
 		}
 
 		case SIOCX25CALLACCPTAPPRV: {
 			rc = -EINVAL;
+			lock_kernel();
 			if (sk->sk_state != TCP_CLOSE)
 				break;
 			clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
+			unlock_kernel();
 			rc = 0;
 			break;
 		}
 
 		case SIOCX25SENDCALLACCPT:  {
 			rc = -EINVAL;
+			lock_kernel();
 			if (sk->sk_state != TCP_ESTABLISHED)
 				break;
 			/* must call accptapprv above */
@@ -1575,6 +1610,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 				break;
 			x25_write_internal(sk, X25_CALL_ACCEPTED);
 			x25->state = X25_STATE_3;
+			unlock_kernel();
 			rc = 0;
 			break;
 		}
@@ -1583,7 +1619,6 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = -ENOIOCTLCMD;
 			break;
 	}
-	unlock_kernel();
 
 	return rc;
 }
-- 
cgit v1.2.3


From 1ecd66bf2ce5e0f2bc72ffdeed814bb0e55a60dc Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 18 Nov 2010 13:21:20 +0000
Subject: X25: remove bkl in timestamp ioctls

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index c99029bc411c..22597838cc76 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1387,19 +1387,15 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
 		case SIOCGSTAMP:
 			rc = -EINVAL;
-			lock_kernel();
 			if (sk)
 				rc = sock_get_timestamp(sk,
 						(struct timeval __user *)argp);
-			unlock_kernel();
 			break;
 		case SIOCGSTAMPNS:
 			rc = -EINVAL;
-			lock_kernel();
 			if (sk)
 				rc = sock_get_timestampns(sk,
 						(struct timespec __user *)argp);
-			unlock_kernel();
 			break;
 		case SIOCGIFADDR:
 		case SIOCSIFADDR:
@@ -1689,19 +1685,15 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
 		break;
 	case SIOCGSTAMP:
 		rc = -EINVAL;
-		lock_kernel();
 		if (sk)
 			rc = compat_sock_get_timestamp(sk,
 					(struct timeval __user*)argp);
-		unlock_kernel();
 		break;
 	case SIOCGSTAMPNS:
 		rc = -EINVAL;
-		lock_kernel();
 		if (sk)
 			rc = compat_sock_get_timestampns(sk,
 					(struct timespec __user*)argp);
-		unlock_kernel();
 		break;
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
-- 
cgit v1.2.3


From 54aafbd4989a684ca876e49bf3e6eb931654dc02 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 18 Nov 2010 13:21:28 +0000
Subject: X25: remove bkl in inq and outq ioctls

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 22597838cc76..2f235a6cb3b1 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1361,12 +1361,10 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case TIOCOUTQ: {
 			int amount;
 
-			lock_kernel();
 			amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
 			if (amount < 0)
 				amount = 0;
 			rc = put_user(amount, (unsigned int __user *)argp);
-			unlock_kernel();
 			break;
 		}
 
@@ -1377,11 +1375,11 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			 * These two are safe on a single CPU system as
 			 * only user tasks fiddle here
 			 */
-			lock_kernel();
+			lock_sock(sk);
 			if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
 				amount = skb->len;
+			release_sock(sk);
 			rc = put_user(amount, (unsigned int __user *)argp);
-			unlock_kernel();
 			break;
 		}
 
-- 
cgit v1.2.3


From 0670b8ae66daf1d326c7bd10e73daff5f18fcf92 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 18 Nov 2010 13:21:35 +0000
Subject: X25: remove bkl in routing ioctls

Routing doesn't use the socket data and is protected by x25_route_list_lock

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 2f235a6cb3b1..2351aceb296d 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1412,9 +1412,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = -EPERM;
 			if (!capable(CAP_NET_ADMIN))
 				break;
-			lock_kernel();
 			rc = x25_route_ioctl(cmd, argp);
-			unlock_kernel();
 			break;
 		case SIOCX25GSUBSCRIP:
 			lock_kernel();
@@ -1710,9 +1708,7 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
 		rc = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			break;
-		lock_kernel();
 		rc = x25_route_ioctl(cmd, argp);
-		unlock_kernel();
 		break;
 	case SIOCX25GSUBSCRIP:
 		lock_kernel();
-- 
cgit v1.2.3


From bbce5a59e4e0e6e1dbc85492caaf310ff6611309 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 20 Nov 2010 07:31:54 +0000
Subject: packet: use vzalloc()

alloc_one_pg_vec_page() is supposed to return zeroed memory, so use
vzalloc() instead of vmalloc()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b6372dd128d7..422705d62b5b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2367,7 +2367,7 @@ static inline char *alloc_one_pg_vec_page(unsigned long order,
 	 * __get_free_pages failed, fall back to vmalloc
 	 */
 	*flags |= PGV_FROM_VMALLOC;
-	buffer = vmalloc((1 << order) * PAGE_SIZE);
+	buffer = vzalloc((1 << order) * PAGE_SIZE);
 
 	if (buffer)
 		return buffer;
-- 
cgit v1.2.3


From 551eaff1b384cc107eab6332ba8424b3ca1f304b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 21 Nov 2010 10:26:44 -0800
Subject: pktgen: allow faster module unload

Unloading pktgen module needs ~6 seconds on a 64 cpus machine, to stop
64 kthreads.

Add a pktgen_exiting variable to let kernel threads die faster, so that
kthread_stop() doesnt have to wait too long for them. This variable is
not tested in fast path.

Note : Before exiting from pktgen_thread_worker(), we must make sure
kthread_stop() is waiting for this thread to be stopped, like its done
in kernel/softirq.c

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 52fc1e08a7c4..2e57830cbeb2 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -395,6 +395,8 @@ struct pktgen_hdr {
 	__be32 tv_usec;
 };
 
+static bool pktgen_exiting __read_mostly;
+
 struct pktgen_thread {
 	spinlock_t if_lock;		/* for list of devices */
 	struct list_head if_list;	/* All device here */
@@ -3451,11 +3453,6 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
 
 	remove_proc_entry(t->tsk->comm, pg_proc_dir);
 
-	mutex_lock(&pktgen_thread_lock);
-
-	list_del(&t->th_list);
-
-	mutex_unlock(&pktgen_thread_lock);
 }
 
 static void pktgen_resched(struct pktgen_dev *pkt_dev)
@@ -3602,6 +3599,8 @@ static int pktgen_thread_worker(void *arg)
 		pkt_dev = next_to_run(t);
 
 		if (unlikely(!pkt_dev && t->control == 0)) {
+			if (pktgen_exiting)
+				break;
 			wait_event_interruptible_timeout(t->queue,
 							 t->control != 0,
 							 HZ/10);
@@ -3654,6 +3653,13 @@ static int pktgen_thread_worker(void *arg)
 	pr_debug("%s removing thread\n", t->tsk->comm);
 	pktgen_rem_thread(t);
 
+	/* Wait for kthread_stop */
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
 	return 0;
 }
 
@@ -3928,6 +3934,7 @@ static void __exit pg_cleanup(void)
 	struct list_head *q, *n;
 
 	/* Stop all interfaces & threads */
+	pktgen_exiting = true;
 
 	list_for_each_safe(q, n, &pktgen_threads) {
 		t = list_entry(q, struct pktgen_thread, th_list);
-- 
cgit v1.2.3


From 87217502d4f45a9925c5eda5b2179f1220e87537 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:12 +0000
Subject: Net: bluetooth: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index d1e433f7d673..7ca1f46a471a 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP)	+= bnep/
 obj-$(CONFIG_BT_CMTP)	+= cmtp/
 obj-$(CONFIG_BT_HIDP)	+= hidp/
 
-bluetooth-objs := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
+bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
-- 
cgit v1.2.3


From a3106d032fb17283c96fa041f8285e6926ae074d Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:13 +0000
Subject: Net: caif: Makefile: Remove deprecated items

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Also, use the ccflags-$ flag instead of EXTRA_CFLAGS because EXTRA_CFLAGS is
deprecated and should now be switched.

Last but not least, took out if-conditionals.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/Makefile | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/caif/Makefile b/net/caif/Makefile
index f87481fb0e65..9d38e406e4a4 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,8 +1,6 @@
-ifeq ($(CONFIG_CAIF_DEBUG),y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_CAIF_DEBUG)     :=      -DDEBUG
 
-caif-objs := caif_dev.o \
+caif-y := caif_dev.o \
 	cfcnfg.o cfmuxl.o cfctrl.o  \
 	cffrml.o cfveil.o cfdbgl.o\
 	cfserl.o cfdgml.o  \
@@ -13,4 +11,4 @@ obj-$(CONFIG_CAIF) += caif.o
 obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
 obj-$(CONFIG_CAIF) += caif_socket.o
 
-export-objs := caif.o
+export-y := caif.o
-- 
cgit v1.2.3


From bac14e017830bd204b3a1bd55f42b0841c02e995 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:14 +0000
Subject: Net: can: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/can/Makefile b/net/can/Makefile
index 9cd3c4b3abda..2d3894b32742 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -3,10 +3,10 @@
 #
 
 obj-$(CONFIG_CAN)	+= can.o
-can-objs		:= af_can.o proc.o
+can-y			:= af_can.o proc.o
 
 obj-$(CONFIG_CAN_RAW)	+= can-raw.o
-can-raw-objs		:= raw.o
+can-raw-y		:= raw.o
 
 obj-$(CONFIG_CAN_BCM)	+= can-bcm.o
-can-bcm-objs		:= bcm.o
+can-bcm-y		:= bcm.o
-- 
cgit v1.2.3


From fa13bc3daa5954ce58f68fd34fb1611df6ea6e6f Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:15 +0000
Subject: Net: ceph: Makefile: remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ceph/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index aab1cabb8035..153bdec40835 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),)
 
 obj-$(CONFIG_CEPH_LIB) += libceph.o
 
-libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
+libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
 	mon_client.o \
 	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
 	debugfs.o \
-- 
cgit v1.2.3


From 22674a24b44ac53f244ef6edadd02021a270df5a Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:16 +0000
Subject: Net: dns_resolver: Makefile: Remove deprecated kbuild goal
 definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dns_resolver/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
index c0ef4e71dc49..d5c13c2eb36d 100644
--- a/net/dns_resolver/Makefile
+++ b/net/dns_resolver/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o
 
-dns_resolver-objs :=  dns_key.o dns_query.o
+dns_resolver-y :=  dns_key.o dns_query.o
-- 
cgit v1.2.3


From cc0bdac399b1881626cd5512f292e396c9c96685 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:17 +0000
Subject: Net: econet: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/econet/Makefile b/net/econet/Makefile
index 39f0a77abdbd..05fae8be2fed 100644
--- a/net/econet/Makefile
+++ b/net/econet/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_ECONET) += econet.o
 
-econet-objs := af_econet.o
+econet-y := af_econet.o
-- 
cgit v1.2.3


From 6b8ff8c517008d93a6da62b106072a12dea8cb7c Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:18 +0000
Subject: Net: ipv4: netfilter: Makefile: Remove deprecated kbuild goal
 definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 48111594ee9b..19eb59d01037 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -3,15 +3,15 @@
 #
 
 # objects for l3 independent conntrack
-nf_conntrack_ipv4-objs  :=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+nf_conntrack_ipv4-y	:=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
 ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
 ifeq ($(CONFIG_PROC_FS),y)
 nf_conntrack_ipv4-objs	+= nf_conntrack_l3proto_ipv4_compat.o
 endif
 endif
 
-nf_nat-objs		:= nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-iptable_nat-objs	:= nf_nat_rule.o nf_nat_standalone.o
+nf_nat-y		:= nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
+iptable_nat-y	:= nf_nat_rule.o nf_nat_standalone.o
 
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-- 
cgit v1.2.3


From 4de58dfebe6882dc1e8e8dc5ec062e28e99623cd Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:19 +0000
Subject: Net: ipv6: netfiliter: Makefile: Remove deprecated kbuild goal
 definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 0a432c9b0795..abfee91ce816 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,13 +11,13 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 
 # objects for l3 independent conntrack
-nf_conntrack_ipv6-objs  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
+nf_conntrack_ipv6-y  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
 
 # l3 independent conntrack
 obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
 
 # defrag
-nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
+nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
 
 # matches
-- 
cgit v1.2.3


From f91c4ae4989322ed5cd10b5247e1a7bd3868a84e Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:20 +0000
Subject: Net: irda: ircomm: Makefile: Remove deprecated kbuild goal defintions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/ircomm/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile
index 48689458c086..ab23b5ba7e33 100644
--- a/net/irda/ircomm/Makefile
+++ b/net/irda/ircomm/Makefile
@@ -4,5 +4,5 @@
 
 obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o
 
-ircomm-objs := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
-ircomm-tty-objs := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
+ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o
+ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o
-- 
cgit v1.2.3


From cd30c62024904951392e21a200fe5427a6286736 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:21 +0000
Subject: Net: irda: irlan: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlan/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile
index 77549bc8641b..94eefbc8e6b9 100644
--- a/net/irda/irlan/Makefile
+++ b/net/irda/irlan/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_IRLAN) += irlan.o
 
-irlan-objs := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
+irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o
-- 
cgit v1.2.3


From 94ee288e94ab31cefe2c5af3b59c25a1374ca3e5 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:22 +0000
Subject: Net: irda: irnet: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile
index b3ee01e0def3..61c365c8a2a0 100644
--- a/net/irda/irnet/Makefile
+++ b/net/irda/irnet/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_IRNET) += irnet.o
 
-irnet-objs := irnet_ppp.o irnet_irda.o
+irnet-y := irnet_ppp.o irnet_irda.o
-- 
cgit v1.2.3


From 64387df8da1e4f178a47d53930288a46a357a479 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:23 +0000
Subject: Net: lapb: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/lapb/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/lapb/Makefile b/net/lapb/Makefile
index 53f7c90db163..fff797dfc88c 100644
--- a/net/lapb/Makefile
+++ b/net/lapb/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_LAPB) += lapb.o
 
-lapb-objs := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o
+lapb-y := lapb_in.o lapb_out.o lapb_subr.o lapb_timer.o lapb_iface.o
-- 
cgit v1.2.3


From 927a41f50c83b539fde5c01911f4968d717199bf Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:24 +0000
Subject: Net: phonet: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index d62bbba649b3..e10b1b182ce3 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,6 +1,6 @@
 obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
 
-phonet-objs := \
+phonet-y := \
 	pn_dev.o \
 	pn_netlink.o \
 	socket.o \
@@ -8,4 +8,4 @@ phonet-objs := \
 	sysctl.o \
 	af_phonet.o
 
-pn_pep-objs := pep.o pep-gprs.o
+pn_pep-y := pep.o pep-gprs.o
-- 
cgit v1.2.3


From 094f2faaa2c4973e50979158f655a1d31a97ba98 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:25 +0000
Subject: Net: rds: Makefile: Remove deprecated items

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Also, use the ccflags-$ flag instead of EXTRA_CFLAGS because EXTRA_CFLAGS is
deprecated and should now be switched.

Last but not least, took out if-conditionals.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/Makefile | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/rds/Makefile b/net/rds/Makefile
index b46eca109688..56d3f6023ced 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -4,7 +4,7 @@ rds-y :=	af_rds.o bind.o cong.o connection.o info.o message.o   \
 			loop.o page.o rdma.o
 
 obj-$(CONFIG_RDS_RDMA) += rds_rdma.o
-rds_rdma-objs :=	rdma_transport.o \
+rds_rdma-y :=	rdma_transport.o \
 			ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
 			ib_sysctl.o ib_rdma.o \
 			iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \
@@ -12,10 +12,8 @@ rds_rdma-objs :=	rdma_transport.o \
 
 
 obj-$(CONFIG_RDS_TCP) += rds_tcp.o
-rds_tcp-objs :=		tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
+rds_tcp-y :=		tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
 			tcp_send.o tcp_stats.o
 
-ifeq ($(CONFIG_RDS_DEBUG), y)
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_RDS_DEBUG)	:=	-DDEBUG
 
-- 
cgit v1.2.3


From 9ed05ad3c0629f434b18d20c51162f9bbb4f5d31 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:26 +0000
Subject: Net: rxrpc: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index c46867c61c98..d1c3429b69ed 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Linux kernel RxRPC
 #
 
-af-rxrpc-objs := \
+af-rxrpc-y := \
 	af_rxrpc.o \
 	ar-accept.o \
 	ar-ack.o \
@@ -21,7 +21,7 @@ af-rxrpc-objs := \
 	ar-transport.o
 
 ifeq ($(CONFIG_PROC_FS),y)
-af-rxrpc-objs += ar-proc.o
+af-rxrpc-y += ar-proc.o
 endif
 
 obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
-- 
cgit v1.2.3


From fdb26195f494988fc155c204aab0f0953ba7ec6f Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:27 +0000
Subject: Net: sunrpc: auth_gss: Makefile: Remove deprecated kbuild goal
 definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/auth_gss/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 7350d86a32ee..9e4cb59ef9f0 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -4,10 +4,10 @@
 
 obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
 
-auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
+auth_rpcgss-y := auth_gss.o gss_generic_token.o \
 	gss_mech_switch.o svcauth_gss.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
-rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
+rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
 	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
-- 
cgit v1.2.3


From e5700c740da2cb9f5a3aa978cd1fa3a79916ba04 Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:28 +0000
Subject: Net: wanrouter: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wanrouter/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wanrouter/Makefile b/net/wanrouter/Makefile
index 9f188ab3dcd0..4da14bc48078 100644
--- a/net/wanrouter/Makefile
+++ b/net/wanrouter/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_WAN_ROUTER) += wanrouter.o
 
-wanrouter-objs :=  wanproc.o wanmain.o
+wanrouter-y :=  wanproc.o wanmain.o
-- 
cgit v1.2.3


From 31e99729ae66d8b74316547c40eed15172f14ea8 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 17 Nov 2010 21:46:06 -0800
Subject: cfg80211: put core regulatory request into queue

This will simplify the synchronization for pending requests.
Without this we have a race between the core and when we
restore regulatory settings, although this is unlikely
its best to just avoid that race altogether.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Tested-by: Mark Mentovai <mark@moxienet.com>
Tested-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3be18d9a944f..9830db61019e 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1530,12 +1530,7 @@ static int regulatory_hint_core(const char *alpha2)
 	request->alpha2[1] = alpha2[1];
 	request->initiator = NL80211_REGDOM_SET_BY_CORE;
 
-	/*
-	 * This ensures last_request is populated once modules
-	 * come swinging in and calling regulatory hints and
-	 * wiphy_apply_custom_regulatory().
-	 */
-	reg_process_hint(request);
+	queue_regulatory_request(request);
 
 	return 0;
 }
-- 
cgit v1.2.3


From f333a7a2f49e2a9b46f8d18962bd750b18beeecd Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 17 Nov 2010 21:46:07 -0800
Subject: cfg80211: move reg_work and reg_todo above

These will be used earlier in the next few patches.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Tested-by: Mark Mentovai <mark@moxienet.com>
Tested-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 9830db61019e..3fa247488f87 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -96,6 +96,9 @@ struct reg_beacon {
 	struct ieee80211_channel chan;
 };
 
+static void reg_todo(struct work_struct *work);
+static DECLARE_WORK(reg_work, reg_todo);
+
 /* We keep a static world regulatory domain in case of the absence of CRDA */
 static const struct ieee80211_regdomain world_regdom = {
 	.n_reg_rules = 5,
@@ -1494,8 +1497,6 @@ static void reg_todo(struct work_struct *work)
 	reg_process_pending_beacon_hints();
 }
 
-static DECLARE_WORK(reg_work, reg_todo);
-
 static void queue_regulatory_request(struct regulatory_request *request)
 {
 	if (isalpha(request->alpha2[0]))
-- 
cgit v1.2.3


From b0e2880b0518ad11af20c7c93ec5cac93f9f03b0 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 17 Nov 2010 21:46:08 -0800
Subject: cfg80211: move mutex locking to reg_process_pending_hints()

This will be required in the next patch and it makes the
next patch easier to review.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Tested-by: Mark Mentovai <mark@moxienet.com>
Tested-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3fa247488f87..b522c46c4748 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1412,16 +1412,13 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 
 	BUG_ON(!reg_request->alpha2);
 
-	mutex_lock(&cfg80211_mutex);
-	mutex_lock(&reg_mutex);
-
 	if (wiphy_idx_valid(reg_request->wiphy_idx))
 		wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
 
 	if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 	    !wiphy) {
 		kfree(reg_request);
-		goto out;
+		return;
 	}
 
 	r = __regulatory_hint(wiphy, reg_request);
@@ -1429,16 +1426,16 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 	if (r == -EALREADY && wiphy &&
 	    wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY)
 		wiphy_update_regulatory(wiphy, initiator);
-out:
-	mutex_unlock(&reg_mutex);
-	mutex_unlock(&cfg80211_mutex);
 }
 
 /* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */
 static void reg_process_pending_hints(void)
-	{
+{
 	struct regulatory_request *reg_request;
 
+	mutex_lock(&cfg80211_mutex);
+	mutex_lock(&reg_mutex);
+
 	spin_lock(&reg_requests_lock);
 	while (!list_empty(&reg_requests_list)) {
 		reg_request = list_first_entry(&reg_requests_list,
@@ -1451,6 +1448,9 @@ static void reg_process_pending_hints(void)
 		spin_lock(&reg_requests_lock);
 	}
 	spin_unlock(&reg_requests_lock);
+
+	mutex_unlock(&reg_mutex);
+	mutex_unlock(&cfg80211_mutex);
 }
 
 /* Processes beacon hints -- this has nothing to do with country IEs */
-- 
cgit v1.2.3


From b2e253cf300c5e33f49b7dd8b593bfc722177401 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 17 Nov 2010 21:46:09 -0800
Subject: cfg80211: Fix regulatory bug with multiple cards and delays

When two cards are connected with the same regulatory domain
if CRDA had a delayed response then cfg80211's own set regulatory
domain would still be the world regulatory domain. There was a bug
on cfg80211's logic such that it assumed that once you pegged a
request as the last request it was already the currently set
regulatory domain. This would mean we would race setting a stale
regulatory domain to secondary cards which had the same regulatory
domain since the alpha2 would match.

We fix this by processing each regulatory request atomically,
and only move on to the next one once we get it fully processed.
In the case CRDA is not present we will simply world roam.

This issue is only present when you have a slow system and the
CRDA processing is delayed. Because of this it is not a known
regression.

Without this fix when a delay is present with CRDA the second card
would end up with an intersected regulatory domain and not allow it
to use the channels it really is designed for. When two cards with
two different regulatory domains were inserted you'd end up
rejecting the second card's regulatory domain request.
This fails with mac80211_hswim's regtest=2 (two requests, same alpha2)
and regtest=3 (two requests, different alpha2) module parameter
options.

This was reproduced and tested against mac80211_hwsim using this
CRDA delayer:

       #!/bin/bash
       echo $COUNTRY >> /tmp/log
       sleep 2
       /sbin/crda.orig

And these regulatory tests:

       modprobe mac80211_hwsim regtest=2
       modprobe mac80211_hwsim regtest=3

Reported-by: Mark Mentovai <mark@moxienet.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Tested-by: Mark Mentovai <mark@moxienet.com>
Tested-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/regulatory.h |  7 +++++++
 net/wireless/reg.c       | 52 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 50 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 9e103a4e91ee..356d6e3dc20a 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -43,6 +43,12 @@ enum environment_cap {
  * @intersect: indicates whether the wireless core should intersect
  * 	the requested regulatory domain with the presently set regulatory
  * 	domain.
+ * @processed: indicates whether or not this requests has already been
+ *	processed. When the last request is processed it means that the
+ *	currently regulatory domain set on cfg80211 is updated from
+ *	CRDA and can be used by other regulatory requests. When a
+ *	the last request is not yet processed we must yield until it
+ *	is processed before processing any new requests.
  * @country_ie_checksum: checksum of the last processed and accepted
  * 	country IE
  * @country_ie_env: lets us know if the AP is telling us we are outdoor,
@@ -54,6 +60,7 @@ struct regulatory_request {
 	enum nl80211_reg_initiator initiator;
 	char alpha2[2];
 	bool intersect;
+	bool processed;
 	enum environment_cap country_ie_env;
 	struct list_head list;
 };
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index b522c46c4748..bc14caab19cd 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1320,6 +1320,21 @@ static int ignore_request(struct wiphy *wiphy,
 	return -EINVAL;
 }
 
+static void reg_set_request_processed(void)
+{
+	bool need_more_processing = false;
+
+	last_request->processed = true;
+
+	spin_lock(&reg_requests_lock);
+	if (!list_empty(&reg_requests_list))
+		need_more_processing = true;
+	spin_unlock(&reg_requests_lock);
+
+	if (need_more_processing)
+		schedule_work(&reg_work);
+}
+
 /**
  * __regulatory_hint - hint to the wireless core a regulatory domain
  * @wiphy: if the hint comes from country information from an AP, this
@@ -1395,8 +1410,10 @@ new_request:
 		 * have applied the requested regulatory domain before we just
 		 * inform userspace we have processed the request
 		 */
-		if (r == -EALREADY)
+		if (r == -EALREADY) {
 			nl80211_send_reg_change_event(last_request);
+			reg_set_request_processed();
+		}
 		return r;
 	}
 
@@ -1428,7 +1445,11 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 		wiphy_update_regulatory(wiphy, initiator);
 }
 
-/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */
+/*
+ * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_*
+ * Regulatory hints come on a first come first serve basis and we
+ * must process each one atomically.
+ */
 static void reg_process_pending_hints(void)
 {
 	struct regulatory_request *reg_request;
@@ -1436,19 +1457,30 @@ static void reg_process_pending_hints(void)
 	mutex_lock(&cfg80211_mutex);
 	mutex_lock(&reg_mutex);
 
+	/* When last_request->processed becomes true this will be rescheduled */
+	if (last_request && !last_request->processed) {
+		REG_DBG_PRINT("Pending regulatory request, waiting "
+			      "for it to be processed...");
+		goto out;
+	}
+
 	spin_lock(&reg_requests_lock);
-	while (!list_empty(&reg_requests_list)) {
-		reg_request = list_first_entry(&reg_requests_list,
-					       struct regulatory_request,
-					       list);
-		list_del_init(&reg_request->list);
 
+	if (list_empty(&reg_requests_list)) {
 		spin_unlock(&reg_requests_lock);
-		reg_process_hint(reg_request);
-		spin_lock(&reg_requests_lock);
+		goto out;
 	}
+
+	reg_request = list_first_entry(&reg_requests_list,
+				       struct regulatory_request,
+				       list);
+	list_del_init(&reg_request->list);
+
 	spin_unlock(&reg_requests_lock);
 
+	reg_process_hint(reg_request);
+
+out:
 	mutex_unlock(&reg_mutex);
 	mutex_unlock(&cfg80211_mutex);
 }
@@ -2057,6 +2089,8 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 
 	nl80211_send_reg_change_event(last_request);
 
+	reg_set_request_processed();
+
 	mutex_unlock(&reg_mutex);
 
 	return r;
-- 
cgit v1.2.3


From 18890d4b89d8507ad09289f6f57a71591c7e9e83 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 19 Nov 2010 08:11:01 +0100
Subject: mac80211: Disable hw crypto for GTKs on AP VLAN interfaces

When using AP VLAN interfaces, each VLAN interface should be in its own
broadcast domain. Hostapd achieves this by assigning different GTKs to
different AP VLAN interfaces.

However, mac80211 drivers are not aware of AP VLAN interfaces and as
such mac80211 sends the GTK to the driver in the context of the base AP
mode interface. This causes problems when multiple AP VLAN interfaces
are used since the driver will use the same key slot for the different
GTKs (there's no way for the driver to distinguish the different GTKs
from different AP VLAN interfaces). Thus, only the clients associated
to one AP VLAN interface (the one that was created last) can actually
use broadcast traffic.

Fix this by not programming any GTKs for AP VLAN interfaces into the hw
but fall back to using software crypto. The GTK for the underlying AP
interface is still sent to the driver.

That means, broadcast traffic to stations associated to an AP VLAN
interface is encrypted in software whereas broadcast traffic to
stations associated to the non-VLAN AP interface is encrypted in
hardware.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ccd676b2f599..72df1ca7299b 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -84,10 +84,17 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		goto out_unsupported;
 
 	sdata = key->sdata;
-	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+		/*
+		 * The driver doesn't know anything about VLAN interfaces.
+		 * Hence, don't send GTKs for VLAN interfaces to the driver.
+		 */
+		if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE))
+			goto out_unsupported;
 		sdata = container_of(sdata->bss,
 				     struct ieee80211_sub_if_data,
 				     u.ap);
+	}
 
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
-- 
cgit v1.2.3


From 456b61bca8ee324ab6c18b065e632c9a8c88aa39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 23 Nov 2010 13:12:15 +0000
Subject: ipv6: mcast: RCU conversion

ipv6_sk_mc_lock rwlock becomes a spinlock.

readers (inet6_mc_check()) now takes rcu_read_lock() instead of read
lock. Writers dont need to disable BH anymore.

struct ipv6_mc_socklist objects are reclaimed after one RCU grace
period.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h   |  2 +-
 include/net/if_inet6.h |  3 +-
 net/ipv6/mcast.c       | 75 +++++++++++++++++++++++++++++---------------------
 3 files changed, 47 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 8e429d0e0405..0c997767429a 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -364,7 +364,7 @@ struct ipv6_pinfo {
 
 	__u32			dst_cookie;
 
-	struct ipv6_mc_socklist	*ipv6_mc_list;
+	struct ipv6_mc_socklist	__rcu *ipv6_mc_list;
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist *ipv6_fl_list;
 
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index f95ff8d9aa47..04977eefb0ee 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -89,10 +89,11 @@ struct ip6_sf_socklist {
 struct ipv6_mc_socklist {
 	struct in6_addr		addr;
 	int			ifindex;
-	struct ipv6_mc_socklist *next;
+	struct ipv6_mc_socklist __rcu *next;
 	rwlock_t		sflock;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip6_sf_socklist	*sflist;
+	struct rcu_head		rcu;
 };
 
 struct ip6_sf_list {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9c5074528a71..49f986d626a0 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
 static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
 
 /* Big mc list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_mc_lock);
+static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
 
 static void igmp6_join_group(struct ifmcaddr6 *ma);
 static void igmp6_leave_group(struct ifmcaddr6 *ma);
@@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
  *	socket join on multicast group
  */
 
+#define for_each_pmc_rcu(np, pmc)				\
+	for (pmc = rcu_dereference(np->ipv6_mc_list);		\
+	     pmc != NULL;					\
+	     pmc = rcu_dereference(pmc->next))
+
 int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct net_device *dev = NULL;
@@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
-	read_lock_bh(&ipv6_sk_mc_lock);
-	for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(np, mc_lst) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr)) {
-			read_unlock_bh(&ipv6_sk_mc_lock);
+			rcu_read_unlock();
 			return -EADDRINUSE;
 		}
 	}
-	read_unlock_bh(&ipv6_sk_mc_lock);
+	rcu_read_unlock();
 
 	mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
 
@@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return err;
 	}
 
-	write_lock_bh(&ipv6_sk_mc_lock);
+	spin_lock(&ipv6_sk_mc_lock);
 	mc_lst->next = np->ipv6_mc_list;
-	np->ipv6_mc_list = mc_lst;
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
+	spin_unlock(&ipv6_sk_mc_lock);
 
 	rcu_read_unlock();
 
 	return 0;
 }
 
+static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ipv6_mc_socklist, rcu));
+}
 /*
  *	socket leave on multicast group
  */
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct ipv6_mc_socklist *mc_lst, **lnk;
+	struct ipv6_mc_socklist *mc_lst;
+	struct ipv6_mc_socklist __rcu **lnk;
 	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_mc_lock);
-	for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
+	spin_lock(&ipv6_sk_mc_lock);
+	for (lnk = &np->ipv6_mc_list;
+	     (mc_lst = rcu_dereference_protected(*lnk,
+			lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ;
+	      lnk = &mc_lst->next) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr)) {
 			struct net_device *dev;
 
 			*lnk = mc_lst->next;
-			write_unlock_bh(&ipv6_sk_mc_lock);
+			spin_unlock(&ipv6_sk_mc_lock);
 
 			rcu_read_lock();
 			dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			} else
 				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 			rcu_read_unlock();
-			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+			atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+			call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
 			return 0;
 		}
 	}
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	spin_unlock(&ipv6_sk_mc_lock);
 
 	return -EADDRNOTAVAIL;
 }
@@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk)
 	struct ipv6_mc_socklist *mc_lst;
 	struct net *net = sock_net(sk);
 
-	write_lock_bh(&ipv6_sk_mc_lock);
-	while ((mc_lst = np->ipv6_mc_list) != NULL) {
+	spin_lock(&ipv6_sk_mc_lock);
+	while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
+				lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
 		struct net_device *dev;
 
 		np->ipv6_mc_list = mc_lst->next;
-		write_unlock_bh(&ipv6_sk_mc_lock);
+		spin_unlock(&ipv6_sk_mc_lock);
 
 		rcu_read_lock();
 		dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk)
 		} else
 			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
 		rcu_read_unlock();
-		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 
-		write_lock_bh(&ipv6_sk_mc_lock);
+		atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+		call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
+
+		spin_lock(&ipv6_sk_mc_lock);
 	}
-	write_unlock_bh(&ipv6_sk_mc_lock);
+	spin_unlock(&ipv6_sk_mc_lock);
 }
 
 int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
 	err = -EADDRNOTAVAIL;
 
-	read_lock(&ipv6_sk_mc_lock);
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 done:
 	if (pmclocked)
 		write_unlock(&pmc->sflock);
-	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	rcu_read_unlock();
 	if (leavegroup)
@@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	dev = idev->dev;
 
 	err = 0;
-	read_lock(&ipv6_sk_mc_lock);
 
 	if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
 		leavegroup = 1;
 		goto done;
 	}
 
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	write_unlock(&pmc->sflock);
 	err = 0;
 done:
-	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
 	rcu_read_unlock();
 	if (leavegroup)
@@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	 * so reading the list is safe.
 	 */
 
-	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+	for_each_pmc_rcu(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(group, &pmc->addr))
@@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 	struct ip6_sf_socklist *psl;
 	int rv = 1;
 
-	read_lock(&ipv6_sk_mc_lock);
-	for (mc = np->ipv6_mc_list; mc; mc = mc->next) {
+	rcu_read_lock();
+	for_each_pmc_rcu(np, mc) {
 		if (ipv6_addr_equal(&mc->addr, mc_addr))
 			break;
 	}
 	if (!mc) {
-		read_unlock(&ipv6_sk_mc_lock);
+		rcu_read_unlock();
 		return 1;
 	}
 	read_lock(&mc->sflock);
@@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 			rv = 0;
 	}
 	read_unlock(&mc->sflock);
-	read_unlock(&ipv6_sk_mc_lock);
+	rcu_read_unlock();
 
 	return rv;
 }
-- 
cgit v1.2.3


From bba14de98753cb6599a2dae0e520714b2153522d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 23 Nov 2010 14:09:15 +0000
Subject: scm: lower SCM_MAX_FD

Lower SCM_MAX_FD from 255 to 253 so that allocations for scm_fp_list are
halved. (commit f8d570a4 added two pointers in this structure)

scm_fp_dup() should not copy whole structure (and trigger kmemcheck
warnings), but only the used part. While we are at it, only allocate
needed size.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h |  5 +++--
 net/core/scm.c    | 10 ++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/scm.h b/include/net/scm.h
index 31656506d967..745460fa2f02 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -10,11 +10,12 @@
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
  */
-#define SCM_MAX_FD	255
+#define SCM_MAX_FD	253
 
 struct scm_fp_list {
 	struct list_head	list;
-	int			count;
+	short			count;
+	short			max;
 	struct file		*fp[SCM_MAX_FD];
 };
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 413cab89017d..bbe454450801 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 			return -ENOMEM;
 		*fplp = fpl;
 		fpl->count = 0;
+		fpl->max = SCM_MAX_FD;
 	}
 	fpp = &fpl->fp[fpl->count];
 
-	if (fpl->count + num > SCM_MAX_FD)
+	if (fpl->count + num > fpl->max)
 		return -EINVAL;
 
 	/*
@@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 	if (!fpl)
 		return NULL;
 
-	new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
+	new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
+			  GFP_KERNEL);
 	if (new_fpl) {
-		for (i=fpl->count-1; i>=0; i--)
+		for (i = 0; i < fpl->count; i++)
 			get_file(fpl->fp[i]);
-		memcpy(new_fpl, fpl, sizeof(*fpl));
+		new_fpl->max = new_fpl->count;
 	}
 	return new_fpl;
 }
-- 
cgit v1.2.3


From 3853b5841c01a3f492fe137afaad9c209e5162c6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 21 Nov 2010 13:17:29 +0000
Subject: xps: Improvements in TX queue selection

In dev_pick_tx, don't do work in calculating queue
index or setting
the index in the sock unless the device has more than one queue.  This
allows the sock to be set only with a queue index of a multi-queue
device which is desirable if device are stacked like in a tunnel.

We also allow the mapping of a socket to queue to be changed.  To
maintain in order packet transmission a flag (ooo_okay) has been
added to the sk_buff structure.  If a transport layer sets this flag
on a packet, the transmit queue can be changed for the socket.
Presumably, the transport would set this if there was no possbility
of creating OOO packets (for instance, there are no packets in flight
for the socket).  This patch includes the modification in TCP output
for setting this flag.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 ++-
 net/core/dev.c         | 18 +++++++++++-------
 net/ipv4/tcp_output.c  |  5 ++++-
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e6ba898de61c..19f37a6ee6c4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -386,9 +386,10 @@ struct sk_buff {
 #else
 	__u8			deliver_no_wcard:1;
 #endif
+	__u8			ooo_okay:1;
 	kmemcheck_bitfield_end(flags2);
 
-	/* 0/14 bit hole */
+	/* 0/13 bit hole */
 
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
diff --git a/net/core/dev.c b/net/core/dev.c
index 381b8e280162..7b17674a29ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2148,20 +2148,24 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	int queue_index;
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (ops->ndo_select_queue) {
+	if (dev->real_num_tx_queues == 1)
+		queue_index = 0;
+	else if (ops->ndo_select_queue) {
 		queue_index = ops->ndo_select_queue(dev, skb);
 		queue_index = dev_cap_txqueue(dev, queue_index);
 	} else {
 		struct sock *sk = skb->sk;
 		queue_index = sk_tx_queue_get(sk);
-		if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
 
-			queue_index = 0;
-			if (dev->real_num_tx_queues > 1)
-				queue_index = skb_tx_hash(dev, skb);
+		if (queue_index < 0 || skb->ooo_okay ||
+		    queue_index >= dev->real_num_tx_queues) {
+			int old_index = queue_index;
 
-			if (sk) {
-				struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+			queue_index = skb_tx_hash(dev, skb);
+
+			if (queue_index != old_index && sk) {
+				struct dst_entry *dst =
+				    rcu_dereference_check(sk->sk_dst_cache, 1);
 
 				if (dst && skb_dst(skb) == dst)
 					sk_tx_queue_set(sk, queue_index);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bb8f547fc7d2..5f29b2e20e23 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -822,8 +822,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 							   &md5);
 	tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
 
-	if (tcp_packets_in_flight(tp) == 0)
+	if (tcp_packets_in_flight(tp) == 0) {
 		tcp_ca_event(sk, CA_EVENT_TX_START);
+		skb->ooo_okay = 1;
+	} else
+		skb->ooo_okay = 0;
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
-- 
cgit v1.2.3


From 1d24eb4815d1e0e8b451ecc546645f8ef1176d4f Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 21 Nov 2010 13:17:27 +0000
Subject: xps: Transmit Packet Steering

This patch implements transmit packet steering (XPS) for multiqueue
devices.  XPS selects a transmit queue during packet transmission based
on configuration.  This is done by mapping the CPU transmitting the
packet to a queue.  This is the transmit side analogue to RPS-- where
RPS is selecting a CPU based on receive queue, XPS selects a queue
based on the CPU (previously there was an XPS patch from Eric
Dumazet, but that might more appropriately be called transmit completion
steering).

Each transmit queue can be associated with a number of CPUs which will
use the queue to send packets.  This is configured as a CPU mask on a
per queue basis in:

/sys/class/net/eth<n>/queues/tx-<n>/xps_cpus

The mappings are stored per device in an inverted data structure that
maps CPUs to queues.  In the netdevice structure this is an array of
num_possible_cpu structures where each structure holds and array of
queue_indexes for queues which that CPU can use.

The benefits of XPS are improved locality in the per queue data
structures.  Also, transmit completions are more likely to be done
nearer to the sending thread, so this should promote locality back
to the socket on free (e.g. UDP).  The benefits of XPS are dependent on
cache hierarchy, application load, and other factors.  XPS would
nominally be configured so that a queue would only be shared by CPUs
which are sharing a cache, the degenerative configuration woud be that
each CPU has it's own queue.

Below are some benchmark results which show the potential benfit of
this patch.  The netperf test has 500 instances of netperf TCP_RR test
with 1 byte req. and resp.

bnx2x on 16 core AMD
   XPS (16 queues, 1 TX queue per CPU)  1234K at 100% CPU
   No XPS (16 queues)                   996K at 100% CPU

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  30 ++++
 net/core/dev.c            |  53 ++++++-
 net/core/net-sysfs.c      | 369 +++++++++++++++++++++++++++++++++++++++++++++-
 net/core/net-sysfs.h      |   3 +
 4 files changed, 447 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b45c1b8b1d19..badf9285fe0d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -503,6 +503,10 @@ struct netdev_queue {
 	struct Qdisc		*qdisc;
 	unsigned long		state;
 	struct Qdisc		*qdisc_sleeping;
+#ifdef CONFIG_RPS
+	struct kobject		kobj;
+#endif
+
 /*
  * write mostly part
  */
@@ -529,6 +533,30 @@ struct rps_map {
 };
 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
 
+/*
+ * This structure holds an XPS map which can be of variable length.  The
+ * map is an array of queues.
+ */
+struct xps_map {
+	unsigned int len;
+	unsigned int alloc_len;
+	struct rcu_head rcu;
+	u16 queues[0];
+};
+#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
+#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
+    / sizeof(u16))
+
+/*
+ * This structure holds all XPS maps for device.  Maps are indexed by CPU.
+ */
+struct xps_dev_maps {
+	struct rcu_head rcu;
+	struct xps_map *cpu_map[0];
+};
+#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
+    (nr_cpu_ids * sizeof(struct xps_map *)))
+
 /*
  * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
  * tail pointer for that CPU's input queue at the time of last enqueue.
@@ -1016,6 +1044,8 @@ struct net_device {
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
 
+	struct xps_dev_maps	*xps_maps;
+
 	/* These may be needed for future network-power-down code. */
 
 	/*
diff --git a/net/core/dev.c b/net/core/dev.c
index 7b17674a29ec..c852f0038a08 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,12 +1557,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
+	int rc;
+
 	if (txq < 1 || txq > dev->num_tx_queues)
 		return -EINVAL;
 
 	if (dev->reg_state == NETREG_REGISTERED) {
 		ASSERT_RTNL();
 
+		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+						  txq);
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -2142,6 +2146,44 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 	return queue_index;
 }
 
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_RPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else {
+				u32 hash;
+				if (skb->sk && skb->sk->sk_hash)
+					hash = skb->sk->sk_hash;
+				else
+					hash = (__force u16) skb->protocol ^
+					    skb->rxhash;
+				hash = jhash_1word(hash, hashrnd);
+				queue_index = map->queues[
+				    ((u64)hash * map->len) >> 32];
+			}
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
+
+	return queue_index;
+#else
+	return -1;
+#endif
+}
+
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
@@ -2161,7 +2203,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 		    queue_index >= dev->real_num_tx_queues) {
 			int old_index = queue_index;
 
-			queue_index = skb_tx_hash(dev, skb);
+			queue_index = get_xps_queue(dev, skb);
+			if (queue_index < 0)
+				queue_index = skb_tx_hash(dev, skb);
 
 			if (queue_index != old_index && sk) {
 				struct dst_entry *dst =
@@ -5066,6 +5110,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 {
 	unsigned int count = dev->num_tx_queues;
 	struct netdev_queue *tx;
+	int i;
 
 	BUG_ON(count < 1);
 
@@ -5076,6 +5121,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 		return -ENOMEM;
 	}
 	dev->_tx = tx;
+
+	for (i = 0; i < count; i++)
+		tx[i].dev = dev;
+
 	return 0;
 }
 
@@ -5083,8 +5132,6 @@ static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue,
 				  void *_unused)
 {
-	queue->dev = dev;
-
 	/* Initialize queue lock */
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7abeb7ceaa4c..68dbbfdee274 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -772,18 +772,377 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 	return error;
 }
 
-static int rx_queue_register_kobjects(struct net_device *net)
+/*
+ * netdev_queue sysfs structures and functions.
+ */
+struct netdev_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_queue *queue,
+	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_netdev_queue_attr(_attr) container_of(_attr,		\
+    struct netdev_queue_attribute, attr)
+
+#define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
+
+static ssize_t netdev_queue_attr_show(struct kobject *kobj,
+				      struct attribute *attr, char *buf)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t netdev_queue_attr_store(struct kobject *kobj,
+				       struct attribute *attr,
+				       const char *buf, size_t count)
+{
+	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static const struct sysfs_ops netdev_queue_sysfs_ops = {
+	.show = netdev_queue_attr_show,
+	.store = netdev_queue_attr_store,
+};
+
+static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
+	struct net_device *dev = queue->dev;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++)
+		if (queue == &dev->_tx[i])
+			break;
+
+	BUG_ON(i >= dev->num_tx_queues);
+
+	return i;
+}
+
+
+static ssize_t show_xps_map(struct netdev_queue *queue,
+			    struct netdev_queue_attribute *attribute, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	cpumask_var_t mask;
+	unsigned long index;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			struct xps_map *map =
+			    rcu_dereference(dev_maps->cpu_map[i]);
+			if (map) {
+				int j;
+				for (j = 0; j < map->len; j++) {
+					if (map->queues[j] == index) {
+						cpumask_set_cpu(i, mask);
+						break;
+					}
+				}
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static void xps_map_release(struct rcu_head *rcu)
+{
+	struct xps_map *map = container_of(rcu, struct xps_map, rcu);
+
+	kfree(map);
+}
+
+static void xps_dev_maps_release(struct rcu_head *rcu)
+{
+	struct xps_dev_maps *dev_maps =
+	    container_of(rcu, struct xps_dev_maps, rcu);
+
+	kfree(dev_maps);
+}
+
+static DEFINE_MUTEX(xps_map_mutex);
+
+static ssize_t store_xps_map(struct netdev_queue *queue,
+		      struct netdev_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct net_device *dev = queue->dev;
+	cpumask_var_t mask;
+	int err, i, cpu, pos, map_len, alloc_len, need_set;
+	unsigned long index;
+	struct xps_map *map, *new_map;
+	struct xps_dev_maps *dev_maps, *new_dev_maps;
+	int nonempty = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	new_dev_maps = kzalloc(max_t(unsigned,
+	    XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES), GFP_KERNEL);
+	if (!new_dev_maps) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&xps_map_mutex);
+
+	dev_maps = dev->xps_maps;
+
+	for_each_possible_cpu(cpu) {
+		new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
+
+		if (map) {
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+			map_len = map->len;
+			alloc_len = map->alloc_len;
+		} else
+			pos = map_len = alloc_len = 0;
+
+		need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
+
+		if (need_set && pos >= map_len) {
+			/* Need to add queue to this CPU's map */
+			if (map_len >= alloc_len) {
+				alloc_len = alloc_len ?
+				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
+				new_map = kzalloc(XPS_MAP_SIZE(alloc_len),
+				    GFP_KERNEL);
+				if (!new_map)
+					goto error;
+				new_map->alloc_len = alloc_len;
+				for (i = 0; i < map_len; i++)
+					new_map->queues[i] = map->queues[i];
+				new_map->len = map_len;
+			}
+			new_map->queues[new_map->len++] = index;
+		} else if (!need_set && pos < map_len) {
+			/* Need to remove queue from this CPU's map */
+			if (map_len > 1)
+				new_map->queues[pos] =
+				    new_map->queues[--new_map->len];
+			else
+				new_map = NULL;
+		}
+		new_dev_maps->cpu_map[cpu] = new_map;
+	}
+
+	/* Cleanup old maps */
+	for_each_possible_cpu(cpu) {
+		map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
+		if (map && new_dev_maps->cpu_map[cpu] != map)
+			call_rcu(&map->rcu, xps_map_release);
+		if (new_dev_maps->cpu_map[cpu])
+			nonempty = 1;
+	}
+
+	if (nonempty)
+		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	else {
+		kfree(new_dev_maps);
+		rcu_assign_pointer(dev->xps_maps, NULL);
+	}
+
+	if (dev_maps)
+		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+
+	mutex_unlock(&xps_map_mutex);
+
+	free_cpumask_var(mask);
+	return len;
+
+error:
+	mutex_unlock(&xps_map_mutex);
+
+	if (new_dev_maps)
+		for_each_possible_cpu(i)
+			kfree(new_dev_maps->cpu_map[i]);
+	kfree(new_dev_maps);
+	free_cpumask_var(mask);
+	return -ENOMEM;
+}
+
+static struct netdev_queue_attribute xps_cpus_attribute =
+    __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+
+static struct attribute *netdev_queue_default_attrs[] = {
+	&xps_cpus_attribute.attr,
+	NULL
+};
+
+static void netdev_queue_release(struct kobject *kobj)
+{
+	struct netdev_queue *queue = to_netdev_queue(kobj);
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	unsigned long index;
+	int i, pos, nonempty = 0;
+
+	index = get_netdev_queue_index(queue);
+
+	mutex_lock(&xps_map_mutex);
+	dev_maps = dev->xps_maps;
+
+	if (dev_maps) {
+		for_each_possible_cpu(i) {
+			map  = dev_maps->cpu_map[i];
+			if (!map)
+				continue;
+
+			for (pos = 0; pos < map->len; pos++)
+				if (map->queues[pos] == index)
+					break;
+
+			if (pos < map->len) {
+				if (map->len > 1)
+					map->queues[pos] =
+					    map->queues[--map->len];
+				else {
+					RCU_INIT_POINTER(dev_maps->cpu_map[i],
+					    NULL);
+					call_rcu(&map->rcu, xps_map_release);
+					map = NULL;
+				}
+			}
+			if (map)
+				nonempty = 1;
+		}
+
+		if (!nonempty) {
+			RCU_INIT_POINTER(dev->xps_maps, NULL);
+			call_rcu(&dev_maps->rcu, xps_dev_maps_release);
+		}
+	}
+
+	mutex_unlock(&xps_map_mutex);
+
+	memset(kobj, 0, sizeof(*kobj));
+	dev_put(queue->dev);
+}
+
+static struct kobj_type netdev_queue_ktype = {
+	.sysfs_ops = &netdev_queue_sysfs_ops,
+	.release = netdev_queue_release,
+	.default_attrs = netdev_queue_default_attrs,
+};
+
+static int netdev_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_queue *queue = net->_tx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
+	    "tx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+	dev_hold(queue->dev);
+
+	return error;
+}
+
+int
+netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
+{
+	int i;
+	int error = 0;
+
+	for (i = old_num; i < new_num; i++) {
+		error = netdev_queue_add_kobject(net, i);
+		if (error) {
+			new_num = old_num;
+			break;
+		}
+	}
+
+	while (--i >= new_num)
+		kobject_put(&net->_tx[i].kobj);
+
+	return error;
+}
+
+static int register_queue_kobjects(struct net_device *net)
+{
+	int error = 0, txq = 0, rxq = 0;
+
 	net->queues_kset = kset_create_and_add("queues",
 	    NULL, &net->dev.kobj);
 	if (!net->queues_kset)
 		return -ENOMEM;
-	return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+
+	error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+	if (error)
+		goto error;
+	rxq = net->real_num_rx_queues;
+
+	error = netdev_queue_update_kobjects(net, 0,
+					     net->real_num_tx_queues);
+	if (error)
+		goto error;
+	txq = net->real_num_tx_queues;
+
+	return 0;
+
+error:
+	netdev_queue_update_kobjects(net, txq, 0);
+	net_rx_queue_update_kobjects(net, rxq, 0);
+	return error;
 }
 
-static void rx_queue_remove_kobjects(struct net_device *net)
+static void remove_queue_kobjects(struct net_device *net)
 {
 	net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
+	netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0);
 	kset_unregister(net->queues_kset);
 }
 #endif /* CONFIG_RPS */
@@ -886,7 +1245,7 @@ void netdev_unregister_kobject(struct net_device * net)
 	kobject_get(&dev->kobj);
 
 #ifdef CONFIG_RPS
-	rx_queue_remove_kobjects(net);
+	remove_queue_kobjects(net);
 #endif
 
 	device_del(dev);
@@ -927,7 +1286,7 @@ int netdev_register_kobject(struct net_device *net)
 		return error;
 
 #ifdef CONFIG_RPS
-	error = rx_queue_register_kobjects(net);
+	error = register_queue_kobjects(net);
 	if (error) {
 		device_del(dev);
 		return error;
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 778e1571548d..25ec2ee57df7 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -6,6 +6,9 @@ int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
 #ifdef CONFIG_RPS
 int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+int netdev_queue_update_kobjects(struct net_device *net,
+				 int old_num, int new_num);
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From ccb14354017272ddac002e859a2711610b6af174 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 24 Nov 2010 16:18:36 -0500
Subject: Revert "nl80211/mac80211: Report signal average"

This reverts commit 86107fd170bc379869250eb7e1bd393a3a70e8ae.

This patch inadvertantly changed the userland ABI.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 --
 include/net/cfg80211.h  | 4 ----
 net/mac80211/Kconfig    | 1 -
 net/mac80211/cfg.c      | 3 +--
 net/mac80211/rx.c       | 1 -
 net/mac80211/sta_info.c | 2 --
 net/mac80211/sta_info.h | 3 ---
 net/wireless/nl80211.c  | 3 ---
 8 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 1ce3775e9e26..037b4e498890 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1161,7 +1161,6 @@ enum nl80211_rate_info {
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
- * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_sta_info_txrate.
  * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
@@ -1179,7 +1178,6 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_PLID,
 	NL80211_STA_INFO_PLINK_STATE,
 	NL80211_STA_INFO_SIGNAL,
-	NL80211_STA_INFO_SIGNAL_AVG,
 	NL80211_STA_INFO_TX_BITRATE,
 	NL80211_STA_INFO_RX_PACKETS,
 	NL80211_STA_INFO_TX_PACKETS,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 69e2364889f1..8fd9eebd0cc9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -424,7 +424,6 @@ struct station_parameters {
  * @STATION_INFO_TX_RETRIES: @tx_retries filled
  * @STATION_INFO_TX_FAILED: @tx_failed filled
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
- * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -440,7 +439,6 @@ enum station_info_flags {
 	STATION_INFO_TX_RETRIES		= 1<<10,
 	STATION_INFO_TX_FAILED		= 1<<11,
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
-	STATION_INFO_SIGNAL_AVG		= 1<<13,
 };
 
 /**
@@ -487,7 +485,6 @@ struct rate_info {
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
- * @signal_avg: signal strength average in dBm
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
@@ -508,7 +505,6 @@ struct station_info {
 	u16 plid;
 	u8 plink_state;
 	s8 signal;
-	s8 signal_avg;
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 798d9b9462e2..4d6f8653ec88 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -6,7 +6,6 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRC32
-	select AVERAGE
 	---help---
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 92c9cf6a7d1c..0c544074479e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -343,9 +343,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
+		sinfo->filled |= STATION_INFO_SIGNAL;
 		sinfo->signal = (s8)sta->last_signal;
-		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
 	}
 
 	sinfo->txrate.flags = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 9dd60a74181f..d2fcd22ab06d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1156,7 +1156,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
-	ewma_add(&sta->avg_signal, -status->signal);
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f43fca8907f7..eff58571fd7e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -244,8 +244,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->local = local;
 	sta->sdata = sdata;
 
-	ewma_init(&sta->avg_signal, 1000, 8);
-
 	if (sta_prepare_rate_control(local, sta, gfp)) {
 		kfree(sta);
 		return NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 84062e2c782c..9265acadef32 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -13,7 +13,6 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 #include <linux/workqueue.h>
-#include <linux/average.h>
 #include "key.h"
 
 /**
@@ -225,7 +224,6 @@ enum plink_state {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
- * @avg_signal: moving average of signal of received frames from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -293,7 +291,6 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
-	struct ewma avg_signal;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d06a40d17002..605553842226 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1872,9 +1872,6 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_SIGNAL)
 		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL,
 			   sinfo->signal);
-	if (sinfo->filled & STATION_INFO_SIGNAL_AVG)
-		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
-			   sinfo->signal_avg);
 	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
 		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
 		if (!txrate)
-- 
cgit v1.2.3


From e9c0268f02f8970149158a9b7ea1e5c1c45c819d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 16 Nov 2010 19:56:49 -0800
Subject: net/wireless: Use pr_<level> and netdev_<level>

No change in output for pr_<level> prefixes.
netdev_<level> output is different, arguably improved.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.c                |  8 +++----
 net/wireless/lib80211.c            |  8 ++++---
 net/wireless/lib80211_crypt_tkip.c | 16 ++++++-------
 net/wireless/reg.c                 | 47 +++++++++++++++-----------------------
 net/wireless/util.c                | 11 ++++-----
 net/wireless/wext-core.c           | 10 ++++----
 6 files changed, 44 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 9c21ebf9780e..630bcf0a2f04 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -4,6 +4,8 @@
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/if.h>
 #include <linux/module.h>
 #include <linux/err.h>
@@ -216,8 +218,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 			    rdev->wiphy.debugfsdir,
 			    rdev->wiphy.debugfsdir->d_parent,
 			    newname))
-		printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n",
-		       newname);
+		pr_err("failed to rename debugfs dir to %s!\n", newname);
 
 	nl80211_notify_dev_rename(rdev);
 
@@ -699,8 +700,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 
 		if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
 				      "phy80211")) {
-			printk(KERN_ERR "wireless: failed to add phy80211 "
-				"symlink to netdev!\n");
+			pr_err("failed to add phy80211 symlink to netdev!\n");
 		}
 		wdev->netdev = dev;
 		wdev->sme_state = CFG80211_SME_IDLE;
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index 97d411f74507..3268fac5ab22 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -13,6 +13,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/ieee80211.h>
@@ -224,8 +226,8 @@ int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
 	return -EINVAL;
 
       found:
-	printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm "
-	       "'%s'\n", ops->name);
+	printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm '%s'\n",
+	       ops->name);
 	list_del(&alg->list);
 	spin_unlock_irqrestore(&lib80211_crypto_lock, flags);
 	kfree(alg);
@@ -270,7 +272,7 @@ static struct lib80211_crypto_ops lib80211_crypt_null = {
 
 static int __init lib80211_init(void)
 {
-	printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n");
+	pr_info(DRV_DESCRIPTION "\n");
 	return lib80211_register_crypto_ops(&lib80211_crypt_null);
 }
 
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 0fe40510e2cb..7ea4f2b0770e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -10,6 +10,8 @@
  * more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -99,8 +101,7 @@ static void *lib80211_tkip_init(int key_idx)
 	priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
 						CRYPTO_ALG_ASYNC);
 	if (IS_ERR(priv->tx_tfm_arc4)) {
-		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
-		       "crypto API arc4\n");
+		printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n"));
 		priv->tx_tfm_arc4 = NULL;
 		goto fail;
 	}
@@ -108,8 +109,7 @@ static void *lib80211_tkip_init(int key_idx)
 	priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
 						 CRYPTO_ALG_ASYNC);
 	if (IS_ERR(priv->tx_tfm_michael)) {
-		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
-		       "crypto API michael_mic\n");
+		printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n"));
 		priv->tx_tfm_michael = NULL;
 		goto fail;
 	}
@@ -117,8 +117,7 @@ static void *lib80211_tkip_init(int key_idx)
 	priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0,
 						CRYPTO_ALG_ASYNC);
 	if (IS_ERR(priv->rx_tfm_arc4)) {
-		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
-		       "crypto API arc4\n");
+		printk(KERN_DEBUG pr_fmt("could not allocate crypto API arc4\n"));
 		priv->rx_tfm_arc4 = NULL;
 		goto fail;
 	}
@@ -126,8 +125,7 @@ static void *lib80211_tkip_init(int key_idx)
 	priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0,
 						 CRYPTO_ALG_ASYNC);
 	if (IS_ERR(priv->rx_tfm_michael)) {
-		printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate "
-		       "crypto API michael_mic\n");
+		printk(KERN_DEBUG pr_fmt("could not allocate crypto API michael_mic\n"));
 		priv->rx_tfm_michael = NULL;
 		goto fail;
 	}
@@ -536,7 +534,7 @@ static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr,
 	struct scatterlist sg[2];
 
 	if (tfm_michael == NULL) {
-		printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n");
+		pr_warn("%s(): tfm_michael == NULL\n", __func__);
 		return -1;
 	}
 	sg_init_table(sg, 2);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bc14caab19cd..5ed615f94e0c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -32,6 +32,9 @@
  * rely on some SHA1 checksum of the regdomain for example.
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/list.h>
@@ -48,7 +51,7 @@
 #ifdef CONFIG_CFG80211_REG_DEBUG
 #define REG_DBG_PRINT(format, args...) \
 	do { \
-		printk(KERN_DEBUG "cfg80211: " format , ## args); \
+		printk(KERN_DEBUG pr_fmt(format), ##args);	\
 	} while (0)
 #else
 #define REG_DBG_PRINT(args...)
@@ -370,11 +373,10 @@ static int call_crda(const char *alpha2)
 	};
 
 	if (!is_world_regdom((char *) alpha2))
-		printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n",
+		pr_info("Calling CRDA for country: %c%c\n",
 			alpha2[0], alpha2[1]);
 	else
-		printk(KERN_INFO "cfg80211: Calling CRDA to update world "
-			"regulatory domain\n");
+		pr_info("Calling CRDA to update world regulatory domain\n");
 
 	/* query internal regulatory database (if it exists) */
 	reg_regdb_query(alpha2);
@@ -1851,8 +1853,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 	const struct ieee80211_freq_range *freq_range = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
 
-	printk(KERN_INFO "    (start_freq - end_freq @ bandwidth), "
-		"(max_antenna_gain, max_eirp)\n");
+	pr_info("    (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)\n");
 
 	for (i = 0; i < rd->n_reg_rules; i++) {
 		reg_rule = &rd->reg_rules[i];
@@ -1864,16 +1865,14 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 		 * in certain regions
 		 */
 		if (power_rule->max_antenna_gain)
-			printk(KERN_INFO "    (%d KHz - %d KHz @ %d KHz), "
-				"(%d mBi, %d mBm)\n",
+			pr_info("    (%d KHz - %d KHz @ %d KHz), (%d mBi, %d mBm)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
 				freq_range->max_bandwidth_khz,
 				power_rule->max_antenna_gain,
 				power_rule->max_eirp);
 		else
-			printk(KERN_INFO "    (%d KHz - %d KHz @ %d KHz), "
-				"(N/A, %d mBm)\n",
+			pr_info("    (%d KHz - %d KHz @ %d KHz), (N/A, %d mBm)\n",
 				freq_range->start_freq_khz,
 				freq_range->end_freq_khz,
 				freq_range->max_bandwidth_khz,
@@ -1892,27 +1891,20 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 			rdev = cfg80211_rdev_by_wiphy_idx(
 				last_request->wiphy_idx);
 			if (rdev) {
-				printk(KERN_INFO "cfg80211: Current regulatory "
-					"domain updated by AP to: %c%c\n",
+				pr_info("Current regulatory domain updated by AP to: %c%c\n",
 					rdev->country_ie_alpha2[0],
 					rdev->country_ie_alpha2[1]);
 			} else
-				printk(KERN_INFO "cfg80211: Current regulatory "
-					"domain intersected:\n");
+				pr_info("Current regulatory domain intersected:\n");
 		} else
-			printk(KERN_INFO "cfg80211: Current regulatory "
-				"domain intersected:\n");
+			pr_info("Current regulatory domain intersected:\n");
 	} else if (is_world_regdom(rd->alpha2))
-		printk(KERN_INFO "cfg80211: World regulatory "
-			"domain updated:\n");
+		pr_info("World regulatory domain updated:\n");
 	else {
 		if (is_unknown_alpha2(rd->alpha2))
-			printk(KERN_INFO "cfg80211: Regulatory domain "
-				"changed to driver built-in settings "
-				"(unknown country)\n");
+			pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n");
 		else
-			printk(KERN_INFO "cfg80211: Regulatory domain "
-				"changed to country: %c%c\n",
+			pr_info("Regulatory domain changed to country: %c%c\n",
 				rd->alpha2[0], rd->alpha2[1]);
 	}
 	print_rd_rules(rd);
@@ -1920,8 +1912,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 
 static void print_regdomain_info(const struct ieee80211_regdomain *rd)
 {
-	printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n",
-		rd->alpha2[0], rd->alpha2[1]);
+	pr_info("Regulatory domain: %c%c\n", rd->alpha2[0], rd->alpha2[1]);
 	print_rd_rules(rd);
 }
 
@@ -1972,8 +1963,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
 		return -EINVAL;
 
 	if (!is_valid_rd(rd)) {
-		printk(KERN_ERR "cfg80211: Invalid "
-			"regulatory domain detected:\n");
+		pr_err("Invalid regulatory domain detected:\n");
 		print_regdomain_info(rd);
 		return -EINVAL;
 	}
@@ -2147,8 +2137,7 @@ int __init regulatory_init(void)
 		 * early boot for call_usermodehelper(). For now treat these
 		 * errors as non-fatal.
 		 */
-		printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable "
-			"to call CRDA during init");
+		pr_err("kobject_uevent_env() was unable to call CRDA during init\n");
 #ifdef CONFIG_CFG80211_REG_DEBUG
 		/* We want to find out exactly why when debugging */
 		WARN_ON(err);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 76120aeda57d..fee020b15a4e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -502,7 +502,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
 			skb_orphan(skb);
 
 		if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) {
-			printk(KERN_ERR "failed to reallocate Tx buffer\n");
+			pr_err("failed to reallocate Tx buffer\n");
 			return -ENOMEM;
 		}
 		skb->truesize += head_need;
@@ -685,20 +685,17 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 			continue;
 		if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL,
 					&wdev->connect_keys->params[i])) {
-			printk(KERN_ERR "%s: failed to set key %d\n",
-				dev->name, i);
+			netdev_err(dev, "failed to set key %d\n", i);
 			continue;
 		}
 		if (wdev->connect_keys->def == i)
 			if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) {
-				printk(KERN_ERR "%s: failed to set defkey %d\n",
-					dev->name, i);
+				netdev_err(dev, "failed to set defkey %d\n", i);
 				continue;
 			}
 		if (wdev->connect_keys->defmgmt == i)
 			if (rdev->ops->set_default_mgmt_key(wdev->wiphy, dev, i))
-				printk(KERN_ERR "%s: failed to set mgtdef %d\n",
-					dev->name, i);
+				netdev_err(dev, "failed to set mgtdef %d\n", i);
 	}
 
 	kfree(wdev->connect_keys);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index dc675a3daa3d..fdbc23c10d8c 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -467,8 +467,8 @@ void wireless_send_event(struct net_device *	dev,
 		 * The best the driver could do is to log an error message.
 		 * We will do it ourselves instead...
 		 */
-		printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
-		       dev->name, cmd);
+		netdev_err(dev, "(WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
+			   cmd);
 		return;
 	}
 
@@ -476,11 +476,13 @@ void wireless_send_event(struct net_device *	dev,
 	if (descr->header_type == IW_HEADER_TYPE_POINT) {
 		/* Check if number of token fits within bounds */
 		if (wrqu->data.length > descr->max_tokens) {
-			printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
+			netdev_err(dev, "(WE) : Wireless Event too big (%d)\n",
+				   wrqu->data.length);
 			return;
 		}
 		if (wrqu->data.length < descr->min_tokens) {
-			printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
+			netdev_err(dev, "(WE) : Wireless Event too small (%d)\n",
+				   wrqu->data.length);
 			return;
 		}
 		/* Calculate extra_len - extra is NULL for restricted events */
-- 
cgit v1.2.3


From c8a7972c3b3633bf90daf50b135665d8ca4838c4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 19 Nov 2010 22:55:37 +0100
Subject: mac80211: restart beacon miss timer on system resume from suspend

Signed-off-by: Paul Stewart <pstew@google.com>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index dfc4a316ac1c..84e24df234e2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2012,6 +2012,7 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
 		add_timer(&ifmgd->timer);
 	if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
 		add_timer(&ifmgd->chswitch_timer);
+	ieee80211_sta_reset_beacon_monitor(sdata);
 }
 #endif
 
-- 
cgit v1.2.3


From 7ccc8bd7593634d827e8bc55898a5038e29848b5 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Fri, 19 Nov 2010 22:55:38 +0100
Subject: mac80211: calculate beacon loss time accurately

Instead of using a fixed 2 second timeout, calculate beacon loss interval
from the advertised beacon interval and a frame count.  With this beacon
loss happens after N (default 7) consecutive frames are missed which
for a typical setup (100TU beacon interval) is ~700ms (or ~1/3 previous).

Signed-off-by: Sam Leffler <sleffler@chromium.org>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3598abf21844..ff7bc307827b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -349,6 +349,7 @@ struct ieee80211_if_managed {
 	struct work_struct chswitch_work;
 	struct work_struct beacon_connection_loss_work;
 
+	unsigned long beacon_timeout;
 	unsigned long probe_timeout;
 	int probe_send_count;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 84e24df234e2..729aba49cf98 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -31,10 +31,15 @@
 #define IEEE80211_MAX_PROBE_TRIES 5
 
 /*
- * beacon loss detection timeout
- * XXX: should depend on beacon interval
+ * Beacon loss timeout is calculated as N frames times the
+ * advertised beacon interval.  This may need to be somewhat
+ * higher than what hardware might detect to account for
+ * delays in the host processing frames. But since we also
+ * probe on beacon miss before declaring the connection lost
+ * default to what we want.
  */
-#define IEEE80211_BEACON_LOSS_TIME	(2 * HZ)
+#define IEEE80211_BEACON_LOSS_COUNT	7
+
 /*
  * Time the connection can be idle before we probe
  * it to see if we can still talk to the AP.
@@ -121,7 +126,7 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
 		return;
 
 	mod_timer(&sdata->u.mgd.bcn_mon_timer,
-		  round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME));
+		  round_jiffies_up(jiffies + sdata->u.mgd.beacon_timeout));
 }
 
 void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
@@ -871,6 +876,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	bss_info_changed |= ieee80211_handle_bss_capability(sdata,
 		cbss->capability, bss->has_erp_value, bss->erp_value);
 
+	sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec(
+		IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int));
+
 	sdata->u.mgd.associated = cbss;
 	memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
 
-- 
cgit v1.2.3


From 46090979a55a0dc2cdb3d939f94fa47742108194 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 23 Nov 2010 20:28:03 +0100
Subject: mac80211: probe the AP when resuming

Check the connection by probing the AP (either using nullfunc or a
probe request). If nullfunc probing is supported and the assoc is no
longer valid, the AP will send a disassoc/deauth immediately.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 729aba49cf98..849d9c639add 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2021,6 +2021,7 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
 	if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
 		add_timer(&ifmgd->chswitch_timer);
 	ieee80211_sta_reset_beacon_monitor(sdata);
+	ieee80211_restart_sta_timer(sdata);
 }
 #endif
 
-- 
cgit v1.2.3


From dd5b4cc71cd09c33e1579cc6d5720656e94e52de Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 22 Nov 2010 20:58:24 +0100
Subject: cfg80211/mac80211: improve ad-hoc multicast rate handling

- store the multicast rate as an index instead of the rate value
  (reduces cpu overhead in a hotpath)
- validate the rate values (must match a bitrate in at least one sband)

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  4 ++--
 include/net/mac80211.h |  4 ++--
 net/mac80211/ibss.c    |  3 ++-
 net/mac80211/rate.c    | 25 ++++++++++++-------------
 net/wireless/nl80211.c | 36 +++++++++++++++++++++++++++++++++---
 5 files changed, 51 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 91f099556ac1..dd4c43f512e2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -923,7 +923,7 @@ struct cfg80211_disassoc_request {
  * @privacy: this is a protected network, keys will be configured
  *	after joining
  * @basic_rates: bitmap of basic rates to use when creating the IBSS
- * @mcast_rate: multicast tx rate (in 100 kbps)
+ * @mcast_rate: per-band multicast rate index + 1 (0: disabled)
  */
 struct cfg80211_ibss_params {
 	u8 *ssid;
@@ -935,7 +935,7 @@ struct cfg80211_ibss_params {
 	u32 basic_rates;
 	bool channel_fixed;
 	bool privacy;
-	int mcast_rate;
+	int mcast_rate[IEEE80211_NUM_BANDS];
 };
 
 /**
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5b0fff2178bb..08e97e5d03fd 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -219,7 +219,7 @@ enum ieee80211_bss_change {
  * @basic_rates: bitmap of basic rates, each bit stands for an
  *	index into the rate table configured by the driver in
  *	the current band.
- * @mcast_rate: multicast rate for AP and Ad-Hoc (in 100 kbps)
+ * @mcast_rate: per-band multicast rate index + 1 (0: disabled)
  * @bssid: The BSSID for this BSS
  * @enable_beacon: whether beaconing should be enabled or not
  * @channel_type: Channel type for this BSS -- the hardware might be
@@ -259,7 +259,7 @@ struct ieee80211_bss_conf {
 	u16 assoc_capability;
 	u64 timestamp;
 	u32 basic_rates;
-	u32 mcast_rate;
+	int mcast_rate[IEEE80211_NUM_BANDS];
 	u16 ht_operation_mode;
 	s32 cqm_rssi_thold;
 	u32 cqm_rssi_hyst;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 6fe6837dc134..410d104b1347 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -915,7 +915,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 
 	sdata->u.ibss.privacy = params->privacy;
 	sdata->u.ibss.basic_rates = params->basic_rates;
-	sdata->vif.bss_conf.mcast_rate = params->mcast_rate;
+	memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate,
+	       sizeof(params->mcast_rate));
 
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
 
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 76de4f8d9327..3d5a2cb835c4 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -211,20 +211,11 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
 	return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
 }
 
-static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u32 mcast_rate,
+static void rc_send_low_broadcast(s8 *idx, u32 basic_rates,
 				  struct ieee80211_supported_band *sband)
 {
 	u8 i;
 
-	if (mcast_rate) {
-		for (i = 0; i < sband->n_bitrates; i++) {
-			if (sband->bitrates[i].bitrate == mcast_rate) {
-				*idx = i;
-				return;
-			}
-		}
-	}
-
 	if (basic_rates == 0)
 		return; /* assume basic rates unknown and accept rate */
 	if (*idx < 0)
@@ -247,17 +238,25 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
 			   struct ieee80211_tx_rate_control *txrc)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
+	struct ieee80211_supported_band *sband = txrc->sband;
+	int mcast_rate;
 
 	if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) {
 		info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta);
 		info->control.rates[0].count =
 			(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
 			1 : txrc->hw->max_rate_tries;
-		if (!sta && txrc->bss)
+		if (!sta && txrc->bss) {
+			mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
+			if (mcast_rate > 0) {
+				info->control.rates[0].idx = mcast_rate - 1;
+				return true;
+			}
+
 			rc_send_low_broadcast(&info->control.rates[0].idx,
 					      txrc->bss_conf->basic_rates,
-					      txrc->bss_conf->mcast_rate,
-					      txrc->sband);
+					      sband);
+		}
 		return true;
 	}
 	return false;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b15eb77195d8..8734efa663d1 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3600,6 +3600,34 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 				      local_state_change);
 }
 
+static bool
+nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev,
+			 int mcast_rate[IEEE80211_NUM_BANDS],
+			 int rateval)
+{
+	struct wiphy *wiphy = &rdev->wiphy;
+	bool found = false;
+	int band, i;
+
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+		struct ieee80211_supported_band *sband;
+
+		sband = wiphy->bands[band];
+		if (!sband)
+			continue;
+
+		for (i = 0; i < sband->n_bitrates; i++) {
+			if (sband->bitrates[i].bitrate == rateval) {
+				mcast_rate[band] = i + 1;
+				found = true;
+				break;
+			}
+		}
+	}
+
+	return found;
+}
+
 static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -3683,9 +3711,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 				return -EINVAL;
 		}
 	}
-	if (info->attrs[NL80211_ATTR_MCAST_RATE])
-		ibss.mcast_rate =
-			nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]);
+
+	if (info->attrs[NL80211_ATTR_MCAST_RATE] &&
+	    !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate,
+			nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE])))
+		return -EINVAL;
 
 	if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
 		connkeys = nl80211_parse_connkeys(rdev,
-- 
cgit v1.2.3


From 4e5ff37692df35c8826f1291204841b174d3c3ce Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 23 Nov 2010 03:10:31 +0100
Subject: mac80211: use nullfunc instead of probe request for connection
 monitoring

nullfunc frames are better for connection monitoring, because probe requests
are answered even if the AP has already dropped the connection, whereas
nullfunc frames from an unassociated station will trigger a disassoc/deauth
frame from the AP (WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA), which allows
the station to reconnect immediately instead of waiting until it attempts to
transmit the next unicast frame.

This only works on hardware with reliable tx ACK reporting, any other hardware
needs to fall back to the probe request method.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  2 +
 net/mac80211/mlme.c        | 92 +++++++++++++++++++++++++++++++++-------------
 net/mac80211/status.c      |  4 ++
 3 files changed, 72 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ff7bc307827b..5bc0745368fe 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1265,6 +1265,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 			     int powersave);
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr);
+void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
+			     struct ieee80211_hdr *hdr);
 void ieee80211_beacon_connection_loss_work(struct work_struct *work);
 
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 849d9c639add..33ffce3ec605 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1034,6 +1034,51 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 	ieee80211_sta_reset_conn_monitor(sdata);
 }
 
+static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+	if (!(ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
+			      IEEE80211_STA_CONNECTION_POLL)))
+	    return;
+
+	ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
+			  IEEE80211_STA_BEACON_POLL);
+	mutex_lock(&sdata->local->iflist_mtx);
+	ieee80211_recalc_ps(sdata->local, -1);
+	mutex_unlock(&sdata->local->iflist_mtx);
+
+	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+		return;
+
+	/*
+	 * We've received a probe response, but are not sure whether
+	 * we have or will be receiving any beacons or data, so let's
+	 * schedule the timers again, just in case.
+	 */
+	ieee80211_sta_reset_beacon_monitor(sdata);
+
+	mod_timer(&ifmgd->conn_mon_timer,
+		  round_jiffies_up(jiffies +
+				   IEEE80211_CONNECTION_IDLE_TIME));
+}
+
+void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
+			     struct ieee80211_hdr *hdr)
+{
+	if (!ieee80211_is_data(hdr->frame_control) &&
+	    !ieee80211_is_nullfunc(hdr->frame_control))
+	    return;
+
+	ieee80211_sta_reset_conn_monitor(sdata);
+
+	if (ieee80211_is_nullfunc(hdr->frame_control) &&
+	    sdata->u.mgd.probe_send_count > 0) {
+		sdata->u.mgd.probe_send_count = 0;
+		ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+	}
+}
+
 static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1049,8 +1094,19 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 	if (ifmgd->probe_send_count >= unicast_limit)
 		dst = NULL;
 
-	ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
-	ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
+	/*
+	 * When the hardware reports an accurate Tx ACK status, it's
+	 * better to send a nullfunc frame instead of a probe request,
+	 * as it will kick us off the AP quickly if we aren't associated
+	 * anymore. The timeout will be reset if the frame is ACKed by
+	 * the AP.
+	 */
+	if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+		ieee80211_send_nullfunc(sdata->local, sdata, 0);
+	else {
+		ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
+		ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
+	}
 
 	ifmgd->probe_send_count++;
 	ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT;
@@ -1517,29 +1573,8 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
 
 	if (ifmgd->associated &&
-	    memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0 &&
-	    ifmgd->flags & (IEEE80211_STA_BEACON_POLL |
-			    IEEE80211_STA_CONNECTION_POLL)) {
-		ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
-				  IEEE80211_STA_BEACON_POLL);
-		mutex_lock(&sdata->local->iflist_mtx);
-		ieee80211_recalc_ps(sdata->local, -1);
-		mutex_unlock(&sdata->local->iflist_mtx);
-
-		if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
-			return;
-
-		/*
-		 * We've received a probe response, but are not sure whether
-		 * we have or will be receiving any beacons or data, so let's
-		 * schedule the timers again, just in case.
-		 */
-		ieee80211_sta_reset_beacon_monitor(sdata);
-
-		mod_timer(&ifmgd->conn_mon_timer,
-			  round_jiffies_up(jiffies +
-					   IEEE80211_CONNECTION_IDLE_TIME));
-	}
+	    memcmp(mgmt->bssid, ifmgd->associated->bssid, ETH_ALEN) == 0)
+		ieee80211_reset_ap_probe(sdata);
 }
 
 /*
@@ -1891,7 +1926,12 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 		u8 bssid[ETH_ALEN];
 
 		memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
-		if (time_is_after_jiffies(ifmgd->probe_timeout))
+
+		/* ACK received for nullfunc probing frame */
+		if (!ifmgd->probe_send_count)
+			ieee80211_reset_ap_probe(sdata);
+
+		else if (time_is_after_jiffies(ifmgd->probe_timeout))
 			run_again(ifmgd, ifmgd->probe_timeout);
 
 		else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) {
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 3153c19893b8..8695cd11dfd9 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -155,6 +155,10 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 
 		ieee80211_queue_work(&local->hw, &local->recalc_smps);
 	}
+
+	if ((sdata->vif.type == NL80211_IFTYPE_STATION) &&
+	    (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
+		ieee80211_sta_tx_notify(sdata, (void *) skb->data);
 }
 
 void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
-- 
cgit v1.2.3


From 72a8a3edd630995662bdc85957206685f376f9c4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 23 Nov 2010 03:10:32 +0100
Subject: mac80211: reduce the number of retries for nullfunc probing

Since nullfunc frames are transmitted as unicast frames, they're more
reliable than the broadcast probe requests, so we need fewer retries
to figure out whether the AP is really gone.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 33ffce3ec605..794807914940 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -28,6 +28,7 @@
 #include "rate.h"
 #include "led.h"
 
+#define IEEE80211_MAX_NULLFUNC_TRIES 2
 #define IEEE80211_MAX_PROBE_TRIES 5
 
 /*
@@ -1924,9 +1925,15 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 			    IEEE80211_STA_CONNECTION_POLL) &&
 	    ifmgd->associated) {
 		u8 bssid[ETH_ALEN];
+		int max_tries;
 
 		memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
 
+		if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+			max_tries = IEEE80211_MAX_NULLFUNC_TRIES;
+		else
+			max_tries = IEEE80211_MAX_PROBE_TRIES;
+
 		/* ACK received for nullfunc probing frame */
 		if (!ifmgd->probe_send_count)
 			ieee80211_reset_ap_probe(sdata);
@@ -1934,7 +1941,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 		else if (time_is_after_jiffies(ifmgd->probe_timeout))
 			run_again(ifmgd, ifmgd->probe_timeout);
 
-		else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) {
+		else if (ifmgd->probe_send_count < max_tries) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			wiphy_debug(local->hw.wiphy,
 				    "%s: No probe response from AP %pM"
-- 
cgit v1.2.3


From 48124d1a91fb77defc9734b4556350d59671fb2c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Tue, 23 Nov 2010 15:05:02 -0800
Subject: mac80211: avoid aggregation for VO traffic

This should help with latency issues which can happen when
using aggregation.

Cc: Felix Fietkau <nbd@openwrt.org>
Cc: Matt Smith <matt.smith@atheros.com>
Cc: Senthil Balasubramanian <senthilkumar@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 2d6f0259e0c6..4ad7a362fcc1 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -371,6 +371,9 @@ minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, stru
 	if (likely(sta->ampdu_mlme.tid_tx[tid]))
 		return;
 
+	if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
+		return;
+
 	ieee80211_start_tx_ba_session(pubsta, tid);
 }
 
-- 
cgit v1.2.3


From c063dbf52b998b852122dff07a8b8dd430b38437 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 24 Nov 2010 08:10:05 +0100
Subject: cfg80211: allow using CQM event to notify packet loss

This adds the ability for drivers to use CQM events
to notify about packet loss for specific stations
(which could be the AP for the managed mode case).
Since the threshold might be determined by the
driver (it isn't passed in right now) it will be
passed out of the driver to userspace in the event.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  3 +++
 include/net/cfg80211.h  | 12 ++++++++++++
 net/wireless/mlme.c     | 12 ++++++++++++
 net/wireless/nl80211.c  | 45 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  4 ++++
 5 files changed, 76 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 037b4e498890..d706bf3badc8 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1819,6 +1819,8 @@ enum nl80211_ps_state {
  *	the minimum amount the RSSI level must change after an event before a
  *	new event may be issued (to reduce effects of RSSI oscillation).
  * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event
+ * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many
+ *	consecutive packets were not acknowledged by the peer
  * @__NL80211_ATTR_CQM_AFTER_LAST: internal
  * @NL80211_ATTR_CQM_MAX: highest key attribute
  */
@@ -1827,6 +1829,7 @@ enum nl80211_attr_cqm {
 	NL80211_ATTR_CQM_RSSI_THOLD,
 	NL80211_ATTR_CQM_RSSI_HYST,
 	NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+	NL80211_ATTR_CQM_PKT_LOSS_EVENT,
 
 	/* keep last */
 	__NL80211_ATTR_CQM_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dd4c43f512e2..0663945cfa48 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2601,6 +2601,18 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 			      enum nl80211_cqm_rssi_threshold_event rssi_event,
 			      gfp_t gfp);
 
+/**
+ * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer
+ * @dev: network device
+ * @peer: peer's MAC address
+ * @num_packets: how many packets were lost -- should be a fixed threshold
+ *	but probably no less than maybe 50, or maybe a throughput dependent
+ *	threshold (to account for temporary interference)
+ * @gfp: context flags
+ */
+void cfg80211_cqm_pktloss_notify(struct net_device *dev,
+				 const u8 *peer, u32 num_packets, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 26838d903b9a..6980a0c315b2 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1028,3 +1028,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
 	nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
 }
 EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
+
+void cfg80211_cqm_pktloss_notify(struct net_device *dev,
+				 const u8 *peer, u32 num_packets, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	/* Indicate roaming trigger event to user space */
+	nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp);
+}
+EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8734efa663d1..67ff7e92cb99 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5715,6 +5715,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void
+nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev, const u8 *peer,
+				u32 num_packets, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct nlattr *pinfoattr;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer);
+
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
+	if (!pinfoattr)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets);
+
+	nla_nest_end(msg, pinfoattr);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 30d2f939150d..16c2f7190768 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -87,5 +87,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev,
 			     enum nl80211_cqm_rssi_threshold_event rssi_event,
 			     gfp_t gfp);
+void
+nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev, const u8 *peer,
+				u32 num_packets, gfp_t gfp);
 
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From 99ba2a14283be96a682e04455061c08a46bbf4ec Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 24 Nov 2010 08:10:06 +0100
Subject: mac80211: implement packet loss notification

For drivers that have accurate TX status reporting
we can report the number of consecutive lost packets
to userspace using the new cfg80211 CQM event. The
threshold is fixed right now, this may need to be
improved in the future.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h |  3 +++
 net/mac80211/status.c   | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9265acadef32..b562d9b6a702 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -248,6 +248,7 @@ enum plink_state {
  * @sta: station information we share with the driver
  * @dead: set to true when sta is unlinked
  * @uploaded: set to true when sta is uploaded to the driver
+ * @lost_packets: number of consecutive lost packets
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -335,6 +336,8 @@ struct sta_info {
 	} debugfs;
 #endif
 
+	unsigned int lost_packets;
+
 	/* keep last! */
 	struct ieee80211_sta sta;
 };
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 8695cd11dfd9..bed7e32ed908 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -161,6 +161,15 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 		ieee80211_sta_tx_notify(sdata, (void *) skb->data);
 }
 
+/*
+ * Use a static threshold for now, best value to be determined
+ * by testing ...
+ * Should it depend on:
+ *  - on # of retransmissions
+ *  - current throughput (higher value for higher tpt)?
+ */
+#define STA_LOST_PKT_THRESHOLD	50
+
 void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct sk_buff *skb2;
@@ -247,6 +256,19 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		if (!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
 		    (info->flags & IEEE80211_TX_STAT_ACK))
 			ieee80211_frame_acked(sta, skb);
+
+		if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+			if (info->flags & IEEE80211_TX_STAT_ACK) {
+				if (sta->lost_packets)
+					sta->lost_packets = 0;
+			} else if (++sta->lost_packets >= STA_LOST_PKT_THRESHOLD) {
+				cfg80211_cqm_pktloss_notify(sta->sdata->dev,
+							    sta->sta.addr,
+							    sta->lost_packets,
+							    GFP_ATOMIC);
+				sta->lost_packets = 0;
+			}
+		}
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From cf7afbfeb8ceb0187348d0a1a0db61305e25f05f Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Mon, 22 Nov 2010 01:31:54 +0000
Subject: rtnl: make link af-specific updates atomic

As David pointed out correctly, updates to af-specific attributes
are currently not atomic. If multiple changes are requested and
one of them fails, previous updates may have been applied already
leaving the link behind in a undefined state.

This patch splits the function parse_link_af() into two functions
validate_link_af() and set_link_at(). validate_link_af() is placed
to validate_linkmsg() check for errors as early as possible before
any changes to the link have been made. set_link_af() is called to
commit the changes later.

This method is not fail proof, while it is currently sufficient
to make set_link_af() inerrable and thus 100% atomic, the
validation function method will not be able to detect all error
scenarios in the future, there will likely always be errors
depending on states which are f.e. not protected by rtnl_mutex
and thus may change between validation and setting.

Also, instead of silently ignoring unknown address families and
config blocks for address families which did not register a set
function the errors EAFNOSUPPORT respectively EOPNOSUPPORT are
returned to avoid comitting 4 out of 5 update requests without
notifying the user.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h | 12 ++++++++----
 net/core/rtnetlink.c    | 29 ++++++++++++++++++++++++-----
 net/ipv4/devinet.c      | 26 +++++++++++++++++++++-----
 net/ipv6/addrconf.c     |  6 ------
 4 files changed, 53 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 35be0bbcd7da..4093ca78cf60 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -92,8 +92,10 @@ extern void	rtnl_link_unregister(struct rtnl_link_ops *ops);
  * 		       specific netlink attributes.
  * 	@get_link_af_size: Function to calculate size of address family specific
  * 			   netlink attributes exlusive the container attribute.
- * 	@parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify
- *			net_device accordingly.
+ *	@validate_link_af: Validate a IFLA_AF_SPEC attribute, must check attr
+ *			   for invalid configuration settings.
+ * 	@set_link_af: Function to parse a IFLA_AF_SPEC attribute and modify
+ *		      net_device accordingly.
  */
 struct rtnl_af_ops {
 	struct list_head	list;
@@ -103,8 +105,10 @@ struct rtnl_af_ops {
 						const struct net_device *dev);
 	size_t			(*get_link_af_size)(const struct net_device *dev);
 
-	int			(*parse_link_af)(struct net_device *dev,
-						 const struct nlattr *attr);
+	int			(*validate_link_af)(const struct net_device *dev,
+						    const struct nlattr *attr);
+	int			(*set_link_af)(struct net_device *dev,
+					       const struct nlattr *attr);
 };
 
 extern int	__rtnl_af_register(struct rtnl_af_ops *ops);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bf69e5871b1a..750db57f3bb3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1107,6 +1107,28 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 			return -EINVAL;
 	}
 
+	if (tb[IFLA_AF_SPEC]) {
+		struct nlattr *af;
+		int rem, err;
+
+		nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
+			const struct rtnl_af_ops *af_ops;
+
+			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+				return -EAFNOSUPPORT;
+
+			if (!af_ops->set_link_af)
+				return -EOPNOTSUPP;
+
+			if (af_ops->validate_link_af) {
+				err = af_ops->validate_link_af(dev,
+							tb[IFLA_AF_SPEC]);
+				if (err < 0)
+					return err;
+			}
+		}
+	}
+
 	return 0;
 }
 
@@ -1356,12 +1378,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 			const struct rtnl_af_ops *af_ops;
 
 			if (!(af_ops = rtnl_af_lookup(nla_type(af))))
-				continue;
-
-			if (!af_ops->parse_link_af)
-				continue;
+				BUG();
 
-			err = af_ops->parse_link_af(dev, af);
+			err = af_ops->set_link_af(dev, af);
 			if (err < 0)
 				goto errout;
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 71afc26c2df8..d9f71bae45c4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1289,14 +1289,14 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
 };
 
-static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla)
+static int inet_validate_link_af(const struct net_device *dev,
+				 const struct nlattr *nla)
 {
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int err, rem;
 
-	if (!in_dev)
-		return -EOPNOTSUPP;
+	if (dev && !__in_dev_get_rcu(dev))
+		return -EAFNOSUPPORT;
 
 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
 	if (err < 0)
@@ -1314,6 +1314,21 @@ static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla)
 		}
 	}
 
+	return 0;
+}
+
+static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct nlattr *a, *tb[IFLA_INET_MAX+1];
+	int rem;
+
+	if (!in_dev)
+		return -EAFNOSUPPORT;
+
+	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
+		BUG();
+
 	if (tb[IFLA_INET_CONF]) {
 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
@@ -1689,7 +1704,8 @@ static struct rtnl_af_ops inet_af_ops = {
 	.family		  = AF_INET,
 	.fill_link_af	  = inet_fill_link_af,
 	.get_link_af_size = inet_get_link_af_size,
-	.parse_link_af	  = inet_parse_link_af,
+	.validate_link_af = inet_validate_link_af,
+	.set_link_af	  = inet_set_link_af,
 };
 
 void __init devinet_init(void)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4cf760598c2a..1023ad0d2b15 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3956,11 +3956,6 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
 	return 0;
 }
 
-static int inet6_parse_link_af(struct net_device *dev, const struct nlattr *nla)
-{
-	return -EOPNOTSUPP;
-}
-
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
@@ -4670,7 +4665,6 @@ static struct rtnl_af_ops inet6_ops = {
 	.family		  = AF_INET6,
 	.fill_link_af	  = inet6_fill_link_af,
 	.get_link_af_size = inet6_get_link_af_size,
-	.parse_link_af	  = inet6_parse_link_af,
 };
 
 /*
-- 
cgit v1.2.3


From a40c9f88b5e3da500ddab9440e5ddac170c12281 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 23 Nov 2010 22:57:47 +0000
Subject: net: add some KERN_CONT markers to continuation lines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: netdev@vger.kernel.org
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c |  4 ++--
 net/ipv4/ipconfig.c   | 32 ++++++++++++++++----------------
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 7670aac0e93f..a8445c72fc13 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -47,11 +47,11 @@ void phy_print_status(struct phy_device *phydev)
 	pr_info("PHY: %s - Link is %s", dev_name(&phydev->dev),
 			phydev->link ? "Up" : "Down");
 	if (phydev->link)
-		printk(" - %d/%s", phydev->speed,
+		printk(KERN_CONT " - %d/%s", phydev->speed,
 				DUPLEX_FULL == phydev->duplex ?
 				"Full" : "Half");
 
-	printk("\n");
+	printk(KERN_CONT "\n");
 }
 EXPORT_SYMBOL(phy_print_status);
 
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 3a6e1ec5e9ae..2b097752426b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1191,13 +1191,13 @@ static int __init ic_dynamic(void)
 		    (ic_proto_enabled & IC_USE_DHCP) &&
 		    ic_dhcp_msgtype != DHCPACK) {
 			ic_got_reply = 0;
-			printk(",");
+			printk(KERN_CONT ",");
 			continue;
 		}
 #endif /* IPCONFIG_DHCP */
 
 		if (ic_got_reply) {
-			printk(" OK\n");
+			printk(KERN_CONT " OK\n");
 			break;
 		}
 
@@ -1205,7 +1205,7 @@ static int __init ic_dynamic(void)
 			continue;
 
 		if (! --retries) {
-			printk(" timed out!\n");
+			printk(KERN_CONT " timed out!\n");
 			break;
 		}
 
@@ -1215,7 +1215,7 @@ static int __init ic_dynamic(void)
 		if (timeout > CONF_TIMEOUT_MAX)
 			timeout = CONF_TIMEOUT_MAX;
 
-		printk(".");
+		printk(KERN_CONT ".");
 	}
 
 #ifdef IPCONFIG_BOOTP
@@ -1236,7 +1236,7 @@ static int __init ic_dynamic(void)
 		((ic_got_reply & IC_RARP) ? "RARP"
 		 : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"),
 		&ic_servaddr);
-	printk("my address is %pI4\n", &ic_myaddr);
+	printk(KERN_CONT "my address is %pI4\n", &ic_myaddr);
 
 	return 0;
 }
@@ -1468,19 +1468,19 @@ static int __init ip_auto_config(void)
 	/*
 	 * Clue in the operator.
 	 */
-	printk("IP-Config: Complete:");
-	printk("\n     device=%s", ic_dev->name);
-	printk(", addr=%pI4", &ic_myaddr);
-	printk(", mask=%pI4", &ic_netmask);
-	printk(", gw=%pI4", &ic_gateway);
-	printk(",\n     host=%s, domain=%s, nis-domain=%s",
+	printk("IP-Config: Complete:\n");
+	printk("     device=%s", ic_dev->name);
+	printk(KERN_CONT ", addr=%pI4", &ic_myaddr);
+	printk(KERN_CONT ", mask=%pI4", &ic_netmask);
+	printk(KERN_CONT ", gw=%pI4", &ic_gateway);
+	printk(KERN_CONT ",\n     host=%s, domain=%s, nis-domain=%s",
 	       utsname()->nodename, ic_domain, utsname()->domainname);
-	printk(",\n     bootserver=%pI4", &ic_servaddr);
-	printk(", rootserver=%pI4", &root_server_addr);
-	printk(", rootpath=%s", root_server_path);
+	printk(KERN_CONT ",\n     bootserver=%pI4", &ic_servaddr);
+	printk(KERN_CONT ", rootserver=%pI4", &root_server_addr);
+	printk(KERN_CONT ", rootpath=%s", root_server_path);
 	if (ic_dev_mtu)
-		printk(", mtu=%d", ic_dev_mtu);
-	printk("\n");
+		printk(KERN_CONT ", mtu=%d", ic_dev_mtu);
+	printk(KERN_CONT "\n");
 #endif /* !SILENT */
 
 	return 0;
-- 
cgit v1.2.3


From d3c15cab213becc49a6f2ad7f48a59513a5f17dd Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 24 Nov 2010 21:47:56 +0000
Subject: ipv6: kill two unused macro definition

1. IPV6_TLV_TEL_DST_SIZE
This has not been using for several years since created.

2. RT6_INFO_LEN
commit 33120b30 kill all RT6_INFO_LEN's references, but only this definition remained.

commit 33120b30cc3b8665204d4fcde7288638b0dd04d5
Author: Alexey Dobriyan <adobriyan@sw.ru>
Date:   Tue Nov 6 05:27:11 2007 -0800

    [IPV6]: Convert /proc/net/ipv6_route to seq_file interface

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 2 --
 net/ipv6/route.c      | 2 --
 2 files changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2a59610c2a58..b1155554bb18 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -58,8 +58,6 @@ MODULE_AUTHOR("Ville Nuorvala");
 MODULE_DESCRIPTION("IPv6 tunneling device");
 MODULE_LICENSE("GPL");
 
-#define IPV6_TLV_TEL_DST_SIZE 8
-
 #ifdef IP6_TNL_DEBUG
 #define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
 #else
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c346ccf66ae1..a0c4ad109c63 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2453,8 +2453,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 
 #ifdef CONFIG_PROC_FS
 
-#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
-
 struct rt6_proc_arg
 {
 	char *buffer;
-- 
cgit v1.2.3


From 5a0d2268d259886f0c87131639d19eb4a67b4532 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 23 Nov 2010 10:42:02 +0000
Subject: net: add netif_tx_queue_frozen_or_stopped

When testing struct netdev_queue state against FROZEN bit, we also test
XOFF bit. We can test both bits at once and save some cycles.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++--
 net/core/netpoll.c        | 3 +--
 net/core/pktgen.c         | 2 +-
 net/sched/sch_generic.c   | 8 +++-----
 net/sched/sch_teql.c      | 3 +--
 5 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index badf9285fe0d..7c6ae2f4b9ab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -493,6 +493,8 @@ static inline void napi_synchronize(const struct napi_struct *n)
 enum netdev_queue_state_t {
 	__QUEUE_STATE_XOFF,
 	__QUEUE_STATE_FROZEN,
+#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF)		| \
+				    (1 << __QUEUE_STATE_FROZEN))
 };
 
 struct netdev_queue {
@@ -1629,9 +1631,9 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
-static inline int netif_tx_queue_frozen(const struct netdev_queue *dev_queue)
+static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue)
 {
-	return test_bit(__QUEUE_STATE_FROZEN, &dev_queue->state);
+	return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN;
 }
 
 /**
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4e98ffac3af0..ee38acb6d463 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,8 +76,7 @@ static void queue_process(struct work_struct *work)
 
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
-		if (netif_tx_queue_stopped(txq) ||
-		    netif_tx_queue_frozen(txq) ||
+		if (netif_tx_queue_frozen_or_stopped(txq) ||
 		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2e57830cbeb2..2953b2abc971 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3527,7 +3527,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	__netif_tx_lock_bh(txq);
 
-	if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) {
+	if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
 		ret = NETDEV_TX_BUSY;
 		pkt_dev->last_ok = 0;
 		goto unlock;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5dbb3cd96e59..7f0bd8952646 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) &&
-		    !netif_tx_queue_frozen(txq)) {
+		if (!netif_tx_queue_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
 		} else
@@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	spin_unlock(root_lock);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+	if (!netif_tx_queue_frozen_or_stopped(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 
 	HARD_TX_UNLOCK(dev, txq);
@@ -144,8 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		ret = dev_requeue_skb(skb, q);
 	}
 
-	if (ret && (netif_tx_queue_stopped(txq) ||
-		    netif_tx_queue_frozen(txq)))
+	if (ret && netif_tx_queue_frozen_or_stopped(txq))
 		ret = 0;
 
 	return ret;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 401af9596709..106479a7c94a 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -309,8 +309,7 @@ restart:
 			if (__netif_tx_trylock(slave_txq)) {
 				unsigned int length = qdisc_pkt_len(skb);
 
-				if (!netif_tx_queue_stopped(slave_txq) &&
-				    !netif_tx_queue_frozen(slave_txq) &&
+				if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
-- 
cgit v1.2.3


From 19eb5cc559f716dc98ce03a5bad6030fdc71e897 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 24 Nov 2010 13:14:50 +0000
Subject: 8021q: vlan device is lockless do not transfer
 real_num_{tx|rx}_queues

Now that the vlan device is lockless and single queue do not
transfer the real num queues. This is causing a BUG_ON to occur.

kernel BUG at net/8021q/vlan.c:345!
Call Trace:
[<ffffffff813fd6e8>] ? fib_rules_event+0x28/0x1b0
[<ffffffff814ad2b5>] notifier_call_chain+0x55/0x80
[<ffffffff81089156>] raw_notifier_call_chain+0x16/0x20
[<ffffffff813e5af7>] call_netdevice_notifiers+0x37/0x70
[<ffffffff813e6756>] netdev_features_change+0x16/0x20
[<ffffffffa02995be>] ixgbe_fcoe_enable+0xae/0x100 [ixgbe]
[<ffffffffa01da06a>] vlan_dev_fcoe_enable+0x2a/0x30 [8021q]
[<ffffffffa02d08c3>] fcoe_create+0x163/0x630 [fcoe]
[<ffffffff811244d5>] ? mmap_region+0x255/0x5a0
[<ffffffff81080ef0>] param_attr_store+0x50/0x80
[<ffffffff810809b6>] module_attr_store+0x26/0x30
[<ffffffff811b9db2>] sysfs_write_file+0xf2/0x180
[<ffffffff8114fc88>] vfs_write+0xc8/0x190
[<ffffffff81150621>] sys_write+0x51/0x90
[<ffffffff8100c0b2>] system_call_fastpath+0x16/0x1b

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index dc1071327d87..6e64f7c6a2e9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -341,9 +341,6 @@ static void vlan_transfer_features(struct net_device *dev,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
 #endif
-	vlandev->real_num_tx_queues = dev->real_num_tx_queues;
-	BUG_ON(vlandev->real_num_tx_queues > vlandev->num_tx_queues);
-
 	if (old_features != vlandev->features)
 		netdev_features_change(vlandev);
 }
-- 
cgit v1.2.3


From 5595a1a5997953dbd8c5df7c2f7d4b3a2eb2be4b Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 25 Nov 2010 02:18:15 +0000
Subject: X25 remove bkl in subscription ioctls

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/x25.h  |  2 ++
 net/x25/af_x25.c   | 12 ++++--------
 net/x25/x25_link.c |  8 ++++++--
 3 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/x25.h b/include/net/x25.h
index 1479cb4a41fc..a06119a05129 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -315,6 +315,8 @@ extern struct list_head x25_route_list;
 extern rwlock_t x25_route_list_lock;
 extern struct list_head x25_forward_list;
 extern rwlock_t x25_forward_list_lock;
+extern struct list_head x25_neigh_list;
+extern rwlock_t x25_neigh_list_lock;
 
 extern int x25_proc_init(void);
 extern void x25_proc_exit(void);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 2351aceb296d..45be72c3f940 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1415,17 +1415,13 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = x25_route_ioctl(cmd, argp);
 			break;
 		case SIOCX25GSUBSCRIP:
-			lock_kernel();
 			rc = x25_subscr_ioctl(cmd, argp);
-			unlock_kernel();
 			break;
 		case SIOCX25SSUBSCRIP:
 			rc = -EPERM;
 			if (!capable(CAP_NET_ADMIN))
 				break;
-			lock_kernel();
 			rc = x25_subscr_ioctl(cmd, argp);
-			unlock_kernel();
 			break;
 		case SIOCX25GFACILITIES: {
 			struct x25_facilities fac = x25->facilities;
@@ -1646,16 +1642,20 @@ static int compat_x25_subscr_ioctl(unsigned int cmd,
 	dev_put(dev);
 
 	if (cmd == SIOCX25GSUBSCRIP) {
+		read_lock_bh(&x25_neigh_list_lock);
 		x25_subscr.extended = nb->extended;
 		x25_subscr.global_facil_mask = nb->global_facil_mask;
+		read_unlock_bh(&x25_neigh_list_lock);
 		rc = copy_to_user(x25_subscr32, &x25_subscr,
 				sizeof(*x25_subscr32)) ? -EFAULT : 0;
 	} else {
 		rc = -EINVAL;
 		if (x25_subscr.extended == 0 || x25_subscr.extended == 1) {
 			rc = 0;
+			write_lock_bh(&x25_neigh_list_lock);
 			nb->extended = x25_subscr.extended;
 			nb->global_facil_mask = x25_subscr.global_facil_mask;
+			write_unlock_bh(&x25_neigh_list_lock);
 		}
 	}
 	x25_neigh_put(nb);
@@ -1711,17 +1711,13 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
 		rc = x25_route_ioctl(cmd, argp);
 		break;
 	case SIOCX25GSUBSCRIP:
-		lock_kernel();
 		rc = compat_x25_subscr_ioctl(cmd, argp);
-		unlock_kernel();
 		break;
 	case SIOCX25SSUBSCRIP:
 		rc = -EPERM;
 		if (!capable(CAP_NET_ADMIN))
 			break;
-		lock_kernel();
 		rc = compat_x25_subscr_ioctl(cmd, argp);
-		unlock_kernel();
 		break;
 	case SIOCX25GFACILITIES:
 	case SIOCX25SFACILITIES:
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 73e7b954ad28..4c81f6abb65b 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -31,8 +31,8 @@
 #include <linux/init.h>
 #include <net/x25.h>
 
-static LIST_HEAD(x25_neigh_list);
-static DEFINE_RWLOCK(x25_neigh_list_lock);
+LIST_HEAD(x25_neigh_list);
+DEFINE_RWLOCK(x25_neigh_list_lock);
 
 static void x25_t20timer_expiry(unsigned long);
 
@@ -360,16 +360,20 @@ int x25_subscr_ioctl(unsigned int cmd, void __user *arg)
 	dev_put(dev);
 
 	if (cmd == SIOCX25GSUBSCRIP) {
+		read_lock_bh(&x25_neigh_list_lock);
 		x25_subscr.extended	     = nb->extended;
 		x25_subscr.global_facil_mask = nb->global_facil_mask;
+		read_unlock_bh(&x25_neigh_list_lock);
 		rc = copy_to_user(arg, &x25_subscr,
 				  sizeof(x25_subscr)) ? -EFAULT : 0;
 	} else {
 		rc = -EINVAL;
 		if (!(x25_subscr.extended && x25_subscr.extended != 1)) {
 			rc = 0;
+			write_lock_bh(&x25_neigh_list_lock);
 			nb->extended	     = x25_subscr.extended;
 			nb->global_facil_mask = x25_subscr.global_facil_mask;
+			write_unlock_bh(&x25_neigh_list_lock);
 		}
 	}
 	x25_neigh_put(nb);
-- 
cgit v1.2.3


From f90de660678cf553f63c387945830a2e4d26dd3e Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 25 Nov 2010 02:18:35 +0000
Subject: X25 remove bkl in facility ioctls

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 48 +++++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 45be72c3f940..2518efae8ec9 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1424,34 +1424,34 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = x25_subscr_ioctl(cmd, argp);
 			break;
 		case SIOCX25GFACILITIES: {
-			struct x25_facilities fac = x25->facilities;
-			lock_kernel();
-			rc = copy_to_user(argp, &fac,
-					  sizeof(fac)) ? -EFAULT : 0;
-			unlock_kernel();
+			lock_sock(sk);
+			rc = copy_to_user(argp, &x25->facilities,
+						sizeof(x25->facilities))
+						? -EFAULT : 0;
+			release_sock(sk);
 			break;
 		}
 
 		case SIOCX25SFACILITIES: {
 			struct x25_facilities facilities;
 			rc = -EFAULT;
-			lock_kernel();
 			if (copy_from_user(&facilities, argp,
 					   sizeof(facilities)))
 				break;
 			rc = -EINVAL;
+			lock_sock(sk);
 			if (sk->sk_state != TCP_LISTEN &&
 			    sk->sk_state != TCP_CLOSE)
-				break;
+				goto out_fac_release;
 			if (facilities.pacsize_in < X25_PS16 ||
 			    facilities.pacsize_in > X25_PS4096)
-				break;
+				goto out_fac_release;
 			if (facilities.pacsize_out < X25_PS16 ||
 			    facilities.pacsize_out > X25_PS4096)
-				break;
+				goto out_fac_release;
 			if (facilities.winsize_in < 1 ||
 			    facilities.winsize_in > 127)
-				break;
+				goto out_fac_release;
 			if (facilities.throughput) {
 				int out = facilities.throughput & 0xf0;
 				int in  = facilities.throughput & 0x0f;
@@ -1459,27 +1459,28 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 					facilities.throughput |=
 						X25_DEFAULT_THROUGHPUT << 4;
 				else if (out < 0x30 || out > 0xD0)
-					break;
+					goto out_fac_release;
 				if (!in)
 					facilities.throughput |=
 						X25_DEFAULT_THROUGHPUT;
 				else if (in < 0x03 || in > 0x0D)
-					break;
+					goto out_fac_release;
 			}
 			if (facilities.reverse &&
 				(facilities.reverse & 0x81) != 0x81)
-				break;
+				goto out_fac_release;
 			x25->facilities = facilities;
 			rc = 0;
-			unlock_kernel();
+out_fac_release:
+			release_sock(sk);
 			break;
 		}
 
 		case SIOCX25GDTEFACILITIES: {
-			lock_kernel();
+			lock_sock(sk);
 			rc = copy_to_user(argp, &x25->dte_facilities,
 						sizeof(x25->dte_facilities));
-			unlock_kernel();
+			release_sock(sk);
 			if (rc)
 				rc = -EFAULT;
 			break;
@@ -1488,24 +1489,25 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCX25SDTEFACILITIES: {
 			struct x25_dte_facilities dtefacs;
 			rc = -EFAULT;
-			lock_kernel();
 			if (copy_from_user(&dtefacs, argp, sizeof(dtefacs)))
 				break;
 			rc = -EINVAL;
+			lock_sock(sk);
 			if (sk->sk_state != TCP_LISTEN &&
 					sk->sk_state != TCP_CLOSE)
-				break;
+				goto out_dtefac_release;
 			if (dtefacs.calling_len > X25_MAX_AE_LEN)
-				break;
+				goto out_dtefac_release;
 			if (dtefacs.calling_ae == NULL)
-				break;
+				goto out_dtefac_release;
 			if (dtefacs.called_len > X25_MAX_AE_LEN)
-				break;
+				goto out_dtefac_release;
 			if (dtefacs.called_ae == NULL)
-				break;
+				goto out_dtefac_release;
 			x25->dte_facilities = dtefacs;
 			rc = 0;
-			unlock_kernel();
+out_dtefac_release:
+			release_sock(sk);
 			break;
 		}
 
-- 
cgit v1.2.3


From 5b7958dfa5db758e36e92e1790075b470b4947f8 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 25 Nov 2010 02:18:40 +0000
Subject: X25 remove bkl from calluserdata ioctls

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 2518efae8ec9..e2eea0aec466 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1512,11 +1512,11 @@ out_dtefac_release:
 		}
 
 		case SIOCX25GCALLUSERDATA: {
-			struct x25_calluserdata cud = x25->calluserdata;
-			lock_kernel();
-			rc = copy_to_user(argp, &cud,
-					  sizeof(cud)) ? -EFAULT : 0;
-			unlock_kernel();
+			lock_sock(sk);
+			rc = copy_to_user(argp, &x25->calluserdata,
+					sizeof(x25->calluserdata))
+					? -EFAULT : 0;
+			release_sock(sk);
 			break;
 		}
 
@@ -1524,15 +1524,15 @@ out_dtefac_release:
 			struct x25_calluserdata calluserdata;
 
 			rc = -EFAULT;
-			lock_kernel();
 			if (copy_from_user(&calluserdata, argp,
 					   sizeof(calluserdata)))
 				break;
 			rc = -EINVAL;
 			if (calluserdata.cudlength > X25_MAX_CUD_LEN)
 				break;
+			lock_sock(sk);
 			x25->calluserdata = calluserdata;
-			unlock_kernel();
+			release_sock(sk);
 			rc = 0;
 			break;
 		}
-- 
cgit v1.2.3


From 74a7e440807d34e586e9feb8e14851b5c80fbfe5 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 25 Nov 2010 02:18:43 +0000
Subject: X25 remove bkl from causediag ioctls

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e2eea0aec466..8cfc419cef4b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1538,23 +1538,22 @@ out_dtefac_release:
 		}
 
 		case SIOCX25GCAUSEDIAG: {
-			struct x25_causediag causediag;
-			lock_kernel();
-			causediag = x25->causediag;
-			rc = copy_to_user(argp, &causediag,
-					  sizeof(causediag)) ? -EFAULT : 0;
-			unlock_kernel();
+			lock_sock(sk);
+			rc = copy_to_user(argp, &x25->causediag,
+					sizeof(x25->causediag))
+					? -EFAULT : 0;
+			release_sock(sk);
 			break;
 		}
 
 		case SIOCX25SCAUSEDIAG: {
 			struct x25_causediag causediag;
 			rc = -EFAULT;
-			lock_kernel();
 			if (copy_from_user(&causediag, argp, sizeof(causediag)))
 				break;
+			lock_sock(sk);
 			x25->causediag = causediag;
-			unlock_kernel();
+			release_sock(sk);
 			rc = 0;
 			break;
 
-- 
cgit v1.2.3


From 3f0a069a1d5c0ccace735e3a62c1bcef53e4c354 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Thu, 25 Nov 2010 02:18:45 +0000
Subject: X25 remove bkl in call user data length ioctl

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 8cfc419cef4b..ad96ee90fe27 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1562,19 +1562,20 @@ out_dtefac_release:
 		case SIOCX25SCUDMATCHLEN: {
 			struct x25_subaddr sub_addr;
 			rc = -EINVAL;
-			lock_kernel();
+			lock_sock(sk);
 			if(sk->sk_state != TCP_CLOSE)
-				break;
+				goto out_cud_release;
 			rc = -EFAULT;
 			if (copy_from_user(&sub_addr, argp,
 					sizeof(sub_addr)))
-				break;
+				goto out_cud_release;
 			rc = -EINVAL;
 			if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN)
-				break;
+				goto out_cud_release;
 			x25->cudmatchlength = sub_addr.cudmatchlength;
-			unlock_kernel();
 			rc = 0;
+out_cud_release:
+			release_sock(sk);
 			break;
 		}
 
-- 
cgit v1.2.3


From 82a39eb6b3829a02e235656feddb4542517fcabc Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 25 Nov 2010 03:15:07 +0000
Subject: ipv6: Prepare the tree for un-inlined jhash.

jhash is widely used in the kernel and because the functions
are inlined, the cost in size is significant. Also, the new jhash
functions are slightly larger than the previous ones so better un-inline.
As a preparation step, the calls to the internal macros are replaced
with the plain jhash function calls.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_connection_sock.c | 22 ++++++++++------------
 net/ipv6/reassembly.c            | 36 ++++++++++++++++--------------------
 2 files changed, 26 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 8a1628023bd1..861d252bd1ba 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -60,18 +60,16 @@ EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
 static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
 			   const u32 rnd, const u16 synq_hsize)
 {
-	u32 a = (__force u32)raddr->s6_addr32[0];
-	u32 b = (__force u32)raddr->s6_addr32[1];
-	u32 c = (__force u32)raddr->s6_addr32[2];
-
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += rnd;
-	__jhash_mix(a, b, c);
-
-	a += (__force u32)raddr->s6_addr32[3];
-	b += (__force u32)rport;
-	__jhash_mix(a, b, c);
+	u32 c;
+
+	c = jhash_3words((__force u32)raddr->s6_addr32[0],
+			 (__force u32)raddr->s6_addr32[1],
+			 (__force u32)raddr->s6_addr32[2],
+			 rnd);
+
+	c = jhash_2words((__force u32)raddr->s6_addr32[3],
+			 (__force u32)rport,
+			 c);
 
 	return c & (synq_hsize - 1);
 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0f2766453759..07beeb06f752 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -104,26 +104,22 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
 			     const struct in6_addr *daddr, u32 rnd)
 {
-	u32 a, b, c;
-
-	a = (__force u32)saddr->s6_addr32[0];
-	b = (__force u32)saddr->s6_addr32[1];
-	c = (__force u32)saddr->s6_addr32[2];
-
-	a += JHASH_GOLDEN_RATIO;
-	b += JHASH_GOLDEN_RATIO;
-	c += rnd;
-	__jhash_mix(a, b, c);
-
-	a += (__force u32)saddr->s6_addr32[3];
-	b += (__force u32)daddr->s6_addr32[0];
-	c += (__force u32)daddr->s6_addr32[1];
-	__jhash_mix(a, b, c);
-
-	a += (__force u32)daddr->s6_addr32[2];
-	b += (__force u32)daddr->s6_addr32[3];
-	c += (__force u32)id;
-	__jhash_mix(a, b, c);
+	u32 c;
+
+	c = jhash_3words((__force u32)saddr->s6_addr32[0],
+			 (__force u32)saddr->s6_addr32[1],
+			 (__force u32)saddr->s6_addr32[2],
+			 rnd);
+
+	c = jhash_3words((__force u32)saddr->s6_addr32[3],
+			 (__force u32)daddr->s6_addr32[0],
+			 (__force u32)daddr->s6_addr32[1],
+			 c);
+
+	c =  jhash_3words((__force u32)daddr->s6_addr32[2],
+			  (__force u32)daddr->s6_addr32[3],
+			  (__force u32)id,
+			  c);
 
 	return c & (INETFRAGS_HASHSZ - 1);
 }
-- 
cgit v1.2.3


From bf26414510103448ad3dc069c7422462f03ea3d7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 26 Nov 2010 08:36:09 +0000
Subject: xps: Add CONFIG_XPS

This patch adds XPS_CONFIG option to enable and disable XPS.  This is
done in the same manner as RPS_CONFIG.  This is also fixes build
failure in XPS code when SMP is not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 52 +++++++++++++++++++++++++----------------------
 net/Kconfig               |  5 +++++
 net/core/dev.c            |  9 +++++---
 net/core/net-sysfs.c      | 47 ++++++++++++++++++++++++++++++------------
 net/core/net-sysfs.h      |  3 ---
 5 files changed, 73 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c6ae2f4b9ab..9ae4544f0cf0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -535,30 +535,6 @@ struct rps_map {
 };
 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
 
-/*
- * This structure holds an XPS map which can be of variable length.  The
- * map is an array of queues.
- */
-struct xps_map {
-	unsigned int len;
-	unsigned int alloc_len;
-	struct rcu_head rcu;
-	u16 queues[0];
-};
-#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
-#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
-    / sizeof(u16))
-
-/*
- * This structure holds all XPS maps for device.  Maps are indexed by CPU.
- */
-struct xps_dev_maps {
-	struct rcu_head rcu;
-	struct xps_map *cpu_map[0];
-};
-#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
-    (nr_cpu_ids * sizeof(struct xps_map *)))
-
 /*
  * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
  * tail pointer for that CPU's input queue at the time of last enqueue.
@@ -626,6 +602,32 @@ struct netdev_rx_queue {
 } ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RPS */
 
+#ifdef CONFIG_XPS
+/*
+ * This structure holds an XPS map which can be of variable length.  The
+ * map is an array of queues.
+ */
+struct xps_map {
+	unsigned int len;
+	unsigned int alloc_len;
+	struct rcu_head rcu;
+	u16 queues[0];
+};
+#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
+#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
+    / sizeof(u16))
+
+/*
+ * This structure holds all XPS maps for device.  Maps are indexed by CPU.
+ */
+struct xps_dev_maps {
+	struct rcu_head rcu;
+	struct xps_map *cpu_map[0];
+};
+#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
+    (nr_cpu_ids * sizeof(struct xps_map *)))
+#endif /* CONFIG_XPS */
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1046,7 +1048,9 @@ struct net_device {
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
 
+#ifdef CONFIG_XPS
 	struct xps_dev_maps	*xps_maps;
+#endif
 
 	/* These may be needed for future network-power-down code. */
 
diff --git a/net/Kconfig b/net/Kconfig
index 55fd82e9ffd9..126c2af0fc1f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -220,6 +220,11 @@ config RPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config XPS
+	boolean
+	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
+	default y
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/dev.c b/net/core/dev.c
index c852f0038a08..3259d2c323a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1567,6 +1567,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 
 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
 						  txq);
+		if (rc)
+			return rc;
+
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -2148,7 +2151,7 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
-#ifdef CONFIG_RPS
+#ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
 	struct xps_map *map;
 	int queue_index = -1;
@@ -5085,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 }
 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
 
+#ifdef CONFIG_RPS
 static int netif_alloc_rx_queues(struct net_device *dev)
 {
-#ifdef CONFIG_RPS
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
 
@@ -5102,9 +5105,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
 	for (i = 0; i < count; i++)
 		rx[i].dev = dev;
-#endif
 	return 0;
 }
+#endif
 
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 68dbbfdee274..99c11294623f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -751,10 +751,12 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
 
 	return error;
 }
+#endif /* CONFIG_RPS */
 
 int
 net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
+#ifdef CONFIG_RPS
 	int i;
 	int error = 0;
 
@@ -770,8 +772,12 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 		kobject_put(&net->_rx[i].kobj);
 
 	return error;
+#else
+	return 0;
+#endif
 }
 
+#ifdef CONFIG_XPS
 /*
  * netdev_queue sysfs structures and functions.
  */
@@ -1090,10 +1096,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
 
 	return error;
 }
+#endif /* CONFIG_XPS */
 
 int
 netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
+#ifdef CONFIG_XPS
 	int i;
 	int error = 0;
 
@@ -1109,27 +1117,36 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 		kobject_put(&net->_tx[i].kobj);
 
 	return error;
+#else
+	return 0;
+#endif
 }
 
 static int register_queue_kobjects(struct net_device *net)
 {
-	int error = 0, txq = 0, rxq = 0;
+	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
 
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
 	net->queues_kset = kset_create_and_add("queues",
 	    NULL, &net->dev.kobj);
 	if (!net->queues_kset)
 		return -ENOMEM;
+#endif
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
 
-	error = net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+	error = net_rx_queue_update_kobjects(net, 0, real_rx);
 	if (error)
 		goto error;
-	rxq = net->real_num_rx_queues;
+	rxq = real_rx;
 
-	error = netdev_queue_update_kobjects(net, 0,
-					     net->real_num_tx_queues);
+	error = netdev_queue_update_kobjects(net, 0, real_tx);
 	if (error)
 		goto error;
-	txq = net->real_num_tx_queues;
+	txq = real_tx;
 
 	return 0;
 
@@ -1141,11 +1158,19 @@ error:
 
 static void remove_queue_kobjects(struct net_device *net)
 {
-	net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
-	netdev_queue_update_kobjects(net, net->real_num_tx_queues, 0);
+	int real_rx = 0, real_tx = 0;
+
+#ifdef CONFIG_RPS
+	real_rx = net->real_num_rx_queues;
+#endif
+	real_tx = net->real_num_tx_queues;
+
+	net_rx_queue_update_kobjects(net, real_rx, 0);
+	netdev_queue_update_kobjects(net, real_tx, 0);
+#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
 	kset_unregister(net->queues_kset);
+#endif
 }
-#endif /* CONFIG_RPS */
 
 static const void *net_current_ns(void)
 {
@@ -1244,9 +1269,7 @@ void netdev_unregister_kobject(struct net_device * net)
 
 	kobject_get(&dev->kobj);
 
-#ifdef CONFIG_RPS
 	remove_queue_kobjects(net);
-#endif
 
 	device_del(dev);
 }
@@ -1285,13 +1308,11 @@ int netdev_register_kobject(struct net_device *net)
 	if (error)
 		return error;
 
-#ifdef CONFIG_RPS
 	error = register_queue_kobjects(net);
 	if (error) {
 		device_del(dev);
 		return error;
 	}
-#endif
 
 	return error;
 }
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 25ec2ee57df7..bd7751ec1c4d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,11 +4,8 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
-#ifdef CONFIG_RPS
 int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
 int netdev_queue_update_kobjects(struct net_device *net,
 				 int old_num, int new_num);
 
 #endif
-
-#endif
-- 
cgit v1.2.3


From b02038a17b271e0f70616c54e4eccb5cc33d1b74 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 28 Nov 2010 05:43:24 +0000
Subject: xps: NUMA allocations for per cpu data

store_xps_map() allocates maps that are used by single cpu, it makes
sense to use NUMA allocations.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 99c11294623f..35ef42fa0cf3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -956,8 +956,9 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 			if (map_len >= alloc_len) {
 				alloc_len = alloc_len ?
 				    2 * alloc_len : XPS_MIN_MAP_ALLOC;
-				new_map = kzalloc(XPS_MAP_SIZE(alloc_len),
-				    GFP_KERNEL);
+				new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len),
+						       GFP_KERNEL,
+						       cpu_to_node(cpu));
 				if (!new_map)
 					goto error;
 				new_map->alloc_len = alloc_len;
-- 
cgit v1.2.3


From a41778694806ac1ccd4b1dafed1abef8d5ba98ac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 28 Nov 2010 21:43:02 +0000
Subject: xps: add __rcu annotations

Avoid sparse warnings : add __rcu annotations and use
rcu_dereference_protected() where necessary.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/net-sysfs.c      | 24 +++++++++++++++---------
 2 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9ae4544f0cf0..4b0c7f3aa32b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -622,7 +622,7 @@ struct xps_map {
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
-	struct xps_map *cpu_map[0];
+	struct xps_map __rcu *cpu_map[0];
 };
 #define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +		\
     (nr_cpu_ids * sizeof(struct xps_map *)))
@@ -1049,7 +1049,7 @@ struct net_device {
 	spinlock_t		tx_global_lock;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps	*xps_maps;
+	struct xps_dev_maps __rcu *xps_maps;
 #endif
 
 	/* These may be needed for future network-power-down code. */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 35ef42fa0cf3..f85cee3d869e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -899,6 +899,8 @@ static void xps_dev_maps_release(struct rcu_head *rcu)
 }
 
 static DEFINE_MUTEX(xps_map_mutex);
+#define xmap_dereference(P)		\
+	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 
 static ssize_t store_xps_map(struct netdev_queue *queue,
 		      struct netdev_queue_attribute *attribute,
@@ -935,11 +937,12 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 
 	mutex_lock(&xps_map_mutex);
 
-	dev_maps = dev->xps_maps;
+	dev_maps = xmap_dereference(dev->xps_maps);
 
 	for_each_possible_cpu(cpu) {
-		new_map = map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
-
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		new_map = map;
 		if (map) {
 			for (pos = 0; pos < map->len; pos++)
 				if (map->queues[pos] == index)
@@ -975,13 +978,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 			else
 				new_map = NULL;
 		}
-		new_dev_maps->cpu_map[cpu] = new_map;
+		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], new_map);
 	}
 
 	/* Cleanup old maps */
 	for_each_possible_cpu(cpu) {
-		map = dev_maps ? dev_maps->cpu_map[cpu] : NULL;
-		if (map && new_dev_maps->cpu_map[cpu] != map)
+		map = dev_maps ?
+			xmap_dereference(dev_maps->cpu_map[cpu]) : NULL;
+		if (map && xmap_dereference(new_dev_maps->cpu_map[cpu]) != map)
 			call_rcu(&map->rcu, xps_map_release);
 		if (new_dev_maps->cpu_map[cpu])
 			nonempty = 1;
@@ -1007,7 +1011,9 @@ error:
 
 	if (new_dev_maps)
 		for_each_possible_cpu(i)
-			kfree(new_dev_maps->cpu_map[i]);
+			kfree(rcu_dereference_protected(
+				new_dev_maps->cpu_map[i],
+				1));
 	kfree(new_dev_maps);
 	free_cpumask_var(mask);
 	return -ENOMEM;
@@ -1033,11 +1039,11 @@ static void netdev_queue_release(struct kobject *kobj)
 	index = get_netdev_queue_index(queue);
 
 	mutex_lock(&xps_map_mutex);
-	dev_maps = dev->xps_maps;
+	dev_maps = xmap_dereference(dev->xps_maps);
 
 	if (dev_maps) {
 		for_each_possible_cpu(i) {
-			map  = dev_maps->cpu_map[i];
+			map = xmap_dereference(dev_maps->cpu_map[i]);
 			if (!map)
 				continue;
 
-- 
cgit v1.2.3


From f7ca38dfe58c20cb1aa2ed9643187e8b194b5bae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 25 Nov 2010 10:02:29 +0100
Subject: nl80211/cfg80211: extend mgmt-tx API for off-channel

With p2p, it is sometimes necessary to transmit
a frame (typically an action frame) on another
channel than the current channel. Enable this
through the CMD_FRAME API, and allow it to wait
for a response. A new command allows that wait
to be aborted.

However, allow userspace to specify whether or
not it wants to allow off-channel TX, it may
actually want to use the same channel only.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 25 +++++++++++++++++-----
 include/net/cfg80211.h  | 11 +++++++---
 net/mac80211/cfg.c      |  7 ++++--
 net/wireless/core.h     |  4 ++--
 net/wireless/mlme.c     |  9 ++++----
 net/wireless/nl80211.c  | 57 +++++++++++++++++++++++++++++++++++++++++++------
 6 files changed, 91 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index d706bf3badc8..5cfa579df476 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -358,11 +358,16 @@
  *	user space application). %NL80211_ATTR_FRAME is used to specify the
  *	frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and
  *	optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on
- *	which channel the frame is to be transmitted or was received. This
- *	channel has to be the current channel (remain-on-channel or the
- *	operational channel). When called, this operation returns a cookie
- *	(%NL80211_ATTR_COOKIE) that will be included with the TX status event
- *	pertaining to the TX request.
+ *	which channel the frame is to be transmitted or was received. If this
+ *	channel is not the current channel (remain-on-channel or the
+ *	operational channel) the device will switch to the given channel and
+ *	transmit the frame, optionally waiting for a response for the time
+ *	specified using %NL80211_ATTR_DURATION. When called, this operation
+ *	returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the
+ *	TX status event pertaining to the TX request.
+ * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this
+ *	command may be used with the corresponding cookie to cancel the wait
+ *	time if it is known that it is no longer necessary.
  * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility.
  * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame
  *	transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies
@@ -493,6 +498,8 @@ enum nl80211_commands {
 	NL80211_CMD_SET_CHANNEL,
 	NL80211_CMD_SET_WDS_PEER,
 
+	NL80211_CMD_FRAME_WAIT_CANCEL,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -828,6 +835,12 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
  *
+ * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be
+ *	transmitted on another channel when the channel given doesn't match
+ *	the current channel. If the current channel doesn't match and this
+ *	flag isn't set, the frame will be rejected. This is also used as an
+ *	nl80211 capability flag.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1002,6 +1015,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MCAST_RATE,
 
+	NL80211_ATTR_OFFCHANNEL_TX_OK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0663945cfa48..49a7c53a48ca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1134,7 +1134,9 @@ struct cfg80211_pmksa {
  * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation.
  *	This allows the operation to be terminated prior to timeout based on
  *	the duration value.
- * @mgmt_tx: Transmit a management frame
+ * @mgmt_tx: Transmit a management frame.
+ * @mgmt_tx_cancel_wait: Cancel the wait time from transmitting a management
+ *	frame on another channel
  *
  * @testmode_cmd: run a test mode command
  *
@@ -1291,10 +1293,13 @@ struct cfg80211_ops {
 					    u64 cookie);
 
 	int	(*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev,
-			  struct ieee80211_channel *chan,
+			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
-			  bool channel_type_valid,
+			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, u64 *cookie);
+	int	(*mgmt_tx_cancel_wait)(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       u64 cookie);
 
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0c544074479e..aac2d7de828e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1552,9 +1552,9 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 }
 
 static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
-			     struct ieee80211_channel *chan,
+			     struct ieee80211_channel *chan, bool offchan,
 			     enum nl80211_channel_type channel_type,
-			     bool channel_type_valid,
+			     bool channel_type_valid, unsigned int wait,
 			     const u8 *buf, size_t len, u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1565,6 +1565,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
 		    IEEE80211_TX_CTL_REQ_TX_STATUS;
 
+	if (offchan)
+		return -EOPNOTSUPP;
+
 	/* Check that we are on the requested channel for transmission */
 	if (chan != local->tmp_channel &&
 	    chan != local->oper_channel)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6583cca0e2ee..ee80ad8dc655 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -341,9 +341,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
 int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
-			  struct ieee80211_channel *chan,
+			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
-			  bool channel_type_valid,
+			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, u64 *cookie);
 
 /* SME */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 6980a0c315b2..d7680f2a4c5b 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -864,9 +864,9 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
 
 int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
-			  struct ieee80211_channel *chan,
+			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
-			  bool channel_type_valid,
+			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, u64 *cookie)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -946,8 +946,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 
 	/* Transmit the Action frame as requested by user space */
-	return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
-				  channel_type_valid, buf, len, cookie);
+	return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan,
+				  channel_type, channel_type_valid,
+				  wait, buf, len, cookie);
 }
 
 bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 67ff7e92cb99..960be4e650f0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -163,16 +163,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
-
 	[NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
-
 	[NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
-
 	[NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
-
 	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
+	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -677,6 +674,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
 	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
 	CMD(mgmt_tx, FRAME);
+	CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -698,6 +696,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	nla_nest_end(msg, nl_cmds);
 
+	/* for now at least assume all drivers have it */
+	if (dev->ops->mgmt_tx)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK);
+
 	if (mgmt_stypes) {
 		u16 stypes;
 		struct nlattr *nl_ftypes, *nl_ifs;
@@ -4244,6 +4246,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	void *hdr;
 	u64 cookie;
 	struct sk_buff *msg;
+	unsigned int wait = 0;
+	bool offchan;
 
 	if (!info->attrs[NL80211_ATTR_FRAME] ||
 	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
@@ -4260,6 +4264,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 		return -EOPNOTSUPP;
 
+	if (info->attrs[NL80211_ATTR_DURATION]) {
+		if (!rdev->ops->mgmt_tx_cancel_wait)
+			return -EINVAL;
+		wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]);
+	}
+
 	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
 		channel_type = nla_get_u32(
 			info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
@@ -4271,6 +4281,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 		channel_type_valid = true;
 	}
 
+	offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK];
+
 	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
 	chan = rdev_freq_to_chan(rdev, freq, channel_type);
 	if (chan == NULL)
@@ -4287,8 +4299,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 		err = PTR_ERR(hdr);
 		goto free_msg;
 	}
-	err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
-				    channel_type_valid,
+	err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type,
+				    channel_type_valid, wait,
 				    nla_data(info->attrs[NL80211_ATTR_FRAME]),
 				    nla_len(info->attrs[NL80211_ATTR_FRAME]),
 				    &cookie);
@@ -4307,6 +4319,31 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	u64 cookie;
+
+	if (!info->attrs[NL80211_ATTR_COOKIE])
+		return -EINVAL;
+
+	if (!rdev->ops->mgmt_tx_cancel_wait)
+		return -EOPNOTSUPP;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
+		return -EOPNOTSUPP;
+
+	cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+
+	return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie);
+}
+
 static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -4879,6 +4916,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
+		.doit = nl80211_tx_mgmt_cancel_wait,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL80211_CMD_SET_POWER_SAVE,
 		.doit = nl80211_set_power_save,
-- 
cgit v1.2.3


From f30221e4ec62d905b56d5e8f7ccab6b406a97cf5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 25 Nov 2010 10:02:30 +0100
Subject: mac80211: implement off-channel mgmt TX

This implements the new off-channel TX API
in mac80211 with a new work item type. The
operation doesn't add a new work item when
we're on the right channel and there's no
wait time so that for example p2p probe
responses will be transmitted without delay.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 93 +++++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/ieee80211_i.h |  5 +++
 net/mac80211/status.c      | 15 +++++++-
 net/mac80211/work.c        | 22 +++++++++++
 4 files changed, 129 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index aac2d7de828e..db134b500caa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1551,6 +1551,28 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 	return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
 }
 
+static enum work_done_result
+ieee80211_offchan_tx_done(struct ieee80211_work *wk, struct sk_buff *skb)
+{
+	/*
+	 * Use the data embedded in the work struct for reporting
+	 * here so if the driver mangled the SKB before dropping
+	 * it (which is the only way we really should get here)
+	 * then we don't report mangled data.
+	 *
+	 * If there was no wait time, then by the time we get here
+	 * the driver will likely not have reported the status yet,
+	 * so in that case userspace will have to deal with it.
+	 */
+
+	if (wk->offchan_tx.wait && wk->offchan_tx.frame)
+		cfg80211_mgmt_tx_status(wk->sdata->dev,
+					(unsigned long) wk->offchan_tx.frame,
+					wk->ie, wk->ie_len, false, GFP_KERNEL);
+
+	return WORK_DONE_DESTROY;
+}
+
 static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 			     struct ieee80211_channel *chan, bool offchan,
 			     enum nl80211_channel_type channel_type,
@@ -1561,20 +1583,22 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct sta_info *sta;
+	struct ieee80211_work *wk;
 	const struct ieee80211_mgmt *mgmt = (void *)buf;
 	u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
 		    IEEE80211_TX_CTL_REQ_TX_STATUS;
-
-	if (offchan)
-		return -EOPNOTSUPP;
+	bool is_offchan = false;
 
 	/* Check that we are on the requested channel for transmission */
 	if (chan != local->tmp_channel &&
 	    chan != local->oper_channel)
-		return -EBUSY;
+		is_offchan = true;
 	if (channel_type_valid &&
 	    (channel_type != local->tmp_channel_type &&
 	     channel_type != local->_oper_channel_type))
+		is_offchan = true;
+
+	if (is_offchan && !offchan)
 		return -EBUSY;
 
 	switch (sdata->vif.type) {
@@ -1608,12 +1632,70 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	IEEE80211_SKB_CB(skb)->flags = flags;
 
 	skb->dev = sdata->dev;
-	ieee80211_tx_skb(sdata, skb);
 
 	*cookie = (unsigned long) skb;
+
+	/*
+	 * Can transmit right away if the channel was the
+	 * right one and there's no wait involved... If a
+	 * wait is involved, we might otherwise not be on
+	 * the right channel for long enough!
+	 */
+	if (!is_offchan && !wait && !sdata->vif.bss_conf.idle) {
+		ieee80211_tx_skb(sdata, skb);
+		return 0;
+	}
+
+	wk = kzalloc(sizeof(*wk) + len, GFP_KERNEL);
+	if (!wk) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	wk->type = IEEE80211_WORK_OFFCHANNEL_TX;
+	wk->chan = chan;
+	wk->sdata = sdata;
+	wk->done = ieee80211_offchan_tx_done;
+	wk->offchan_tx.frame = skb;
+	wk->offchan_tx.wait = wait;
+	wk->ie_len = len;
+	memcpy(wk->ie, buf, len);
+
+	ieee80211_add_work(wk);
 	return 0;
 }
 
+static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
+					 struct net_device *dev,
+					 u64 cookie)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_work *wk;
+	int ret = -ENOENT;
+
+	mutex_lock(&local->mtx);
+	list_for_each_entry(wk, &local->work_list, list) {
+		if (wk->sdata != sdata)
+			continue;
+
+		if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX)
+			continue;
+
+		if (cookie != (unsigned long) wk->offchan_tx.frame)
+			continue;
+
+		wk->timeout = jiffies;
+
+		ieee80211_queue_work(&local->hw, &local->work_work);
+		ret = 0;
+		break;
+	}
+	mutex_unlock(&local->mtx);
+
+	return ret;
+}
+
 static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
 					  struct net_device *dev,
 					  u16 frame_type, bool reg)
@@ -1698,6 +1780,7 @@ struct cfg80211_ops mac80211_config_ops = {
 	.remain_on_channel = ieee80211_remain_on_channel,
 	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
 	.mgmt_tx = ieee80211_mgmt_tx,
+	.mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait,
 	.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
 	.mgmt_frame_register = ieee80211_mgmt_frame_register,
 	.set_antenna = ieee80211_set_antenna,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5bc0745368fe..66b0b52b828d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -260,6 +260,7 @@ enum ieee80211_work_type {
 	IEEE80211_WORK_ASSOC_BEACON_WAIT,
 	IEEE80211_WORK_ASSOC,
 	IEEE80211_WORK_REMAIN_ON_CHANNEL,
+	IEEE80211_WORK_OFFCHANNEL_TX,
 };
 
 /**
@@ -320,6 +321,10 @@ struct ieee80211_work {
 		struct {
 			u32 duration;
 		} remain;
+		struct {
+			struct sk_buff *frame;
+			u32 wait;
+		} offchan_tx;
 	};
 
 	int ie_len;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index bed7e32ed908..4958710a7d92 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -321,10 +321,23 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 					msecs_to_jiffies(10));
 	}
 
-	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
+	if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
+		struct ieee80211_work *wk;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(wk, &local->work_list, list) {
+			if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX)
+				continue;
+			if (wk->offchan_tx.frame != skb)
+				continue;
+			wk->offchan_tx.frame = NULL;
+			break;
+		}
+		rcu_read_unlock();
 		cfg80211_mgmt_tx_status(
 			skb->dev, (unsigned long) skb, skb->data, skb->len,
 			!!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
+	}
 
 	/* this was a transmitted frame, but now we want to reuse it */
 	skb_orphan(skb);
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index ae344d1ba056..2b5c3f267198 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -560,6 +560,25 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
 	return WORK_ACT_TIMEOUT;
 }
 
+static enum work_action __must_check
+ieee80211_offchannel_tx(struct ieee80211_work *wk)
+{
+	if (!wk->started) {
+		wk->timeout = jiffies + msecs_to_jiffies(wk->offchan_tx.wait);
+
+		/*
+		 * After this, offchan_tx.frame remains but now is no
+		 * longer a valid pointer -- we still need it as the
+		 * cookie for canceling this work.
+		 */
+		ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame);
+
+		return WORK_ACT_NONE;
+	}
+
+	return WORK_ACT_TIMEOUT;
+}
+
 static enum work_action __must_check
 ieee80211_assoc_beacon_wait(struct ieee80211_work *wk)
 {
@@ -955,6 +974,9 @@ static void ieee80211_work_work(struct work_struct *work)
 		case IEEE80211_WORK_REMAIN_ON_CHANNEL:
 			rma = ieee80211_remain_on_channel_timeout(wk);
 			break;
+		case IEEE80211_WORK_OFFCHANNEL_TX:
+			rma = ieee80211_offchannel_tx(wk);
+			break;
 		case IEEE80211_WORK_ASSOC_BEACON_WAIT:
 			rma = ieee80211_assoc_beacon_wait(wk);
 			break;
-- 
cgit v1.2.3


From dd318575ff0aae91ac4cbcc5b60c184e59267212 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 29 Nov 2010 11:09:16 +0100
Subject: mac80211: fix RX aggregation locking

The RX aggregation locking documentation was
wrong, which led Christian to also code the
timer timeout handling for it somewhat wrongly.

Fix the documentation, the two places that
need to hold the reorder lock across accesses
to the structure, and the debugfs code that
should just use RCU.

Also, remove acquiring the sta->lock across
reorder timeouts since it isn't necessary, and
change a few places to GFP_KERNEL because the
code path here doesn't need atomic allocations
as I noticed when reviewing all this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c      |  8 +++-----
 net/mac80211/debugfs_sta.c | 29 +++++++++++++++--------------
 net/mac80211/rx.c          | 17 +++++++++++++----
 net/mac80211/sta_info.h    | 29 ++++++++++++++---------------
 4 files changed, 45 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 720b7a84af59..f138b195d657 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -129,9 +129,7 @@ static void sta_rx_agg_reorder_timer_expired(unsigned long data)
 			timer_to_tid[0]);
 
 	rcu_read_lock();
-	spin_lock(&sta->lock);
 	ieee80211_release_reorder_timeout(sta, *ptid);
-	spin_unlock(&sta->lock);
 	rcu_read_unlock();
 }
 
@@ -256,7 +254,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	}
 
 	/* prepare A-MPDU MLME for Rx aggregation */
-	tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
+	tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
 	if (!tid_agg_rx) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
@@ -280,9 +278,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	/* prepare reordering buffer */
 	tid_agg_rx->reorder_buf =
-		kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
+		kcalloc(buf_size, sizeof(struct sk_buff *), GFP_KERNEL);
 	tid_agg_rx->reorder_time =
-		kcalloc(buf_size, sizeof(unsigned long), GFP_ATOMIC);
+		kcalloc(buf_size, sizeof(unsigned long), GFP_KERNEL);
 	if (!tid_agg_rx->reorder_buf || !tid_agg_rx->reorder_time) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index f0fce37f4069..8bb5af85f469 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -112,34 +112,35 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	char buf[71 + STA_TID_NUM * 40], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
+	struct tid_ampdu_rx *tid_rx;
+	struct tid_ampdu_tx *tid_tx;
+
+	rcu_read_lock();
 
-	spin_lock_bh(&sta->lock);
 	p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
 	p += scnprintf(p, sizeof(buf) + buf - p,
 		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
+
 	for (i = 0; i < STA_TID_NUM; i++) {
+		tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
+		tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
+
 		p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
-		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
-				!!sta->ampdu_mlme.tid_rx[i]);
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-				sta->ampdu_mlme.tid_rx[i] ?
-				sta->ampdu_mlme.tid_rx[i]->dialog_token : 0);
+				tid_rx ? tid_rx->dialog_token : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
-				sta->ampdu_mlme.tid_rx[i] ?
-				sta->ampdu_mlme.tid_rx[i]->ssn : 0);
+				tid_rx ? tid_rx->ssn : 0);
 
-		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
-				!!sta->ampdu_mlme.tid_tx[i]);
+		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_tx);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-				sta->ampdu_mlme.tid_tx[i] ?
-				sta->ampdu_mlme.tid_tx[i]->dialog_token : 0);
+				tid_tx ? tid_tx->dialog_token : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d",
-				sta->ampdu_mlme.tid_tx[i] ?
-				skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0);
+				tid_tx ? skb_queue_len(&tid_tx->pending) : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\n");
 	}
-	spin_unlock_bh(&sta->lock);
+	rcu_read_unlock();
 
 	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d2fcd22ab06d..fdeabb19943c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -538,6 +538,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
 {
 	struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
 
+	lockdep_assert_held(&tid_agg_rx->reorder_lock);
+
 	if (!skb)
 		goto no_frame;
 
@@ -557,6 +559,8 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
 {
 	int index;
 
+	lockdep_assert_held(&tid_agg_rx->reorder_lock);
+
 	while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) {
 		index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
 							tid_agg_rx->buf_size;
@@ -581,6 +585,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
 {
 	int index, j;
 
+	lockdep_assert_held(&tid_agg_rx->reorder_lock);
+
 	/* release the buffer until next missing frame */
 	index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
 						tid_agg_rx->buf_size;
@@ -683,10 +689,11 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 	int index;
 	bool ret = true;
 
+	spin_lock(&tid_agg_rx->reorder_lock);
+
 	buf_size = tid_agg_rx->buf_size;
 	head_seq_num = tid_agg_rx->head_seq_num;
 
-	spin_lock(&tid_agg_rx->reorder_lock);
 	/* frame with out of date sequence number */
 	if (seq_less(mpdu_seq_num, head_seq_num)) {
 		dev_kfree_skb(skb);
@@ -1921,9 +1928,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 			mod_timer(&tid_agg_rx->session_timer,
 				  TU_TO_EXP_TIME(tid_agg_rx->timeout));
 
+		spin_lock(&tid_agg_rx->reorder_lock);
 		/* release stored frames up to start of BAR */
 		ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num,
 						 frames);
+		spin_unlock(&tid_agg_rx->reorder_lock);
+
 		kfree_skb(skb);
 		return RX_QUEUED;
 	}
@@ -2515,9 +2525,8 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
 }
 
 /*
- * This function makes calls into the RX path. Therefore the
- * caller must hold the sta_info->lock and everything has to
- * be under rcu_read_lock protection as well.
+ * This function makes calls into the RX path, therefore
+ * it has to be invoked under RCU read lock.
  */
 void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b562d9b6a702..05f11302443b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -81,13 +81,14 @@ enum ieee80211_sta_info_flags {
  * @stop_initiator: initiator of a session stop
  * @tx_stop: TX DelBA frame when stopping
  *
- * This structure is protected by RCU and the per-station
- * spinlock. Assignments to the array holding it must hold
- * the spinlock, only the TX path can access it under RCU
- * lock-free if, and only if, the state has  the flag
- * %HT_AGG_STATE_OPERATIONAL set. Otherwise, the TX path
- * must also acquire the spinlock and re-check the state,
- * see comments in the tx code touching it.
+ * This structure's lifetime is managed by RCU, assignments to
+ * the array holding it must hold the aggregation mutex.
+ *
+ * The TX path can access it under RCU lock-free if, and
+ * only if, the state has the flag %HT_AGG_STATE_OPERATIONAL
+ * set. Otherwise, the TX path must also acquire the spinlock
+ * and re-check the state, see comments in the tx code
+ * touching it.
  */
 struct tid_ampdu_tx {
 	struct rcu_head rcu_head;
@@ -115,15 +116,13 @@ struct tid_ampdu_tx {
  * @rcu_head: RCU head used for freeing this struct
  * @reorder_lock: serializes access to reorder buffer, see below.
  *
- * This structure is protected by RCU and the per-station
- * spinlock. Assignments to the array holding it must hold
- * the spinlock.
+ * This structure's lifetime is managed by RCU, assignments to
+ * the array holding it must hold the aggregation mutex.
  *
- * The @reorder_lock is used to protect the variables and
- * arrays such as @reorder_buf, @reorder_time, @head_seq_num,
- * @stored_mpdu_num and @reorder_time from being corrupted by
- * concurrent access of the RX path and the expired frame
- * release timer.
+ * The @reorder_lock is used to protect the members of this
+ * struct, except for @timeout, @buf_size and @dialog_token,
+ * which are constant across the lifetime of the struct (the
+ * dialog token being used only for debugging).
  */
 struct tid_ampdu_rx {
 	struct rcu_head rcu_head;
-- 
cgit v1.2.3


From 08ca944eb240b2299e743c76b43fbc7c2dd251de Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Tue, 30 Nov 2010 12:19:34 +0100
Subject: mac80211: Minor optimization in ieee80211_rx_h_data

Remove a superfluous ieee80211_is_data check as that was checked a few
lines before already and we wont't get here for non-data frames at all.

Second, the frame was already converted to 802.3 header format and
reading the fc and addr1 fields was only possible because the 802.3
header is short enough and didn't overwrite the relevant parts of the
802.11 header. Make the code more obvious by checking the ethernet
header's h_dest field.

Furthermore reorder the conditions to reduce the number of checks
when dynamic powersave is not needed (AP mode for example).

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fdeabb19943c..d83334bbb245 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1877,9 +1877,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += rx->skb->len;
 
-	if (ieee80211_is_data(hdr->frame_control) &&
-	    !is_multicast_ether_addr(hdr->addr1) &&
-	    local->hw.conf.dynamic_ps_timeout > 0 && local->ps_sdata) {
+	if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 &&
+	    !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) {
 			mod_timer(&local->dynamic_ps_timer, jiffies +
 			 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
 	}
-- 
cgit v1.2.3


From 98158f5a853cafd33b254ae0eacc0dd69f90b93b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 30 Nov 2010 11:41:59 -0800
Subject: inetpeer: Abstract out the tree root accesses.

Instead of directly accessing "peer", change to code to
operate using a "struct inet_peer_base *" pointer.

This will facilitate the addition of a seperate tree for
ipv6 peer entries.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 119 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 69 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9e94d7cf4f8a..f94400848921 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -79,13 +79,13 @@ static const struct inet_peer peer_fake_node = {
 	.avl_height	= 0
 };
 
-static struct {
+static struct inet_peer_base {
 	struct inet_peer __rcu *root;
 	spinlock_t	lock;
 	int		total;
-} peers = {
+} v4_peers = {
 	.root		= peer_avl_empty_rcu,
-	.lock		= __SPIN_LOCK_UNLOCKED(peers.lock),
+	.lock		= __SPIN_LOCK_UNLOCKED(v4_peers.lock),
 	.total		= 0,
 };
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
@@ -155,15 +155,15 @@ static void unlink_from_unused(struct inet_peer *p)
 /*
  * Called with local BH disabled and the pool lock held.
  */
-#define lookup(_daddr, _stack) 					\
+#define lookup(_daddr, _stack, _base)				\
 ({								\
 	struct inet_peer *u;					\
 	struct inet_peer __rcu **v;				\
 								\
 	stackptr = _stack;					\
-	*stackptr++ = &peers.root;				\
-	for (u = rcu_dereference_protected(peers.root,		\
-			lockdep_is_held(&peers.lock));		\
+	*stackptr++ = &_base->root;				\
+	for (u = rcu_dereference_protected(_base->root,		\
+			lockdep_is_held(&_base->lock));		\
 	     u != peer_avl_empty; ) {				\
 		if (_daddr == u->v4daddr)			\
 			break;					\
@@ -173,7 +173,7 @@ static void unlink_from_unused(struct inet_peer *p)
 			v = &u->avl_right;			\
 		*stackptr++ = v;				\
 		u = rcu_dereference_protected(*v,		\
-			lockdep_is_held(&peers.lock));		\
+			lockdep_is_held(&_base->lock));		\
 	}							\
 	u;							\
 })
@@ -185,9 +185,9 @@ static void unlink_from_unused(struct inet_peer *p)
  * But every pointer we follow is guaranteed to be valid thanks to RCU.
  * We exit from this function if number of links exceeds PEER_MAXDEPTH
  */
-static struct inet_peer *lookup_rcu_bh(__be32 daddr)
+static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base)
 {
-	struct inet_peer *u = rcu_dereference_bh(peers.root);
+	struct inet_peer *u = rcu_dereference_bh(base->root);
 	int count = 0;
 
 	while (u != peer_avl_empty) {
@@ -212,19 +212,19 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
 }
 
 /* Called with local BH disabled and the pool lock held. */
-#define lookup_rightempty(start)				\
+#define lookup_rightempty(start, base)				\
 ({								\
 	struct inet_peer *u;					\
 	struct inet_peer __rcu **v;				\
 	*stackptr++ = &start->avl_left;				\
 	v = &start->avl_left;					\
 	for (u = rcu_dereference_protected(*v,			\
-			lockdep_is_held(&peers.lock));		\
+			lockdep_is_held(&base->lock));		\
 	     u->avl_right != peer_avl_empty_rcu; ) {		\
 		v = &u->avl_right;				\
 		*stackptr++ = v;				\
 		u = rcu_dereference_protected(*v,		\
-			lockdep_is_held(&peers.lock));		\
+			lockdep_is_held(&base->lock));		\
 	}							\
 	u;							\
 })
@@ -234,7 +234,8 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
  * Look into mm/map_avl.c for more detail description of the ideas.
  */
 static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
-		struct inet_peer __rcu ***stackend)
+			       struct inet_peer __rcu ***stackend,
+			       struct inet_peer_base *base)
 {
 	struct inet_peer __rcu **nodep;
 	struct inet_peer *node, *l, *r;
@@ -243,20 +244,20 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 	while (stackend > stack) {
 		nodep = *--stackend;
 		node = rcu_dereference_protected(*nodep,
-				lockdep_is_held(&peers.lock));
+				lockdep_is_held(&base->lock));
 		l = rcu_dereference_protected(node->avl_left,
-				lockdep_is_held(&peers.lock));
+				lockdep_is_held(&base->lock));
 		r = rcu_dereference_protected(node->avl_right,
-				lockdep_is_held(&peers.lock));
+				lockdep_is_held(&base->lock));
 		lh = node_height(l);
 		rh = node_height(r);
 		if (lh > rh + 1) { /* l: RH+2 */
 			struct inet_peer *ll, *lr, *lrl, *lrr;
 			int lrh;
 			ll = rcu_dereference_protected(l->avl_left,
-				lockdep_is_held(&peers.lock));
+				lockdep_is_held(&base->lock));
 			lr = rcu_dereference_protected(l->avl_right,
-				lockdep_is_held(&peers.lock));
+				lockdep_is_held(&base->lock));
 			lrh = node_height(lr);
 			if (lrh <= node_height(ll)) {	/* ll: RH+1 */
 				RCU_INIT_POINTER(node->avl_left, lr);	/* lr: RH or RH+1 */
@@ -268,9 +269,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 				RCU_INIT_POINTER(*nodep, l);
 			} else { /* ll: RH, lr: RH+1 */
 				lrl = rcu_dereference_protected(lr->avl_left,
-					lockdep_is_held(&peers.lock));	/* lrl: RH or RH-1 */
+					lockdep_is_held(&base->lock));	/* lrl: RH or RH-1 */
 				lrr = rcu_dereference_protected(lr->avl_right,
-					lockdep_is_held(&peers.lock));	/* lrr: RH or RH-1 */
+					lockdep_is_held(&base->lock));	/* lrr: RH or RH-1 */
 				RCU_INIT_POINTER(node->avl_left, lrr);	/* lrr: RH or RH-1 */
 				RCU_INIT_POINTER(node->avl_right, r);	/* r: RH */
 				node->avl_height = rh + 1; /* node: RH+1 */
@@ -286,9 +287,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 			struct inet_peer *rr, *rl, *rlr, *rll;
 			int rlh;
 			rr = rcu_dereference_protected(r->avl_right,
-				lockdep_is_held(&peers.lock));
+				lockdep_is_held(&base->lock));
 			rl = rcu_dereference_protected(r->avl_left,
-				lockdep_is_held(&peers.lock));
+				lockdep_is_held(&base->lock));
 			rlh = node_height(rl);
 			if (rlh <= node_height(rr)) {	/* rr: LH+1 */
 				RCU_INIT_POINTER(node->avl_right, rl);	/* rl: LH or LH+1 */
@@ -300,9 +301,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 				RCU_INIT_POINTER(*nodep, r);
 			} else { /* rr: RH, rl: RH+1 */
 				rlr = rcu_dereference_protected(rl->avl_right,
-					lockdep_is_held(&peers.lock));	/* rlr: LH or LH-1 */
+					lockdep_is_held(&base->lock));	/* rlr: LH or LH-1 */
 				rll = rcu_dereference_protected(rl->avl_left,
-					lockdep_is_held(&peers.lock));	/* rll: LH or LH-1 */
+					lockdep_is_held(&base->lock));	/* rll: LH or LH-1 */
 				RCU_INIT_POINTER(node->avl_right, rll);	/* rll: LH or LH-1 */
 				RCU_INIT_POINTER(node->avl_left, l);	/* l: LH */
 				node->avl_height = lh + 1; /* node: LH+1 */
@@ -321,14 +322,14 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
 }
 
 /* Called with local BH disabled and the pool lock held. */
-#define link_to_pool(n)						\
+#define link_to_pool(n, base)					\
 do {								\
 	n->avl_height = 1;					\
 	n->avl_left = peer_avl_empty_rcu;			\
 	n->avl_right = peer_avl_empty_rcu;			\
 	/* lockless readers can catch us now */			\
 	rcu_assign_pointer(**--stackptr, n);			\
-	peer_avl_rebalance(stack, stackptr);			\
+	peer_avl_rebalance(stack, stackptr, base);		\
 } while (0)
 
 static void inetpeer_free_rcu(struct rcu_head *head)
@@ -337,13 +338,13 @@ static void inetpeer_free_rcu(struct rcu_head *head)
 }
 
 /* May be called with local BH enabled. */
-static void unlink_from_pool(struct inet_peer *p)
+static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 {
 	int do_free;
 
 	do_free = 0;
 
-	spin_lock_bh(&peers.lock);
+	spin_lock_bh(&base->lock);
 	/* Check the reference counter.  It was artificially incremented by 1
 	 * in cleanup() function to prevent sudden disappearing.  If we can
 	 * atomically (because of lockless readers) take this last reference,
@@ -353,7 +354,7 @@ static void unlink_from_pool(struct inet_peer *p)
 	if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
 		struct inet_peer __rcu **stack[PEER_MAXDEPTH];
 		struct inet_peer __rcu ***stackptr, ***delp;
-		if (lookup(p->v4daddr, stack) != p)
+		if (lookup(p->v4daddr, stack, base) != p)
 			BUG();
 		delp = stackptr - 1; /* *delp[0] == p */
 		if (p->avl_left == peer_avl_empty_rcu) {
@@ -362,9 +363,9 @@ static void unlink_from_pool(struct inet_peer *p)
 		} else {
 			/* look for a node to insert instead of p */
 			struct inet_peer *t;
-			t = lookup_rightempty(p);
+			t = lookup_rightempty(p, base);
 			BUG_ON(rcu_dereference_protected(*stackptr[-1],
-					lockdep_is_held(&peers.lock)) != t);
+					lockdep_is_held(&base->lock)) != t);
 			**--stackptr = t->avl_left;
 			/* t is removed, t->v4daddr > x->v4daddr for any
 			 * x in p->avl_left subtree.
@@ -376,11 +377,11 @@ static void unlink_from_pool(struct inet_peer *p)
 			BUG_ON(delp[1] != &p->avl_left);
 			delp[1] = &t->avl_left; /* was &p->avl_left */
 		}
-		peer_avl_rebalance(stack, stackptr);
-		peers.total--;
+		peer_avl_rebalance(stack, stackptr, base);
+		base->total--;
 		do_free = 1;
 	}
-	spin_unlock_bh(&peers.lock);
+	spin_unlock_bh(&base->lock);
 
 	if (do_free)
 		call_rcu_bh(&p->rcu, inetpeer_free_rcu);
@@ -395,6 +396,11 @@ static void unlink_from_pool(struct inet_peer *p)
 		inet_putpeer(p);
 }
 
+static struct inet_peer_base *peer_to_base(struct inet_peer *p)
+{
+	return &v4_peers;
+}
+
 /* May be called with local BH enabled. */
 static int cleanup_once(unsigned long ttl)
 {
@@ -428,21 +434,27 @@ static int cleanup_once(unsigned long ttl)
 		 * happen because of entry limits in route cache. */
 		return -1;
 
-	unlink_from_pool(p);
+	unlink_from_pool(p, peer_to_base(p));
 	return 0;
 }
 
+static struct inet_peer_base *family_to_base(int family)
+{
+	return &v4_peers;
+}
+
 /* Called with or without local BH being disabled. */
 struct inet_peer *inet_getpeer(__be32 daddr, int create)
 {
-	struct inet_peer *p;
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
+	struct inet_peer_base *base = family_to_base(AF_INET);
+	struct inet_peer *p;
 
 	/* Look up for the address quickly, lockless.
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
 	rcu_read_lock_bh();
-	p = lookup_rcu_bh(daddr);
+	p = lookup_rcu_bh(daddr, base);
 	rcu_read_unlock_bh();
 
 	if (p) {
@@ -456,11 +468,11 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	/* retry an exact lookup, taking the lock before.
 	 * At least, nodes should be hot in our cache.
 	 */
-	spin_lock_bh(&peers.lock);
-	p = lookup(daddr, stack);
+	spin_lock_bh(&base->lock);
+	p = lookup(daddr, stack, base);
 	if (p != peer_avl_empty) {
 		atomic_inc(&p->refcnt);
-		spin_unlock_bh(&peers.lock);
+		spin_unlock_bh(&base->lock);
 		/* Remove the entry from unused list if it was there. */
 		unlink_from_unused(p);
 		return p;
@@ -476,30 +488,36 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 
 
 		/* Link the node. */
-		link_to_pool(p);
-		peers.total++;
+		link_to_pool(p, base);
+		base->total++;
 	}
-	spin_unlock_bh(&peers.lock);
+	spin_unlock_bh(&base->lock);
 
-	if (peers.total >= inet_peer_threshold)
+	if (base->total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
 		cleanup_once(0);
 
 	return p;
 }
 
+static int compute_total(void)
+{
+	return v4_peers.total;
+}
+
 /* Called with local BH disabled. */
 static void peer_check_expire(unsigned long dummy)
 {
 	unsigned long now = jiffies;
-	int ttl;
+	int ttl, total;
 
-	if (peers.total >= inet_peer_threshold)
+	total = compute_total();
+	if (total >= inet_peer_threshold)
 		ttl = inet_peer_minttl;
 	else
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
-					peers.total / inet_peer_threshold * HZ;
+					total / inet_peer_threshold * HZ;
 	while (!cleanup_once(ttl)) {
 		if (jiffies != now)
 			break;
@@ -508,13 +526,14 @@ static void peer_check_expire(unsigned long dummy)
 	/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
 	 * interval depending on the total number of entries (more entries,
 	 * less interval). */
-	if (peers.total >= inet_peer_threshold)
+	total = compute_total();
+	if (total >= inet_peer_threshold)
 		peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
 	else
 		peer_periodic_timer.expires = jiffies
 			+ inet_peer_gc_maxtime
 			- (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
-				peers.total / inet_peer_threshold * HZ;
+				total / inet_peer_threshold * HZ;
 	add_timer(&peer_periodic_timer);
 }
 
-- 
cgit v1.2.3


From 582a72da9a41be9227dc931d728ae2906880a589 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 30 Nov 2010 11:53:55 -0800
Subject: inetpeer: Introduce inet_peer_address_t.

Currently only the v4 aspect is used, but this will change.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 10 +++++++++-
 net/ipv4/inetpeer.c    | 16 ++++++++--------
 net/ipv4/tcp_ipv4.c    |  2 +-
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index fe239bfe5f7f..d7e60792d76e 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -13,10 +13,18 @@
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
 
+typedef struct {
+	union {
+		__be32		a4;
+		__be32		a6[4];
+	};
+	__u16	family;
+} inet_peer_address_t;
+
 struct inet_peer {
 	/* group together avl_left,avl_right,v4daddr to speedup lookups */
 	struct inet_peer __rcu	*avl_left, *avl_right;
-	__be32			v4daddr;	/* peer's address */
+	inet_peer_address_t	daddr;
 	__u32			avl_height;
 	struct list_head	unused;
 	__u32			dtime;		/* the time of last use of not
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index f94400848921..893f998efdbb 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -63,7 +63,7 @@
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		dtime: unused node list lock
- *		v4daddr: unchangeable
+ *		daddr: unchangeable
  *		ip_id_count: atomic value (no lock needed)
  */
 
@@ -165,9 +165,9 @@ static void unlink_from_unused(struct inet_peer *p)
 	for (u = rcu_dereference_protected(_base->root,		\
 			lockdep_is_held(&_base->lock));		\
 	     u != peer_avl_empty; ) {				\
-		if (_daddr == u->v4daddr)			\
+		if (_daddr == u->daddr.a4)			\
 			break;					\
-		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
+		if ((__force __u32)_daddr < (__force __u32)u->daddr.a4)	\
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
@@ -191,7 +191,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base
 	int count = 0;
 
 	while (u != peer_avl_empty) {
-		if (daddr == u->v4daddr) {
+		if (daddr == u->daddr.a4) {
 			/* Before taking a reference, check if this entry was
 			 * deleted, unlink_from_pool() sets refcnt=-1 to make
 			 * distinction between an unused entry (refcnt=0) and
@@ -201,7 +201,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base
 				u = NULL;
 			return u;
 		}
-		if ((__force __u32)daddr < (__force __u32)u->v4daddr)
+		if ((__force __u32)daddr < (__force __u32)u->daddr.a4)
 			u = rcu_dereference_bh(u->avl_left);
 		else
 			u = rcu_dereference_bh(u->avl_right);
@@ -354,7 +354,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 	if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
 		struct inet_peer __rcu **stack[PEER_MAXDEPTH];
 		struct inet_peer __rcu ***stackptr, ***delp;
-		if (lookup(p->v4daddr, stack, base) != p)
+		if (lookup(p->daddr.a4, stack, base) != p)
 			BUG();
 		delp = stackptr - 1; /* *delp[0] == p */
 		if (p->avl_left == peer_avl_empty_rcu) {
@@ -367,7 +367,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 			BUG_ON(rcu_dereference_protected(*stackptr[-1],
 					lockdep_is_held(&base->lock)) != t);
 			**--stackptr = t->avl_left;
-			/* t is removed, t->v4daddr > x->v4daddr for any
+			/* t is removed, t->daddr > x->daddr for any
 			 * x in p->avl_left subtree.
 			 * Put t in the old place of p. */
 			RCU_INIT_POINTER(*delp[0], t);
@@ -479,7 +479,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	}
 	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
 	if (p) {
-		p->v4daddr = daddr;
+		p->daddr.a4 = daddr;
 		atomic_set(&p->refcnt, 1);
 		atomic_set(&p->rid, 0);
 		atomic_set(&p->ip_id_count, secure_ip_id(daddr));
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 69ccbc1dde9c..b8bbf89409b0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1347,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    tcp_death_row.sysctl_tw_recycle &&
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
-		    peer->v4daddr == saddr) {
+		    peer->daddr.a4 == saddr) {
 			inet_peer_refcheck(peer);
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
-- 
cgit v1.2.3


From b534ecf1cd26f094497da6ae28a6ab64cdbe1617 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 30 Nov 2010 11:54:19 -0800
Subject: inetpeer: Make inet_getpeer() take an inet_peer_adress_t pointer.

And make an inet_getpeer_v4() helper, update callers.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 11 ++++++++++-
 net/ipv4/inetpeer.c    | 10 +++++-----
 net/ipv4/ip_fragment.c |  2 +-
 net/ipv4/route.c       |  2 +-
 net/ipv4/tcp_ipv4.c    |  4 ++--
 5 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index d7e60792d76e..834f0456c87e 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -50,7 +50,16 @@ struct inet_peer {
 void			inet_initpeers(void) __init;
 
 /* can be called with or without local BH being disabled */
-struct inet_peer	*inet_getpeer(__be32 daddr, int create);
+struct inet_peer	*inet_getpeer(inet_peer_address_t *daddr, int create);
+
+static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
+{
+	inet_peer_address_t daddr;
+
+	daddr.a4 = v4daddr;
+	daddr.family = AF_INET;
+	return inet_getpeer(&daddr, create);
+}
 
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 893f998efdbb..9aa76b8dd490 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -444,7 +444,7 @@ static struct inet_peer_base *family_to_base(int family)
 }
 
 /* Called with or without local BH being disabled. */
-struct inet_peer *inet_getpeer(__be32 daddr, int create)
+struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
 {
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
 	struct inet_peer_base *base = family_to_base(AF_INET);
@@ -454,7 +454,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
 	rcu_read_lock_bh();
-	p = lookup_rcu_bh(daddr, base);
+	p = lookup_rcu_bh(daddr->a4, base);
 	rcu_read_unlock_bh();
 
 	if (p) {
@@ -469,7 +469,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	 * At least, nodes should be hot in our cache.
 	 */
 	spin_lock_bh(&base->lock);
-	p = lookup(daddr, stack, base);
+	p = lookup(daddr->a4, stack, base);
 	if (p != peer_avl_empty) {
 		atomic_inc(&p->refcnt);
 		spin_unlock_bh(&base->lock);
@@ -479,10 +479,10 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	}
 	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
 	if (p) {
-		p->daddr.a4 = daddr;
+		p->daddr = *daddr;
 		atomic_set(&p->refcnt, 1);
 		atomic_set(&p->rid, 0);
-		atomic_set(&p->ip_id_count, secure_ip_id(daddr));
+		atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4));
 		p->tcp_ts_stamp = 0;
 		INIT_LIST_HEAD(&p->unused);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 168440834ade..e6215bdd96c0 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -141,7 +141,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 	qp->daddr = arg->iph->daddr;
 	qp->user = arg->user;
 	qp->peer = sysctl_ipfrag_max_dist ?
-		inet_getpeer(arg->iph->saddr, 1) : NULL;
+		inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
 }
 
 static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ec2333fb637e..3843c2dfde82 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1289,7 +1289,7 @@ void rt_bind_peer(struct rtable *rt, int create)
 {
 	struct inet_peer *peer;
 
-	peer = inet_getpeer(rt->rt_dst, create);
+	peer = inet_getpeer_v4(rt->rt_dst, create);
 
 	if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b8bbf89409b0..00285fcf6788 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1778,7 +1778,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
 	int release_it = 0;
 
 	if (!rt || rt->rt_dst != inet->inet_daddr) {
-		peer = inet_getpeer(inet->inet_daddr, 1);
+		peer = inet_getpeer_v4(inet->inet_daddr, 1);
 		release_it = 1;
 	} else {
 		if (!rt->peer)
@@ -1804,7 +1804,7 @@ EXPORT_SYMBOL(tcp_v4_remember_stamp);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 {
-	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
+	struct inet_peer *peer = inet_getpeer_v4(tw->tw_daddr, 1);
 
 	if (peer) {
 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-- 
cgit v1.2.3


From 026630450244b8f8d1baf54548be0800aa1823ed Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 30 Nov 2010 12:08:53 -0800
Subject: inetpeer: Abstract address comparisons.

Now v4 and v6 addresses will both work properly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9aa76b8dd490..c96dc51c2e49 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -152,6 +152,22 @@ static void unlink_from_unused(struct inet_peer *p)
 	}
 }
 
+static int addr_compare(const inet_peer_address_t *a,
+			const inet_peer_address_t *b)
+{
+	int i, n = (a->family == AF_INET ? 1 : 4);
+
+	for (i = 0; i < n; i++) {
+		if (a->a6[i] == b->a6[i])
+			continue;
+		if (a->a6[i] < b->a6[i])
+			return -1;
+		return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Called with local BH disabled and the pool lock held.
  */
@@ -165,9 +181,10 @@ static void unlink_from_unused(struct inet_peer *p)
 	for (u = rcu_dereference_protected(_base->root,		\
 			lockdep_is_held(&_base->lock));		\
 	     u != peer_avl_empty; ) {				\
-		if (_daddr == u->daddr.a4)			\
+		int cmp = addr_compare(_daddr, &u->daddr);	\
+		if (cmp == 0)					\
 			break;					\
-		if ((__force __u32)_daddr < (__force __u32)u->daddr.a4)	\
+		if (cmp == -1)					\
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
@@ -185,13 +202,15 @@ static void unlink_from_unused(struct inet_peer *p)
  * But every pointer we follow is guaranteed to be valid thanks to RCU.
  * We exit from this function if number of links exceeds PEER_MAXDEPTH
  */
-static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base)
+static struct inet_peer *lookup_rcu_bh(const inet_peer_address_t *daddr,
+				       struct inet_peer_base *base)
 {
 	struct inet_peer *u = rcu_dereference_bh(base->root);
 	int count = 0;
 
 	while (u != peer_avl_empty) {
-		if (daddr == u->daddr.a4) {
+		int cmp = addr_compare(daddr, &u->daddr);
+		if (cmp == 0) {
 			/* Before taking a reference, check if this entry was
 			 * deleted, unlink_from_pool() sets refcnt=-1 to make
 			 * distinction between an unused entry (refcnt=0) and
@@ -201,7 +220,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base
 				u = NULL;
 			return u;
 		}
-		if ((__force __u32)daddr < (__force __u32)u->daddr.a4)
+		if (cmp == -1)
 			u = rcu_dereference_bh(u->avl_left);
 		else
 			u = rcu_dereference_bh(u->avl_right);
@@ -354,7 +373,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 	if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
 		struct inet_peer __rcu **stack[PEER_MAXDEPTH];
 		struct inet_peer __rcu ***stackptr, ***delp;
-		if (lookup(p->daddr.a4, stack, base) != p)
+		if (lookup(&p->daddr, stack, base) != p)
 			BUG();
 		delp = stackptr - 1; /* *delp[0] == p */
 		if (p->avl_left == peer_avl_empty_rcu) {
@@ -454,7 +473,7 @@ struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
 	 * Because of a concurrent writer, we might not find an existing entry.
 	 */
 	rcu_read_lock_bh();
-	p = lookup_rcu_bh(daddr->a4, base);
+	p = lookup_rcu_bh(daddr, base);
 	rcu_read_unlock_bh();
 
 	if (p) {
@@ -469,7 +488,7 @@ struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
 	 * At least, nodes should be hot in our cache.
 	 */
 	spin_lock_bh(&base->lock);
-	p = lookup(daddr->a4, stack, base);
+	p = lookup(daddr, stack, base);
 	if (p != peer_avl_empty) {
 		atomic_inc(&p->refcnt);
 		spin_unlock_bh(&base->lock);
-- 
cgit v1.2.3


From 021e9299113363cc1b713f86b2cba30b8e6cb5dd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 30 Nov 2010 12:12:23 -0800
Subject: inetpeer: Add v6 peers tree, abstract root properly.

Add the ipv6 peer tree instance, and adapt remaining
direct references to 'v4_peers' as needed.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c96dc51c2e49..1c1335b0d401 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -79,15 +79,24 @@ static const struct inet_peer peer_fake_node = {
 	.avl_height	= 0
 };
 
-static struct inet_peer_base {
+struct inet_peer_base {
 	struct inet_peer __rcu *root;
 	spinlock_t	lock;
 	int		total;
-} v4_peers = {
+};
+
+static struct inet_peer_base v4_peers = {
 	.root		= peer_avl_empty_rcu,
 	.lock		= __SPIN_LOCK_UNLOCKED(v4_peers.lock),
 	.total		= 0,
 };
+
+static struct inet_peer_base v6_peers = {
+	.root		= peer_avl_empty_rcu,
+	.lock		= __SPIN_LOCK_UNLOCKED(v6_peers.lock),
+	.total		= 0,
+};
+
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
 
 /* Exported for sysctl_net_ipv4.  */
@@ -415,9 +424,14 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
 		inet_putpeer(p);
 }
 
+static struct inet_peer_base *family_to_base(int family)
+{
+	return (family == AF_INET ? &v4_peers : &v6_peers);
+}
+
 static struct inet_peer_base *peer_to_base(struct inet_peer *p)
 {
-	return &v4_peers;
+	return family_to_base(p->daddr.family);
 }
 
 /* May be called with local BH enabled. */
@@ -457,11 +471,6 @@ static int cleanup_once(unsigned long ttl)
 	return 0;
 }
 
-static struct inet_peer_base *family_to_base(int family)
-{
-	return &v4_peers;
-}
-
 /* Called with or without local BH being disabled. */
 struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
 {
@@ -521,7 +530,7 @@ struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
 
 static int compute_total(void)
 {
-	return v4_peers.total;
+	return v4_peers.total + v6_peers.total;
 }
 
 /* Called with local BH disabled. */
-- 
cgit v1.2.3


From b3419363808f2481b24a817f491878e1795db4c7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 30 Nov 2010 12:27:11 -0800
Subject: ipv6: Add infrastructure to bind inet_peer objects to routes.

They are only allowed on cached ipv6 routes.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h   |  2 ++
 include/net/ip6_route.h |  3 +++
 net/ipv4/inetpeer.c     |  2 ++
 net/ipv6/route.c        | 18 ++++++++++++++++++
 4 files changed, 25 insertions(+)

(limited to 'net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 062a823d311c..708ff7cb8806 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -21,6 +21,7 @@
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/netlink.h>
+#include <net/inetpeer.h>
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 #define FIB6_TABLE_HASHSZ 256
@@ -109,6 +110,7 @@ struct rt6_info {
 	u32				rt6i_metric;
 
 	struct inet6_dev		*rt6i_idev;
+	struct inet_peer		*rt6i_peer;
 
 #ifdef CONFIG_XFRM
 	u32				rt6i_flow_cache_genid;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 278312c95f96..23fed28db4bb 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -56,6 +56,9 @@ static inline unsigned int rt6_flags2srcprefs(int flags)
 	return (flags >> 3) & 7;
 }
 
+extern void			rt6_bind_peer(struct rt6_info *rt,
+					      int create);
+
 extern void			ip6_route_input(struct sk_buff *skb);
 
 extern struct dst_entry *	ip6_route_output(struct net *net,
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 1c1335b0d401..f95b89f3916d 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -532,6 +532,7 @@ static int compute_total(void)
 {
 	return v4_peers.total + v6_peers.total;
 }
+EXPORT_SYMBOL_GPL(inet_getpeer);
 
 /* Called with local BH disabled. */
 static void peer_check_expire(unsigned long dummy)
@@ -577,3 +578,4 @@ void inet_putpeer(struct inet_peer *p)
 
 	local_bh_enable();
 }
+EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a0c4ad109c63..026caef0326c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -188,11 +188,29 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 	struct inet6_dev *idev = rt->rt6i_idev;
+	struct inet_peer *peer = rt->rt6i_peer;
 
 	if (idev != NULL) {
 		rt->rt6i_idev = NULL;
 		in6_dev_put(idev);
 	}
+	if (peer) {
+		BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
+		rt->rt6i_peer = NULL;
+		inet_putpeer(peer);
+	}
+}
+
+void rt6_bind_peer(struct rt6_info *rt, int create)
+{
+	struct inet_peer *peer;
+
+	if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
+		return;
+
+	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
+	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
+		inet_putpeer(peer);
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-- 
cgit v1.2.3


From 3f419d2d487821093ee46e898b5f8747f9edc9cd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 29 Nov 2010 13:37:14 -0800
Subject: inet: Turn ->remember_stamp into ->get_peer in connection AF ops.

Then we can make a completely generic tcp_remember_stamp()
that uses ->get_peer() as a helper, minimizing the AF specific
code and minimizing the eventual code duplication when we implement
the ipv6 side of TW recycling.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |  2 +-
 include/net/tcp.h                  |  2 +-
 net/ipv4/tcp_ipv4.c                | 35 ++++++++---------------------------
 net/ipv4/tcp_minisocks.c           | 31 ++++++++++++++++++++++++++++++-
 net/ipv6/tcp_ipv6.c                |  8 ++++----
 5 files changed, 44 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index e4f494b42e06..6c93a56cc958 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -43,7 +43,7 @@ struct inet_connection_sock_af_ops {
 	struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
 				      struct request_sock *req,
 				      struct dst_entry *dst);
-	int	    (*remember_stamp)(struct sock *sk);
+	struct inet_peer *(*get_peer)(struct sock *sk, bool *release_it);
 	u16	    net_header_len;
 	u16	    sockaddr_len;
 	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e36c874c7fb1..3e239641d4ee 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -312,7 +312,7 @@ extern void tcp_shutdown (struct sock *sk, int how);
 
 extern int tcp_v4_rcv(struct sk_buff *skb);
 
-extern int tcp_v4_remember_stamp(struct sock *sk);
+extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it);
 extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		       size_t size);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 00285fcf6788..0ddf819cfb5d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1763,44 +1763,25 @@ do_time_wait:
 	goto discard_it;
 }
 
-/* VJ's idea. Save last timestamp seen from this destination
- * and hold it at least for normal timewait interval to use for duplicate
- * segment detection in subsequent connections, before they enter synchronized
- * state.
- */
-
-int tcp_v4_remember_stamp(struct sock *sk)
+struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
 {
+	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
-	struct inet_peer *peer = NULL;
-	int release_it = 0;
+	struct inet_peer *peer;
 
 	if (!rt || rt->rt_dst != inet->inet_daddr) {
 		peer = inet_getpeer_v4(inet->inet_daddr, 1);
-		release_it = 1;
+		*release_it = true;
 	} else {
 		if (!rt->peer)
 			rt_bind_peer(rt, 1);
 		peer = rt->peer;
+		*release_it = false;
 	}
 
-	if (peer) {
-		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
-		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
-		     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
-			peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
-			peer->tcp_ts = tp->rx_opt.ts_recent;
-		}
-		if (release_it)
-			inet_putpeer(peer);
-		return 1;
-	}
-
-	return 0;
+	return peer;
 }
-EXPORT_SYMBOL(tcp_v4_remember_stamp);
+EXPORT_SYMBOL(tcp_v4_get_peer);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 {
@@ -1828,7 +1809,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
 	.rebuild_header	   = inet_sk_rebuild_header,
 	.conn_request	   = tcp_v4_conn_request,
 	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
-	.remember_stamp	   = tcp_v4_remember_stamp,
+	.get_peer	   = tcp_v4_get_peer,
 	.net_header_len	   = sizeof(struct iphdr),
 	.setsockopt	   = ip_setsockopt,
 	.getsockopt	   = ip_getsockopt,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 43cf901d7659..059082c873cf 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -49,6 +49,35 @@ struct inet_timewait_death_row tcp_death_row = {
 };
 EXPORT_SYMBOL_GPL(tcp_death_row);
 
+/* VJ's idea. Save last timestamp seen from this destination
+ * and hold it at least for normal timewait interval to use for duplicate
+ * segment detection in subsequent connections, before they enter synchronized
+ * state.
+ */
+
+static int tcp_remember_stamp(struct sock *sk)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_peer *peer;
+	bool release_it;
+
+	peer = icsk->icsk_af_ops->get_peer(sk, &release_it);
+	if (peer) {
+		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
+		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
+		     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
+			peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
+			peer->tcp_ts = tp->rx_opt.ts_recent;
+		}
+		if (release_it)
+			inet_putpeer(peer);
+		return 1;
+	}
+
+	return 0;
+}
+
 static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 {
 	if (seq == s_win)
@@ -274,7 +303,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 	int recycle_ok = 0;
 
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
-		recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
+		recycle_ok = tcp_remember_stamp(sk);
 
 	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
 		tw = inet_twsk_alloc(sk, state);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7e41e2cbb85e..e394d0029d8d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1818,10 +1818,10 @@ do_time_wait:
 	goto discard_it;
 }
 
-static int tcp_v6_remember_stamp(struct sock *sk)
+struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 {
 	/* Alas, not yet... */
-	return 0;
+	return NULL;
 }
 
 static const struct inet_connection_sock_af_ops ipv6_specific = {
@@ -1830,7 +1830,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
 	.rebuild_header	   = inet6_sk_rebuild_header,
 	.conn_request	   = tcp_v6_conn_request,
 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
-	.remember_stamp	   = tcp_v6_remember_stamp,
+	.get_peer	   = tcp_v6_get_peer,
 	.net_header_len	   = sizeof(struct ipv6hdr),
 	.setsockopt	   = ipv6_setsockopt,
 	.getsockopt	   = ipv6_getsockopt,
@@ -1862,7 +1862,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
 	.rebuild_header	   = inet_sk_rebuild_header,
 	.conn_request	   = tcp_v6_conn_request,
 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
-	.remember_stamp	   = tcp_v4_remember_stamp,
+	.get_peer	   = tcp_v4_get_peer,
 	.net_header_len	   = sizeof(struct iphdr),
 	.setsockopt	   = ipv6_setsockopt,
 	.getsockopt	   = ipv6_getsockopt,
-- 
cgit v1.2.3


From f2cd2d3e9b3ef960612e362f0ad129d735452df2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 29 Nov 2010 08:14:37 +0000
Subject: net sched: use xps information for qdisc NUMA affinity

Allocate qdisc memory according to NUMA properties of cpus included in
xps map.

To be effective, qdisc should be (re)setup after changes
of /sys/class/net/eth<n>/queues/tx-<n>/xps_cpus

I added a numa_node field in struct netdev_queue, containing NUMA node
if all cpus included in xps_cpus share same node, else -1.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 +++++++++++++++++++-
 net/core/dev.c            |  5 +++--
 net/core/net-sysfs.c      | 12 +++++++++++-
 net/sched/sch_generic.c   |  4 +++-
 4 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4b0c7f3aa32b..a9ac5dc26e3c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -508,7 +508,9 @@ struct netdev_queue {
 #ifdef CONFIG_RPS
 	struct kobject		kobj;
 #endif
-
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+	int			numa_node;
+#endif
 /*
  * write mostly part
  */
@@ -523,6 +525,22 @@ struct netdev_queue {
 	u64			tx_dropped;
 } ____cacheline_aligned_in_smp;
 
+static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
+{
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+	return q->numa_node;
+#else
+	return -1;
+#endif
+}
+
+static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node)
+{
+#if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+	q->numa_node = node;
+#endif
+}
+
 #ifdef CONFIG_RPS
 /*
  * This structure holds an RPS map which can be of variable length.  The
diff --git a/net/core/dev.c b/net/core/dev.c
index 3259d2c323a6..cd2437495428 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5125,9 +5125,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	}
 	dev->_tx = tx;
 
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
+		netdev_queue_numa_node_write(&tx[i], -1);
 		tx[i].dev = dev;
-
+	}
 	return 0;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f85cee3d869e..85e8b5326dd6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -913,6 +913,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	struct xps_map *map, *new_map;
 	struct xps_dev_maps *dev_maps, *new_dev_maps;
 	int nonempty = 0;
+	int numa_node = -2;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -953,7 +954,14 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 			pos = map_len = alloc_len = 0;
 
 		need_set = cpu_isset(cpu, *mask) && cpu_online(cpu);
-
+#ifdef CONFIG_NUMA
+		if (need_set) {
+			if (numa_node == -2)
+				numa_node = cpu_to_node(cpu);
+			else if (numa_node != cpu_to_node(cpu))
+				numa_node = -1;
+		}
+#endif
 		if (need_set && pos >= map_len) {
 			/* Need to add queue to this CPU's map */
 			if (map_len >= alloc_len) {
@@ -1001,6 +1009,8 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	if (dev_maps)
 		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
 
+	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1);
+
 	mutex_unlock(&xps_map_mutex);
 
 	free_cpumask_var(mask);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7f0bd8952646..0918834ee4a1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -553,7 +553,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	size = QDISC_ALIGN(sizeof(*sch));
 	size += ops->priv_size + (QDISC_ALIGNTO - 1);
 
-	p = kzalloc(size, GFP_KERNEL);
+	p = kzalloc_node(size, GFP_KERNEL,
+			 netdev_queue_numa_node_read(dev_queue));
+
 	if (!p)
 		goto errout;
 	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
-- 
cgit v1.2.3


From 407d6fcbfdd011bcc2dd9e6923c5cca00abbfc6f Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 29 Nov 2010 09:47:47 +0000
Subject: gre: minor cleanups

Use strcpy() rather the sprintf() for the case where name is getting
generated.  Fix indentation.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 897210adaa77..98769c399d9e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -405,11 +405,11 @@ static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
 	if (parms->name[0])
 		strlcpy(name, parms->name, IFNAMSIZ);
 	else
-		sprintf(name, "gre%%d");
+		strcpy(name, "gre%d");
 
 	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
 	if (!dev)
-	  return NULL;
+		return NULL;
 
 	dev_net_set(dev, net);
 
-- 
cgit v1.2.3


From 4da6a738ffdb99b88efbe5b4c4fe521ca453640d Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 29 Nov 2010 09:47:48 +0000
Subject: gre: add module alias for gre0 tunnel device

If gre is built as a module the 'ip tunnel add' command would fail because
the ip_gre module was not being autoloaded.  Adding an alias for
the gre0 device name cause dev_load() to autoload it when needed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 98769c399d9e..258c98d5fa79 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1764,3 +1764,4 @@ module_exit(ipgre_fini);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("gre");
 MODULE_ALIAS_RTNL_LINK("gretap");
+MODULE_ALIAS("gre0");
-- 
cgit v1.2.3


From 8afe7c8acd33bc52c56546e73e46e9d546269e2c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 29 Nov 2010 09:47:49 +0000
Subject: ipip: add module alias for tunl0 tunnel device

If ipip is built as a module the 'ip tunnel add' command would fail because
the ipip module was not being autoloaded.  Adding an alias for
the tunl0 device name cause dev_load() to autoload it when needed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipip.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e70ad581398e..988f52fba54a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -913,3 +913,4 @@ static void __exit ipip_fini(void)
 module_init(ipip_init);
 module_exit(ipip_fini);
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("tunl0");
-- 
cgit v1.2.3


From ccd556fe334914bf2e465eb5bc480d49cd4406d7 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 10 Nov 2010 17:11:51 +0200
Subject: Bluetooth: Simplify remote features callback function logic

The current remote and remote extended features event callbacks logic
can be made simpler by using a label and goto statements instead of the
current multiple levels of nested if statements.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_event.c | 91 ++++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 84093b0000b9..84302768939a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1162,33 +1162,33 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
-	if (conn) {
-		if (!ev->status)
-			memcpy(conn->features, ev->features, 8);
+	if (!conn)
+		goto unlock;
 
-		if (conn->state == BT_CONFIG) {
-			if (!ev->status && lmp_ssp_capable(hdev) &&
-						lmp_ssp_capable(conn)) {
-				struct hci_cp_read_remote_ext_features cp;
-				cp.handle = ev->handle;
-				cp.page = 0x01;
-				hci_send_cmd(hdev,
-					HCI_OP_READ_REMOTE_EXT_FEATURES,
-							sizeof(cp), &cp);
-			} else if (!ev->status && conn->out &&
-					conn->sec_level == BT_SECURITY_HIGH) {
-				struct hci_cp_auth_requested cp;
-				cp.handle = ev->handle;
-				hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
+	if (!ev->status)
+		memcpy(conn->features, ev->features, 8);
+
+	if (conn->state != BT_CONFIG)
+		goto unlock;
+
+	if (!ev->status && lmp_ssp_capable(hdev) && lmp_ssp_capable(conn)) {
+		struct hci_cp_read_remote_ext_features cp;
+		cp.handle = ev->handle;
+		cp.page = 0x01;
+		hci_send_cmd(hdev, HCI_OP_READ_REMOTE_EXT_FEATURES,
 							sizeof(cp), &cp);
-			} else {
-				conn->state = BT_CONNECTED;
-				hci_proto_connect_cfm(conn, ev->status);
-				hci_conn_put(conn);
-			}
-		}
+	} else if (!ev->status && conn->out &&
+			conn->sec_level == BT_SECURITY_HIGH) {
+		struct hci_cp_auth_requested cp;
+		cp.handle = ev->handle;
+		hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
+	} else {
+		conn->state = BT_CONNECTED;
+		hci_proto_connect_cfm(conn, ev->status);
+		hci_conn_put(conn);
 	}
 
+unlock:
 	hci_dev_unlock(hdev);
 }
 
@@ -1646,32 +1646,35 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
-	if (conn) {
-		if (!ev->status && ev->page == 0x01) {
-			struct inquiry_entry *ie;
+	if (!conn)
+		goto unlock;
 
-			if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)))
-				ie->data.ssp_mode = (ev->features[0] & 0x01);
+	if (!ev->status && ev->page == 0x01) {
+		struct inquiry_entry *ie;
 
-			conn->ssp_mode = (ev->features[0] & 0x01);
-		}
+		if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)))
+			ie->data.ssp_mode = (ev->features[0] & 0x01);
 
-		if (conn->state == BT_CONFIG) {
-			if (!ev->status && hdev->ssp_mode > 0 &&
-					conn->ssp_mode > 0 && conn->out &&
-					conn->sec_level != BT_SECURITY_SDP) {
-				struct hci_cp_auth_requested cp;
-				cp.handle = ev->handle;
-				hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
-							sizeof(cp), &cp);
-			} else {
-				conn->state = BT_CONNECTED;
-				hci_proto_connect_cfm(conn, ev->status);
-				hci_conn_put(conn);
-			}
-		}
+		conn->ssp_mode = (ev->features[0] & 0x01);
+	}
+
+	if (conn->state != BT_CONFIG)
+		goto unlock;
+
+	if (!ev->status && hdev->ssp_mode > 0 &&
+			conn->ssp_mode > 0 && conn->out &&
+			conn->sec_level != BT_SECURITY_SDP) {
+		struct hci_cp_auth_requested cp;
+		cp.handle = ev->handle;
+		hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
+				sizeof(cp), &cp);
+	} else {
+		conn->state = BT_CONNECTED;
+		hci_proto_connect_cfm(conn, ev->status);
+		hci_conn_put(conn);
 	}
 
+unlock:
 	hci_dev_unlock(hdev);
 }
 
-- 
cgit v1.2.3


From 392599b95d76f4f3102d8614bdc1957795cd1a3a Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Thu, 18 Nov 2010 22:22:28 +0200
Subject: Bluetooth: Create a unified authentication request function

This patch adds a single function that's responsible for requesting
authentication for outgoing connections. This is preparation for the
next patch which will add automated name requests and thereby move the
authentication requests to a different location.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_event.c | 54 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 84302768939a..9c6d9bc1d8af 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -677,6 +677,29 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
 	hci_dev_unlock(hdev);
 }
 
+static int hci_request_outgoing_auth(struct hci_dev *hdev,
+						struct hci_conn *conn)
+{
+	struct hci_cp_auth_requested cp;
+
+	if (conn->state != BT_CONFIG || !conn->out)
+		return 0;
+
+	if (conn->sec_level == BT_SECURITY_SDP)
+		return 0;
+
+	/* Only request authentication for SSP connections or non-SSP
+	 * devices with sec_level HIGH */
+	if (!(hdev->ssp_mode > 0 && conn->ssp_mode > 0) &&
+					conn->sec_level != BT_SECURITY_HIGH)
+		return 0;
+
+	cp.handle = __cpu_to_le16(conn->handle);
+	hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
+
+	return 1;
+}
+
 static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status)
 {
 	BT_DBG("%s status 0x%x", hdev->name, status);
@@ -1156,6 +1179,7 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
 {
 	struct hci_ev_remote_features *ev = (void *) skb->data;
 	struct hci_conn *conn;
+	int auth_requested;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
@@ -1177,12 +1201,15 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
 		cp.page = 0x01;
 		hci_send_cmd(hdev, HCI_OP_READ_REMOTE_EXT_FEATURES,
 							sizeof(cp), &cp);
-	} else if (!ev->status && conn->out &&
-			conn->sec_level == BT_SECURITY_HIGH) {
-		struct hci_cp_auth_requested cp;
-		cp.handle = ev->handle;
-		hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
-	} else {
+		goto unlock;
+	}
+
+	if (!ev->status)
+		auth_requested = hci_request_outgoing_auth(hdev, conn);
+	else
+		auth_requested = 0;
+
+	if (!auth_requested) {
 		conn->state = BT_CONNECTED;
 		hci_proto_connect_cfm(conn, ev->status);
 		hci_conn_put(conn);
@@ -1640,6 +1667,7 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 {
 	struct hci_ev_remote_ext_features *ev = (void *) skb->data;
 	struct hci_conn *conn;
+	int auth_requested;
 
 	BT_DBG("%s", hdev->name);
 
@@ -1661,14 +1689,12 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 	if (conn->state != BT_CONFIG)
 		goto unlock;
 
-	if (!ev->status && hdev->ssp_mode > 0 &&
-			conn->ssp_mode > 0 && conn->out &&
-			conn->sec_level != BT_SECURITY_SDP) {
-		struct hci_cp_auth_requested cp;
-		cp.handle = ev->handle;
-		hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
-				sizeof(cp), &cp);
-	} else {
+	if (!ev->status)
+		auth_requested = hci_request_outgoing_auth(hdev, conn);
+	else
+		auth_requested = 0;
+
+	if (!auth_requested) {
 		conn->state = BT_CONNECTED;
 		hci_proto_connect_cfm(conn, ev->status);
 		hci_conn_put(conn);
-- 
cgit v1.2.3


From 127178d24c7eb2df53b1ba2b6f6f743e88178a1b Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Thu, 18 Nov 2010 22:22:29 +0200
Subject: Bluetooth: Automate remote name requests

In Bluetooth there are no automatic updates of remote device names when
they get changed on the remote side. Instead, it is a good idea to do a
manual name request when a new connection gets created (for whatever
reason) since at this point it is very cheap (no costly baseband
connection creation needed just for the sake of the name request).

So far userspace has been responsible for this extra name request but
tighter control is needed in order not to flood Bluetooth controllers
with two many commands during connection creation. It has been shown
that some controllers simply fail to function correctly if they get too
many (almost) simultaneous commands during connection creation. The
simplest way to acheive better control of these commands is to move
their sending completely to the kernel side.

This patch inserts name requests into the sequence of events that the
kernel performs during connection creation. It does this after the
remote features have been successfully requested and before any pending
authentication requests are performed. The code will work sub-optimally
with userspace versions that still do the name requesting themselves (it
shouldn't break anything though) so it is recommended to combine this
with a userspace software version that doesn't have automated name
requests.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_event.c | 72 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 9c6d9bc1d8af..4165895049b3 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -677,11 +677,9 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
 	hci_dev_unlock(hdev);
 }
 
-static int hci_request_outgoing_auth(struct hci_dev *hdev,
+static int hci_outgoing_auth_needed(struct hci_dev *hdev,
 						struct hci_conn *conn)
 {
-	struct hci_cp_auth_requested cp;
-
 	if (conn->state != BT_CONFIG || !conn->out)
 		return 0;
 
@@ -694,15 +692,35 @@ static int hci_request_outgoing_auth(struct hci_dev *hdev,
 					conn->sec_level != BT_SECURITY_HIGH)
 		return 0;
 
-	cp.handle = __cpu_to_le16(conn->handle);
-	hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
-
 	return 1;
 }
 
 static void hci_cs_remote_name_req(struct hci_dev *hdev, __u8 status)
 {
+	struct hci_cp_remote_name_req *cp;
+	struct hci_conn *conn;
+
 	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	/* If successful wait for the name req complete event before
+	 * checking for the need to do authentication */
+	if (!status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_REMOTE_NAME_REQ);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr);
+	if (conn && hci_outgoing_auth_needed(hdev, conn)) {
+		struct hci_cp_auth_requested cp;
+		cp.handle = __cpu_to_le16(conn->handle);
+		hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
+	}
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
@@ -1113,9 +1131,23 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 
 static inline void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+	struct hci_ev_remote_name *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
 	BT_DBG("%s", hdev->name);
 
 	hci_conn_check_pending(hdev);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn && hci_outgoing_auth_needed(hdev, conn)) {
+		struct hci_cp_auth_requested cp;
+		cp.handle = __cpu_to_le16(conn->handle);
+		hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp);
+	}
+
+	hci_dev_unlock(hdev);
 }
 
 static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1179,7 +1211,6 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
 {
 	struct hci_ev_remote_features *ev = (void *) skb->data;
 	struct hci_conn *conn;
-	int auth_requested;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
@@ -1204,12 +1235,15 @@ static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff
 		goto unlock;
 	}
 
-	if (!ev->status)
-		auth_requested = hci_request_outgoing_auth(hdev, conn);
-	else
-		auth_requested = 0;
+	if (!ev->status) {
+		struct hci_cp_remote_name_req cp;
+		memset(&cp, 0, sizeof(cp));
+		bacpy(&cp.bdaddr, &conn->dst);
+		cp.pscan_rep_mode = 0x02;
+		hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
+	}
 
-	if (!auth_requested) {
+	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
 		hci_proto_connect_cfm(conn, ev->status);
 		hci_conn_put(conn);
@@ -1667,7 +1701,6 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 {
 	struct hci_ev_remote_ext_features *ev = (void *) skb->data;
 	struct hci_conn *conn;
-	int auth_requested;
 
 	BT_DBG("%s", hdev->name);
 
@@ -1689,12 +1722,15 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 	if (conn->state != BT_CONFIG)
 		goto unlock;
 
-	if (!ev->status)
-		auth_requested = hci_request_outgoing_auth(hdev, conn);
-	else
-		auth_requested = 0;
+	if (!ev->status) {
+		struct hci_cp_remote_name_req cp;
+		memset(&cp, 0, sizeof(cp));
+		bacpy(&cp.bdaddr, &conn->dst);
+		cp.pscan_rep_mode = 0x02;
+		hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
+	}
 
-	if (!auth_requested) {
+	if (!hci_outgoing_auth_needed(hdev, conn)) {
 		conn->state = BT_CONNECTED;
 		hci_proto_connect_cfm(conn, ev->status);
 		hci_conn_put(conn);
-- 
cgit v1.2.3


From 5520d20f68310fc158dcbbecfd5eac5cdfc5a241 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Sat, 30 Oct 2010 18:26:21 +0400
Subject: Bluetooth: bnep: fix information leak to userland

Structure bnep_conninfo is copied to userland with the field "device"
that has the last elements unitialized.  It leads to leaking of
contents of kernel stack memory.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/bnep/core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index f10b41fb05a0..5868597534e5 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -648,6 +648,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
 
 static void __bnep_copy_ci(struct bnep_conninfo *ci, struct bnep_session *s)
 {
+	memset(ci, 0, sizeof(*ci));
 	memcpy(ci->dst, s->eh.h_source, ETH_ALEN);
 	strcpy(ci->device, s->dev->name);
 	ci->flags = s->flags;
-- 
cgit v1.2.3


From 3185fbd9d7bb166992f072440b3329f58bf2c60a Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Sat, 30 Oct 2010 18:26:26 +0400
Subject: Bluetooth: cmtp: fix information leak to userland

Structure cmtp_conninfo is copied to userland with some padding fields
unitialized.  It leads to leaking of contents of kernel stack memory.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/cmtp/core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index ec0a1347f933..8e5f292529ac 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -78,6 +78,7 @@ static void __cmtp_unlink_session(struct cmtp_session *session)
 
 static void __cmtp_copy_session(struct cmtp_session *session, struct cmtp_conninfo *ci)
 {
+	memset(ci, 0, sizeof(*ci));
 	bacpy(&ci->bdaddr, &session->bdaddr);
 
 	ci->flags = session->flags;
-- 
cgit v1.2.3


From d31dbf6e5989b2fd9a30ec5b25436e94f009d6df Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Sat, 30 Oct 2010 18:26:31 +0400
Subject: Bluetooth: hidp: fix information leak to userland

Structure hidp_conninfo is copied to userland with version, product,
vendor and name fields unitialized if both session->input and session->hid
are NULL.  It leads to leaking of contents of kernel stack memory.

Signed-off-by: Vasiliy Kulikov <segooon@gmail.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hidp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index c0ee8b3928ed..29544c21f4b5 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -107,6 +107,7 @@ static void __hidp_unlink_session(struct hidp_session *session)
 
 static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
 {
+	memset(ci, 0, sizeof(*ci));
 	bacpy(&ci->bdaddr, &session->bdaddr);
 
 	ci->flags = session->flags;
@@ -115,7 +116,6 @@ static void __hidp_copy_session(struct hidp_session *session, struct hidp_connin
 	ci->vendor  = 0x0000;
 	ci->product = 0x0000;
 	ci->version = 0x0000;
-	memset(ci->name, 0, 128);
 
 	if (session->input) {
 		ci->vendor  = session->input->id.vendor;
-- 
cgit v1.2.3


From a49184c229535ebedbb659214db2d4d1d77b7c07 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 3 Nov 2010 12:32:44 +0200
Subject: Bluetooth: Check sk is not owned before freeing l2cap_conn

Check that socket sk is not locked in user process before removing
l2cap connection handler.

lock_sock and release_sock do not hold a normal spinlock directly but
instead hold the owner field. This means bh_lock_sock can still execute
even if the socket is "locked". More info can be found here:
http://www.linuxfoundation.org/collaborate/workgroups/networking/socketlocks

krfcommd kernel thread may be preempted with l2cap tasklet which remove
l2cap_conn structure. If krfcommd is in process of sending of RFCOMM reply
(like "RFCOMM UA" reply to "RFCOMM DISC") then kernel crash happens.

...
[  694.175933] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[  694.184936] pgd = c0004000
[  694.187683] [00000000] *pgd=00000000
[  694.191711] Internal error: Oops: 5 [#1] PREEMPT
[  694.196350] last sysfs file: /sys/devices/platform/hci_h4p/firmware/hci_h4p/loading
[  694.260375] CPU: 0    Not tainted  (2.6.32.10 #1)
[  694.265106] PC is at l2cap_sock_sendmsg+0x43c/0x73c [l2cap]
[  694.270721] LR is at 0xd7017303
...
[  694.525085] Backtrace:
[  694.527587] [<bf266be0>] (l2cap_sock_sendmsg+0x0/0x73c [l2cap]) from [<c02f2cc8>] (sock_sendmsg+0xb8/0xd8)
[  694.537292] [<c02f2c10>] (sock_sendmsg+0x0/0xd8) from [<c02f3044>] (kernel_sendmsg+0x48/0x80)

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cd8f6ea03841..4ed38272df78 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3078,6 +3078,14 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 		break;
 
 	default:
+		/* don't delete l2cap channel if sk is owned by user */
+		if (sock_owned_by_user(sk)) {
+			sk->sk_state = BT_DISCONN;
+			l2cap_sock_clear_timer(sk);
+			l2cap_sock_set_timer(sk, HZ / 5);
+			break;
+		}
+
 		l2cap_chan_del(sk, ECONNREFUSED);
 		break;
 	}
@@ -3283,6 +3291,15 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
+	/* don't delete l2cap channel if sk is owned by user */
+	if (sock_owned_by_user(sk)) {
+		sk->sk_state = BT_DISCONN;
+		l2cap_sock_clear_timer(sk);
+		l2cap_sock_set_timer(sk, HZ / 5);
+		bh_unlock_sock(sk);
+		return 0;
+	}
+
 	l2cap_chan_del(sk, ECONNRESET);
 	bh_unlock_sock(sk);
 
@@ -3305,6 +3322,15 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 	if (!sk)
 		return 0;
 
+	/* don't delete l2cap channel if sk is owned by user */
+	if (sock_owned_by_user(sk)) {
+		sk->sk_state = BT_DISCONN;
+		l2cap_sock_clear_timer(sk);
+		l2cap_sock_set_timer(sk, HZ / 5);
+		bh_unlock_sock(sk);
+		return 0;
+	}
+
 	l2cap_chan_del(sk, 0);
 	bh_unlock_sock(sk);
 
-- 
cgit v1.2.3


From 940a9eea80946b64b96bd8af1fc71b30c602d057 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 3 Nov 2010 12:32:45 +0200
Subject: Bluetooth: timer check sk is not owned before freeing

In timer context we might delete l2cap channel used by krfcommd.
The check makes sure that sk is not owned. If sk is owned we
restart timer for HZ/5.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4ed38272df78..18a802cc0469 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -83,6 +83,18 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
 
 /* ---- L2CAP timers ---- */
+static void l2cap_sock_set_timer(struct sock *sk, long timeout)
+{
+	BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
+	sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+}
+
+static void l2cap_sock_clear_timer(struct sock *sk)
+{
+	BT_DBG("sock %p state %d", sk, sk->sk_state);
+	sk_stop_timer(sk, &sk->sk_timer);
+}
+
 static void l2cap_sock_timeout(unsigned long arg)
 {
 	struct sock *sk = (struct sock *) arg;
@@ -92,6 +104,14 @@ static void l2cap_sock_timeout(unsigned long arg)
 
 	bh_lock_sock(sk);
 
+	if (sock_owned_by_user(sk)) {
+		/* sk is owned by user. Try again later */
+		l2cap_sock_set_timer(sk, HZ / 5);
+		bh_unlock_sock(sk);
+		sock_put(sk);
+		return;
+	}
+
 	if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
 		reason = ECONNREFUSED;
 	else if (sk->sk_state == BT_CONNECT &&
@@ -108,18 +128,6 @@ static void l2cap_sock_timeout(unsigned long arg)
 	sock_put(sk);
 }
 
-static void l2cap_sock_set_timer(struct sock *sk, long timeout)
-{
-	BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout);
-	sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
-}
-
-static void l2cap_sock_clear_timer(struct sock *sk)
-{
-	BT_DBG("sock %p state %d", sk, sk->sk_state);
-	sk_stop_timer(sk, &sk->sk_timer);
-}
-
 /* ---- L2CAP channels ---- */
 static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
 {
-- 
cgit v1.2.3


From cc11b9c14da4ca1c545b424dae2ae8fb1ab04063 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Mon, 22 Nov 2010 13:21:37 +0200
Subject: Bluetooth: do not use assignment in if condition

Fix checkpatch errors like:
"ERROR: do not use assignment in if condition"
Simplify code and fix one long line.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Acked-by: Ville Tervo <ville.tervo@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_event.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4165895049b3..3c1957c82b61 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -996,12 +996,14 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 		hci_dev_lock(hdev);
 
-		if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr)))
+		ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
+		if (ie)
 			memcpy(ie->data.dev_class, ev->dev_class, 3);
 
 		conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
 		if (!conn) {
-			if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) {
+			conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr);
+			if (!conn) {
 				BT_ERR("No memory for new connection");
 				hci_dev_unlock(hdev);
 				return;
@@ -1608,7 +1610,8 @@ static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *sk
 	if (conn && !ev->status) {
 		struct inquiry_entry *ie;
 
-		if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) {
+		ie = hci_inquiry_cache_lookup(hdev, &conn->dst);
+		if (ie) {
 			ie->data.clock_offset = ev->clock_offset;
 			ie->timestamp = jiffies;
 		}
@@ -1642,7 +1645,8 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *
 
 	hci_dev_lock(hdev);
 
-	if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr))) {
+	ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
+	if (ie) {
 		ie->data.pscan_rep_mode = ev->pscan_rep_mode;
 		ie->timestamp = jiffies;
 	}
@@ -1713,7 +1717,8 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
 	if (!ev->status && ev->page == 0x01) {
 		struct inquiry_entry *ie;
 
-		if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst)))
+		ie = hci_inquiry_cache_lookup(hdev, &conn->dst);
+		if (ie)
 			ie->data.ssp_mode = (ev->features[0] & 0x01);
 
 		conn->ssp_mode = (ev->features[0] & 0x01);
@@ -1886,7 +1891,8 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_
 
 	hci_dev_lock(hdev);
 
-	if ((ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr)))
+	ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
+	if (ie)
 		ie->data.ssp_mode = (ev->features[0] & 0x01);
 
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3


From e0f0cb56364958223f0cb1f1b0b0eecf1b8dcb95 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 1 Nov 2010 18:43:53 +0000
Subject: Bluetooth: Get rid of __l2cap_get_sock_by_psm()

l2cap_get_sock_by_psm() was the only user of this function, so I merged
both into l2cap_get_sock_by_psm(). The socket lock now should be hold
outside of l2cap_get_sock_by_psm() once we hold and release it inside the
same function now.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 18a802cc0469..12b4aa2f8fc9 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -751,11 +751,13 @@ found:
 /* Find socket with psm and source bdaddr.
  * Returns closest match.
  */
-static struct sock *__l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
+static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
 {
 	struct sock *sk = NULL, *sk1 = NULL;
 	struct hlist_node *node;
 
+	read_lock(&l2cap_sk_list.lock);
+
 	sk_for_each(sk, node, &l2cap_sk_list.head) {
 		if (state && sk->sk_state != state)
 			continue;
@@ -770,20 +772,10 @@ static struct sock *__l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src
 				sk1 = sk;
 		}
 	}
-	return node ? sk : sk1;
-}
 
-/* Find socket with given address (psm, src).
- * Returns locked socket */
-static inline struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src)
-{
-	struct sock *s;
-	read_lock(&l2cap_sk_list.lock);
-	s = __l2cap_get_sock_by_psm(state, psm, src);
-	if (s)
-		bh_lock_sock(s);
 	read_unlock(&l2cap_sk_list.lock);
-	return s;
+
+	return node ? sk : sk1;
 }
 
 static void l2cap_sock_destruct(struct sock *sk)
@@ -2934,6 +2926,8 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 		goto sendresp;
 	}
 
+	bh_lock_sock(parent);
+
 	/* Check if the ACL is secure enough (if not SDP) */
 	if (psm != cpu_to_le16(0x0001) &&
 				!hci_conn_check_link_mode(conn->hcon)) {
@@ -4464,6 +4458,8 @@ static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, str
 	if (!sk)
 		goto drop;
 
+	bh_lock_sock(sk);
+
 	BT_DBG("sk %p, len %d", sk, skb->len);
 
 	if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED)
-- 
cgit v1.2.3


From eeb366564be7c311b31c70821d18a43a8a57f9bc Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 1 Nov 2010 18:43:53 +0000
Subject: Bluetooth: Get rid of __rfcomm_get_sock_by_channel()

rfcomm_get_sock_by_channel() was the only user of this function, so I merged
both into rfcomm_get_sock_by_channel(). The socket lock now should be hold
outside of rfcomm_get_sock_by_channel() once we hold and release it inside the
same function now.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/rfcomm/sock.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index aec505f934df..0207bd6dbfc5 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -140,11 +140,13 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src)
 /* Find socket with channel and source bdaddr.
  * Returns closest match.
  */
-static struct sock *__rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
+static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
 {
 	struct sock *sk = NULL, *sk1 = NULL;
 	struct hlist_node *node;
 
+	read_lock(&rfcomm_sk_list.lock);
+
 	sk_for_each(sk, node, &rfcomm_sk_list.head) {
 		if (state && sk->sk_state != state)
 			continue;
@@ -159,19 +161,10 @@ static struct sock *__rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t
 				sk1 = sk;
 		}
 	}
-	return node ? sk : sk1;
-}
 
-/* Find socket with given address (channel, src).
- * Returns locked socket */
-static inline struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src)
-{
-	struct sock *s;
-	read_lock(&rfcomm_sk_list.lock);
-	s = __rfcomm_get_sock_by_channel(state, channel, src);
-	if (s) bh_lock_sock(s);
 	read_unlock(&rfcomm_sk_list.lock);
-	return s;
+
+	return node ? sk : sk1;
 }
 
 static void rfcomm_sock_destruct(struct sock *sk)
@@ -945,6 +938,8 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
 	if (!parent)
 		return 0;
 
+	bh_lock_sock(parent);
+
 	/* Check for backlog size */
 	if (sk_acceptq_is_full(parent)) {
 		BT_DBG("backlog full %d", parent->sk_ack_backlog);
-- 
cgit v1.2.3


From b78d7b4f204a6ba1901af36c95e10fded9816054 Mon Sep 17 00:00:00 2001
From: Anderson Lizardo <anderson.lizardo@openbossa.org>
Date: Mon, 29 Nov 2010 12:15:50 -0400
Subject: Bluetooth: Fix error handling for l2cap_init()

create_singlethread_workqueue() may fail with errors such as -ENOMEM. If
this happens, the return value is not set to a negative value and the
module load will succeed. It will then crash on module unload because of
a destroy_workqueue() call on a NULL pointer.

Additionally, the _busy_wq workqueue is not being destroyed if any
errors happen on l2cap_init().

Signed-off-by: Anderson Lizardo <anderson.lizardo@openbossa.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/l2cap.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 12b4aa2f8fc9..a1c7ae88dd1f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4871,8 +4871,10 @@ static int __init l2cap_init(void)
 		return err;
 
 	_busy_wq = create_singlethread_workqueue("l2cap");
-	if (!_busy_wq)
-		goto error;
+	if (!_busy_wq) {
+		proto_unregister(&l2cap_proto);
+		return -ENOMEM;
+	}
 
 	err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
 	if (err < 0) {
@@ -4900,6 +4902,7 @@ static int __init l2cap_init(void)
 	return 0;
 
 error:
+	destroy_workqueue(_busy_wq);
 	proto_unregister(&l2cap_proto);
 	return err;
 }
-- 
cgit v1.2.3


From 735cbc4784a084b7a76c43c69f9dba683bb3b48b Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 1 Dec 2010 16:58:22 +0200
Subject: Bluetooth: clean up sco code

Do not use assignments in IF condition, remove extra spaces

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/sco.h |  8 ++++----
 net/bluetooth/sco.c         | 22 +++++++++++++---------
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h
index e28a2a771471..ea5c6641243f 100644
--- a/include/net/bluetooth/sco.h
+++ b/include/net/bluetooth/sco.h
@@ -55,11 +55,11 @@ struct sco_conninfo {
 struct sco_conn {
 	struct hci_conn	*hcon;
 
-	bdaddr_t 	*dst;
-	bdaddr_t 	*src;
-	
+	bdaddr_t	*dst;
+	bdaddr_t	*src;
+
 	spinlock_t	lock;
-	struct sock 	*sk;
+	struct sock	*sk;
 
 	unsigned int    mtu;
 };
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 66b9e5c0523a..960c6d1637da 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -44,7 +44,7 @@
 #include <net/sock.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -52,7 +52,7 @@
 
 #define VERSION "0.6"
 
-static int disable_esco = 0;
+static int disable_esco;
 
 static const struct proto_ops sco_sock_ops;
 
@@ -138,16 +138,17 @@ static inline struct sock *sco_chan_get(struct sco_conn *conn)
 
 static int sco_conn_del(struct hci_conn *hcon, int err)
 {
-	struct sco_conn *conn;
+	struct sco_conn *conn = hcon->sco_data;
 	struct sock *sk;
 
-	if (!(conn = hcon->sco_data))
+	if (!conn)
 		return 0;
 
 	BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
 
 	/* Kill socket */
-	if ((sk = sco_chan_get(conn))) {
+	sk = sco_chan_get(conn);
+	if (sk) {
 		bh_lock_sock(sk);
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
@@ -185,7 +186,8 @@ static int sco_connect(struct sock *sk)
 
 	BT_DBG("%s -> %s", batostr(src), batostr(dst));
 
-	if (!(hdev = hci_get_route(dst, src)))
+	hdev = hci_get_route(dst, src);
+	if (!hdev)
 		return -EHOSTUNREACH;
 
 	hci_dev_lock_bh(hdev);
@@ -510,7 +512,8 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
 	/* Set destination address and psm */
 	bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr);
 
-	if ((err = sco_connect(sk)))
+	err = sco_connect(sk);
+	if (err)
 		goto done;
 
 	err = bt_sock_wait_state(sk, BT_CONNECTED,
@@ -828,13 +831,14 @@ static void sco_chan_del(struct sock *sk, int err)
 
 static void sco_conn_ready(struct sco_conn *conn)
 {
-	struct sock *parent, *sk;
+	struct sock *parent;
+	struct sock *sk = conn->sk;
 
 	BT_DBG("conn %p", conn);
 
 	sco_conn_lock(conn);
 
-	if ((sk = conn->sk)) {
+	if (sk) {
 		sco_sock_clear_timer(sk);
 		bh_lock_sock(sk);
 		sk->sk_state = BT_CONNECTED;
-- 
cgit v1.2.3


From 285b4e90318dcf421a00b2ac3fe8ab713f3281e3 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 1 Dec 2010 16:58:23 +0200
Subject: Bluetooth: clean up rfcomm code

Remove extra spaces, assignments in if statement, zeroing static
variables, extra braces. Fix includes.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/rfcomm.h |  4 ++--
 net/bluetooth/rfcomm/core.c    |  8 ++++----
 net/bluetooth/rfcomm/sock.c    |  5 +++--
 net/bluetooth/rfcomm/tty.c     | 28 ++++++++++++++++------------
 4 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 71047bc0af84..2e7875691f0a 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -105,7 +105,7 @@
 struct rfcomm_hdr {
 	u8 addr;
 	u8 ctrl;
-	u8 len;    // Actual size can be 2 bytes
+	u8 len;    /* Actual size can be 2 bytes */
 } __packed;
 
 struct rfcomm_cmd {
@@ -228,7 +228,7 @@ struct rfcomm_dlc {
 /* ---- RFCOMM SEND RPN ---- */
 int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
 			u8 bit_rate, u8 data_bits, u8 stop_bits,
-			u8 parity, u8 flow_ctrl_settings, 
+			u8 parity, u8 flow_ctrl_settings,
 			u8 xon_char, u8 xoff_char, u16 param_mask);
 
 /* ---- RFCOMM DLCs (channels) ---- */
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index fa642aa652bd..c1e2bbafb549 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -41,7 +41,7 @@
 #include <linux/slab.h>
 
 #include <net/sock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -51,10 +51,10 @@
 
 #define VERSION "1.11"
 
-static int disable_cfc = 0;
+static int disable_cfc;
+static int l2cap_ertm;
 static int channel_mtu = -1;
 static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU;
-static int l2cap_ertm = 0;
 
 static struct task_struct *rfcomm_thread;
 
@@ -1901,7 +1901,7 @@ static inline void rfcomm_check_connection(struct rfcomm_session *s)
 
 	BT_DBG("%p state %ld", s, s->state);
 
-	switch(sk->sk_state) {
+	switch (sk->sk_state) {
 	case BT_CONNECTED:
 		s->state = BT_CONNECT;
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 0207bd6dbfc5..66cc1f0c3df8 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -45,7 +45,7 @@
 #include <net/sock.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -888,7 +888,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how)
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	if (!sk) return 0;
+	if (!sk)
+		return 0;
 
 	lock_sock(sk);
 	if (!sk->sk_shutdown) {
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index a9b81f5dacd1..2575c2db6404 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -58,9 +58,9 @@ struct rfcomm_dev {
 
 	bdaddr_t		src;
 	bdaddr_t		dst;
-	u8 			channel;
+	u8			channel;
 
-	uint 			modem_status;
+	uint			modem_status;
 
 	struct rfcomm_dlc	*dlc;
 	struct tty_struct	*tty;
@@ -69,7 +69,7 @@ struct rfcomm_dev {
 
 	struct device		*tty_dev;
 
-	atomic_t 		wmem_alloc;
+	atomic_t		wmem_alloc;
 
 	struct sk_buff_head	pending;
 };
@@ -431,7 +431,8 @@ static int rfcomm_release_dev(void __user *arg)
 
 	BT_DBG("dev_id %d flags 0x%x", req.dev_id, req.flags);
 
-	if (!(dev = rfcomm_dev_get(req.dev_id)))
+	dev = rfcomm_dev_get(req.dev_id);
+	if (!dev)
 		return -ENODEV;
 
 	if (dev->flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN)) {
@@ -470,7 +471,8 @@ static int rfcomm_get_dev_list(void __user *arg)
 
 	size = sizeof(*dl) + dev_num * sizeof(*di);
 
-	if (!(dl = kmalloc(size, GFP_KERNEL)))
+	dl = kmalloc(size, GFP_KERNEL);
+	if (!dl)
 		return -ENOMEM;
 
 	di = dl->dev_info;
@@ -513,7 +515,8 @@ static int rfcomm_get_dev_info(void __user *arg)
 	if (copy_from_user(&di, arg, sizeof(di)))
 		return -EFAULT;
 
-	if (!(dev = rfcomm_dev_get(di.id)))
+	dev = rfcomm_dev_get(di.id);
+	if (!dev)
 		return -ENODEV;
 
 	di.flags   = dev->flags;
@@ -561,7 +564,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
 		return;
 	}
 
-	if (!(tty = dev->tty) || !skb_queue_empty(&dev->pending)) {
+	tty = dev->tty;
+	if (!tty || !skb_queue_empty(&dev->pending)) {
 		skb_queue_tail(&dev->pending, skb);
 		return;
 	}
@@ -796,7 +800,8 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in
 
 		memcpy(skb_put(skb, size), buf + sent, size);
 
-		if ((err = rfcomm_dlc_send(dlc, skb)) < 0) {
+		err = rfcomm_dlc_send(dlc, skb);
+		if (err < 0) {
 			kfree_skb(skb);
 			break;
 		}
@@ -892,7 +897,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 
 	/* Parity on/off and when on, odd/even */
 	if (((old->c_cflag & PARENB) != (new->c_cflag & PARENB)) ||
-			((old->c_cflag & PARODD) != (new->c_cflag & PARODD)) ) {
+			((old->c_cflag & PARODD) != (new->c_cflag & PARODD))) {
 		changes |= RFCOMM_RPN_PM_PARITY;
 		BT_DBG("Parity change detected.");
 	}
@@ -937,11 +942,10 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 	/* POSIX does not support 1.5 stop bits and RFCOMM does not
 	 * support 2 stop bits. So a request for 2 stop bits gets
 	 * translated to 1.5 stop bits */
-	if (new->c_cflag & CSTOPB) {
+	if (new->c_cflag & CSTOPB)
 		stop_bits = RFCOMM_RPN_STOP_15;
-	} else {
+	else
 		stop_bits = RFCOMM_RPN_STOP_1;
-	}
 
 	/* Handle number of data bits [5-8] */
 	if ((old->c_cflag & CSIZE) != (new->c_cflag & CSIZE))
-- 
cgit v1.2.3


From 894718a6be69d8cfd191dc291b42be32a1e4851b Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 1 Dec 2010 16:58:24 +0200
Subject: Bluetooth: clean up l2cap code

Do not initialize static vars to zero, macros with complex values
shall be enclosed with (), remove unneeded braces.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/l2cap.h | 10 +++++-----
 net/bluetooth/l2cap.c         |  7 +++----
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index c819c8bf9b68..217bb91a7345 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -417,11 +417,11 @@ static inline int l2cap_tx_window_full(struct sock *sk)
 	return sub == pi->remote_tx_win;
 }
 
-#define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1
-#define __get_reqseq(ctrl) ((ctrl) & L2CAP_CTRL_REQSEQ) >> 8
-#define __is_iframe(ctrl) !((ctrl) & L2CAP_CTRL_FRAME_TYPE)
-#define __is_sframe(ctrl) (ctrl) & L2CAP_CTRL_FRAME_TYPE
-#define __is_sar_start(ctrl) ((ctrl) & L2CAP_CTRL_SAR) == L2CAP_SDU_START
+#define __get_txseq(ctrl)	(((ctrl) & L2CAP_CTRL_TXSEQ) >> 1)
+#define __get_reqseq(ctrl)	(((ctrl) & L2CAP_CTRL_REQSEQ) >> 8)
+#define __is_iframe(ctrl)	(!((ctrl) & L2CAP_CTRL_FRAME_TYPE))
+#define __is_sframe(ctrl)	((ctrl) & L2CAP_CTRL_FRAME_TYPE)
+#define __is_sar_start(ctrl)	(((ctrl) & L2CAP_CTRL_SAR) == L2CAP_SDU_START)
 
 void l2cap_load(void);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a1c7ae88dd1f..c12eccfdfe01 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -57,7 +57,7 @@
 
 #define VERSION "2.15"
 
-static int disable_ertm = 0;
+static int disable_ertm;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -4162,11 +4162,10 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 			__mod_retrans_timer();
 
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
+		if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
 			l2cap_send_ack(pi);
-		} else {
+		else
 			l2cap_ertm_send(sk);
-		}
 	}
 }
 
-- 
cgit v1.2.3


From 70f23020e6d89155504b5b39f22505f4aec6fa6f Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Wed, 1 Dec 2010 16:58:25 +0200
Subject: Bluetooth: clean up hci code

Do not use assignment in IF condition, remove extra spaces,
fixing typos, simplify code.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/hci.h      |  4 +--
 include/net/bluetooth/hci_core.h | 14 ++++-----
 net/bluetooth/hci_conn.c         | 23 +++++++++-----
 net/bluetooth/hci_core.c         | 66 +++++++++++++++++++++++++---------------
 net/bluetooth/hci_event.c        |  8 +++--
 net/bluetooth/hci_sock.c         | 17 +++++++----
 6 files changed, 82 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index e30e00834340..e9527c512345 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -489,7 +489,7 @@ struct hci_rp_read_local_name {
 
 #define HCI_OP_WRITE_PG_TIMEOUT		0x0c18
 
-#define HCI_OP_WRITE_SCAN_ENABLE 	0x0c1a
+#define HCI_OP_WRITE_SCAN_ENABLE	0x0c1a
 	#define SCAN_DISABLED		0x00
 	#define SCAN_INQUIRY		0x01
 	#define SCAN_PAGE		0x02
@@ -874,7 +874,7 @@ struct hci_ev_si_security {
 
 struct hci_command_hdr {
 	__le16	opcode;		/* OCF & OGF */
-	__u8 	plen;
+	__u8	plen;
 } __packed;
 
 struct hci_event_hdr {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ebec8c9a929d..9c08625617a1 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -44,15 +44,15 @@ struct inquiry_data {
 };
 
 struct inquiry_entry {
-	struct inquiry_entry 	*next;
+	struct inquiry_entry	*next;
 	__u32			timestamp;
 	struct inquiry_data	data;
 };
 
 struct inquiry_cache {
-	spinlock_t 		lock;
+	spinlock_t		lock;
 	__u32			timestamp;
-	struct inquiry_entry 	*list;
+	struct inquiry_entry	*list;
 };
 
 struct hci_conn_hash {
@@ -141,7 +141,7 @@ struct hci_dev {
 	void			*driver_data;
 	void			*core_data;
 
-	atomic_t 		promisc;
+	atomic_t		promisc;
 
 	struct dentry		*debugfs;
 
@@ -150,7 +150,7 @@ struct hci_dev {
 
 	struct rfkill		*rfkill;
 
-	struct module 		*owner;
+	struct module		*owner;
 
 	int (*open)(struct hci_dev *hdev);
 	int (*close)(struct hci_dev *hdev);
@@ -215,8 +215,8 @@ extern rwlock_t hci_dev_list_lock;
 extern rwlock_t hci_cb_list_lock;
 
 /* ----- Inquiry cache ----- */
-#define INQUIRY_CACHE_AGE_MAX   (HZ*30)   // 30 seconds
-#define INQUIRY_ENTRY_AGE_MAX   (HZ*60)   // 60 seconds
+#define INQUIRY_CACHE_AGE_MAX   (HZ*30)   /* 30 seconds */
+#define INQUIRY_ENTRY_AGE_MAX   (HZ*60)   /* 60 seconds */
 
 #define inquiry_cache_lock(c)		spin_lock(&c->lock)
 #define inquiry_cache_unlock(c)		spin_unlock(&c->lock)
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 0b1e460fe440..6b90a4191734 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -39,7 +39,7 @@
 #include <net/sock.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -66,7 +66,8 @@ void hci_acl_connect(struct hci_conn *conn)
 	bacpy(&cp.bdaddr, &conn->dst);
 	cp.pscan_rep_mode = 0x02;
 
-	if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) {
+	ie = hci_inquiry_cache_lookup(hdev, &conn->dst);
+	if (ie) {
 		if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
 			cp.pscan_rep_mode = ie->data.pscan_rep_mode;
 			cp.pscan_mode     = ie->data.pscan_mode;
@@ -368,8 +369,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 
 	BT_DBG("%s dst %s", hdev->name, batostr(dst));
 
-	if (!(acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst))) {
-		if (!(acl = hci_conn_add(hdev, ACL_LINK, dst)))
+	acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
+	if (!acl) {
+		acl = hci_conn_add(hdev, ACL_LINK, dst);
+		if (!acl)
 			return NULL;
 	}
 
@@ -389,8 +392,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 	if (type == ACL_LINK)
 		return acl;
 
-	if (!(sco = hci_conn_hash_lookup_ba(hdev, type, dst))) {
-		if (!(sco = hci_conn_add(hdev, type, dst))) {
+	sco = hci_conn_hash_lookup_ba(hdev, type, dst);
+	if (!sco) {
+		sco = hci_conn_add(hdev, type, dst);
+		if (!sco) {
 			hci_conn_put(acl);
 			return NULL;
 		}
@@ -647,10 +652,12 @@ int hci_get_conn_list(void __user *arg)
 
 	size = sizeof(req) + req.conn_num * sizeof(*ci);
 
-	if (!(cl = kmalloc(size, GFP_KERNEL)))
+	cl = kmalloc(size, GFP_KERNEL);
+	if (!cl)
 		return -ENOMEM;
 
-	if (!(hdev = hci_dev_get(req.dev_id))) {
+	hdev = hci_dev_get(req.dev_id);
+	if (!hdev) {
 		kfree(cl);
 		return -ENODEV;
 	}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index bc2a052e518b..51c61f75a797 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -44,7 +44,7 @@
 #include <net/sock.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -349,20 +349,23 @@ struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev, bdaddr_t *b
 void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data)
 {
 	struct inquiry_cache *cache = &hdev->inq_cache;
-	struct inquiry_entry *e;
+	struct inquiry_entry *ie;
 
 	BT_DBG("cache %p, %s", cache, batostr(&data->bdaddr));
 
-	if (!(e = hci_inquiry_cache_lookup(hdev, &data->bdaddr))) {
+	ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr);
+	if (!ie) {
 		/* Entry not in the cache. Add new one. */
-		if (!(e = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC)))
+		ie = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC);
+		if (!ie)
 			return;
-		e->next     = cache->list;
-		cache->list = e;
+
+		ie->next = cache->list;
+		cache->list = ie;
 	}
 
-	memcpy(&e->data, data, sizeof(*data));
-	e->timestamp = jiffies;
+	memcpy(&ie->data, data, sizeof(*data));
+	ie->timestamp = jiffies;
 	cache->timestamp = jiffies;
 }
 
@@ -422,16 +425,20 @@ int hci_inquiry(void __user *arg)
 
 	hci_dev_lock_bh(hdev);
 	if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
-					inquiry_cache_empty(hdev) ||
-					ir.flags & IREQ_CACHE_FLUSH) {
+				inquiry_cache_empty(hdev) ||
+				ir.flags & IREQ_CACHE_FLUSH) {
 		inquiry_cache_flush(hdev);
 		do_inquiry = 1;
 	}
 	hci_dev_unlock_bh(hdev);
 
 	timeo = ir.length * msecs_to_jiffies(2000);
-	if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0)
-		goto done;
+
+	if (do_inquiry) {
+		err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo);
+		if (err < 0)
+			goto done;
+	}
 
 	/* for unlimited number of responses we will use buffer with 255 entries */
 	max_rsp = (ir.num_rsp == 0) ? 255 : ir.num_rsp;
@@ -439,7 +446,8 @@ int hci_inquiry(void __user *arg)
 	/* cache_dump can't sleep. Therefore we allocate temp buffer and then
 	 * copy it to the user space.
 	 */
-	if (!(buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL))) {
+	buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL);
+	if (!buf) {
 		err = -ENOMEM;
 		goto done;
 	}
@@ -611,7 +619,8 @@ int hci_dev_close(__u16 dev)
 	struct hci_dev *hdev;
 	int err;
 
-	if (!(hdev = hci_dev_get(dev)))
+	hdev = hci_dev_get(dev);
+	if (!hdev)
 		return -ENODEV;
 	err = hci_dev_do_close(hdev);
 	hci_dev_put(hdev);
@@ -623,7 +632,8 @@ int hci_dev_reset(__u16 dev)
 	struct hci_dev *hdev;
 	int ret = 0;
 
-	if (!(hdev = hci_dev_get(dev)))
+	hdev = hci_dev_get(dev);
+	if (!hdev)
 		return -ENODEV;
 
 	hci_req_lock(hdev);
@@ -663,7 +673,8 @@ int hci_dev_reset_stat(__u16 dev)
 	struct hci_dev *hdev;
 	int ret = 0;
 
-	if (!(hdev = hci_dev_get(dev)))
+	hdev = hci_dev_get(dev);
+	if (!hdev)
 		return -ENODEV;
 
 	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
@@ -682,7 +693,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 	if (copy_from_user(&dr, arg, sizeof(dr)))
 		return -EFAULT;
 
-	if (!(hdev = hci_dev_get(dr.dev_id)))
+	hdev = hci_dev_get(dr.dev_id);
+	if (!hdev)
 		return -ENODEV;
 
 	switch (cmd) {
@@ -763,7 +775,8 @@ int hci_get_dev_list(void __user *arg)
 
 	size = sizeof(*dl) + dev_num * sizeof(*dr);
 
-	if (!(dl = kzalloc(size, GFP_KERNEL)))
+	dl = kzalloc(size, GFP_KERNEL);
+	if (!dl)
 		return -ENOMEM;
 
 	dr = dl->dev_req;
@@ -797,7 +810,8 @@ int hci_get_dev_info(void __user *arg)
 	if (copy_from_user(&di, arg, sizeof(di)))
 		return -EFAULT;
 
-	if (!(hdev = hci_dev_get(di.dev_id)))
+	hdev = hci_dev_get(di.dev_id);
+	if (!hdev)
 		return -ENODEV;
 
 	strcpy(di.name, hdev->name);
@@ -905,7 +919,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	hdev->sniff_max_interval = 800;
 	hdev->sniff_min_interval = 80;
 
-	tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev);
+	tasklet_init(&hdev->cmd_task, hci_cmd_task, (unsigned long) hdev);
 	tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev);
 	tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev);
 
@@ -1368,7 +1382,8 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 	bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT;
 	hci_add_acl_hdr(skb, conn->handle, flags | ACL_START);
 
-	if (!(list = skb_shinfo(skb)->frag_list)) {
+	list = skb_shinfo(skb)->frag_list;
+	if (!list) {
 		/* Non fragmented */
 		BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len);
 
@@ -1609,7 +1624,8 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_conn_enter_active_mode(conn);
 
 		/* Send to upper protocol */
-		if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) {
+		hp = hci_proto[HCI_PROTO_L2CAP];
+		if (hp && hp->recv_acldata) {
 			hp->recv_acldata(conn, skb, flags);
 			return;
 		}
@@ -1644,7 +1660,8 @@ static inline void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		register struct hci_proto *hp;
 
 		/* Send to upper protocol */
-		if ((hp = hci_proto[HCI_PROTO_SCO]) && hp->recv_scodata) {
+		hp = hci_proto[HCI_PROTO_SCO];
+		if (hp && hp->recv_scodata) {
 			hp->recv_scodata(conn, skb);
 			return;
 		}
@@ -1727,7 +1744,8 @@ static void hci_cmd_task(unsigned long arg)
 	if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) {
 		kfree_skb(hdev->sent_cmd);
 
-		if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) {
+		hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC);
+		if (hdev->sent_cmd) {
 			atomic_dec(&hdev->cmd_cnt);
 			hci_send_frame(skb);
 			hdev->cmd_last_tx = jiffies;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 3c1957c82b61..8923b36a67a2 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -39,7 +39,7 @@
 #include <net/sock.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -1512,10 +1512,12 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s
 			conn->sent -= count;
 
 			if (conn->type == ACL_LINK) {
-				if ((hdev->acl_cnt += count) > hdev->acl_pkts)
+				hdev->acl_cnt += count;
+				if (hdev->acl_cnt > hdev->acl_pkts)
 					hdev->acl_cnt = hdev->acl_pkts;
 			} else {
-				if ((hdev->sco_cnt += count) > hdev->sco_pkts)
+				hdev->sco_cnt += count;
+				if (hdev->sco_cnt > hdev->sco_pkts)
 					hdev->sco_cnt = hdev->sco_pkts;
 			}
 		}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 83acd164d39e..b3753bad2a55 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -43,7 +43,7 @@
 #include <net/sock.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -125,7 +125,8 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 				continue;
 		}
 
-		if (!(nskb = skb_clone(skb, GFP_ATOMIC)))
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (!nskb)
 			continue;
 
 		/* Put type byte before the data */
@@ -370,7 +371,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
 	}
 
 	if (haddr->hci_dev != HCI_DEV_NONE) {
-		if (!(hdev = hci_dev_get(haddr->hci_dev))) {
+		hdev = hci_dev_get(haddr->hci_dev);
+		if (!hdev) {
 			err = -ENODEV;
 			goto done;
 		}
@@ -457,7 +459,8 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (sk->sk_state == BT_CLOSED)
 		return 0;
 
-	if (!(skb = skb_recv_datagram(sk, flags, noblock, &err)))
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
 		return err;
 
 	msg->msg_namelen = 0;
@@ -499,7 +502,8 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	lock_sock(sk);
 
-	if (!(hdev = hci_pi(sk)->hdev)) {
+	hdev = hci_pi(sk)->hdev;
+	if (!hdev) {
 		err = -EBADFD;
 		goto done;
 	}
@@ -509,7 +513,8 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto done;
 	}
 
-	if (!(skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err)))
+	skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
 		goto done;
 
 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
-- 
cgit v1.2.3


From 8790ca172a1550949804a2ad59ccea310f680c9f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 1 Dec 2010 17:28:18 -0800
Subject: inetpeer: Kill use of inet_peer_address_t typedef.

They are verboten these days.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 12 ++++++------
 net/ipv4/inetpeer.c    |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index fb8aeb1fd23f..5161bfdf5a52 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -14,18 +14,18 @@
 #include <net/ipv6.h>
 #include <asm/atomic.h>
 
-typedef struct {
+struct inetpeer_addr {
 	union {
 		__be32		a4;
 		__be32		a6[4];
 	};
 	__u16	family;
-} inet_peer_address_t;
+};
 
 struct inet_peer {
 	/* group together avl_left,avl_right,v4daddr to speedup lookups */
 	struct inet_peer __rcu	*avl_left, *avl_right;
-	inet_peer_address_t	daddr;
+	struct inetpeer_addr	daddr;
 	__u32			avl_height;
 	struct list_head	unused;
 	__u32			dtime;		/* the time of last use of not
@@ -51,11 +51,11 @@ struct inet_peer {
 void			inet_initpeers(void) __init;
 
 /* can be called with or without local BH being disabled */
-struct inet_peer	*inet_getpeer(inet_peer_address_t *daddr, int create);
+struct inet_peer	*inet_getpeer(struct inetpeer_addr *daddr, int create);
 
 static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
 {
-	inet_peer_address_t daddr;
+	struct inetpeer_addr daddr;
 
 	daddr.a4 = v4daddr;
 	daddr.family = AF_INET;
@@ -64,7 +64,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
 
 static inline struct inet_peer *inet_getpeer_v6(struct in6_addr *v6daddr, int create)
 {
-	inet_peer_address_t daddr;
+	struct inetpeer_addr daddr;
 
 	ipv6_addr_copy((struct in6_addr *)daddr.a6, v6daddr);
 	daddr.family = AF_INET6;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index f95b89f3916d..d9bc85751c74 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -161,8 +161,8 @@ static void unlink_from_unused(struct inet_peer *p)
 	}
 }
 
-static int addr_compare(const inet_peer_address_t *a,
-			const inet_peer_address_t *b)
+static int addr_compare(const struct inetpeer_addr *a,
+			const struct inetpeer_addr *b)
 {
 	int i, n = (a->family == AF_INET ? 1 : 4);
 
@@ -211,7 +211,7 @@ static int addr_compare(const inet_peer_address_t *a,
  * But every pointer we follow is guaranteed to be valid thanks to RCU.
  * We exit from this function if number of links exceeds PEER_MAXDEPTH
  */
-static struct inet_peer *lookup_rcu_bh(const inet_peer_address_t *daddr,
+static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr,
 				       struct inet_peer_base *base)
 {
 	struct inet_peer *u = rcu_dereference_bh(base->root);
@@ -472,7 +472,7 @@ static int cleanup_once(unsigned long ttl)
 }
 
 /* Called with or without local BH being disabled. */
-struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create)
+struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create)
 {
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
 	struct inet_peer_base *base = family_to_base(AF_INET);
-- 
cgit v1.2.3


From ccb7c410ddc054b8c1ae780319bc98ae092d3854 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 1 Dec 2010 18:09:13 -0800
Subject: timewait_sock: Create and use getpeer op.

The only thing AF-specific about remembering the timestamp
for a time-wait TCP socket is getting the peer.

Abstract that behind a new timewait_sock_ops vector.

Support for real IPV6 sockets is not filled in yet, but
curiously this makes timewait recycling start to work
for v4-mapped ipv6 sockets.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h           |  1 +
 include/net/timewait_sock.h |  8 ++++++++
 net/ipv4/tcp_ipv4.c         | 33 +++++++++++----------------------
 net/ipv4/tcp_minisocks.c    | 32 ++++++++++++++++++++++++--------
 net/ipv6/tcp_ipv6.c         | 26 +++++++++++++++++++-------
 5 files changed, 63 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3e239641d4ee..4097320caa25 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -313,6 +313,7 @@ extern void tcp_shutdown (struct sock *sk, int how);
 extern int tcp_v4_rcv(struct sk_buff *skb);
 
 extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it);
+extern void *tcp_v4_tw_get_peer(struct sock *sk);
 extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
 extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		       size_t size);
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index 97c3b14da55d..053b3cf2c66a 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -21,6 +21,7 @@ struct timewait_sock_ops {
 	int		(*twsk_unique)(struct sock *sk,
 				       struct sock *sktw, void *twp);
 	void		(*twsk_destructor)(struct sock *sk);
+	void		*(*twsk_getpeer)(struct sock *sk);
 };
 
 static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -39,4 +40,11 @@ static inline void twsk_destructor(struct sock *sk)
 		sk->sk_prot->twsk_prot->twsk_destructor(sk);
 }
 
+static inline void *twsk_getpeer(struct sock *sk)
+{
+	if (sk->sk_prot->twsk_prot->twsk_getpeer)
+		return sk->sk_prot->twsk_prot->twsk_getpeer(sk);
+	return NULL;
+}
+
 #endif /* _TIMEWAIT_SOCK_H */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ddf819cfb5d..dd555051ec8b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1210,12 +1210,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 };
 #endif
 
-static struct timewait_sock_ops tcp_timewait_sock_ops = {
-	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
-	.twsk_unique	= tcp_twsk_unique,
-	.twsk_destructor= tcp_twsk_destructor,
-};
-
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_extend_values tmp_ext;
@@ -1783,25 +1777,20 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
 }
 EXPORT_SYMBOL(tcp_v4_get_peer);
 
-int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
+void *tcp_v4_tw_get_peer(struct sock *sk)
 {
-	struct inet_peer *peer = inet_getpeer_v4(tw->tw_daddr, 1);
-
-	if (peer) {
-		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-
-		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
-		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
-		     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
-			peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
-			peer->tcp_ts	   = tcptw->tw_ts_recent;
-		}
-		inet_putpeer(peer);
-		return 1;
-	}
+	struct inet_timewait_sock *tw = inet_twsk(sk);
 
-	return 0;
+	return inet_getpeer_v4(tw->tw_daddr, 1);
 }
+EXPORT_SYMBOL(tcp_v4_tw_get_peer);
+
+static struct timewait_sock_ops tcp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+	.twsk_destructor= tcp_twsk_destructor,
+	.twsk_getpeer	= tcp_v4_tw_get_peer,
+};
 
 const struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	   = ip_queue_xmit,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 059082c873cf..3527b51d6159 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -78,6 +78,27 @@ static int tcp_remember_stamp(struct sock *sk)
 	return 0;
 }
 
+static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
+{
+	struct sock *sk = (struct sock *) tw;
+	struct inet_peer *peer;
+
+	peer = twsk_getpeer(sk);
+	if (peer) {
+		const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+
+		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
+		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
+		     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
+			peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
+			peer->tcp_ts	   = tcptw->tw_ts_recent;
+		}
+		inet_putpeer(peer);
+		return 1;
+	}
+	return 0;
+}
+
 static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 {
 	if (seq == s_win)
@@ -178,14 +199,9 @@ kill_with_rst:
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
-		/* I am shamed, but failed to make it more elegant.
-		 * Yes, it is direct reference to IP, which is impossible
-		 * to generalize to IPv6. Taking into account that IPv6
-		 * do not understand recycling in any case, it not
-		 * a big problem in practice. --ANK */
-		if (tw->tw_family == AF_INET &&
-		    tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
-		    tcp_v4_tw_remember_stamp(tw))
+		if (tcp_death_row.sysctl_tw_recycle &&
+		    tcptw->tw_ts_recent_stamp &&
+		    tcp_tw_remember_stamp(tw))
 			inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
 					   TCP_TIMEWAIT_LEN);
 		else
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e394d0029d8d..5f73a1808e36 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -906,12 +906,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 };
 #endif
 
-static struct timewait_sock_ops tcp6_timewait_sock_ops = {
-	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
-	.twsk_unique	= tcp_twsk_unique,
-	.twsk_destructor= tcp_twsk_destructor,
-};
-
 static void __tcp_v6_send_check(struct sk_buff *skb,
 				struct in6_addr *saddr, struct in6_addr *daddr)
 {
@@ -1818,12 +1812,30 @@ do_time_wait:
 	goto discard_it;
 }
 
-struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
+static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
+{
+	/* Alas, not yet... */
+	return NULL;
+}
+
+static void *tcp_v6_tw_get_peer(struct sock *sk)
 {
+	struct inet_timewait_sock *tw = inet_twsk(sk);
+
+	if (tw->tw_family == AF_INET)
+		return tcp_v4_tw_get_peer(sk);
+
 	/* Alas, not yet... */
 	return NULL;
 }
 
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+	.twsk_destructor= tcp_twsk_destructor,
+	.twsk_getpeer	= tcp_v6_tw_get_peer,
+};
+
 static const struct inet_connection_sock_af_ops ipv6_specific = {
 	.queue_xmit	   = inet6_csk_xmit,
 	.send_check	   = tcp_v6_send_check,
-- 
cgit v1.2.3


From ae4694b2d3e4c0f47c0e804a68417be57e5daf85 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 2 Dec 2010 10:59:22 -0800
Subject: ipv6: Create inet6_csk_route_req().

Brother of ipv4's inet_csk_route_req().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_connection_sock.h |  3 +++
 net/ipv6/inet6_connection_sock.c    | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'net')

diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index aae08f686633..ff013505236b 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -25,6 +25,9 @@ struct sockaddr;
 extern int inet6_csk_bind_conflict(const struct sock *sk,
 				   const struct inet_bind_bucket *tb);
 
+extern struct dst_entry* inet6_csk_route_req(struct sock *sk,
+					     const struct request_sock *req);
+
 extern struct request_sock *inet6_csk_search_req(const struct sock *sk,
 						 struct request_sock ***prevp,
 						 const __be16 rport,
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 861d252bd1ba..e46305d1815a 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,6 +54,38 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 
 EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
 
+struct dst_entry *inet6_csk_route_req(struct sock *sk,
+				      const struct request_sock *req)
+{
+	struct inet6_request_sock *treq = inet6_rsk(req);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct in6_addr *final_p, final;
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = IPPROTO_TCP;
+	ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
+	final_p = fl6_update_dst(&fl, np->opt, &final);
+	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.mark = sk->sk_mark;
+	fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+	fl.fl_ip_sport = inet_rsk(req)->loc_port;
+	security_req_classify_flow(req, &fl);
+
+	if (ip6_dst_lookup(sk, &dst, &fl))
+		return NULL;
+
+	if (final_p)
+		ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+	if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+		return NULL;
+
+	return dst;
+}
+
 /*
  * request_sock (formerly open request) hash tables.
  */
-- 
cgit v1.2.3


From 493f377d6dd56f4e98b198d637fe714ab124681b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 2 Dec 2010 12:14:29 -0800
Subject: tcp: Add timewait recycling bits to ipv6 connect code.

This will also improve handling of ipv6 tcp socket request
backlog when syncookies are not enabled.  When backlog
becomes very deep, last quarter of backlog is limited to
validated destinations.  Previously only ipv4 implemented
this logic, but now ipv6 does too.

Now we are only one step away from enabling timewait
recycling for ipv6, and that step is simply filling in
the implementation of tcp_v6_get_peer() and
tcp_v6_tw_get_peer().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/request_sock.c |   1 +
 net/ipv6/tcp_ipv6.c     | 101 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 77 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7a..41d99435f62d 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,6 +33,7 @@
  * Note : Dont forget somaxconn that may limit backlog too.
  */
 int sysctl_max_syn_backlog = 256;
+EXPORT_SYMBOL(sysctl_max_syn_backlog);
 
 int reqsk_queue_alloc(struct request_sock_queue *queue,
 		      unsigned int nr_table_entries)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5f73a1808e36..c2ebbe1c5a47 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct rt6_info *rt;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	sk->sk_gso_type = SKB_GSO_TCPV6;
 	__ip6_dst_store(sk, dst, NULL, NULL);
 
+	rt = (struct rt6_info *) dst;
+	if (tcp_death_row.sysctl_tw_recycle &&
+	    !tp->rx_opt.ts_recent_stamp &&
+	    ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
+		struct inet_peer *peer = rt6_get_peer(rt);
+		/*
+		 * VJ's idea. We save last timestamp seen from
+		 * the destination in peer table, when entering state
+		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
+		 * when trying new connection.
+		 */
+		if (peer) {
+			inet_peer_refcheck(peer);
+			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+				tp->rx_opt.ts_recent = peer->tcp_ts;
+			}
+		}
+	}
+
 	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
 		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
@@ -1170,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 isn = TCP_SKB_CB(skb)->when;
+	struct dst_entry *dst = NULL;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1267,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (!isn) {
+		struct inet_peer *peer = NULL;
+
 		if (ipv6_opt_accepted(sk, skb) ||
 		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1279,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (!sk->sk_bound_dev_if &&
 		    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
 			treq->iif = inet6_iif(skb);
-		if (!want_cookie) {
-			isn = tcp_v6_init_sequence(skb);
-		} else {
+
+		if (want_cookie) {
 			isn = cookie_v6_init_sequence(sk, skb, &req->mss);
 			req->cookie_ts = tmp_opt.tstamp_ok;
+			goto have_isn;
+		}
+
+		/* VJ's idea. We save last timestamp seen
+		 * from the destination in peer table, when entering
+		 * state TIME-WAIT, and check against it before
+		 * accepting new connection request.
+		 *
+		 * If "isn" is not zero, this request hit alive
+		 * timewait bucket, so that all the necessary checks
+		 * are made in the function processing timewait state.
+		 */
+		if (tmp_opt.saw_tstamp &&
+		    tcp_death_row.sysctl_tw_recycle &&
+		    (dst = inet6_csk_route_req(sk, req)) != NULL &&
+		    (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
+		    ipv6_addr_equal((struct in6_addr *)peer->daddr.a6,
+				    &treq->rmt_addr)) {
+			inet_peer_refcheck(peer);
+			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
+			    (s32)(peer->tcp_ts - req->ts_recent) >
+							TCP_PAWS_WINDOW) {
+				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+				goto drop_and_release;
+			}
+		}
+		/* Kill the following clause, if you dislike this way. */
+		else if (!sysctl_tcp_syncookies &&
+			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+			  (sysctl_max_syn_backlog >> 2)) &&
+			 (!peer || !peer->tcp_ts_stamp) &&
+			 (!dst || !dst_metric(dst, RTAX_RTT))) {
+			/* Without syncookies last quarter of
+			 * backlog is filled with destinations,
+			 * proven to be alive.
+			 * It means that we continue to communicate
+			 * to destinations, already remembered
+			 * to the moment of synflood.
+			 */
+			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
+				       &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+			goto drop_and_release;
 		}
+
+		isn = tcp_v6_init_sequence(skb);
 	}
+have_isn:
 	tcp_rsk(req)->snt_isn = isn;
 
 	security_inet_conn_request(sk, skb, req);
@@ -1298,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	return 0;
 
+drop_and_release:
+	dst_release(dst);
 drop_and_free:
 	reqsk_free(req);
 drop:
@@ -1376,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	if (sk_acceptq_is_full(sk))
 		goto out_overflow;
 
-	if (dst == NULL) {
-		struct in6_addr *final_p, final;
-		struct flowi fl;
-
-		memset(&fl, 0, sizeof(fl));
-		fl.proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		final_p = fl6_update_dst(&fl, opt, &final);
-		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
-		fl.oif = sk->sk_bound_dev_if;
-		fl.mark = sk->sk_mark;
-		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
-		fl.fl_ip_sport = inet_rsk(req)->loc_port;
-		security_req_classify_flow(req, &fl);
-
-		if (ip6_dst_lookup(sk, &dst, &fl))
-			goto out;
-
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+	if (!dst) {
+		dst = inet6_csk_route_req(sk, req);
+		if (!dst)
 			goto out;
 	}
 
-- 
cgit v1.2.3


From db3949c4506a21633469d71f2915cf660eea0a35 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 2 Dec 2010 11:52:07 -0800
Subject: tcp: Implement ipv6 ->get_peer() and ->tw_get_peer().

Now ipv6 timewait recycling is fully implemented and
enabled.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c2ebbe1c5a47..319458558df9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1865,19 +1865,33 @@ do_time_wait:
 
 static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 {
-	/* Alas, not yet... */
-	return NULL;
+	struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct inet_peer *peer;
+
+	if (!rt ||
+	    !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
+		peer = inet_getpeer_v6(&np->daddr, 1);
+		*release_it = true;
+	} else {
+		if (!rt->rt6i_peer)
+			rt6_bind_peer(rt, 1);
+		peer = rt->rt6i_peer;
+		*release_it = true;
+	}
+
+	return peer;
 }
 
 static void *tcp_v6_tw_get_peer(struct sock *sk)
 {
+	struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
 	struct inet_timewait_sock *tw = inet_twsk(sk);
 
 	if (tw->tw_family == AF_INET)
 		return tcp_v4_tw_get_peer(sk);
 
-	/* Alas, not yet... */
-	return NULL;
+	return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
 }
 
 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
-- 
cgit v1.2.3


From 97b1ce25e8fc27f74703537ec09d4996c7a6e38a Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 1 Dec 2010 18:04:50 +0000
Subject: tcp: use TCP_BASE_MSS to set basic mss value

TCP_BASE_MSS is defined, but not used.
commit 5d424d5a introduce this macro, so use
it to initial sysctl_tcp_base_mss.

commit 5d424d5a674f782d0659a3b66d951f412901faee
Author: John Heffner <jheffner@psc.edu>
Date:   Mon Mar 20 17:53:41 2006 -0800

    [TCP]: MTU probing

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5f29b2e20e23..749b6498588e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
 int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
 int sysctl_tcp_mtu_probing __read_mostly = 0;
-int sysctl_tcp_base_mss __read_mostly = 512;
+int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
 
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
-- 
cgit v1.2.3


From b672083ed36a49c323737b7c7e1d5264a7c193af Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 1 Dec 2010 18:05:12 +0000
Subject: ipv6: use ND_REACHABLE_TIME and ND_RETRANS_TIMER instead of magic
 number

ND_REACHABLE_TIME and ND_RETRANS_TIMER have defined
since v2.6.12-rc2, but never been used.
So use them instead of magic number.

This patch also changes original code style to read comfortably .

Thank YOSHIFUJI Hideaki for pointing it out.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 998d6d27e7cf..e18f84130203 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -141,18 +141,18 @@ struct neigh_table nd_tbl = {
 	.proxy_redo =	pndisc_redo,
 	.id =		"ndisc_cache",
 	.parms = {
-		.tbl =			&nd_tbl,
-		.base_reachable_time =	30 * HZ,
-		.retrans_time =	 1 * HZ,
-		.gc_staletime =	60 * HZ,
-		.reachable_time =		30 * HZ,
-		.delay_probe_time =	 5 * HZ,
-		.queue_len =		 3,
-		.ucast_probes =	 3,
-		.mcast_probes =	 3,
-		.anycast_delay =	 1 * HZ,
-		.proxy_delay =		(8 * HZ) / 10,
-		.proxy_qlen =		64,
+		.tbl			= &nd_tbl,
+		.base_reachable_time	= ND_REACHABLE_TIME,
+		.retrans_time		= ND_RETRANS_TIMER,
+		.gc_staletime		= 60 * HZ,
+		.reachable_time		= ND_REACHABLE_TIME,
+		.delay_probe_time	= 5 * HZ,
+		.queue_len		= 3,
+		.ucast_probes		= 3,
+		.mcast_probes		= 3,
+		.anycast_delay		= 1 * HZ,
+		.proxy_delay		= (8 * HZ) / 10,
+		.proxy_qlen		= 64,
 	},
 	.gc_interval =	  30 * HZ,
 	.gc_thresh1 =	 128,
-- 
cgit v1.2.3


From d265fef6ddf9042195aae551e1fde211c2a1588b Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:00:53 +0000
Subject: tipc: Remove obsolete native API files and exports

As part of the removal of TIPC's native API support it is no longer
necessary for TIPC to export symbols for routines that can be called
by kernel-based applications, nor for it to have header files that
kernel-based applications can include to access the declarations for
those routines. This commit eliminates the exporting of symbols by
TIPC and migrates the contents of each obsolete native API include
file into its corresponding non-native API equivalent.

The code which was migrated in this commit was migrated intact, in
that there are no technical changes combined with the relocation.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tipc/tipc.h        | 186 ------------------------------------
 include/net/tipc/tipc_bearer.h | 138 ---------------------------
 include/net/tipc/tipc_msg.h    | 207 -----------------------------------------
 include/net/tipc/tipc_port.h   | 101 --------------------
 net/tipc/bcast.c               |   1 +
 net/tipc/bearer.h              |  69 +++++++++++++-
 net/tipc/config.c              |   2 +-
 net/tipc/core.c                |  40 --------
 net/tipc/core.h                |  14 ++-
 net/tipc/eth_media.c           |   6 +-
 net/tipc/msg.h                 | 168 +++++++++++++++++++++++++++++++--
 net/tipc/port.h                | 131 ++++++++++++++++++++++++++
 net/tipc/socket.c              |   3 +-
 net/tipc/subscr.c              |   1 +
 net/tipc/user_reg.h            |   5 +
 15 files changed, 382 insertions(+), 690 deletions(-)
 delete mode 100644 include/net/tipc/tipc.h
 delete mode 100644 include/net/tipc/tipc_bearer.h
 delete mode 100644 include/net/tipc/tipc_msg.h
 delete mode 100644 include/net/tipc/tipc_port.h

(limited to 'net')

diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h
deleted file mode 100644
index 1e0645e1eed2..000000000000
--- a/include/net/tipc/tipc.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * include/net/tipc/tipc.h: Main include file for TIPC users
- * 
- * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005,2010 Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _NET_TIPC_H_
-#define _NET_TIPC_H_
-
-#ifdef __KERNEL__
-
-#include <linux/tipc.h>
-#include <linux/skbuff.h>
-
-/* 
- * Native API
- */
-
-/*
- * TIPC operating mode routines
- */
-
-#define TIPC_NOT_RUNNING  0
-#define TIPC_NODE_MODE    1
-#define TIPC_NET_MODE     2
-
-typedef void (*tipc_mode_event)(void *usr_handle, int mode, u32 addr);
-
-int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle);
-
-void tipc_detach(unsigned int userref);
-
-/*
- * TIPC port manipulation routines
- */
-
-typedef void (*tipc_msg_err_event) (void *usr_handle,
-				    u32 portref,
-				    struct sk_buff **buf,
-				    unsigned char const *data,
-				    unsigned int size,
-				    int reason, 
-				    struct tipc_portid const *attmpt_destid);
-
-typedef void (*tipc_named_msg_err_event) (void *usr_handle,
-					  u32 portref,
-					  struct sk_buff **buf,
-					  unsigned char const *data,
-					  unsigned int size,
-					  int reason, 
-					  struct tipc_name_seq const *attmpt_dest);
-
-typedef void (*tipc_conn_shutdown_event) (void *usr_handle,
-					  u32 portref,
-					  struct sk_buff **buf,
-					  unsigned char const *data,
-					  unsigned int size,
-					  int reason);
-
-typedef void (*tipc_msg_event) (void *usr_handle,
-				u32 portref,
-				struct sk_buff **buf,
-				unsigned char const *data,
-				unsigned int size,
-				unsigned int importance, 
-				struct tipc_portid const *origin);
-
-typedef void (*tipc_named_msg_event) (void *usr_handle,
-				      u32 portref,
-				      struct sk_buff **buf,
-				      unsigned char const *data,
-				      unsigned int size,
-				      unsigned int importance, 
-				      struct tipc_portid const *orig,
-				      struct tipc_name_seq const *dest);
-
-typedef void (*tipc_conn_msg_event) (void *usr_handle,
-				     u32 portref,
-				     struct sk_buff **buf,
-				     unsigned char const *data,
-				     unsigned int size);
-
-typedef void (*tipc_continue_event) (void *usr_handle, 
-				     u32 portref);
-
-int tipc_createport(unsigned int tipc_user, 
-		    void *usr_handle, 
-		    unsigned int importance, 
-		    tipc_msg_err_event error_cb, 
-		    tipc_named_msg_err_event named_error_cb, 
-		    tipc_conn_shutdown_event conn_error_cb, 
-		    tipc_msg_event message_cb, 
-		    tipc_named_msg_event named_message_cb, 
-		    tipc_conn_msg_event conn_message_cb, 
-		    tipc_continue_event continue_event_cb,
-		    u32 *portref);
-
-int tipc_deleteport(u32 portref);
-
-int tipc_ownidentity(u32 portref, struct tipc_portid *port);
-
-int tipc_portimportance(u32 portref, unsigned int *importance);
-int tipc_set_portimportance(u32 portref, unsigned int importance);
-
-int tipc_portunreliable(u32 portref, unsigned int *isunreliable);
-int tipc_set_portunreliable(u32 portref, unsigned int isunreliable);
-
-int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable);
-int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
-
-int tipc_publish(u32 portref, unsigned int scope, 
-		 struct tipc_name_seq const *name_seq);
-int tipc_withdraw(u32 portref, unsigned int scope,
-		  struct tipc_name_seq const *name_seq);
-
-int tipc_connect2port(u32 portref, struct tipc_portid const *port);
-
-int tipc_disconnect(u32 portref);
-
-int tipc_shutdown(u32 ref);
-
-/*
- * TIPC messaging routines
- */
-
-#define TIPC_PORT_IMPORTANCE 100	/* send using current port setting */
-
-
-int tipc_send(u32 portref,
-	      unsigned int num_sect,
-	      struct iovec const *msg_sect);
-
-int tipc_send2name(u32 portref, 
-		   struct tipc_name const *name, 
-		   u32 domain,
-		   unsigned int num_sect,
-		   struct iovec const *msg_sect);
-
-int tipc_send2port(u32 portref,
-		   struct tipc_portid const *dest,
-		   unsigned int num_sect,
-		   struct iovec const *msg_sect);
-
-int tipc_send_buf2port(u32 portref,
-		       struct tipc_portid const *dest,
-		       struct sk_buff *buf,
-		       unsigned int dsz);
-
-int tipc_multicast(u32 portref, 
-		   struct tipc_name_seq const *seq, 
-		   u32 domain,	/* currently unused */
-		   unsigned int section_count,
-		   struct iovec const *msg);
-#endif
-
-#endif
diff --git a/include/net/tipc/tipc_bearer.h b/include/net/tipc/tipc_bearer.h
deleted file mode 100644
index ee2f304e4919..000000000000
--- a/include/net/tipc/tipc_bearer.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * include/net/tipc/tipc_bearer.h: Include file for privileged access to TIPC bearers
- * 
- * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _NET_TIPC_BEARER_H_
-#define _NET_TIPC_BEARER_H_
-
-#ifdef __KERNEL__
-
-#include <linux/tipc_config.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-
-/*
- * Identifiers of supported TIPC media types
- */
-
-#define TIPC_MEDIA_TYPE_ETH	1
-
-/* 
- * Destination address structure used by TIPC bearers when sending messages
- * 
- * IMPORTANT: The fields of this structure MUST be stored using the specified
- * byte order indicated below, as the structure is exchanged between nodes
- * as part of a link setup process.
- */
-
-struct tipc_media_addr {
-	__be32  type;			/* bearer type (network byte order) */
-	union {
-		__u8   eth_addr[6];	/* 48 bit Ethernet addr (byte array) */ 
-#if 0
-		/* Prototypes for other possible bearer types */
-
-		struct {
-			__u16 sin_family;
-			__u16 sin_port;
-			struct {
-				__u32 s_addr;
-			} sin_addr;
-			char pad[4];
-		} addr_in;		/* IP-based bearer */
-		__u16  sock_descr;	/* generic socket bearer */
-#endif
-	} dev_addr;
-};
-
-/**
- * struct tipc_bearer - TIPC bearer info available to privileged users
- * @usr_handle: pointer to additional user-defined information about bearer
- * @mtu: max packet size bearer can support
- * @blocked: non-zero if bearer is blocked
- * @lock: spinlock for controlling access to bearer
- * @addr: media-specific address associated with bearer
- * @name: bearer name (format = media:interface)
- * 
- * Note: TIPC initializes "name" and "lock" fields; user is responsible for
- * initialization all other fields when a bearer is enabled.
- */
-
-struct tipc_bearer {
-	void *usr_handle;
-	u32 mtu;
-	int blocked;
-	spinlock_t lock;
-	struct tipc_media_addr addr;
-	char name[TIPC_MAX_BEARER_NAME];
-};
-
-/*
- * TIPC routines available to supported media types
- */
-
-int  tipc_register_media(u32 media_type,
-			 char *media_name, 
-			 int (*enable)(struct tipc_bearer *), 
-			 void (*disable)(struct tipc_bearer *), 
-			 int (*send_msg)(struct sk_buff *, 
-					 struct tipc_bearer *,
-					 struct tipc_media_addr *), 
-			 char *(*addr2str)(struct tipc_media_addr *a,
-					   char *str_buf,
-					   int str_size),
-			 struct tipc_media_addr *bcast_addr,
-			 const u32 bearer_priority,
-			 const u32 link_tolerance,  /* [ms] */
-			 const u32 send_window_limit); 
-
-void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
-
-int  tipc_block_bearer(const char *name);
-void tipc_continue(struct tipc_bearer *tb_ptr); 
-
-int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority);
-int tipc_disable_bearer(const char *name);
-
-/*
- * Routines made available to TIPC by supported media types
- */
-
-int  tipc_eth_media_start(void);
-void tipc_eth_media_stop(void);
-
-#endif
-
-#endif
diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h
deleted file mode 100644
index ffe50b4e7b93..000000000000
--- a/include/net/tipc/tipc_msg.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * include/net/tipc/tipc_msg.h: Include file for privileged access to TIPC message headers
- * 
- * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _NET_TIPC_MSG_H_
-#define _NET_TIPC_MSG_H_
-
-#ifdef __KERNEL__
-
-struct tipc_msg {
-	__be32 hdr[15];
-};
-
-
-/*
-		TIPC user data message header format, version 2:
-
-
-       1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w0:|vers | user  |hdr sz |n|d|s|-|          message size           |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w1:|mstyp| error |rer cnt|lsc|opt p|      broadcast ack no         |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w2:|        link level ack no      |   broadcast/link level seq no |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w3:|                       previous node                           |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w4:|                      originating port                         |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w5:|                      destination port                         |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+    
-   w6:|                      originating node                         |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w7:|                      destination node                         |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w8:|            name type / transport sequence number              |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-   w9:|              name instance/multicast lower bound              |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+    
-   wA:|                    multicast upper bound                      |
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+    
-      /                                                               /
-      \                           options                             \
-      /                                                               /
-      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-*/
-
-#define TIPC_CONN_MSG	0
-#define TIPC_MCAST_MSG	1
-#define TIPC_NAMED_MSG	2
-#define TIPC_DIRECT_MSG	3
-
-
-static inline u32 msg_word(struct tipc_msg *m, u32 pos)
-{
-	return ntohl(m->hdr[pos]);
-}
-
-static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask)
-{
-	return (msg_word(m, w) >> pos) & mask;
-}
-
-static inline u32 msg_importance(struct tipc_msg *m)
-{
-	return msg_bits(m, 0, 25, 0xf);
-}
-
-static inline u32 msg_hdr_sz(struct tipc_msg *m)
-{
-	return msg_bits(m, 0, 21, 0xf) << 2;
-}
-
-static inline int msg_short(struct tipc_msg *m)
-{
-	return msg_hdr_sz(m) == 24;
-}
-
-static inline u32 msg_size(struct tipc_msg *m)
-{
-	return msg_bits(m, 0, 0, 0x1ffff);
-}
-
-static inline u32 msg_data_sz(struct tipc_msg *m)
-{
-	return msg_size(m) - msg_hdr_sz(m);
-}
-
-static inline unchar *msg_data(struct tipc_msg *m)
-{
-	return ((unchar *)m) + msg_hdr_sz(m);
-}
-
-static inline u32 msg_type(struct tipc_msg *m)
-{
-	return msg_bits(m, 1, 29, 0x7);
-}
-
-static inline u32 msg_named(struct tipc_msg *m)
-{
-	return msg_type(m) == TIPC_NAMED_MSG;
-}
-
-static inline u32 msg_mcast(struct tipc_msg *m)
-{
-	return msg_type(m) == TIPC_MCAST_MSG;
-}
-
-static inline u32 msg_connected(struct tipc_msg *m)
-{
-	return msg_type(m) == TIPC_CONN_MSG;
-}
-
-static inline u32 msg_errcode(struct tipc_msg *m)
-{
-	return msg_bits(m, 1, 25, 0xf);
-}
-
-static inline u32 msg_prevnode(struct tipc_msg *m)
-{
-	return msg_word(m, 3);
-}
-
-static inline u32 msg_origport(struct tipc_msg *m)
-{
-	return msg_word(m, 4);
-}
-
-static inline u32 msg_destport(struct tipc_msg *m)
-{
-	return msg_word(m, 5);
-}
-
-static inline u32 msg_mc_netid(struct tipc_msg *m)
-{
-	return msg_word(m, 5);
-}
-
-static inline u32 msg_orignode(struct tipc_msg *m)
-{
-	if (likely(msg_short(m)))
-		return msg_prevnode(m);
-	return msg_word(m, 6);
-}
-
-static inline u32 msg_destnode(struct tipc_msg *m)
-{
-	return msg_word(m, 7);
-}
-
-static inline u32 msg_nametype(struct tipc_msg *m)
-{
-	return msg_word(m, 8);
-}
-
-static inline u32 msg_nameinst(struct tipc_msg *m)
-{
-	return msg_word(m, 9);
-}
-
-static inline u32 msg_namelower(struct tipc_msg *m)
-{
-	return msg_nameinst(m);
-}
-
-static inline u32 msg_nameupper(struct tipc_msg *m)
-{
-	return msg_word(m, 10);
-}
-
-#endif
-
-#endif
diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h
deleted file mode 100644
index 1893aaf49426..000000000000
--- a/include/net/tipc/tipc_port.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports
- * 
- * Copyright (c) 1994-2007, Ericsson AB
- * Copyright (c) 2005-2008, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _NET_TIPC_PORT_H_
-#define _NET_TIPC_PORT_H_
-
-#ifdef __KERNEL__
-
-#include <linux/tipc.h>
-#include <linux/skbuff.h>
-#include <net/tipc/tipc_msg.h>
-
-#define TIPC_FLOW_CONTROL_WIN 512
-
-/**
- * struct tipc_port - native TIPC port info available to privileged users
- * @usr_handle: pointer to additional user-defined information about port
- * @lock: pointer to spinlock for controlling access to port
- * @connected: non-zero if port is currently connected to a peer port
- * @conn_type: TIPC type used when connection was established
- * @conn_instance: TIPC instance used when connection was established
- * @conn_unacked: number of unacknowledged messages received from peer port
- * @published: non-zero if port has one or more associated names
- * @congested: non-zero if cannot send because of link or port congestion
- * @max_pkt: maximum packet size "hint" used when building messages sent by port
- * @ref: unique reference to port in TIPC object registry
- * @phdr: preformatted message header used when sending messages
- */
-
-struct tipc_port {
-        void *usr_handle;
-        spinlock_t *lock;
-	int connected;
-        u32 conn_type;
-        u32 conn_instance;
-	u32 conn_unacked;
-	int published;
-	u32 congested;
-	u32 max_pkt;
-	u32 ref;
-	struct tipc_msg phdr;
-};
-
-
-struct tipc_port *tipc_createport_raw(void *usr_handle,
-			u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
-			void (*wakeup)(struct tipc_port *),
-			const u32 importance);
-
-int tipc_reject_msg(struct sk_buff *buf, u32 err);
-
-int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode);
-
-void tipc_acknowledge(u32 port_ref,u32 ack);
-
-struct tipc_port *tipc_get_port(const u32 ref);
-
-/*
- * The following routines require that the port be locked on entry
- */
-
-int tipc_disconnect_port(struct tipc_port *tp_ptr);
-
-
-#endif
-
-#endif
-
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 22a60fc98392..7d449f03c385 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -47,6 +47,7 @@
 #include "name_distr.h"
 #include "bearer.h"
 #include "name_table.h"
+#include "port.h"
 #include "bcast.h"
 
 #define MAX_PKT_DEFAULT_MCAST 1500	/* bcast link max packet size (fixed) */
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index a850b389663e..49af7fae8b5a 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -43,6 +43,45 @@
 #define MAX_BEARERS 8
 #define MAX_MEDIA 4
 
+/*
+ * Identifiers of supported TIPC media types
+ */
+#define TIPC_MEDIA_TYPE_ETH	1
+
+/*
+ * Destination address structure used by TIPC bearers when sending messages
+ *
+ * IMPORTANT: The fields of this structure MUST be stored using the specified
+ * byte order indicated below, as the structure is exchanged between nodes
+ * as part of a link setup process.
+ */
+struct tipc_media_addr {
+	__be32  type;			/* bearer type (network byte order) */
+	union {
+		__u8   eth_addr[6];	/* 48 bit Ethernet addr (byte array) */
+	} dev_addr;
+};
+
+/**
+ * struct tipc_bearer - TIPC bearer info available to media code
+ * @usr_handle: pointer to additional media-specific information about bearer
+ * @mtu: max packet size bearer can support
+ * @blocked: non-zero if bearer is blocked
+ * @lock: spinlock for controlling access to bearer
+ * @addr: media-specific address associated with bearer
+ * @name: bearer name (format = media:interface)
+ *
+ * Note: TIPC initializes "name" and "lock" fields; media code is responsible
+ * for initialization all other fields when a bearer is enabled.
+ */
+struct tipc_bearer {
+	void *usr_handle;
+	u32 mtu;
+	int blocked;
+	spinlock_t lock;
+	struct tipc_media_addr addr;
+	char name[TIPC_MAX_BEARER_NAME];
+};
 
 /**
  * struct media - TIPC media information available to internal users
@@ -55,7 +94,7 @@
  * @priority: default link (and bearer) priority
  * @tolerance: default time (in ms) before declaring link failure
  * @window: default window (in packets) before declaring link congestion
- * @type_id: TIPC media identifier [defined in tipc_bearer.h]
+ * @type_id: TIPC media identifier
  * @name: media name
  */
 
@@ -116,6 +155,34 @@ struct link;
 
 extern struct bearer tipc_bearers[];
 
+/*
+ * TIPC routines available to supported media types
+ */
+int tipc_register_media(u32 media_type,
+		 char *media_name, int (*enable)(struct tipc_bearer *),
+		 void (*disable)(struct tipc_bearer *),
+		 int (*send_msg)(struct sk_buff *,
+			struct tipc_bearer *, struct tipc_media_addr *),
+		 char *(*addr2str)(struct tipc_media_addr *a,
+			char *str_buf, int str_size),
+		 struct tipc_media_addr *bcast_addr, const u32 bearer_priority,
+		 const u32 link_tolerance,  /* [ms] */
+		 const u32 send_window_limit);
+
+void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr);
+
+int  tipc_block_bearer(const char *name);
+void tipc_continue(struct tipc_bearer *tb_ptr);
+
+int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority);
+int tipc_disable_bearer(const char *name);
+
+/*
+ * Routines made available to TIPC by supported media types
+ */
+int  tipc_eth_media_start(void);
+void tipc_eth_media_stop(void);
+
 void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a);
 struct sk_buff *tipc_media_get_names(void);
 
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 50a6133a3668..82267f3cd3b7 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -43,8 +43,8 @@
 #include "addr.h"
 #include "name_table.h"
 #include "node.h"
+#include "user_reg.h"
 #include "config.h"
-#include "discover.h"
 
 struct subscr_data {
 	char usr_handle[8];
diff --git a/net/tipc/core.c b/net/tipc/core.c
index e2a09eb8efd4..785362f6a411 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -236,43 +236,3 @@ module_exit(tipc_exit);
 MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(TIPC_MOD_VER);
-
-/* Native TIPC API for kernel-space applications (see tipc.h) */
-
-EXPORT_SYMBOL(tipc_attach);
-EXPORT_SYMBOL(tipc_detach);
-EXPORT_SYMBOL(tipc_createport);
-EXPORT_SYMBOL(tipc_deleteport);
-EXPORT_SYMBOL(tipc_ownidentity);
-EXPORT_SYMBOL(tipc_portimportance);
-EXPORT_SYMBOL(tipc_set_portimportance);
-EXPORT_SYMBOL(tipc_portunreliable);
-EXPORT_SYMBOL(tipc_set_portunreliable);
-EXPORT_SYMBOL(tipc_portunreturnable);
-EXPORT_SYMBOL(tipc_set_portunreturnable);
-EXPORT_SYMBOL(tipc_publish);
-EXPORT_SYMBOL(tipc_withdraw);
-EXPORT_SYMBOL(tipc_connect2port);
-EXPORT_SYMBOL(tipc_disconnect);
-EXPORT_SYMBOL(tipc_shutdown);
-EXPORT_SYMBOL(tipc_send);
-EXPORT_SYMBOL(tipc_send2name);
-EXPORT_SYMBOL(tipc_send2port);
-EXPORT_SYMBOL(tipc_multicast);
-
-/* TIPC API for external bearers (see tipc_bearer.h) */
-
-EXPORT_SYMBOL(tipc_block_bearer);
-EXPORT_SYMBOL(tipc_continue);
-EXPORT_SYMBOL(tipc_disable_bearer);
-EXPORT_SYMBOL(tipc_enable_bearer);
-EXPORT_SYMBOL(tipc_recv_msg);
-EXPORT_SYMBOL(tipc_register_media);
-
-/* TIPC API for external APIs (see tipc_port.h) */
-
-EXPORT_SYMBOL(tipc_createport_raw);
-EXPORT_SYMBOL(tipc_reject_msg);
-EXPORT_SYMBOL(tipc_send_buf_fast);
-EXPORT_SYMBOL(tipc_acknowledge);
-
diff --git a/net/tipc/core.h b/net/tipc/core.h
index e19389e57227..ca7e171c1043 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -39,10 +39,6 @@
 
 #include <linux/tipc.h>
 #include <linux/tipc_config.h>
-#include <net/tipc/tipc_msg.h>
-#include <net/tipc/tipc_port.h>
-#include <net/tipc/tipc_bearer.h>
-#include <net/tipc/tipc.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -62,6 +58,9 @@
 
 #define TIPC_MOD_VER "2.0.0"
 
+struct tipc_msg;	/* msg.h */
+struct print_buf;	/* dbg.h */
+
 /*
  * TIPC sanity test macros
  */
@@ -173,6 +172,13 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
 
 #define ELINKCONG EAGAIN	/* link congestion <=> resource unavailable */
 
+/*
+ * TIPC operating mode routines
+ */
+#define TIPC_NOT_RUNNING  0
+#define TIPC_NODE_MODE    1
+#define TIPC_NET_MODE     2
+
 /*
  * Global configuration variables
  */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6e988ba485fd..ee683cc8f4b1 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -34,13 +34,13 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <net/tipc/tipc.h>
-#include <net/tipc/tipc_bearer.h>
-#include <net/tipc/tipc_msg.h>
 #include <linux/netdevice.h>
 #include <linux/slab.h>
 #include <net/net_namespace.h>
 
+#include "core.h"
+#include "bearer.h"
+
 #define MAX_ETH_BEARERS		2
 #define ETH_LINK_PRIORITY	TIPC_DEF_LINK_PRI
 #define ETH_LINK_TOLERANCE	TIPC_DEF_LINK_TOL
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 031aad18efce..aee53864d7a0 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -37,10 +37,51 @@
 #ifndef _TIPC_MSG_H
 #define _TIPC_MSG_H
 
-#include "core.h"
+#include "bearer.h"
 
 #define TIPC_VERSION              2
 
+/*
+ *		TIPC user data message header format, version 2:
+ *
+ *
+ *     1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w0:|vers | user  |hdr sz |n|d|s|-|          message size           |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w1:|mstyp| error |rer cnt|lsc|opt p|      broadcast ack no         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w2:|        link level ack no      |   broadcast/link level seq no |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w3:|                       previous node                           |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w4:|                      originating port                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w5:|                      destination port                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w6:|                      originating node                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w7:|                      destination node                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w8:|            name type / transport sequence number              |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * w9:|              name instance/multicast lower bound              |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * wA:|                    multicast upper bound                      |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    /                                                               /
+ *    \                           options                             \
+ *    /                                                               /
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ */
+
+#define TIPC_CONN_MSG		0
+#define TIPC_MCAST_MSG		1
+#define TIPC_NAMED_MSG		2
+#define TIPC_DIRECT_MSG		3
+
+
 #define SHORT_H_SIZE              24	/* Connected, in-cluster messages */
 #define DIR_MSG_H_SIZE            32	/* Directly addressed messages */
 #define LONG_H_SIZE               40	/* Named messages */
@@ -52,20 +93,26 @@
 #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
 
 
-/*
-		TIPC user data message header format, version 2
+struct tipc_msg {
+	__be32 hdr[15];
+};
 
-	- Fundamental definitions available to privileged TIPC users
-	  are located in tipc_msg.h.
-	- Remaining definitions available to TIPC internal users appear below.
-*/
 
+static inline u32 msg_word(struct tipc_msg *m, u32 pos)
+{
+	return ntohl(m->hdr[pos]);
+}
 
 static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
 {
 	m->hdr[w] = htonl(val);
 }
 
+static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask)
+{
+	return (msg_word(m, w) >> pos) & mask;
+}
+
 static inline void msg_set_bits(struct tipc_msg *m, u32 w,
 				u32 pos, u32 mask, u32 val)
 {
@@ -112,16 +159,36 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n)
 	msg_set_bits(m, 0, 25, 0xf, n);
 }
 
+static inline u32 msg_importance(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 25, 0xf);
+}
+
 static inline void msg_set_importance(struct tipc_msg *m, u32 i)
 {
 	msg_set_user(m, i);
 }
 
+static inline u32 msg_hdr_sz(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 21, 0xf) << 2;
+}
+
 static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n)
 {
 	msg_set_bits(m, 0, 21, 0xf, n>>2);
 }
 
+static inline u32 msg_size(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 0, 0x1ffff);
+}
+
+static inline u32 msg_data_sz(struct tipc_msg *m)
+{
+	return msg_size(m) - msg_hdr_sz(m);
+}
+
 static inline int msg_non_seq(struct tipc_msg *m)
 {
 	return msg_bits(m, 0, 20, 1);
@@ -162,11 +229,36 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz)
  * Word 1
  */
 
+static inline u32 msg_type(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 29, 0x7);
+}
+
 static inline void msg_set_type(struct tipc_msg *m, u32 n)
 {
 	msg_set_bits(m, 1, 29, 0x7, n);
 }
 
+static inline u32 msg_named(struct tipc_msg *m)
+{
+	return msg_type(m) == TIPC_NAMED_MSG;
+}
+
+static inline u32 msg_mcast(struct tipc_msg *m)
+{
+	return msg_type(m) == TIPC_MCAST_MSG;
+}
+
+static inline u32 msg_connected(struct tipc_msg *m)
+{
+	return msg_type(m) == TIPC_CONN_MSG;
+}
+
+static inline u32 msg_errcode(struct tipc_msg *m)
+{
+	return msg_bits(m, 1, 25, 0xf);
+}
+
 static inline void msg_set_errcode(struct tipc_msg *m, u32 err)
 {
 	msg_set_bits(m, 1, 25, 0xf, err);
@@ -257,31 +349,68 @@ static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode)
  */
 
 
+static inline u32 msg_prevnode(struct tipc_msg *m)
+{
+	return msg_word(m, 3);
+}
+
 static inline void msg_set_prevnode(struct tipc_msg *m, u32 a)
 {
 	msg_set_word(m, 3, a);
 }
 
+static inline u32 msg_origport(struct tipc_msg *m)
+{
+	return msg_word(m, 4);
+}
+
 static inline void msg_set_origport(struct tipc_msg *m, u32 p)
 {
 	msg_set_word(m, 4, p);
 }
 
+static inline u32 msg_destport(struct tipc_msg *m)
+{
+	return msg_word(m, 5);
+}
+
 static inline void msg_set_destport(struct tipc_msg *m, u32 p)
 {
 	msg_set_word(m, 5, p);
 }
 
+static inline u32 msg_mc_netid(struct tipc_msg *m)
+{
+	return msg_word(m, 5);
+}
+
 static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p)
 {
 	msg_set_word(m, 5, p);
 }
 
+static inline int msg_short(struct tipc_msg *m)
+{
+	return msg_hdr_sz(m) == 24;
+}
+
+static inline u32 msg_orignode(struct tipc_msg *m)
+{
+	if (likely(msg_short(m)))
+		return msg_prevnode(m);
+	return msg_word(m, 6);
+}
+
 static inline void msg_set_orignode(struct tipc_msg *m, u32 a)
 {
 	msg_set_word(m, 6, a);
 }
 
+static inline u32 msg_destnode(struct tipc_msg *m)
+{
+	return msg_word(m, 7);
+}
+
 static inline void msg_set_destnode(struct tipc_msg *m, u32 a)
 {
 	msg_set_word(m, 7, a);
@@ -299,6 +428,11 @@ static inline u32 msg_routed(struct tipc_msg *m)
 	return(msg_destnode(m) ^ msg_orignode(m)) >> 11;
 }
 
+static inline u32 msg_nametype(struct tipc_msg *m)
+{
+	return msg_word(m, 8);
+}
+
 static inline void msg_set_nametype(struct tipc_msg *m, u32 n)
 {
 	msg_set_word(m, 8, n);
@@ -324,6 +458,16 @@ static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n)
 	msg_set_word(m, 8, n);
 }
 
+static inline u32 msg_nameinst(struct tipc_msg *m)
+{
+	return msg_word(m, 9);
+}
+
+static inline u32 msg_namelower(struct tipc_msg *m)
+{
+	return msg_nameinst(m);
+}
+
 static inline void msg_set_namelower(struct tipc_msg *m, u32 n)
 {
 	msg_set_word(m, 9, n);
@@ -334,11 +478,21 @@ static inline void msg_set_nameinst(struct tipc_msg *m, u32 n)
 	msg_set_namelower(m, n);
 }
 
+static inline u32 msg_nameupper(struct tipc_msg *m)
+{
+	return msg_word(m, 10);
+}
+
 static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
 {
 	msg_set_word(m, 10, n);
 }
 
+static inline unchar *msg_data(struct tipc_msg *m)
+{
+	return ((unchar *)m) + msg_hdr_sz(m);
+}
+
 static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
 {
 	return (struct tipc_msg *)msg_data(m);
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 73bbf442b346..8b9d87a3efae 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -44,6 +44,39 @@
 #include "dbg.h"
 #include "node_subscr.h"
 
+#define TIPC_FLOW_CONTROL_WIN 512
+
+typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref,
+		struct sk_buff **buf, unsigned char const *data,
+		unsigned int size, int reason,
+		struct tipc_portid const *attmpt_destid);
+
+typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref,
+		struct sk_buff **buf, unsigned char const *data,
+		unsigned int size, int reason,
+		struct tipc_name_seq const *attmpt_dest);
+
+typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref,
+		struct sk_buff **buf, unsigned char const *data,
+		unsigned int size, int reason);
+
+typedef void (*tipc_msg_event) (void *usr_handle, u32 portref,
+		struct sk_buff **buf, unsigned char const *data,
+		unsigned int size, unsigned int importance,
+		struct tipc_portid const *origin);
+
+typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref,
+		struct sk_buff **buf, unsigned char const *data,
+		unsigned int size, unsigned int importance,
+		struct tipc_portid const *orig,
+		struct tipc_name_seq const *dest);
+
+typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref,
+		struct sk_buff **buf, unsigned char const *data,
+		unsigned int size);
+
+typedef void (*tipc_continue_event) (void *usr_handle, u32 portref);
+
 /**
  * struct user_port - TIPC user port (used with native API)
  * @user_ref: id of user who created user port
@@ -67,6 +100,34 @@ struct user_port {
 	struct list_head uport_list;
 };
 
+/**
+ * struct tipc_port - TIPC port info available to socket API
+ * @usr_handle: pointer to additional user-defined information about port
+ * @lock: pointer to spinlock for controlling access to port
+ * @connected: non-zero if port is currently connected to a peer port
+ * @conn_type: TIPC type used when connection was established
+ * @conn_instance: TIPC instance used when connection was established
+ * @conn_unacked: number of unacknowledged messages received from peer port
+ * @published: non-zero if port has one or more associated names
+ * @congested: non-zero if cannot send because of link or port congestion
+ * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @ref: unique reference to port in TIPC object registry
+ * @phdr: preformatted message header used when sending messages
+ */
+struct tipc_port {
+	void *usr_handle;
+	spinlock_t *lock;
+	int connected;
+	u32 conn_type;
+	u32 conn_instance;
+	u32 conn_unacked;
+	int published;
+	u32 congested;
+	u32 max_pkt;
+	u32 ref;
+	struct tipc_msg phdr;
+};
+
 /**
  * struct port - TIPC port structure
  * @publ: TIPC port info available to privileged users
@@ -109,6 +170,76 @@ struct port {
 extern spinlock_t tipc_port_list_lock;
 struct port_list;
 
+/*
+ * TIPC port manipulation routines
+ */
+struct tipc_port *tipc_createport_raw(void *usr_handle,
+		u32 (*dispatcher)(struct tipc_port *, struct sk_buff *),
+		void (*wakeup)(struct tipc_port *), const u32 importance);
+
+int tipc_reject_msg(struct sk_buff *buf, u32 err);
+
+int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode);
+
+void tipc_acknowledge(u32 port_ref, u32 ack);
+
+int tipc_createport(unsigned int tipc_user, void *usr_handle,
+		unsigned int importance, tipc_msg_err_event error_cb,
+		tipc_named_msg_err_event named_error_cb,
+		tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb,
+		tipc_named_msg_event named_msg_cb,
+		tipc_conn_msg_event conn_msg_cb,
+		tipc_continue_event continue_event_cb, u32 *portref);
+
+int tipc_deleteport(u32 portref);
+
+int tipc_ownidentity(u32 portref, struct tipc_portid *port);
+
+int tipc_portimportance(u32 portref, unsigned int *importance);
+int tipc_set_portimportance(u32 portref, unsigned int importance);
+
+int tipc_portunreliable(u32 portref, unsigned int *isunreliable);
+int tipc_set_portunreliable(u32 portref, unsigned int isunreliable);
+
+int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable);
+int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
+
+int tipc_publish(u32 portref, unsigned int scope,
+		struct tipc_name_seq const *name_seq);
+int tipc_withdraw(u32 portref, unsigned int scope,
+		struct tipc_name_seq const *name_seq);
+
+int tipc_connect2port(u32 portref, struct tipc_portid const *port);
+
+int tipc_disconnect(u32 portref);
+
+int tipc_shutdown(u32 ref);
+
+
+/*
+ * The following routines require that the port be locked on entry
+ */
+int tipc_disconnect_port(struct tipc_port *tp_ptr);
+
+/*
+ * TIPC messaging routines
+ */
+#define TIPC_PORT_IMPORTANCE 100	/* send using current port setting */
+
+int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect);
+
+int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
+		unsigned int num_sect, struct iovec const *msg_sect);
+
+int tipc_send2port(u32 portref, struct tipc_portid const *dest,
+		unsigned int num_sect, struct iovec const *msg_sect);
+
+int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest,
+		struct sk_buff *buf, unsigned int dsz);
+
+int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, u32 domain,
+		unsigned int section_count, struct iovec const *msg);
+
 int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
 			      struct iovec const *msg_sect, u32 num_sect,
 			      int err);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e9f0d5004483..23a12e44347f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -49,10 +49,9 @@
 
 #include <linux/tipc.h>
 #include <linux/tipc_config.h>
-#include <net/tipc/tipc_msg.h>
-#include <net/tipc/tipc_port.h>
 
 #include "core.h"
+#include "port.h"
 
 #define SS_LISTENING	-1	/* socket is listening */
 #define SS_READY	-2	/* socket is connectionless */
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 33313961d010..a857e6ea857e 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -37,6 +37,7 @@
 #include "core.h"
 #include "dbg.h"
 #include "name_table.h"
+#include "user_reg.h"
 #include "port.h"
 #include "ref.h"
 #include "subscr.h"
diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h
index 81dc12e2882f..a05981fb9176 100644
--- a/net/tipc/user_reg.h
+++ b/net/tipc/user_reg.h
@@ -42,6 +42,11 @@
 int tipc_reg_start(void);
 void tipc_reg_stop(void);
 
+typedef void (*tipc_mode_event)(void *usr_handle, int mode, u32 addr);
+
+int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle);
+void tipc_detach(unsigned int userref);
+
 int tipc_reg_add_port(struct user_port *up_ptr);
 int tipc_reg_remove_port(struct user_port *up_ptr);
 
-- 
cgit v1.2.3


From c80262829769419e19527f972672e8df0480235a Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:00:54 +0000
Subject: tipc: Remove obsolete inclusions of header files

Gets rid of #include statements that are no longer required as a
result of the merging of obsolete native API header file content
into other TIPC include files.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c        | 2 --
 net/tipc/bcast.c       | 9 ---------
 net/tipc/bearer.c      | 3 ---
 net/tipc/bearer.h      | 1 -
 net/tipc/cluster.c     | 6 ------
 net/tipc/config.c      | 5 -----
 net/tipc/config.h      | 1 -
 net/tipc/core.c        | 1 -
 net/tipc/discover.c    | 2 --
 net/tipc/discover.h    | 2 --
 net/tipc/link.c        | 8 --------
 net/tipc/link.h        | 1 -
 net/tipc/msg.c         | 2 --
 net/tipc/name_distr.c  | 2 --
 net/tipc/name_table.c  | 5 -----
 net/tipc/net.c         | 5 -----
 net/tipc/node.c        | 6 ------
 net/tipc/node_subscr.c | 2 --
 net/tipc/port.c        | 6 ------
 net/tipc/port.h        | 2 --
 net/tipc/subscr.c      | 3 ---
 net/tipc/zone.c        | 3 ---
 22 files changed, 77 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 8a2e89bffde5..886715a75259 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -35,11 +35,9 @@
  */
 
 #include "core.h"
-#include "dbg.h"
 #include "addr.h"
 #include "zone.h"
 #include "cluster.h"
-#include "net.h"
 
 /**
  * tipc_addr_domain_valid - validates a network domain address
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 7d449f03c385..6d828d9eda42 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -36,18 +36,9 @@
  */
 
 #include "core.h"
-#include "msg.h"
-#include "dbg.h"
 #include "link.h"
-#include "net.h"
-#include "node.h"
 #include "port.h"
-#include "addr.h"
-#include "node_subscr.h"
 #include "name_distr.h"
-#include "bearer.h"
-#include "name_table.h"
-#include "port.h"
 #include "bcast.h"
 
 #define MAX_PKT_DEFAULT_MCAST 1500	/* bcast link max packet size (fixed) */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 9927d1d56c4f..347f255b0de2 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -36,12 +36,9 @@
 
 #include "core.h"
 #include "config.h"
-#include "dbg.h"
 #include "bearer.h"
-#include "link.h"
 #include "port.h"
 #include "discover.h"
-#include "bcast.h"
 
 #define MAX_ADDR_STR 32
 
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 49af7fae8b5a..8dc0e9268a28 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -37,7 +37,6 @@
 #ifndef _TIPC_BEARER_H
 #define _TIPC_BEARER_H
 
-#include "core.h"
 #include "bcast.h"
 
 #define MAX_BEARERS 8
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 7fea14b98b97..ee251798d482 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -36,13 +36,7 @@
 
 #include "core.h"
 #include "cluster.h"
-#include "addr.h"
-#include "node_subscr.h"
 #include "link.h"
-#include "node.h"
-#include "net.h"
-#include "msg.h"
-#include "bearer.h"
 
 static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 				u32 lower, u32 upper);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 82267f3cd3b7..5bb369669729 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -35,14 +35,9 @@
  */
 
 #include "core.h"
-#include "dbg.h"
-#include "bearer.h"
 #include "port.h"
 #include "link.h"
-#include "zone.h"
-#include "addr.h"
 #include "name_table.h"
-#include "node.h"
 #include "user_reg.h"
 #include "config.h"
 
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 481e12ece715..443159a166fd 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -39,7 +39,6 @@
 
 /* ---------------------------------------------------------------------- */
 
-#include "core.h"
 #include "link.h"
 
 struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 785362f6a411..f5d62c174de2 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -40,7 +40,6 @@
 #include <linux/random.h>
 
 #include "core.h"
-#include "dbg.h"
 #include "ref.h"
 #include "net.h"
 #include "user_reg.h"
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 4a7cd3719b78..f2ce36baf42e 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -35,9 +35,7 @@
  */
 
 #include "core.h"
-#include "dbg.h"
 #include "link.h"
-#include "zone.h"
 #include "discover.h"
 #include "port.h"
 #include "name_table.h"
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index f8e750636123..d2c3cffb79fc 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -37,8 +37,6 @@
 #ifndef _TIPC_DISCOVER_H
 #define _TIPC_DISCOVER_H
 
-#include "core.h"
-
 struct link_req;
 
 struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr,
diff --git a/net/tipc/link.c b/net/tipc/link.c
index b31992ccd5d3..aee6579438c7 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -35,19 +35,11 @@
  */
 
 #include "core.h"
-#include "dbg.h"
 #include "link.h"
-#include "net.h"
-#include "node.h"
 #include "port.h"
-#include "addr.h"
-#include "node_subscr.h"
 #include "name_distr.h"
-#include "bearer.h"
-#include "name_table.h"
 #include "discover.h"
 #include "config.h"
-#include "bcast.h"
 
 
 /*
diff --git a/net/tipc/link.h b/net/tipc/link.h
index f98bc613de67..c562888d25da 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -39,7 +39,6 @@
 
 #include "dbg.h"
 #include "msg.h"
-#include "bearer.h"
 #include "node.h"
 
 #define PUSH_FAILED   1
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ecb532fb0351..ee6b4c68d4a4 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -36,9 +36,7 @@
 
 #include "core.h"
 #include "addr.h"
-#include "dbg.h"
 #include "msg.h"
-#include "bearer.h"
 
 u32 tipc_msg_tot_importance(struct tipc_msg *m)
 {
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 7b907171f879..10ff48be3c01 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -36,9 +36,7 @@
 
 #include "core.h"
 #include "cluster.h"
-#include "dbg.h"
 #include "link.h"
-#include "msg.h"
 #include "name_distr.h"
 
 #define ITEM_SIZE sizeof(struct distr_item)
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 3a8de4334da1..d5adb0456746 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -36,15 +36,10 @@
 
 #include "core.h"
 #include "config.h"
-#include "dbg.h"
 #include "name_table.h"
 #include "name_distr.h"
-#include "addr.h"
-#include "node_subscr.h"
 #include "subscr.h"
 #include "port.h"
-#include "cluster.h"
-#include "bcast.h"
 
 static int tipc_nametbl_size = 1024;		/* must be a power of 2 */
 
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 1a621cfd6604..c2b4b86c2e6a 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -35,18 +35,13 @@
  */
 
 #include "core.h"
-#include "bearer.h"
 #include "net.h"
 #include "zone.h"
-#include "addr.h"
 #include "name_table.h"
 #include "name_distr.h"
 #include "subscr.h"
 #include "link.h"
-#include "msg.h"
 #include "port.h"
-#include "bcast.h"
-#include "discover.h"
 #include "config.h"
 
 /*
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b4d87eb2dc5d..bd959a855fd5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -37,13 +37,7 @@
 #include "core.h"
 #include "config.h"
 #include "node.h"
-#include "cluster.h"
-#include "net.h"
-#include "addr.h"
-#include "node_subscr.h"
-#include "link.h"
 #include "port.h"
-#include "bearer.h"
 #include "name_distr.h"
 
 void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 19194d476a9e..018a55332d91 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -35,10 +35,8 @@
  */
 
 #include "core.h"
-#include "dbg.h"
 #include "node_subscr.h"
 #include "node.h"
-#include "addr.h"
 
 /**
  * tipc_nodesub_subscribe - create "node down" subscription for specified node
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 82092eaa1536..e822117b79ad 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -36,15 +36,9 @@
 
 #include "core.h"
 #include "config.h"
-#include "dbg.h"
 #include "port.h"
-#include "addr.h"
-#include "link.h"
-#include "node.h"
 #include "name_table.h"
 #include "user_reg.h"
-#include "msg.h"
-#include "bcast.h"
 
 /* Connection management: */
 #define PROBING_INTERVAL 3600000	/* [ms] => 1 h */
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8b9d87a3efae..e038ce1ef8e8 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -37,11 +37,9 @@
 #ifndef _TIPC_PORT_H
 #define _TIPC_PORT_H
 
-#include "core.h"
 #include "ref.h"
 #include "net.h"
 #include "msg.h"
-#include "dbg.h"
 #include "node_subscr.h"
 
 #define TIPC_FLOW_CONTROL_WIN 512
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index a857e6ea857e..e7fb38ba577d 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -35,11 +35,8 @@
  */
 
 #include "core.h"
-#include "dbg.h"
 #include "name_table.h"
 #include "user_reg.h"
-#include "port.h"
-#include "ref.h"
 #include "subscr.h"
 
 /**
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 83f8b5e91fc8..1b61ca8c48ef 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -36,9 +36,6 @@
 
 #include "core.h"
 #include "zone.h"
-#include "net.h"
-#include "addr.h"
-#include "node_subscr.h"
 #include "cluster.h"
 #include "node.h"
 
-- 
cgit v1.2.3


From 8d71919d7afc4ade0d9de09e1d50fbf9168c368d Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:00:55 +0000
Subject: tipc: Delete unused configuration service structure definition

Removes a structure definition that is no longer used by TIPC's
configuration service.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 5bb369669729..2ee5a9a3cebf 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -41,13 +41,6 @@
 #include "user_reg.h"
 #include "config.h"
 
-struct subscr_data {
-	char usr_handle[8];
-	u32 domain;
-	u32 port_ref;
-	struct list_head subd_list;
-};
-
 struct manager {
 	u32 user_ref;
 	u32 port_ref;
-- 
cgit v1.2.3


From 28cc937eac00805e8b9c6e7ed7d590567378187f Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:00:56 +0000
Subject: tipc: Eliminate useless return value when disabling a bearer

Modifies bearer_disable() to return void since it always indicates
success anyway.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 347f255b0de2..885da94be4ac 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -622,7 +622,7 @@ int tipc_block_bearer(const char *name)
  * Note: This routine assumes caller holds tipc_net_lock.
  */
 
-static int bearer_disable(struct bearer *b_ptr)
+static void bearer_disable(struct bearer *b_ptr)
 {
 	struct link *l_ptr;
 	struct link *temp_l_ptr;
@@ -638,7 +638,6 @@ static int bearer_disable(struct bearer *b_ptr)
 	}
 	spin_unlock_bh(&b_ptr->publ.lock);
 	memset(b_ptr, 0, sizeof(struct bearer));
-	return 0;
 }
 
 int tipc_disable_bearer(const char *name)
@@ -651,8 +650,10 @@ int tipc_disable_bearer(const char *name)
 	if (b_ptr == NULL) {
 		warn("Attempt to disable unknown bearer <%s>\n", name);
 		res = -EINVAL;
-	} else
-		res = bearer_disable(b_ptr);
+	} else {
+		bearer_disable(b_ptr);
+		res = 0;
+	}
 	write_unlock_bh(&tipc_net_lock);
 	return res;
 }
-- 
cgit v1.2.3


From 528c771e87c3fa661bc6983b5bf0ba464d9f7c3a Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:00:57 +0000
Subject: tipc: Delete useless function prototypes

Removes several function declarations that aren't used anywhere,
either because they reference routines that no longer exist or
because all users of the function reference it after it has already
been defined.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.h  | 1 -
 net/tipc/cluster.c | 1 -
 net/tipc/node.c    | 1 -
 net/tipc/port.h    | 1 -
 4 files changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 8dc0e9268a28..85f451d5aacf 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -192,7 +192,6 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr);
 struct bearer *tipc_bearer_find_interface(const char *if_name);
 int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr);
 int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr);
-int tipc_bearer_init(void);
 void tipc_bearer_stop(void);
 void tipc_bearer_lock_push(struct bearer *b_ptr);
 
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index ee251798d482..405be87157ba 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -40,7 +40,6 @@
 
 static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 				u32 lower, u32 upper);
-static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest);
 
 struct tipc_node **tipc_local_nodes = NULL;
 struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
diff --git a/net/tipc/node.c b/net/tipc/node.c
index bd959a855fd5..df71dfc3a9ae 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,7 +40,6 @@
 #include "port.h"
 #include "name_distr.h"
 
-void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
 static void node_lost_contact(struct tipc_node *n_ptr);
 static void node_established_contact(struct tipc_node *n_ptr);
 
diff --git a/net/tipc/port.h b/net/tipc/port.h
index e038ce1ef8e8..547cd6e00312 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -242,7 +242,6 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
 			      struct iovec const *msg_sect, u32 num_sect,
 			      int err);
 struct sk_buff *tipc_port_get_ports(void);
-struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space);
 void tipc_port_recv_proto_msg(struct sk_buff *buf);
 void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp);
 void tipc_port_reinit(void);
-- 
cgit v1.2.3


From a5c2af9922a94a875c5f4b2dcd357a1c399b7ea6 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:00:58 +0000
Subject: tipc: Remove support for TIPC mode change callback

Eliminates support for the callback routine invoked when TIPC
changes its mode of operation from inactive to standalone or from
standalone to networked. This callback was part of TIPC's obsolete
native API and is not used by TIPC internally.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c   |  2 +-
 net/tipc/subscr.c   |  2 +-
 net/tipc/user_reg.c | 50 ++------------------------------------------------
 net/tipc/user_reg.h |  4 +---
 4 files changed, 5 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index 2ee5a9a3cebf..bdde39f0436b 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -560,7 +560,7 @@ int tipc_cfg_init(void)
 	struct tipc_name_seq seq;
 	int res;
 
-	res = tipc_attach(&mng.user_ref, NULL, NULL);
+	res = tipc_attach(&mng.user_ref);
 	if (res)
 		goto failed;
 
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index e7fb38ba577d..e13c89aeb6d2 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -549,7 +549,7 @@ int tipc_subscr_start(void)
 	INIT_LIST_HEAD(&topsrv.subscriber_list);
 
 	spin_lock_bh(&topsrv.lock);
-	res = tipc_attach(&topsrv.user_ref, NULL, NULL);
+	res = tipc_attach(&topsrv.user_ref);
 	if (res) {
 		spin_unlock_bh(&topsrv.lock);
 		return res;
diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c
index 506928803162..2e2702e2049c 100644
--- a/net/tipc/user_reg.c
+++ b/net/tipc/user_reg.c
@@ -50,15 +50,11 @@
 /**
  * struct tipc_user - registered TIPC user info
  * @next: index of next free registry entry (or -1 for an allocated entry)
- * @callback: ptr to routine to call when TIPC mode changes (NULL if none)
- * @usr_handle: user-defined value passed to callback routine
  * @ports: list of user ports owned by the user
  */
 
 struct tipc_user {
 	int next;
-	tipc_mode_event callback;
-	void *usr_handle;
 	struct list_head ports;
 };
 
@@ -94,42 +90,13 @@ static int reg_init(void)
 	return users ? 0 : -ENOMEM;
 }
 
-/**
- * reg_callback - inform TIPC user about current operating mode
- */
-
-static void reg_callback(struct tipc_user *user_ptr)
-{
-	tipc_mode_event cb;
-	void *arg;
-
-	spin_lock_bh(&reg_lock);
-	cb = user_ptr->callback;
-	arg = user_ptr->usr_handle;
-	spin_unlock_bh(&reg_lock);
-
-	if (cb)
-		cb(arg, tipc_mode, tipc_own_addr);
-}
-
 /**
  * tipc_reg_start - activate TIPC user registry
  */
 
 int tipc_reg_start(void)
 {
-	u32 u;
-	int res;
-
-	if ((res = reg_init()))
-		return res;
-
-	for (u = 1; u <= MAX_USERID; u++) {
-		if (users[u].callback)
-			tipc_k_signal((Handler)reg_callback,
-				      (unsigned long)&users[u]);
-	}
-	return 0;
+	return reg_init();
 }
 
 /**
@@ -138,15 +105,9 @@ int tipc_reg_start(void)
 
 void tipc_reg_stop(void)
 {
-	int id;
-
 	if (!users)
 		return;
 
-	for (id = 1; id <= MAX_USERID; id++) {
-		if (users[id].callback)
-			reg_callback(&users[id]);
-	}
 	kfree(users);
 	users = NULL;
 }
@@ -157,12 +118,10 @@ void tipc_reg_stop(void)
  * NOTE: This routine may be called when TIPC is inactive.
  */
 
-int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle)
+int tipc_attach(u32 *userid)
 {
 	struct tipc_user *user_ptr;
 
-	if ((tipc_mode == TIPC_NOT_RUNNING) && !cb)
-		return -ENOPROTOOPT;
 	if (!users)
 		reg_init();
 
@@ -177,13 +136,9 @@ int tipc_attach(u32 *userid, tipc_mode_event cb, void *usr_handle)
 	user_ptr->next = -1;
 	spin_unlock_bh(&reg_lock);
 
-	user_ptr->callback = cb;
-	user_ptr->usr_handle = usr_handle;
 	INIT_LIST_HEAD(&user_ptr->ports);
 	atomic_inc(&tipc_user_count);
 
-	if (cb && (tipc_mode != TIPC_NOT_RUNNING))
-		tipc_k_signal((Handler)reg_callback, (unsigned long)user_ptr);
 	return 0;
 }
 
@@ -207,7 +162,6 @@ void tipc_detach(u32 userid)
 	}
 
 	user_ptr = &users[userid];
-	user_ptr->callback = NULL;
 	INIT_LIST_HEAD(&ports_temp);
 	list_splice(&user_ptr->ports, &ports_temp);
 	user_ptr->next = next_free_user;
diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h
index a05981fb9176..109eed0d6de3 100644
--- a/net/tipc/user_reg.h
+++ b/net/tipc/user_reg.h
@@ -42,9 +42,7 @@
 int tipc_reg_start(void);
 void tipc_reg_stop(void);
 
-typedef void (*tipc_mode_event)(void *usr_handle, int mode, u32 addr);
-
-int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle);
+int tipc_attach(unsigned int *userref);
 void tipc_detach(unsigned int userref);
 
 int tipc_reg_add_port(struct user_port *up_ptr);
-- 
cgit v1.2.3


From 38f232eae20cefed2e2379d77c54babb0de6d024 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:00:59 +0000
Subject: tipc: Remove unused domain argument from multicast send routine

Eliminates an unused argument from tipc_multicast(), now that this
routine can no longer be called by kernel-based applications.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c   | 2 +-
 net/tipc/port.h   | 2 +-
 net/tipc/socket.c | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index e822117b79ad..01dcfeea1ec4 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -88,7 +88,7 @@ static void port_incr_out_seqno(struct port *p_ptr)
  * tipc_multicast - send a multicast message to local and remote destinations
  */
 
-int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain,
+int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
 		   u32 num_sect, struct iovec const *msg_sect)
 {
 	struct tipc_msg *hdr;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 547cd6e00312..711b4a0fa443 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -235,7 +235,7 @@ int tipc_send2port(u32 portref, struct tipc_portid const *dest,
 int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest,
 		struct sk_buff *buf, unsigned int dsz);
 
-int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, u32 domain,
+int tipc_multicast(u32 portref, struct tipc_name_seq const *seq,
 		unsigned int section_count, struct iovec const *msg);
 
 int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 23a12e44347f..34f96eda5fa3 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -596,7 +596,6 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 				break;
 			res = tipc_multicast(tport->ref,
 					     &dest->addr.nameseq,
-					     0,
 					     m->msg_iovlen,
 					     m->msg_iov);
 		}
-- 
cgit v1.2.3


From 52fe7b725e0a1360d36c720ee87ab1e559df69db Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:01:00 +0000
Subject: tipc: Eliminate useless initialization when creating subscriber

Removes initialization of a local variable that is always assigned
a different value before it is referenced.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index e13c89aeb6d2..23f43d03980c 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -542,7 +542,7 @@ static void subscr_named_msg_event(void *usr_handle,
 int tipc_subscr_start(void)
 {
 	struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV};
-	int res = -1;
+	int res;
 
 	memset(&topsrv, 0, sizeof (topsrv));
 	spin_lock_init(&topsrv.lock);
-- 
cgit v1.2.3


From 471450f7ec24ccd9ac24e6f05cd9358d40c09d03 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:01:01 +0000
Subject: tipc: Eliminate an unused symbolic constant in link code

Removes a symbol that is not referenced anywhere by TIPC's link code.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index aee6579438c7..cf414cf05e72 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -48,12 +48,6 @@
 
 #define INVALID_SESSION 0x10000
 
-/*
- * Limit for deferred reception queue:
- */
-
-#define DEF_QUEUE_LIMIT 256u
-
 /*
  * Link state events:
  */
-- 
cgit v1.2.3


From 12bae479ee414f45ad8fe93530f5b6ea241bde3f Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:01:02 +0000
Subject: tipc: Eliminate obsolete native API forwarding routines

Moves the content of each native API message forwarding routine
into the sole routine that calls it, since the forwarding routines
no longer be called in isolation. Also removes code in each routine
that altered the outgoing message's importance level since this is
now no longer possible.

The previous function mapping (parent function, and child API) was
as follows:

   tipc_send2name
       \--tipc_forward2name

   tipc_send2port
       \--tipc_forward2port

   tipc_send_buf2port
       \--tipc_forward_buf2port

After this commit, the children don't exist and their functionality
is completely in the respective parent.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 100 +++++++++-----------------------------------------------
 net/tipc/port.h |   2 --
 2 files changed, 15 insertions(+), 87 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 01dcfeea1ec4..73f232c1fe15 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1265,16 +1265,11 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
 }
 
 /**
- * tipc_forward2name - forward message sections to port name
+ * tipc_send2name - send message sections to port name
  */
 
-static int tipc_forward2name(u32 ref,
-			     struct tipc_name const *name,
-			     u32 domain,
-			     u32 num_sect,
-			     struct iovec const *msg_sect,
-			     struct tipc_portid const *orig,
-			     unsigned int importance)
+int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain,
+	   unsigned int num_sect, struct iovec const *msg_sect)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
@@ -1288,14 +1283,12 @@ static int tipc_forward2name(u32 ref,
 
 	msg = &p_ptr->publ.phdr;
 	msg_set_type(msg, TIPC_NAMED_MSG);
-	msg_set_orignode(msg, orig->node);
-	msg_set_origport(msg, orig->ref);
+	msg_set_orignode(msg, tipc_own_addr);
+	msg_set_origport(msg, ref);
 	msg_set_hdr_sz(msg, LONG_H_SIZE);
 	msg_set_nametype(msg, name->type);
 	msg_set_nameinst(msg, name->instance);
 	msg_set_lookup_scope(msg, tipc_addr_scope(domain));
-	if (importance <= TIPC_CRITICAL_IMPORTANCE)
-		msg_set_importance(msg,importance);
 	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
 	msg_set_destnode(msg, destnode);
 	msg_set_destport(msg, destport);
@@ -1319,33 +1312,11 @@ static int tipc_forward2name(u32 ref,
 }
 
 /**
- * tipc_send2name - send message sections to port name
- */
-
-int tipc_send2name(u32 ref,
-		   struct tipc_name const *name,
-		   unsigned int domain,
-		   unsigned int num_sect,
-		   struct iovec const *msg_sect)
-{
-	struct tipc_portid orig;
-
-	orig.ref = ref;
-	orig.node = tipc_own_addr;
-	return tipc_forward2name(ref, name, domain, num_sect, msg_sect, &orig,
-				 TIPC_PORT_IMPORTANCE);
-}
-
-/**
- * tipc_forward2port - forward message sections to port identity
+ * tipc_send2port - send message sections to port identity
  */
 
-static int tipc_forward2port(u32 ref,
-			     struct tipc_portid const *dest,
-			     unsigned int num_sect,
-			     struct iovec const *msg_sect,
-			     struct tipc_portid const *orig,
-			     unsigned int importance)
+int tipc_send2port(u32 ref, struct tipc_portid const *dest,
+	   unsigned int num_sect, struct iovec const *msg_sect)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
@@ -1357,13 +1328,11 @@ static int tipc_forward2port(u32 ref,
 
 	msg = &p_ptr->publ.phdr;
 	msg_set_type(msg, TIPC_DIRECT_MSG);
-	msg_set_orignode(msg, orig->node);
-	msg_set_origport(msg, orig->ref);
+	msg_set_orignode(msg, tipc_own_addr);
+	msg_set_origport(msg, ref);
 	msg_set_destnode(msg, dest->node);
 	msg_set_destport(msg, dest->ref);
 	msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
-	if (importance <= TIPC_CRITICAL_IMPORTANCE)
-		msg_set_importance(msg, importance);
 	p_ptr->sent++;
 	if (dest->node == tipc_own_addr)
 		return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
@@ -1378,31 +1347,11 @@ static int tipc_forward2port(u32 ref,
 }
 
 /**
- * tipc_send2port - send message sections to port identity
+ * tipc_send_buf2port - send message buffer to port identity
  */
 
-int tipc_send2port(u32 ref,
-		   struct tipc_portid const *dest,
-		   unsigned int num_sect,
-		   struct iovec const *msg_sect)
-{
-	struct tipc_portid orig;
-
-	orig.ref = ref;
-	orig.node = tipc_own_addr;
-	return tipc_forward2port(ref, dest, num_sect, msg_sect, &orig,
-				 TIPC_PORT_IMPORTANCE);
-}
-
-/**
- * tipc_forward_buf2port - forward message buffer to port identity
- */
-static int tipc_forward_buf2port(u32 ref,
-				 struct tipc_portid const *dest,
-				 struct sk_buff *buf,
-				 unsigned int dsz,
-				 struct tipc_portid const *orig,
-				 unsigned int importance)
+int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
+	       struct sk_buff *buf, unsigned int dsz)
 {
 	struct port *p_ptr;
 	struct tipc_msg *msg;
@@ -1414,13 +1363,11 @@ static int tipc_forward_buf2port(u32 ref,
 
 	msg = &p_ptr->publ.phdr;
 	msg_set_type(msg, TIPC_DIRECT_MSG);
-	msg_set_orignode(msg, orig->node);
-	msg_set_origport(msg, orig->ref);
+	msg_set_orignode(msg, tipc_own_addr);
+	msg_set_origport(msg, ref);
 	msg_set_destnode(msg, dest->node);
 	msg_set_destport(msg, dest->ref);
 	msg_set_hdr_sz(msg, DIR_MSG_H_SIZE);
-	if (importance <= TIPC_CRITICAL_IMPORTANCE)
-		msg_set_importance(msg, importance);
 	msg_set_size(msg, DIR_MSG_H_SIZE + dsz);
 	if (skb_cow(buf, DIR_MSG_H_SIZE))
 		return -ENOMEM;
@@ -1439,20 +1386,3 @@ static int tipc_forward_buf2port(u32 ref,
 	return -ELINKCONG;
 }
 
-/**
- * tipc_send_buf2port - send message buffer to port identity
- */
-
-int tipc_send_buf2port(u32 ref,
-		       struct tipc_portid const *dest,
-		       struct sk_buff *buf,
-		       unsigned int dsz)
-{
-	struct tipc_portid orig;
-
-	orig.ref = ref;
-	orig.node = tipc_own_addr;
-	return tipc_forward_buf2port(ref, dest, buf, dsz, &orig,
-				     TIPC_PORT_IMPORTANCE);
-}
-
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 711b4a0fa443..3f6c0aaa171d 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -222,8 +222,6 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr);
 /*
  * TIPC messaging routines
  */
-#define TIPC_PORT_IMPORTANCE 100	/* send using current port setting */
-
 int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect);
 
 int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain,
-- 
cgit v1.2.3


From b924dcf0038b8f83e65b44f679ad480d44f85aa6 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Tue, 30 Nov 2010 12:01:03 +0000
Subject: tipc: Delete tipc_ownidentity()

Moves the content of the native API routine tipc_ownidentity() into the
sole routine that calls it, since it can no longer be called in isolation.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c   | 7 -------
 net/tipc/port.h   | 2 --
 net/tipc/socket.c | 3 ++-
 3 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index 73f232c1fe15..7873283f4965 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -983,13 +983,6 @@ int tipc_createport(u32 user_ref,
 	return 0;
 }
 
-int tipc_ownidentity(u32 ref, struct tipc_portid *id)
-{
-	id->ref = ref;
-	id->node = tipc_own_addr;
-	return 0;
-}
-
 int tipc_portimportance(u32 ref, unsigned int *importance)
 {
 	struct port *p_ptr;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 3f6c0aaa171d..3a807fcec2be 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -191,8 +191,6 @@ int tipc_createport(unsigned int tipc_user, void *usr_handle,
 
 int tipc_deleteport(u32 portref);
 
-int tipc_ownidentity(u32 portref, struct tipc_portid *port);
-
 int tipc_portimportance(u32 portref, unsigned int *importance);
 int tipc_set_portimportance(u32 portref, unsigned int importance);
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34f96eda5fa3..cd0bb77f2673 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -403,7 +403,8 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
 		addr->addr.id.ref = tsock->peer_name.ref;
 		addr->addr.id.node = tsock->peer_name.node;
 	} else {
-		tipc_ownidentity(tsock->p->ref, &addr->addr.id);
+		addr->addr.id.ref = tsock->p->ref;
+		addr->addr.id.node = tipc_own_addr;
 	}
 
 	*uaddr_len = sizeof(*addr);
-- 
cgit v1.2.3


From ca44ac386181ba710a9ab6db900d6c1e5451b366 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 29 Nov 2010 22:48:46 +0000
Subject: net: don't reallocate skb->head unless the current one hasn't the
 needed extra size or is shared

skb head being allocated by kmalloc(), it might be larger than what
actually requested because of discrete kmem caches sizes. Before
reallocating a new skb head, check if the current one has the needed
extra size.

Do this check only if skb head is not shared.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f8444754a..8814a9a52f47 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -778,6 +778,28 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	size = SKB_DATA_ALIGN(size);
 
+	/* Check if we can avoid taking references on fragments if we own
+	 * the last reference on skb->head. (see skb_release_data())
+	 */
+	if (!skb->cloned)
+		fastpath = true;
+	else {
+		int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
+
+		fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
+	}
+
+	if (fastpath &&
+	    size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
+		memmove(skb->head + size, skb_shinfo(skb),
+			offsetof(struct skb_shared_info,
+				 frags[skb_shinfo(skb)->nr_frags]));
+		memmove(skb->head + nhead, skb->head,
+			skb_tail_pointer(skb) - skb->head);
+		off = nhead;
+		goto adjust_others;
+	}
+
 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
 	if (!data)
 		goto nodata;
@@ -791,17 +813,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	       skb_shinfo(skb),
 	       offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
 
-	/* Check if we can avoid taking references on fragments if we own
-	 * the last reference on skb->head. (see skb_release_data())
-	 */
-	if (!skb->cloned)
-		fastpath = true;
-	else {
-		int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
-
-		fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
-	}
-
 	if (fastpath) {
 		kfree(skb->head);
 	} else {
@@ -816,6 +827,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	off = (data + nhead) - skb->head;
 
 	skb->head     = data;
+adjust_others:
 	skb->data    += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->end      = size;
-- 
cgit v1.2.3


From df6bd743b6f06b066c1c3ba7f2853a6e8d61468c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 14 Jun 2010 02:26:15 -0300
Subject: Bluetooth: Don't accept ConfigReq if we aren't in the BT_CONFIG state

If such event happens we shall reply with a Command Reject, because we are
not expecting any configure request.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c12eccfdfe01..c791fcda7b2d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3124,8 +3124,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	if (!sk)
 		return -ENOENT;
 
-	if (sk->sk_state == BT_DISCONN)
+	if (sk->sk_state != BT_CONFIG) {
+		struct l2cap_cmd_rej rej;
+
+		rej.reason = cpu_to_le16(0x0002);
+		l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
+				sizeof(rej), &rej);
 		goto unlock;
+	}
 
 	/* Reject if config buffer is too small. */
 	len = cmd_len - sizeof(*req);
-- 
cgit v1.2.3


From c1ce5a74d113f221d40625bd3ad83df2db2695b7 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Wed, 1 Dec 2010 16:34:45 +0100
Subject: mac80211: Update last_tx_rate only for data frames

The last_tx_rate field was also updated for non-data frames that are
often sent with a lower rate (for example management frames at 1 Mbps).
This is confusing when the data rate is actually much higher.

Hence, only update the last_tx_rate field with tx rate information
gathered from last data frames.

If the rate control algorithm filled in txrc.reported_rate we don't need
to verify this information.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2ba742656825..5d6b0759d18c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -666,10 +666,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	if (unlikely(info->control.rates[0].idx < 0))
 		return TX_DROP;
 
-	if (txrc.reported_rate.idx < 0)
+	if (txrc.reported_rate.idx < 0) {
 		txrc.reported_rate = info->control.rates[0];
-
-	if (tx->sta)
+		if (tx->sta && ieee80211_is_data(hdr->frame_control))
+			tx->sta->last_tx_rate = txrc.reported_rate;
+	} else if (tx->sta)
 		tx->sta->last_tx_rate = txrc.reported_rate;
 
 	if (unlikely(!info->control.rates[0].count))
-- 
cgit v1.2.3


From 75706d0e9d19601534446982b70102bb9327169b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 2 Dec 2010 21:01:07 +0100
Subject: mac80211: remove a redundant check

ieee80211_is_nullfunc() implies ieee80211_is_data()

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 794807914940..f570801514f1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1067,8 +1067,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
 void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr)
 {
-	if (!ieee80211_is_data(hdr->frame_control) &&
-	    !ieee80211_is_nullfunc(hdr->frame_control))
+	if (!ieee80211_is_data(hdr->frame_control))
 	    return;
 
 	ieee80211_sta_reset_conn_monitor(sdata);
-- 
cgit v1.2.3


From 04ac3c0ee2c773c321ec472d892635a20556f34d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 2 Dec 2010 21:01:08 +0100
Subject: mac80211: speed up AP probing using nullfunc frames

If the nullfunc frame used to probe the AP was not acked, there is no point
in waiting for the probe timeout, so advance to the next try (or disconnect)
immediately.
If we do reach the probe timeout without having received a tx status, the
connection is probably really bad and worth disconnecting.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  3 +-
 net/mac80211/mlme.c        | 93 +++++++++++++++++++++++++++++++++-------------
 net/mac80211/status.c      | 18 ++++-----
 3 files changed, 79 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 66b0b52b828d..e7c880725639 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -357,6 +357,7 @@ struct ieee80211_if_managed {
 	unsigned long beacon_timeout;
 	unsigned long probe_timeout;
 	int probe_send_count;
+	bool nullfunc_failed;
 
 	struct mutex mtx;
 	struct cfg80211_bss *associated;
@@ -1271,7 +1272,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr);
 void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_hdr *hdr);
+			     struct ieee80211_hdr *hdr, bool ack);
 void ieee80211_beacon_connection_loss_work(struct work_struct *work);
 
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f570801514f1..3a1dde3c7956 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1065,16 +1065,20 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
 }
 
 void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
-			     struct ieee80211_hdr *hdr)
+			     struct ieee80211_hdr *hdr, bool ack)
 {
 	if (!ieee80211_is_data(hdr->frame_control))
 	    return;
 
-	ieee80211_sta_reset_conn_monitor(sdata);
+	if (ack)
+		ieee80211_sta_reset_conn_monitor(sdata);
 
 	if (ieee80211_is_nullfunc(hdr->frame_control) &&
 	    sdata->u.mgd.probe_send_count > 0) {
-		sdata->u.mgd.probe_send_count = 0;
+		if (ack)
+			sdata->u.mgd.probe_send_count = 0;
+		else
+			sdata->u.mgd.nullfunc_failed = true;
 		ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 	}
 }
@@ -1101,9 +1105,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 	 * anymore. The timeout will be reset if the frame is ACKed by
 	 * the AP.
 	 */
-	if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+	if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+		ifmgd->nullfunc_failed = false;
 		ieee80211_send_nullfunc(sdata->local, sdata, 0);
-	else {
+	} else {
 		ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
 		ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
 	}
@@ -1912,6 +1917,31 @@ static void ieee80211_sta_timer(unsigned long data)
 	ieee80211_queue_work(&local->hw, &sdata->work);
 }
 
+static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
+					  u8 *bssid)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+	ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
+			  IEEE80211_STA_BEACON_POLL);
+
+	ieee80211_set_disassoc(sdata, true, true);
+	mutex_unlock(&ifmgd->mtx);
+	mutex_lock(&local->mtx);
+	ieee80211_recalc_idle(local);
+	mutex_unlock(&local->mtx);
+	/*
+	 * must be outside lock due to cfg80211,
+	 * but that's not a problem.
+	 */
+	ieee80211_send_deauth_disassoc(sdata, bssid,
+			IEEE80211_STYPE_DEAUTH,
+			WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
+			NULL, true);
+	mutex_lock(&ifmgd->mtx);
+}
+
 void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -1936,11 +1966,38 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 		/* ACK received for nullfunc probing frame */
 		if (!ifmgd->probe_send_count)
 			ieee80211_reset_ap_probe(sdata);
-
-		else if (time_is_after_jiffies(ifmgd->probe_timeout))
+		else if (ifmgd->nullfunc_failed) {
+			if (ifmgd->probe_send_count < max_tries) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+				wiphy_debug(local->hw.wiphy,
+					    "%s: No ack for nullfunc frame to"
+					    " AP %pM, try %d\n",
+					    sdata->name, bssid,
+					    ifmgd->probe_send_count);
+#endif
+				ieee80211_mgd_probe_ap_send(sdata);
+			} else {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+				wiphy_debug(local->hw.wiphy,
+					    "%s: No ack for nullfunc frame to"
+					    " AP %pM, disconnecting.\n",
+					    sdata->name, bssid,
+					    ifmgd->probe_send_count);
+#endif
+				ieee80211_sta_connection_lost(sdata, bssid);
+			}
+		} else if (time_is_after_jiffies(ifmgd->probe_timeout))
 			run_again(ifmgd, ifmgd->probe_timeout);
-
-		else if (ifmgd->probe_send_count < max_tries) {
+		else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+			wiphy_debug(local->hw.wiphy,
+				    "%s: Failed to send nullfunc to AP %pM"
+				    " after %dms, disconnecting.\n",
+				    sdata->name,
+				    bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
+#endif
+			ieee80211_sta_connection_lost(sdata, bssid);
+		} else if (ifmgd->probe_send_count < max_tries) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 			wiphy_debug(local->hw.wiphy,
 				    "%s: No probe response from AP %pM"
@@ -1955,27 +2012,13 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 			 * We actually lost the connection ... or did we?
 			 * Let's make sure!
 			 */
-			ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
-					  IEEE80211_STA_BEACON_POLL);
 			wiphy_debug(local->hw.wiphy,
 				    "%s: No probe response from AP %pM"
 				    " after %dms, disconnecting.\n",
 				    sdata->name,
 				    bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
-			ieee80211_set_disassoc(sdata, true, true);
-			mutex_unlock(&ifmgd->mtx);
-			mutex_lock(&local->mtx);
-			ieee80211_recalc_idle(local);
-			mutex_unlock(&local->mtx);
-			/*
-			 * must be outside lock due to cfg80211,
-			 * but that's not a problem.
-			 */
-			ieee80211_send_deauth_disassoc(sdata, bssid,
-					IEEE80211_STYPE_DEAUTH,
-					WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-					NULL, true);
-			mutex_lock(&ifmgd->mtx);
+
+			ieee80211_sta_connection_lost(sdata, bssid);
 		}
 	}
 
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 4958710a7d92..38a797217a91 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -155,10 +155,6 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 
 		ieee80211_queue_work(&local->hw, &local->recalc_smps);
 	}
-
-	if ((sdata->vif.type == NL80211_IFTYPE_STATION) &&
-	    (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
-		ieee80211_sta_tx_notify(sdata, (void *) skb->data);
 }
 
 /*
@@ -186,6 +182,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	int retry_count = -1, i;
 	int rates_idx = -1;
 	bool send_to_cooked;
+	bool acked;
 
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
 		/* the HW cannot have attempted that rate */
@@ -211,8 +208,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		if (memcmp(hdr->addr2, sta->sdata->vif.addr, ETH_ALEN))
 			continue;
 
-		if (!(info->flags & IEEE80211_TX_STAT_ACK) &&
-		    test_sta_flags(sta, WLAN_STA_PS_STA)) {
+		acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+		if (!acked && test_sta_flags(sta, WLAN_STA_PS_STA)) {
 			/*
 			 * The STA is in power save mode, so assume
 			 * that this TX packet failed because of that.
@@ -244,7 +241,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			rcu_read_unlock();
 			return;
 		} else {
-			if (!(info->flags & IEEE80211_TX_STAT_ACK))
+			if (!acked)
 				sta->tx_retry_failed++;
 			sta->tx_retry_count += retry_count;
 		}
@@ -253,10 +250,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		if (ieee80211_vif_is_mesh(&sta->sdata->vif))
 			ieee80211s_update_metric(local, sta, skb);
 
-		if (!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
-		    (info->flags & IEEE80211_TX_STAT_ACK))
+		if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked)
 			ieee80211_frame_acked(sta, skb);
 
+		if ((sta->sdata->vif.type == NL80211_IFTYPE_STATION) &&
+		    (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS))
+			ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data, acked);
+
 		if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) {
 			if (info->flags & IEEE80211_TX_STAT_ACK) {
 				if (sta->lost_packets)
-- 
cgit v1.2.3


From 7903264402546f45f9bac8ad2bfdb00d00eb124a Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 30 Nov 2010 06:38:00 +0000
Subject: net: Fix too optimistic NETIF_F_HW_CSUM features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM+NETIF_F_IPV6_CSUM, but
some drivers miss the difference. Fix this and also fix UFO dependency
on checksumming offload as it makes the same mistake in assumptions.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/benet/be_main.c           |  6 ++++--
 drivers/net/bnx2x/bnx2x_main.c        |  4 ++--
 drivers/net/jme.c                     | 16 +++++++++-------
 drivers/net/pch_gbe/pch_gbe_ethtool.c | 19 +------------------
 drivers/net/pch_gbe/pch_gbe_main.c    |  6 +++---
 drivers/net/sc92031.c                 |  3 ++-
 drivers/net/stmmac/stmmac_ethtool.c   | 12 +-----------
 drivers/net/stmmac/stmmac_main.c      |  5 +++--
 drivers/net/vxge/vxge-ethtool.c       |  2 +-
 drivers/net/vxge/vxge-main.c          |  2 +-
 net/core/dev.c                        |  7 +++++--
 net/core/ethtool.c                    |  4 +++-
 12 files changed, 35 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 102567ee68c2..275428032ce2 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -2583,10 +2583,12 @@ static void be_netdev_init(struct net_device *netdev)
 	int i;
 
 	netdev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_RX | NETIF_F_TSO |
-		NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER | NETIF_F_HW_CSUM |
+		NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER |
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_GRO | NETIF_F_TSO6;
 
-	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_HW_CSUM;
+	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO |
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 
 	if (lancer_chip(adapter))
 		netdev->vlan_features |= NETIF_F_TSO6;
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index 1552fc3c1351..0068a1dbc064 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -8957,7 +8957,7 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
 	dev->netdev_ops = &bnx2x_netdev_ops;
 	bnx2x_set_ethtool_ops(dev);
 	dev->features |= NETIF_F_SG;
-	dev->features |= NETIF_F_HW_CSUM;
+	dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	if (bp->flags & USING_DAC_FLAG)
 		dev->features |= NETIF_F_HIGHDMA;
 	dev->features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
@@ -8965,7 +8965,7 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
 	dev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
 
 	dev->vlan_features |= NETIF_F_SG;
-	dev->vlan_features |= NETIF_F_HW_CSUM;
+	dev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	if (bp->flags & USING_DAC_FLAG)
 		dev->vlan_features |= NETIF_F_HIGHDMA;
 	dev->vlan_features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index c57d9a43ceca..2411e72ba572 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -2076,12 +2076,11 @@ jme_change_mtu(struct net_device *netdev, int new_mtu)
 	}
 
 	if (new_mtu > 1900) {
-		netdev->features &= ~(NETIF_F_HW_CSUM |
-				NETIF_F_TSO |
-				NETIF_F_TSO6);
+		netdev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+				NETIF_F_TSO | NETIF_F_TSO6);
 	} else {
 		if (test_bit(JME_FLAG_TXCSUM, &jme->flags))
-			netdev->features |= NETIF_F_HW_CSUM;
+			netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 		if (test_bit(JME_FLAG_TSO, &jme->flags))
 			netdev->features |= NETIF_F_TSO | NETIF_F_TSO6;
 	}
@@ -2514,10 +2513,12 @@ jme_set_tx_csum(struct net_device *netdev, u32 on)
 	if (on) {
 		set_bit(JME_FLAG_TXCSUM, &jme->flags);
 		if (netdev->mtu <= 1900)
-			netdev->features |= NETIF_F_HW_CSUM;
+			netdev->features |=
+				NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	} else {
 		clear_bit(JME_FLAG_TXCSUM, &jme->flags);
-		netdev->features &= ~NETIF_F_HW_CSUM;
+		netdev->features &=
+				~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
 	}
 
 	return 0;
@@ -2797,7 +2798,8 @@ jme_init_one(struct pci_dev *pdev,
 	netdev->netdev_ops = &jme_netdev_ops;
 	netdev->ethtool_ops		= &jme_ethtool_ops;
 	netdev->watchdog_timeo		= TX_TIMEOUT;
-	netdev->features		=	NETIF_F_HW_CSUM |
+	netdev->features		=	NETIF_F_IP_CSUM |
+						NETIF_F_IPV6_CSUM |
 						NETIF_F_SG |
 						NETIF_F_TSO |
 						NETIF_F_TSO6 |
diff --git a/drivers/net/pch_gbe/pch_gbe_ethtool.c b/drivers/net/pch_gbe/pch_gbe_ethtool.c
index c8cc32c0edc9..c8c873b31a89 100644
--- a/drivers/net/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/pch_gbe/pch_gbe_ethtool.c
@@ -468,18 +468,6 @@ static int pch_gbe_set_rx_csum(struct net_device *netdev, u32 data)
 	return 0;
 }
 
-/**
- * pch_gbe_get_tx_csum - Report whether transmit checksums are turned on or off
- * @netdev:  Network interface device structure
- * Returns
- *	true(1):  Checksum On
- *	false(0): Checksum Off
- */
-static u32 pch_gbe_get_tx_csum(struct net_device *netdev)
-{
-	return (netdev->features & NETIF_F_HW_CSUM) != 0;
-}
-
 /**
  * pch_gbe_set_tx_csum - Turn transmit checksums on or off
  * @netdev: Network interface device structure
@@ -493,11 +481,7 @@ static int pch_gbe_set_tx_csum(struct net_device *netdev, u32 data)
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
 
 	adapter->tx_csum = data;
-	if (data)
-		netdev->features |= NETIF_F_HW_CSUM;
-	else
-		netdev->features &= ~NETIF_F_HW_CSUM;
-	return 0;
+	return ethtool_op_set_tx_ipv6_csum(netdev, data);
 }
 
 /**
@@ -572,7 +556,6 @@ static const struct ethtool_ops pch_gbe_ethtool_ops = {
 	.set_pauseparam = pch_gbe_set_pauseparam,
 	.get_rx_csum = pch_gbe_get_rx_csum,
 	.set_rx_csum = pch_gbe_set_rx_csum,
-	.get_tx_csum = pch_gbe_get_tx_csum,
 	.set_tx_csum = pch_gbe_set_tx_csum,
 	.get_strings = pch_gbe_get_strings,
 	.get_ethtool_stats = pch_gbe_get_ethtool_stats,
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index afb75066b14d..58e79033a8ee 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -2319,7 +2319,7 @@ static int pch_gbe_probe(struct pci_dev *pdev,
 	netdev->watchdog_timeo = PCH_GBE_WATCHDOG_PERIOD;
 	netif_napi_add(netdev, &adapter->napi,
 		       pch_gbe_napi_poll, PCH_GBE_RX_WEIGHT);
-	netdev->features = NETIF_F_HW_CSUM | NETIF_F_GRO;
+	netdev->features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO;
 	pch_gbe_set_ethtool_ops(netdev);
 
 	pch_gbe_mac_reset_hw(&adapter->hw);
@@ -2358,9 +2358,9 @@ static int pch_gbe_probe(struct pci_dev *pdev,
 	pch_gbe_check_options(adapter);
 
 	if (adapter->tx_csum)
-		netdev->features |= NETIF_F_HW_CSUM;
+		netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	else
-		netdev->features &= ~NETIF_F_HW_CSUM;
+		netdev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
 
 	/* initialize the wol settings based on the eeprom settings */
 	adapter->wake_up_evt = PCH_GBE_WL_INIT_SETTING;
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index 417adf372828..76290a8c3c14 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -1449,7 +1449,8 @@ static int __devinit sc92031_probe(struct pci_dev *pdev,
 	dev->irq = pdev->irq;
 
 	/* faked with skb_copy_and_csum_dev */
-	dev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA;
+	dev->features = NETIF_F_SG | NETIF_F_HIGHDMA |
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 
 	dev->netdev_ops		= &sc92031_netdev_ops;
 	dev->watchdog_timeo	= TX_TIMEOUT;
diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c
index f2695fd180ca..fd719edc7f7c 100644
--- a/drivers/net/stmmac/stmmac_ethtool.c
+++ b/drivers/net/stmmac/stmmac_ethtool.c
@@ -197,16 +197,6 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
 	}
 }
 
-static int stmmac_ethtool_set_tx_csum(struct net_device *netdev, u32 data)
-{
-	if (data)
-		netdev->features |= NETIF_F_HW_CSUM;
-	else
-		netdev->features &= ~NETIF_F_HW_CSUM;
-
-	return 0;
-}
-
 static u32 stmmac_ethtool_get_rx_csum(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
@@ -370,7 +360,7 @@ static struct ethtool_ops stmmac_ethtool_ops = {
 	.get_link = ethtool_op_get_link,
 	.get_rx_csum = stmmac_ethtool_get_rx_csum,
 	.get_tx_csum = ethtool_op_get_tx_csum,
-	.set_tx_csum = stmmac_ethtool_set_tx_csum,
+	.set_tx_csum = ethtool_op_set_tx_ipv6_csum,
 	.get_sg = ethtool_op_get_sg,
 	.set_sg = ethtool_op_set_sg,
 	.get_pauseparam = stmmac_get_pauseparam,
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 730a6fd79ee0..bfc2d1251502 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1494,7 +1494,8 @@ static int stmmac_probe(struct net_device *dev)
 	dev->netdev_ops = &stmmac_netdev_ops;
 	stmmac_set_ethtool_ops(dev);
 
-	dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA);
+	dev->features |= NETIF_F_SG | NETIF_F_HIGHDMA |
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	dev->watchdog_timeo = msecs_to_jiffies(watchdog);
 #ifdef STMMAC_VLAN_TAG_USED
 	/* Both mac100 and gmac support receive VLAN tag detection */
@@ -1525,7 +1526,7 @@ static int stmmac_probe(struct net_device *dev)
 
 	DBG(probe, DEBUG, "%s: Scatter/Gather: %s - HW checksums: %s\n",
 	    dev->name, (dev->features & NETIF_F_SG) ? "on" : "off",
-	    (dev->features & NETIF_F_HW_CSUM) ? "on" : "off");
+	    (dev->features & NETIF_F_IP_CSUM) ? "on" : "off");
 
 	spin_lock_init(&priv->lock);
 
diff --git a/drivers/net/vxge/vxge-ethtool.c b/drivers/net/vxge/vxge-ethtool.c
index bc9bd1035706..1dd3a21b3a43 100644
--- a/drivers/net/vxge/vxge-ethtool.c
+++ b/drivers/net/vxge/vxge-ethtool.c
@@ -1177,7 +1177,7 @@ static const struct ethtool_ops vxge_ethtool_ops = {
 	.get_rx_csum		= vxge_get_rx_csum,
 	.set_rx_csum		= vxge_set_rx_csum,
 	.get_tx_csum		= ethtool_op_get_tx_csum,
-	.set_tx_csum		= ethtool_op_set_tx_hw_csum,
+	.set_tx_csum		= ethtool_op_set_tx_ipv6_csum,
 	.get_sg			= ethtool_op_get_sg,
 	.set_sg			= ethtool_op_set_sg,
 	.get_tso		= ethtool_op_get_tso,
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 8a84152e320a..4877b3b8a29e 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -3368,7 +3368,7 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
 
 	ndev->features |= NETIF_F_SG;
 
-	ndev->features |= NETIF_F_HW_CSUM;
+	ndev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	vxge_debug_init(vxge_hw_device_trace_level_get(hldev),
 		"%s : checksuming enabled", __func__);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index cd2437495428..55ff66fabce4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5041,10 +5041,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 	}
 
 	if (features & NETIF_F_UFO) {
-		if (!(features & NETIF_F_GEN_CSUM)) {
+		/* maybe split UFO into V4 and V6? */
+		if (!((features & NETIF_F_GEN_CSUM) ||
+		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
 			if (name)
 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-				       "since no NETIF_F_HW_CSUM feature.\n",
+				       "since no checksum offload features.\n",
 				       name);
 			features &= ~NETIF_F_UFO;
 		}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 956a9f4971cb..d5bc28818883 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1171,7 +1171,9 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
 		return -EFAULT;
 	if (edata.data && !(dev->features & NETIF_F_SG))
 		return -EINVAL;
-	if (edata.data && !(dev->features & NETIF_F_HW_CSUM))
+	if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) ||
+		(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+			== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
 		return -EINVAL;
 	return dev->ethtool_ops->set_ufo(dev, edata.data);
 }
-- 
cgit v1.2.3


From da2033c282264bfba4e339b7cb3df62adb5c5fc8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 30 Nov 2010 21:45:56 +0000
Subject: filter: add SKF_AD_RXHASH and SKF_AD_CPU

Add SKF_AD_RXHASH and SKF_AD_CPU to filter ancillary mechanism,
to be able to build advanced filters.

This can help spreading packets on several sockets with a fast
selection, after RPS dispatch to N cpus for example, or to catch a
percentage of flows in one queue.

tcpdump -s 500 "cpu = 1" :

[0] ld CPU
[1] jeq #1  jt 2  jf 3
[2] ret #500
[3] ret #0

# take 12.5 % of flows (average)
tcpdump -s 1000 "rxhash & 7 = 2" :

[0] ld RXHASH
[1] and #7
[2] jeq #2  jt 3  jf 4
[3] ret #1000
[4] ret #0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Rui <wirelesser@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 4 +++-
 net/core/filter.c      | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 447a775878fb..5334adaf4072 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -124,7 +124,9 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_MARK 	20
 #define SKF_AD_QUEUE	24
 #define SKF_AD_HATYPE	28
-#define SKF_AD_MAX	32
+#define SKF_AD_RXHASH	32
+#define SKF_AD_CPU	36
+#define SKF_AD_MAX	40
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index a44d27f9f0f0..054e286861d2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -375,6 +375,12 @@ load_b:
 				return 0;
 			A = skb->dev->type;
 			continue;
+		case SKF_AD_RXHASH:
+			A = skb->rxhash;
+			continue;
+		case SKF_AD_CPU:
+			A = raw_smp_processor_id();
+			continue;
 		case SKF_AD_NLATTR: {
 			struct nlattr *nla;
 
-- 
cgit v1.2.3


From f7fce74e387e0563e5a165704664aa5ee8b2f48b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 1 Dec 2010 06:03:06 +0000
Subject: net: kill an RCU warning in inet_fill_link_af()

commits 9f0f7272 (ipv4: AF_INET link address family) and cf7afbfeb8c
(rtnl: make link af-specific updates atomic) used incorrect
__in_dev_get_rcu() in RTNL protected contexts, triggering PROVE_RCU
warnings.

Switch to __in_dev_get_rtnl(), wich is more appropriate, since we hold
RTNL.

Based on a report and initial patch from Amerigo Wang.

Reported-by: Amerigo Wang <amwang@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Thomas Graf <tgraf@infradead.org>
Reviewed-by: WANG Cong <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d9f71bae45c4..3b067704ab38 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1258,7 +1258,7 @@ errout:
 
 static size_t inet_get_link_af_size(const struct net_device *dev)
 {
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 
 	if (!in_dev)
 		return 0;
@@ -1268,7 +1268,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
 
 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
 {
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 	struct nlattr *nla;
 	int i;
 
@@ -1295,7 +1295,7 @@ static int inet_validate_link_af(const struct net_device *dev,
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int err, rem;
 
-	if (dev && !__in_dev_get_rcu(dev))
+	if (dev && !__in_dev_get_rtnl(dev))
 		return -EAFNOSUPPORT;
 
 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
@@ -1319,7 +1319,7 @@ static int inet_validate_link_af(const struct net_device *dev,
 
 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
 {
-	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
 	int rem;
 
-- 
cgit v1.2.3


From 0af55bb58f8fa7865004ac48d16affe125ac1b7f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 1 Dec 2010 02:52:20 +0000
Subject: af_packet: use vmalloc_to_page() instead for the addresss returned by
 vmalloc()

The following commit causes the pgv->buffer may point to the memory
returned by vmalloc(). And we can't use virt_to_page() for the vmalloc
address.

This patch introduces a new inline function pgv_to_page(), which calls
vmalloc_to_page() for the vmalloc address, and virt_to_page() for the
__get_free_pages address.

We used to increase page pointer to get the next page at the next page
address, after Neil's patch, it is wrong, as the physical address may
be not continuous. This patch also fixes this issue.

    commit 0e3125c755445664f00ad036e4fc2cd32fd52877
    Author: Neil Horman <nhorman@tuxdriver.com>
    Date:   Tue Nov 16 10:26:47 2010 -0800

    packet: Enhance AF_PACKET implementation to not require high order contiguous memory allocation (v4)

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 422705d62b5b..26fbeb140a6a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -224,6 +224,13 @@ struct packet_skb_cb {
 
 #define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
 
+static inline struct page *pgv_to_page(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		return vmalloc_to_page(addr);
+	return virt_to_page(addr);
+}
+
 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 {
 	union {
@@ -236,11 +243,11 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 	switch (po->tp_version) {
 	case TPACKET_V1:
 		h.h1->tp_status = status;
-		flush_dcache_page(virt_to_page(&h.h1->tp_status));
+		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
 		break;
 	case TPACKET_V2:
 		h.h2->tp_status = status;
-		flush_dcache_page(virt_to_page(&h.h2->tp_status));
+		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 		break;
 	default:
 		pr_err("TPACKET version not supported\n");
@@ -263,10 +270,10 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
 	h.raw = frame;
 	switch (po->tp_version) {
 	case TPACKET_V1:
-		flush_dcache_page(virt_to_page(&h.h1->tp_status));
+		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
 		return h.h1->tp_status;
 	case TPACKET_V2:
-		flush_dcache_page(virt_to_page(&h.h2->tp_status));
+		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
 		return h.h2->tp_status;
 	default:
 		pr_err("TPACKET version not supported\n");
@@ -800,15 +807,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	__packet_set_status(po, h.raw, status);
 	smp_mb();
 	{
-		struct page *p_start, *p_end;
-		u8 *h_end = h.raw + macoff + snaplen - 1;
-
-		p_start = virt_to_page(h.raw);
-		p_end = virt_to_page(h_end);
-		while (p_start <= p_end) {
-			flush_dcache_page(p_start);
-			p_start++;
-		}
+		u8 *start, *end;
+
+		end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
+		for (start = h.raw; start < end; start += PAGE_SIZE)
+			flush_dcache_page(pgv_to_page(start));
 	}
 
 	sk->sk_data_ready(sk, 0);
@@ -915,7 +918,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	}
 
 	err = -EFAULT;
-	page = virt_to_page(data);
 	offset = offset_in_page(data);
 	len_max = PAGE_SIZE - offset;
 	len = ((to_write > len_max) ? len_max : to_write);
@@ -934,11 +936,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 			return -EFAULT;
 		}
 
+		page = pgv_to_page(data);
+		data += len;
 		flush_dcache_page(page);
 		get_page(page);
-		skb_fill_page_desc(skb,
-				nr_frags,
-				page++, offset, len);
+		skb_fill_page_desc(skb, nr_frags, page, offset, len);
 		to_write -= len;
 		offset = 0;
 		len_max = PAGE_SIZE;
-- 
cgit v1.2.3


From c56b4d90123b77e29a91b9b96bb791f929139d8e Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 1 Dec 2010 02:52:57 +0000
Subject: af_packet: remove pgv.flags

As we can check if an address is vmalloc address with is_vmalloc_addr(),
we remove pgv.flags. Then we may get more pg_vecs.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 26fbeb140a6a..a11c731d2ee4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -167,7 +167,6 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 #define PGV_FROM_VMALLOC 1
 struct pgv {
 	char *buffer;
-	unsigned char flags;
 };
 
 struct packet_ring_buffer {
@@ -2342,7 +2341,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
 
 	for (i = 0; i < len; i++) {
 		if (likely(pg_vec[i].buffer)) {
-			if (pg_vec[i].flags & PGV_FROM_VMALLOC)
+			if (is_vmalloc_addr(pg_vec[i].buffer))
 				vfree(pg_vec[i].buffer);
 			else
 				free_pages((unsigned long)pg_vec[i].buffer,
@@ -2353,8 +2352,7 @@ static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
 	kfree(pg_vec);
 }
 
-static inline char *alloc_one_pg_vec_page(unsigned long order,
-					  unsigned char *flags)
+static inline char *alloc_one_pg_vec_page(unsigned long order)
 {
 	char *buffer = NULL;
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
@@ -2368,7 +2366,6 @@ static inline char *alloc_one_pg_vec_page(unsigned long order,
 	/*
 	 * __get_free_pages failed, fall back to vmalloc
 	 */
-	*flags |= PGV_FROM_VMALLOC;
 	buffer = vzalloc((1 << order) * PAGE_SIZE);
 
 	if (buffer)
@@ -2377,7 +2374,6 @@ static inline char *alloc_one_pg_vec_page(unsigned long order,
 	/*
 	 * vmalloc failed, lets dig into swap here
 	 */
-	*flags = 0;
 	gfp_flags &= ~__GFP_NORETRY;
 	buffer = (char *)__get_free_pages(gfp_flags, order);
 	if (buffer)
@@ -2400,8 +2396,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
 		goto out;
 
 	for (i = 0; i < block_nr; i++) {
-		pg_vec[i].buffer = alloc_one_pg_vec_page(order,
-							 &pg_vec[i].flags);
+		pg_vec[i].buffer = alloc_one_pg_vec_page(order);
 		if (unlikely(!pg_vec[i].buffer))
 			goto out_free_pgvec;
 	}
@@ -2585,13 +2580,8 @@ static int packet_mmap(struct file *file, struct socket *sock,
 			void *kaddr = rb->pg_vec[i].buffer;
 			int pg_num;
 
-			for (pg_num = 0; pg_num < rb->pg_vec_pages;
-					pg_num++) {
-				if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
-					page = vmalloc_to_page(kaddr);
-				else
-					page = virt_to_page(kaddr);
-
+			for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
+				page = pgv_to_page(kaddr);
 				err = vm_insert_page(vma, start, page);
 				if (unlikely(err))
 					goto out;
-- 
cgit v1.2.3


From ae9c416d686db74f67d73c1bebf1e3a7e8b3c5b5 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 1 Dec 2010 20:07:31 +0000
Subject: net: arp: use assignment

Only when dont_send is 0, arp_filter() is consulted, so we can simply
assign the return value of arp_filter() to dont_send instead.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7833f17b648a..10af759f2630 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -883,7 +883,7 @@ static int arp_process(struct sk_buff *skb)
 
 			dont_send = arp_ignore(in_dev, sip, tip);
 			if (!dont_send && IN_DEV_ARPFILTER(in_dev))
-				dont_send |= arp_filter(sip, tip, dev);
+				dont_send = arp_filter(sip, tip, dev);
 			if (!dont_send) {
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n) {
-- 
cgit v1.2.3


From 2d5311e4e8272fd398fc1cf278f12fd6dee4074b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 1 Dec 2010 20:46:24 +0000
Subject: filter: add a security check at install time

We added some security checks in commit 57fe93b374a6
(filter: make sure filters dont read uninitialized memory) to close a
potential leak of kernel information to user.

This added a potential extra cost at run time, while we can perform a
check of the filter itself, to make sure a malicious user doesnt try to
abuse us.

This patch adds a check_loads() function, whole unique purpose is to
make this check, allocating a temporary array of mask. We scan the
filter and propagate a bitmask information, telling us if a load M(K) is
allowed because a previous store M(K) is guaranteed. (So that
sk_run_filter() can possibly not read unitialized memory)

Note: this can uncover application bug, denying a filter attach,
previously allowed.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: Changli Gao <xiaosuo@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 054e286861d2..ac4920a87be5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -166,11 +166,9 @@ unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry
 	u32 A = 0;			/* Accumulator */
 	u32 X = 0;			/* Index Register */
 	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
-	unsigned long memvalid = 0;
 	u32 tmp;
 	int k;
 
-	BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
 	/*
 	 * Process array of filter instructions.
 	 */
@@ -318,12 +316,10 @@ load_b:
 			X = K;
 			continue;
 		case BPF_S_LD_MEM:
-			A = (memvalid & (1UL << K)) ?
-				mem[K] : 0;
+			A = mem[K];
 			continue;
 		case BPF_S_LDX_MEM:
-			X = (memvalid & (1UL << K)) ?
-				mem[K] : 0;
+			X = mem[K];
 			continue;
 		case BPF_S_MISC_TAX:
 			X = A;
@@ -336,11 +332,9 @@ load_b:
 		case BPF_S_RET_A:
 			return A;
 		case BPF_S_ST:
-			memvalid |= 1UL << K;
 			mem[K] = A;
 			continue;
 		case BPF_S_STX:
-			memvalid |= 1UL << K;
 			mem[K] = X;
 			continue;
 		default:
@@ -425,6 +419,66 @@ load_b:
 }
 EXPORT_SYMBOL(sk_run_filter);
 
+/*
+ * Security :
+ * A BPF program is able to use 16 cells of memory to store intermediate
+ * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
+ * As we dont want to clear mem[] array for each packet going through
+ * sk_run_filter(), we check that filter loaded by user never try to read
+ * a cell if not previously written, and we check all branches to be sure
+ * a malicious user doesnt try to abuse us.
+ */
+static int check_load_and_stores(struct sock_filter *filter, int flen)
+{
+	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+	int pc, ret = 0;
+
+	BUILD_BUG_ON(BPF_MEMWORDS > 16);
+	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		return -ENOMEM;
+	memset(masks, 0xff, flen * sizeof(*masks));
+
+	for (pc = 0; pc < flen; pc++) {
+		memvalid &= masks[pc];
+
+		switch (filter[pc].code) {
+		case BPF_S_ST:
+		case BPF_S_STX:
+			memvalid |= (1 << filter[pc].k);
+			break;
+		case BPF_S_LD_MEM:
+		case BPF_S_LDX_MEM:
+			if (!(memvalid & (1 << filter[pc].k))) {
+				ret = -EINVAL;
+				goto error;
+			}
+			break;
+		case BPF_S_JMP_JA:
+			/* a jump must set masks on target */
+			masks[pc + 1 + filter[pc].k] &= memvalid;
+			memvalid = ~0;
+			break;
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
+			/* a jump must set masks on targets */
+			masks[pc + 1 + filter[pc].jt] &= memvalid;
+			masks[pc + 1 + filter[pc].jf] &= memvalid;
+			memvalid = ~0;
+			break;
+		}
+	}
+error:
+	kfree(masks);
+	return ret;
+}
+
 /**
  *	sk_chk_filter - verify socket filter code
  *	@filter: filter to verify
@@ -553,7 +607,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 	switch (filter[flen - 1].code) {
 	case BPF_S_RET_K:
 	case BPF_S_RET_A:
-		return 0;
+		return check_load_and_stores(filter, flen);
 	}
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 45904f21655cf4f0ae7d0fab5906fe51bf56ecf4 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Fri, 3 Dec 2010 09:20:40 +0100
Subject: nl80211/mac80211: define and allow configuring mesh element TTL

The TTL in path selection information elements is different from
the mesh ttl used in mesh data frames.  Version 7.03 of the 11s
draft calls this ttl 'Element TTL'.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h       | 4 ++++
 include/net/cfg80211.h        | 2 ++
 net/mac80211/cfg.c            | 2 ++
 net/mac80211/debugfs_netdev.c | 2 ++
 net/mac80211/mesh.c           | 1 +
 net/mac80211/mesh.h           | 2 ++
 net/mac80211/mesh_hwmp.c      | 9 +++++----
 net/mac80211/mesh_pathtbl.c   | 7 ++++---
 net/wireless/nl80211.c        | 5 +++++
 9 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 5cfa579df476..9e541452d805 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1547,6 +1547,9 @@ enum nl80211_mntr_flags {
  * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
  * point.
  *
+ * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
+ * source mesh point for path selection elements.
+ *
  * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically
  * open peer links when we detect compatible mesh peers.
  *
@@ -1593,6 +1596,7 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 	NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 	NL80211_MESHCONF_HWMP_ROOTMODE,
+	NL80211_MESHCONF_ELEMENT_TTL,
 
 	/* keep last */
 	__NL80211_MESHCONF_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6b2af7aeddd3..93a4b2068334 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -624,6 +624,8 @@ struct mesh_config {
 	u16 dot11MeshMaxPeerLinks;
 	u8  dot11MeshMaxRetries;
 	u8  dot11MeshTTL;
+	/* ttl used in path selection information elements */
+	u8  element_ttl;
 	bool auto_open_plinks;
 	/* HWMP parameters */
 	u8  dot11MeshHWMPmaxPREQretries;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index db134b500caa..ce6936890c26 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1024,6 +1024,8 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy,
 		conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries;
 	if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask))
 		conf->dot11MeshTTL = nconf->dot11MeshTTL;
+	if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask))
+		conf->dot11MeshTTL = nconf->element_ttl;
 	if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask))
 		conf->auto_open_plinks = nconf->auto_open_plinks;
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask))
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cbdf36d7841c..2dabdf7680d0 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -251,6 +251,7 @@ IEEE80211_IF_FILE(dot11MeshConfirmTimeout,
 IEEE80211_IF_FILE(dot11MeshHoldingTimeout,
 		u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC);
 IEEE80211_IF_FILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC);
+IEEE80211_IF_FILE(element_ttl, u.mesh.mshcfg.element_ttl, DEC);
 IEEE80211_IF_FILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC);
 IEEE80211_IF_FILE(dot11MeshMaxPeerLinks,
 		u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC);
@@ -355,6 +356,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 	MESHPARAMS_ADD(dot11MeshConfirmTimeout);
 	MESHPARAMS_ADD(dot11MeshHoldingTimeout);
 	MESHPARAMS_ADD(dot11MeshTTL);
+	MESHPARAMS_ADD(element_ttl);
 	MESHPARAMS_ADD(auto_open_plinks);
 	MESHPARAMS_ADD(dot11MeshMaxPeerLinks);
 	MESHPARAMS_ADD(dot11MeshHWMPactivePathTimeout);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c8a4f19ed13b..78a36c79bdcc 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -668,6 +668,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T;
 	ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR;
 	ifmsh->mshcfg.dot11MeshTTL = MESH_TTL;
+	ifmsh->mshcfg.element_ttl = MESH_DEFAULT_ELEMENT_TTL;
 	ifmsh->mshcfg.auto_open_plinks = true;
 	ifmsh->mshcfg.dot11MeshMaxPeerLinks =
 		MESH_MAX_ESTAB_PLINKS;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 58e741128968..182942eeac4d 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -216,6 +216,8 @@ struct mesh_rmc {
 #define PERR_RCODE_NO_ROUTE     12
 #define PERR_RCODE_DEST_UNREACH 13
 
+#define MESH_DEFAULT_ELEMENT_TTL 31
+
 /* Public interfaces */
 /* Various */
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 829e08a657d0..5bf64d7112b3 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -232,7 +232,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 	*pos++ = WLAN_EID_PERR;
 	*pos++ = ie_len;
 	/* ttl */
-	*pos++ = MESH_TTL;
+	*pos++ = ttl;
 	/* number of destinations */
 	*pos++ = 1;
 	/*
@@ -522,7 +522,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 
 	if (reply) {
 		lifetime = PREQ_IE_LIFETIME(preq_elem);
-		ttl = ifmsh->mshcfg.dot11MeshTTL;
+		ttl = ifmsh->mshcfg.element_ttl;
 		if (ttl != 0) {
 			mhwmp_dbg("replying to the PREQ\n");
 			mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr,
@@ -877,7 +877,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
 		sdata->u.mesh.last_sn_update = jiffies;
 	}
 	lifetime = default_lifetime(sdata);
-	ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
+	ttl = sdata->u.mesh.mshcfg.element_ttl;
 	if (ttl == 0) {
 		sdata->u.mesh.mshstats.dropped_frames_ttl++;
 		spin_unlock_bh(&mpath->state_lock);
@@ -1013,5 +1013,6 @@ mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata)
 	mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->vif.addr,
 			       cpu_to_le32(++ifmsh->sn),
 			       0, NULL, 0, broadcast_addr,
-			       0, MESH_TTL, 0, 0, 0, sdata);
+			       0, sdata->u.mesh.mshcfg.element_ttl,
+			       0, 0, 0, sdata);
 }
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 349e466cf08b..8d65b47d9837 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -467,8 +467,8 @@ void mesh_plink_broken(struct sta_info *sta)
 			mpath->flags &= ~MESH_PATH_ACTIVE;
 			++mpath->sn;
 			spin_unlock_bh(&mpath->state_lock);
-			mesh_path_error_tx(MESH_TTL, mpath->dst,
-					cpu_to_le32(mpath->sn),
+			mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl,
+					mpath->dst, cpu_to_le32(mpath->sn),
 					cpu_to_le16(PERR_RCODE_DEST_UNREACH),
 					bcast, sdata);
 		} else
@@ -614,7 +614,8 @@ void mesh_path_discard_frame(struct sk_buff *skb,
 		mpath = mesh_path_lookup(da, sdata);
 		if (mpath)
 			sn = ++mpath->sn;
-		mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(sn),
+		mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, skb->data,
+				   cpu_to_le32(sn),
 				   cpu_to_le16(PERR_RCODE_NO_ROUTE), ra, sdata);
 	}
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 960be4e650f0..0b90cab5da2f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2582,6 +2582,8 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 			cur_params.dot11MeshMaxRetries);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_TTL,
 			cur_params.dot11MeshTTL);
+	NLA_PUT_U8(msg, NL80211_MESHCONF_ELEMENT_TTL,
+			cur_params.element_ttl);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS,
 			cur_params.auto_open_plinks);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES,
@@ -2623,6 +2625,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 },
 	[NL80211_MESHCONF_TTL] = { .type = NLA_U8 },
+	[NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 },
 	[NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 },
 
 	[NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 },
@@ -2670,6 +2673,8 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
 			mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL,
 			mask, NL80211_MESHCONF_TTL, nla_get_u8);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl,
+			mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks,
 			mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries,
-- 
cgit v1.2.3


From 09b174702601079c3a04806754be30ffbd70db4d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 3 Dec 2010 09:20:41 +0100
Subject: mac80211: move mesh filter adjusting

Logically, the filter adjusting belongs with
starting/stopping mesh, not interface up/down,
so move it there.

Tested-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 18 +-----------------
 net/mac80211/mesh.c  | 11 +++++++++++
 2 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7aa85591dbe7..96e27f1e79fb 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -197,11 +197,6 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 		sdata->bss = &sdata->u.ap;
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
-		if (!ieee80211_vif_is_mesh(&sdata->vif))
-			break;
-		/* mesh ifaces must set allmulti to forward mcast traffic */
-		atomic_inc(&local->iff_allmultis);
-		break;
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_ADHOC:
@@ -274,9 +269,6 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 		}
 
 		if (ieee80211_vif_is_mesh(&sdata->vif)) {
-			local->fif_other_bss++;
-			ieee80211_configure_filter(local);
-
 			ieee80211_start_mesh(sdata);
 		} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
 			local->fif_pspoll++;
@@ -504,16 +496,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		ieee80211_configure_filter(local);
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
-		if (ieee80211_vif_is_mesh(&sdata->vif)) {
-			/* other_bss and allmulti are always set on mesh
-			 * ifaces */
-			local->fif_other_bss--;
-			atomic_dec(&local->iff_allmultis);
-
-			ieee80211_configure_filter(local);
-
+		if (ieee80211_vif_is_mesh(&sdata->vif))
 			ieee80211_stop_mesh(sdata);
-		}
 		/* fall through */
 	default:
 		flush_work(&sdata->work);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 78a36c79bdcc..0d3234875ac5 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -513,6 +513,11 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_local *local = sdata->local;
 
+	local->fif_other_bss++;
+	/* mesh ifaces must set allmulti to forward mcast traffic */
+	atomic_inc(&local->iff_allmultis);
+	ieee80211_configure_filter(local);
+
 	set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
 	ieee80211_mesh_root_setup(ifmsh);
 	ieee80211_queue_work(&local->hw, &sdata->work);
@@ -524,6 +529,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_local *local = sdata->local;
+
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
 	/*
@@ -534,6 +541,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	 * it no longer is.
 	 */
 	cancel_work_sync(&sdata->work);
+
+	local->fif_other_bss--;
+	atomic_dec(&local->iff_allmultis);
+	ieee80211_configure_filter(local);
 }
 
 static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3


From f9e10ce4cf86945eb5efcab31284c971877ed012 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 3 Dec 2010 09:20:42 +0100
Subject: cfg80211: require add_virtual_intf to return new dev

cfg80211 used to do all its bookkeeping in
the notifier, but some new stuff will have
to use local variables so make the callback
return the netdev pointer.

Tested-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 11 +++++++----
 net/mac80211/cfg.c     | 20 ++++++++++++--------
 net/wireless/nl80211.c |  7 +++++--
 3 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 93a4b2068334..902895dfbd49 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1033,7 +1033,8 @@ struct cfg80211_pmksa {
  *
  * @add_virtual_intf: create a new virtual interface with the given name,
  *	must set the struct wireless_dev's iftype. Beware: You must create
- *	the new netdev in the wiphy's network namespace!
+ *	the new netdev in the wiphy's network namespace! Returns the netdev,
+ *	or an ERR_PTR.
  *
  * @del_virtual_intf: remove the virtual interface determined by ifindex.
  *
@@ -1168,9 +1169,11 @@ struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
 	int	(*resume)(struct wiphy *wiphy);
 
-	int	(*add_virtual_intf)(struct wiphy *wiphy, char *name,
-				    enum nl80211_iftype type, u32 *flags,
-				    struct vif_params *params);
+	struct net_device * (*add_virtual_intf)(struct wiphy *wiphy,
+						char *name,
+						enum nl80211_iftype type,
+						u32 *flags,
+						struct vif_params *params);
 	int	(*del_virtual_intf)(struct wiphy *wiphy, struct net_device *dev);
 	int	(*change_virtual_intf)(struct wiphy *wiphy,
 				       struct net_device *dev,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ce6936890c26..d34c7c3dd762 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -19,9 +19,10 @@
 #include "rate.h"
 #include "mesh.h"
 
-static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
-			       enum nl80211_iftype type, u32 *flags,
-			       struct vif_params *params)
+static struct net_device *ieee80211_add_iface(struct wiphy *wiphy, char *name,
+					      enum nl80211_iftype type,
+					      u32 *flags,
+					      struct vif_params *params)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct net_device *dev;
@@ -29,12 +30,15 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
 	int err;
 
 	err = ieee80211_if_add(local, name, &dev, type, params);
-	if (err || type != NL80211_IFTYPE_MONITOR || !flags)
-		return err;
+	if (err)
+		return ERR_PTR(err);
 
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	sdata->u.mntr_flags = *flags;
-	return 0;
+	if (type == NL80211_IFTYPE_MONITOR && flags) {
+		sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+		sdata->u.mntr_flags = *flags;
+	}
+
+	return dev;
 }
 
 static int ieee80211_del_iface(struct wiphy *wiphy, struct net_device *dev)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0b90cab5da2f..cc2e5d6163de 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1368,6 +1368,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct vif_params params;
+	struct net_device *dev;
 	int err;
 	enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
 	u32 flags;
@@ -1403,11 +1404,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
 				  info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
 				  &flags);
-	err = rdev->ops->add_virtual_intf(&rdev->wiphy,
+	dev = rdev->ops->add_virtual_intf(&rdev->wiphy,
 		nla_data(info->attrs[NL80211_ATTR_IFNAME]),
 		type, err ? NULL : &flags, &params);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
 
-	return err;
+	return 0;
 }
 
 static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
-- 
cgit v1.2.3


From bd90fdcc5fbd99a2a778999610420cf793bd1be2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 3 Dec 2010 09:20:43 +0100
Subject: nl80211: refactor mesh parameter parsing

I'm going to need this in a new place later.

Tested-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 61 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cc2e5d6163de..c8d4d53fc450 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2613,14 +2613,6 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	return -ENOBUFS;
 }
 
-#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \
-do {\
-	if (table[attr_num]) {\
-		cfg.param = nla_fn(table[attr_num]); \
-		mask |= (1 << (attr_num - 1)); \
-	} \
-} while (0);\
-
 static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = {
 	[NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 },
@@ -2639,31 +2631,34 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
 };
 
-static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_parse_mesh_params(struct genl_info *info,
+				     struct mesh_config *cfg,
+				     u32 *mask_out)
 {
-	u32 mask;
-	struct cfg80211_registered_device *rdev = info->user_ptr[0];
-	struct net_device *dev = info->user_ptr[1];
-	struct mesh_config cfg;
 	struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
-	struct nlattr *parent_attr;
+	u32 mask = 0;
 
-	parent_attr = info->attrs[NL80211_ATTR_MESH_PARAMS];
-	if (!parent_attr)
+#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \
+do {\
+	if (table[attr_num]) {\
+		cfg->param = nla_fn(table[attr_num]); \
+		mask |= (1 << (attr_num - 1)); \
+	} \
+} while (0);\
+
+
+	if (!info->attrs[NL80211_ATTR_MESH_PARAMS])
 		return -EINVAL;
 	if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
-			parent_attr, nl80211_meshconf_params_policy))
+			     info->attrs[NL80211_ATTR_MESH_PARAMS],
+			     nl80211_meshconf_params_policy))
 		return -EINVAL;
 
-	if (!rdev->ops->set_mesh_params)
-		return -EOPNOTSUPP;
-
 	/* This makes sure that there aren't more than 32 mesh config
 	 * parameters (otherwise our bitfield scheme would not work.) */
 	BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32);
 
 	/* Fill in the params struct */
-	mask = 0;
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout,
 			mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout,
@@ -2703,12 +2698,32 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
 			NL80211_MESHCONF_HWMP_ROOTMODE,
 			nla_get_u8);
 
+	if (mask_out)
+		*mask_out = mask;
+	return 0;
+
+#undef FILL_IN_MESH_PARAM_IF_SET
+}
+
+static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct mesh_config cfg;
+	u32 mask;
+	int err;
+
+	if (!rdev->ops->set_mesh_params)
+		return -EOPNOTSUPP;
+
+	err = nl80211_parse_mesh_params(info, &cfg, &mask);
+	if (err)
+		return err;
+
 	/* Apply changes */
 	return rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask);
 }
 
-#undef FILL_IN_MESH_PARAM_IF_SET
-
 static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	struct sk_buff *msg;
-- 
cgit v1.2.3


From 29cbe68c516a48a9a88b3226878570c6cbd83c02 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 3 Dec 2010 09:20:44 +0100
Subject: cfg80211/mac80211: add mesh join/leave commands

Instead of tying mesh activity to interface up,
add join and leave commands for mesh. Since we
must be backward compatible, let cfg80211 handle
joining a mesh if a mesh ID was pre-configured
when the device goes up.

Note that this therefore must modify mac80211 as
well since mac80211 needs to lose the logic to
start the mesh on interface up.

We now allow querying mesh parameters before the
mesh is connected, which simply returns defaults.
Setting them (internally renamed to "update") is
only allowed while connected. Specify them with
the new mesh join command instead where needed.

In mac80211, beaconing must now also follow the
mesh enabled/not enabled state, which is done
by testing the mesh ID.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |   8 +++
 include/net/cfg80211.h     |  38 +++++++++---
 net/mac80211/cfg.c         |  39 ++++++++++---
 net/mac80211/ieee80211_i.h |  13 -----
 net/mac80211/iface.c       |  14 +----
 net/mac80211/main.c        |   3 +-
 net/mac80211/mesh.c        |  26 ++-------
 net/mac80211/mesh.h        |  25 --------
 net/wireless/Makefile      |   2 +-
 net/wireless/core.c        |  15 ++++-
 net/wireless/core.h        |  13 +++++
 net/wireless/mesh.c        | 140 +++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.c     | 137 +++++++++++++++++++++++++++++++++++++-------
 net/wireless/util.c        |   1 +
 14 files changed, 359 insertions(+), 115 deletions(-)
 create mode 100644 net/wireless/mesh.c

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 9e541452d805..410a06ea551b 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -394,6 +394,11 @@
  *
  * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface.
  *
+ * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial
+ *	mesh config parameters may be given.
+ * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the
+ *	network is determined by the network interface.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -500,6 +505,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_FRAME_WAIT_CANCEL,
 
+	NL80211_CMD_JOIN_MESH,
+	NL80211_CMD_LEAVE_MESH,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 902895dfbd49..788c3989a9e8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -258,13 +258,9 @@ struct ieee80211_supported_band {
 
 /**
  * struct vif_params - describes virtual interface parameters
- * @mesh_id: mesh ID to use
- * @mesh_id_len: length of the mesh ID
  * @use_4addr: use 4-address frames
  */
 struct vif_params {
-       u8 *mesh_id;
-       int mesh_id_len;
        int use_4addr;
 };
 
@@ -615,6 +611,11 @@ struct bss_parameters {
 	int ap_isolate;
 };
 
+/*
+ * struct mesh_config - 802.11s mesh configuration
+ *
+ * These parameters can be changed while the mesh is active.
+ */
 struct mesh_config {
 	/* Timeouts in ms */
 	/* Mesh plink management parameters */
@@ -637,6 +638,18 @@ struct mesh_config {
 	u8  dot11MeshHWMPRootMode;
 };
 
+/**
+ * struct mesh_setup - 802.11s mesh setup configuration
+ * @mesh_id: the mesh ID
+ * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes
+ *
+ * These parameters are fixed when the mesh is created.
+ */
+struct mesh_setup {
+	const u8 *mesh_id;
+	u8 mesh_id_len;
+};
+
 /**
  * struct ieee80211_txq_params - TX queue parameters
  * @queue: TX queue identifier (NL80211_TXQ_Q_*)
@@ -1078,7 +1091,7 @@ struct cfg80211_pmksa {
  *
  * @get_mesh_params: Put the current mesh parameters into *params
  *
- * @set_mesh_params: Set mesh parameters.
+ * @update_mesh_params: Update mesh parameters on a running mesh.
  *	The mask is a bitfield which tells us which parameters to
  *	set, and which to leave alone.
  *
@@ -1229,9 +1242,14 @@ struct cfg80211_ops {
 	int	(*get_mesh_params)(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf);
-	int	(*set_mesh_params)(struct wiphy *wiphy,
-				struct net_device *dev,
-				const struct mesh_config *nconf, u32 mask);
+	int	(*update_mesh_params)(struct wiphy *wiphy,
+				      struct net_device *dev, u32 mask,
+				      const struct mesh_config *nconf);
+	int	(*join_mesh)(struct wiphy *wiphy, struct net_device *dev,
+			     const struct mesh_config *conf,
+			     const struct mesh_setup *setup);
+	int	(*leave_mesh)(struct wiphy *wiphy, struct net_device *dev);
+
 	int	(*change_bss)(struct wiphy *wiphy, struct net_device *dev,
 			      struct bss_parameters *params);
 
@@ -1647,6 +1665,8 @@ struct cfg80211_cached_keys;
  * @bssid: (private) Used by the internal configuration code
  * @ssid: (private) Used by the internal configuration code
  * @ssid_len: (private) Used by the internal configuration code
+ * @mesh_id_len: (private) Used by the internal configuration code
+ * @mesh_id_up_len: (private) Used by the internal configuration code
  * @wext: (private) Used by the internal wireless extensions compat code
  * @use_4addr: indicates 4addr mode is used on this interface, must be
  *	set by driver (if supported) on add_interface BEFORE registering the
@@ -1676,7 +1696,7 @@ struct wireless_dev {
 
 	/* currently used for IBSS and SME - might be rearranged later */
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
-	u8 ssid_len;
+	u8 ssid_len, mesh_id_len, mesh_id_up_len;
 	enum {
 		CFG80211_SME_IDLE,
 		CFG80211_SME_CONNECTING,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d34c7c3dd762..68329d713c02 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -60,11 +60,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 	if (ret)
 		return ret;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len)
-		ieee80211_sdata_set_mesh_id(sdata,
-					    params->mesh_id_len,
-					    params->mesh_id);
-
 	if (type == NL80211_IFTYPE_AP_VLAN &&
 	    params && params->use_4addr == 0)
 		rcu_assign_pointer(sdata->u.vlan.sta, NULL);
@@ -1003,9 +998,9 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask)
 	return (mask >> (parm-1)) & 0x1;
 }
 
-static int ieee80211_set_mesh_params(struct wiphy *wiphy,
-				struct net_device *dev,
-				const struct mesh_config *nconf, u32 mask)
+static int ieee80211_update_mesh_params(struct wiphy *wiphy,
+					struct net_device *dev, u32 mask,
+					const struct mesh_config *nconf)
 {
 	struct mesh_config *conf;
 	struct ieee80211_sub_if_data *sdata;
@@ -1056,6 +1051,30 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy,
 	return 0;
 }
 
+static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
+			       const struct mesh_config *conf,
+			       const struct mesh_setup *setup)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+
+	memcpy(&sdata->u.mesh.mshcfg, conf, sizeof(struct mesh_config));
+	ifmsh->mesh_id_len = setup->mesh_id_len;
+	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
+
+	ieee80211_start_mesh(sdata);
+
+	return 0;
+}
+
+static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	ieee80211_stop_mesh(sdata);
+
+	return 0;
+}
 #endif
 
 static int ieee80211_change_bss(struct wiphy *wiphy,
@@ -1760,8 +1779,10 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_mpath = ieee80211_change_mpath,
 	.get_mpath = ieee80211_get_mpath,
 	.dump_mpath = ieee80211_dump_mpath,
-	.set_mesh_params = ieee80211_set_mesh_params,
+	.update_mesh_params = ieee80211_update_mesh_params,
 	.get_mesh_params = ieee80211_get_mesh_params,
+	.join_mesh = ieee80211_join_mesh,
+	.leave_mesh = ieee80211_leave_mesh,
 #endif
 	.change_bss = ieee80211_change_bss,
 	.set_txq_params = ieee80211_set_txq_params,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index e7c880725639..72499fe5fc36 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -609,19 +609,6 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
 	return container_of(p, struct ieee80211_sub_if_data, vif);
 }
 
-static inline void
-ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata,
-			    u8 mesh_id_len, u8 *mesh_id)
-{
-#ifdef CONFIG_MAC80211_MESH
-	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
-	ifmsh->mesh_id_len = mesh_id_len;
-	memcpy(ifmsh->mesh_id, mesh_id, mesh_id_len);
-#else
-	WARN_ON(1);
-#endif
-}
-
 enum sdata_queue_type {
 	IEEE80211_SDATA_QUEUE_TYPE_FRAME	= 0,
 	IEEE80211_SDATA_QUEUE_AGG_START		= 1,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 96e27f1e79fb..f0f11bb794af 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -268,9 +268,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 				goto err_stop;
 		}
 
-		if (ieee80211_vif_is_mesh(&sdata->vif)) {
-			ieee80211_start_mesh(sdata);
-		} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
+		if (sdata->vif.type == NL80211_IFTYPE_AP) {
 			local->fif_pspoll++;
 			local->fif_probe_req++;
 
@@ -495,10 +493,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 		ieee80211_adjust_monitor_flags(sdata, -1);
 		ieee80211_configure_filter(local);
 		break;
-	case NL80211_IFTYPE_MESH_POINT:
-		if (ieee80211_vif_is_mesh(&sdata->vif))
-			ieee80211_stop_mesh(sdata);
-		/* fall through */
 	default:
 		flush_work(&sdata->work);
 		/*
@@ -1188,12 +1182,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	if (ret)
 		goto fail;
 
-	if (ieee80211_vif_is_mesh(&sdata->vif) &&
-	    params && params->mesh_id_len)
-		ieee80211_sdata_set_mesh_id(sdata,
-					    params->mesh_id_len,
-					    params->mesh_id);
-
 	mutex_lock(&local->iflist_mtx);
 	list_add_tail_rcu(&sdata->list, &local->interfaces);
 	mutex_unlock(&local->iflist_mtx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 107a0cbe52ac..2de69766c6aa 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -246,7 +246,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 					!!sdata->u.ibss.presp;
 				break;
 			case NL80211_IFTYPE_MESH_POINT:
-				sdata->vif.bss_conf.enable_beacon = true;
+				sdata->vif.bss_conf.enable_beacon =
+					!!sdata->u.mesh.mesh_id_len;
 				break;
 			default:
 				/* not reached */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 0d3234875ac5..63e1188d5062 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -530,6 +530,11 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+
+	ifmsh->mesh_id_len = 0;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+	sta_info_flush(local, NULL);
 
 	del_timer_sync(&sdata->u.mesh.housekeeping_timer);
 	del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
@@ -674,27 +679,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 		    ieee80211_mesh_housekeeping_timer,
 		    (unsigned long) sdata);
 
-	ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T;
-	ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T;
-	ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T;
-	ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR;
-	ifmsh->mshcfg.dot11MeshTTL = MESH_TTL;
-	ifmsh->mshcfg.element_ttl = MESH_DEFAULT_ELEMENT_TTL;
-	ifmsh->mshcfg.auto_open_plinks = true;
-	ifmsh->mshcfg.dot11MeshMaxPeerLinks =
-		MESH_MAX_ESTAB_PLINKS;
-	ifmsh->mshcfg.dot11MeshHWMPactivePathTimeout =
-		MESH_PATH_TIMEOUT;
-	ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval =
-		MESH_PREQ_MIN_INT;
-	ifmsh->mshcfg.dot11MeshHWMPnetDiameterTraversalTime =
-		MESH_DIAM_TRAVERSAL_TIME;
-	ifmsh->mshcfg.dot11MeshHWMPmaxPREQretries =
-		MESH_MAX_PREQ_RETRIES;
-	ifmsh->mshcfg.path_refresh_time =
-		MESH_PATH_REFRESH_TIME;
-	ifmsh->mshcfg.min_discovery_timeout =
-		MESH_MIN_DISCOVERY_TIMEOUT;
 	ifmsh->accepting_plinks = true;
 	ifmsh->preq_id = 0;
 	ifmsh->sn = 0;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 182942eeac4d..039d7fa0af74 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -175,33 +175,10 @@ struct mesh_rmc {
  */
 #define MESH_CFG_CMP_LEN 	(IEEE80211_MESH_CONFIG_LEN - 2)
 
-/* Default values, timeouts in ms */
-#define MESH_TTL 		31
-#define MESH_MAX_RETR	 	3
-#define MESH_RET_T 		100
-#define MESH_CONF_T 		100
-#define MESH_HOLD_T 		100
-
-#define MESH_PATH_TIMEOUT	5000
-/* Minimum interval between two consecutive PREQs originated by the same
- * interface
- */
-#define MESH_PREQ_MIN_INT	10
-#define MESH_DIAM_TRAVERSAL_TIME 50
-/* A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds before
- * timing out.  This way it will remain ACTIVE and no data frames will be
- * unnecesarily held in the pending queue.
- */
-#define MESH_PATH_REFRESH_TIME			1000
-#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME)
 #define MESH_DEFAULT_BEACON_INTERVAL		1000 	/* in 1024 us units */
 
-#define MESH_MAX_PREQ_RETRIES 4
 #define MESH_PATH_EXPIRE (600 * HZ)
 
-/* Default maximum number of established plinks per interface */
-#define MESH_MAX_ESTAB_PLINKS	32
-
 /* Default maximum number of plinks per interface */
 #define MESH_MAX_PLINKS		256
 
@@ -216,8 +193,6 @@ struct mesh_rmc {
 #define PERR_RCODE_NO_ROUTE     12
 #define PERR_RCODE_DEST_UNREACH 13
 
-#define MESH_DEFAULT_ELEMENT_TTL 31
-
 /* Public interfaces */
 /* Various */
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index e77e508126fa..55a28ab21db9 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_WEXT_SPY) += wext-spy.o
 obj-$(CONFIG_WEXT_PRIV) += wext-priv.o
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o
-cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o
+cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
 cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
 cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 630bcf0a2f04..79772fcc37bc 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -332,6 +332,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	WARN_ON(ops->add_virtual_intf && !ops->del_virtual_intf);
 	WARN_ON(ops->add_station && !ops->del_station);
 	WARN_ON(ops->add_mpath && !ops->del_mpath);
+	WARN_ON(ops->join_mesh && !ops->leave_mesh);
 
 	alloc_size = sizeof(*rdev) + sizeof_priv;
 
@@ -752,6 +753,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			cfg80211_mlme_down(rdev, dev);
 			wdev_unlock(wdev);
 			break;
+		case NL80211_IFTYPE_MESH_POINT:
+			cfg80211_leave_mesh(rdev, dev);
+			break;
 		default:
 			break;
 		}
@@ -775,20 +779,27 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		}
 		cfg80211_lock_rdev(rdev);
 		mutex_lock(&rdev->devlist_mtx);
-#ifdef CONFIG_CFG80211_WEXT
 		wdev_lock(wdev);
 		switch (wdev->iftype) {
+#ifdef CONFIG_CFG80211_WEXT
 		case NL80211_IFTYPE_ADHOC:
 			cfg80211_ibss_wext_join(rdev, wdev);
 			break;
 		case NL80211_IFTYPE_STATION:
 			cfg80211_mgd_wext_connect(rdev, wdev);
 			break;
+#endif
+		case NL80211_IFTYPE_MESH_POINT:
+			/* backward compat code ... */
+			if (wdev->mesh_id_up_len)
+				__cfg80211_join_mesh(rdev, dev, wdev->ssid,
+						     wdev->mesh_id_up_len,
+						     &default_mesh_config);
+			break;
 		default:
 			break;
 		}
 		wdev_unlock(wdev);
-#endif
 		rdev->opencount++;
 		mutex_unlock(&rdev->devlist_mtx);
 		cfg80211_unlock_rdev(rdev);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index ee80ad8dc655..743203bb61ac 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -285,6 +285,19 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid);
 int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 			    struct wireless_dev *wdev);
 
+/* mesh */
+extern const struct mesh_config default_mesh_config;
+int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_config *conf);
+int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_config *conf);
+int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			struct net_device *dev);
+
 /* MLME */
 int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
new file mode 100644
index 000000000000..e0b9747fe50a
--- /dev/null
+++ b/net/wireless/mesh.c
@@ -0,0 +1,140 @@
+#include <linux/ieee80211.h>
+#include <net/cfg80211.h>
+#include "core.h"
+
+/* Default values, timeouts in ms */
+#define MESH_TTL 		31
+#define MESH_DEFAULT_ELEMENT_TTL 31
+#define MESH_MAX_RETR	 	3
+#define MESH_RET_T 		100
+#define MESH_CONF_T 		100
+#define MESH_HOLD_T 		100
+
+#define MESH_PATH_TIMEOUT	5000
+
+/*
+ * Minimum interval between two consecutive PREQs originated by the same
+ * interface
+ */
+#define MESH_PREQ_MIN_INT	10
+#define MESH_DIAM_TRAVERSAL_TIME 50
+
+/*
+ * A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds
+ * before timing out.  This way it will remain ACTIVE and no data frames
+ * will be unnecessarily held in the pending queue.
+ */
+#define MESH_PATH_REFRESH_TIME			1000
+#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME)
+
+/* Default maximum number of established plinks per interface */
+#define MESH_MAX_ESTAB_PLINKS	32
+
+#define MESH_MAX_PREQ_RETRIES	4
+
+
+const struct mesh_config default_mesh_config = {
+	.dot11MeshRetryTimeout = MESH_RET_T,
+	.dot11MeshConfirmTimeout = MESH_CONF_T,
+	.dot11MeshHoldingTimeout = MESH_HOLD_T,
+	.dot11MeshMaxRetries = MESH_MAX_RETR,
+	.dot11MeshTTL = MESH_TTL,
+	.element_ttl = MESH_DEFAULT_ELEMENT_TTL,
+	.auto_open_plinks = true,
+	.dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS,
+	.dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT,
+	.dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT,
+	.dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME,
+	.dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES,
+	.path_refresh_time = MESH_PATH_REFRESH_TIME,
+	.min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT,
+};
+
+
+int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_config *conf)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct mesh_setup setup = {
+		.mesh_id = mesh_id,
+		.mesh_id_len = mesh_id_len,
+	};
+	int err;
+
+	BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
+
+	ASSERT_WDEV_LOCK(wdev);
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	if (wdev->mesh_id_len)
+		return -EALREADY;
+
+	if (!mesh_id_len)
+		return -EINVAL;
+
+	if (!rdev->ops->join_mesh)
+		return -EOPNOTSUPP;
+
+	err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, &setup);
+	if (!err) {
+		memcpy(wdev->ssid, mesh_id, mesh_id_len);
+		wdev->mesh_id_len = mesh_id_len;
+	}
+
+	return err;
+}
+
+int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_config *conf)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_join_mesh(rdev, dev, mesh_id, mesh_id_len, conf);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
+static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+				 struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	ASSERT_WDEV_LOCK(wdev);
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	if (!rdev->ops->leave_mesh)
+		return -EOPNOTSUPP;
+
+	if (!wdev->mesh_id_len)
+		return -ENOTCONN;
+
+	err = rdev->ops->leave_mesh(&rdev->wiphy, dev);
+	if (!err)
+		wdev->mesh_id_len = 0;
+	return err;
+}
+
+int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_leave_mesh(rdev, dev);
+	wdev_unlock(wdev);
+
+	return err;
+}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c8d4d53fc450..56508d40c740 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -661,13 +661,14 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(add_beacon, NEW_BEACON);
 	CMD(add_station, NEW_STATION);
 	CMD(add_mpath, NEW_MPATH);
-	CMD(set_mesh_params, SET_MESH_PARAMS);
+	CMD(update_mesh_params, SET_MESH_PARAMS);
 	CMD(change_bss, SET_BSS);
 	CMD(auth, AUTHENTICATE);
 	CMD(assoc, ASSOCIATE);
 	CMD(deauth, DEAUTHENTICATE);
 	CMD(disassoc, DISASSOCIATE);
 	CMD(join_ibss, JOIN_IBSS);
+	CMD(join_mesh, JOIN_MESH);
 	CMD(set_pmksa, SET_PMKSA);
 	CMD(del_pmksa, DEL_PMKSA);
 	CMD(flush_pmksa, FLUSH_PMKSA);
@@ -1324,11 +1325,21 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_MESH_ID]) {
+		struct wireless_dev *wdev = dev->ieee80211_ptr;
+
 		if (ntype != NL80211_IFTYPE_MESH_POINT)
 			return -EINVAL;
-		params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
-		params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
-		change = true;
+		if (netif_running(dev))
+			return -EBUSY;
+
+		wdev_lock(wdev);
+		BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
+			     IEEE80211_MAX_MESH_ID_LEN);
+		wdev->mesh_id_up_len =
+			nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
+		memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+		       wdev->mesh_id_up_len);
+		wdev_unlock(wdev);
 	}
 
 	if (info->attrs[NL80211_ATTR_4ADDR]) {
@@ -1388,12 +1399,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	    !(rdev->wiphy.interface_modes & (1 << type)))
 		return -EOPNOTSUPP;
 
-	if (type == NL80211_IFTYPE_MESH_POINT &&
-	    info->attrs[NL80211_ATTR_MESH_ID]) {
-		params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
-		params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
-	}
-
 	if (info->attrs[NL80211_ATTR_4ADDR]) {
 		params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
 		err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type);
@@ -1410,6 +1415,20 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(dev))
 		return PTR_ERR(dev);
 
+	if (type == NL80211_IFTYPE_MESH_POINT &&
+	    info->attrs[NL80211_ATTR_MESH_ID]) {
+		struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+		wdev_lock(wdev);
+		BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
+			     IEEE80211_MAX_MESH_ID_LEN);
+		wdev->mesh_id_up_len =
+			nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
+		memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+		       wdev->mesh_id_up_len);
+		wdev_unlock(wdev);
+	}
+
 	return 0;
 }
 
@@ -2543,21 +2562,32 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 }
 
 static int nl80211_get_mesh_params(struct sk_buff *skb,
-	struct genl_info *info)
+				   struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
-	struct mesh_config cur_params;
-	int err;
 	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct mesh_config cur_params;
+	int err = 0;
 	void *hdr;
 	struct nlattr *pinfoattr;
 	struct sk_buff *msg;
 
+	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
 	if (!rdev->ops->get_mesh_params)
 		return -EOPNOTSUPP;
 
-	/* Get the mesh params */
-	err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params);
+	wdev_lock(wdev);
+	/* If not connected, get default parameters */
+	if (!wdev->mesh_id_len)
+		memcpy(&cur_params, &default_mesh_config, sizeof(cur_params));
+	else
+		err = rdev->ops->get_mesh_params(&rdev->wiphy, dev,
+						 &cur_params);
+	wdev_unlock(wdev);
+
 	if (err)
 		return err;
 
@@ -2705,23 +2735,37 @@ do {\
 #undef FILL_IN_MESH_PARAM_IF_SET
 }
 
-static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_update_mesh_params(struct sk_buff *skb,
+				      struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct mesh_config cfg;
 	u32 mask;
 	int err;
 
-	if (!rdev->ops->set_mesh_params)
+	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
+		return -EOPNOTSUPP;
+
+	if (!rdev->ops->update_mesh_params)
 		return -EOPNOTSUPP;
 
 	err = nl80211_parse_mesh_params(info, &cfg, &mask);
 	if (err)
 		return err;
 
-	/* Apply changes */
-	return rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask);
+	wdev_lock(wdev);
+	if (!wdev->mesh_id_len)
+		err = -ENOLINK;
+
+	if (!err)
+		err = rdev->ops->update_mesh_params(&rdev->wiphy, dev,
+						    mask, &cfg);
+
+	wdev_unlock(wdev);
+
+	return err;
 }
 
 static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
@@ -4505,6 +4549,41 @@ out:
 	return err;
 }
 
+static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct mesh_config cfg;
+	int err;
+
+	/* start with default */
+	memcpy(&cfg, &default_mesh_config, sizeof(cfg));
+
+	if (info->attrs[NL80211_ATTR_MESH_PARAMS]) {
+		/* and parse parameters if given */
+		err = nl80211_parse_mesh_params(info, &cfg, NULL);
+		if (err)
+			return err;
+	}
+
+	if (!info->attrs[NL80211_ATTR_MESH_ID] ||
+	    !nla_len(info->attrs[NL80211_ATTR_MESH_ID]))
+		return -EINVAL;
+
+	return cfg80211_join_mesh(rdev, dev,
+				  nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
+				  nla_len(info->attrs[NL80211_ATTR_MESH_ID]),
+				  &cfg);
+}
+
+static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+
+	return cfg80211_leave_mesh(rdev, dev);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -4769,10 +4848,10 @@ static struct genl_ops nl80211_ops[] = {
 	},
 	{
 		.cmd = NL80211_CMD_SET_MESH_PARAMS,
-		.doit = nl80211_set_mesh_params,
+		.doit = nl80211_update_mesh_params,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
-		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
@@ -4987,6 +5066,22 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_JOIN_MESH,
+		.doit = nl80211_join_mesh,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_LEAVE_MESH,
+		.doit = nl80211_leave_mesh,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/util.c b/net/wireless/util.c
index fee020b15a4e..4de624ca4c63 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -792,6 +792,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 
 	if (ntype != otype) {
 		dev->ieee80211_ptr->use_4addr = false;
+		dev->ieee80211_ptr->mesh_id_up_len = 0;
 
 		switch (otype) {
 		case NL80211_IFTYPE_ADHOC:
-- 
cgit v1.2.3


From 8917a3c0b7d1557548f50bfe3f0e18e0354e38f6 Mon Sep 17 00:00:00 2001
From: David Shwatrz <dshwatrz@gmail.com>
Date: Thu, 2 Dec 2010 09:01:55 +0000
Subject: Fix a typo in datagram.c and sctp/socket.c.

Hi,
This patch fixes a typo in net/core/datagram.c and in net/sctp/socket.c

Regards,
David Shwartz

Signed-off-by: David Shwartz <dshwatrz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 2 +-
 net/sctp/socket.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index cd1e039c8755..18ac112ea7ae 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -177,7 +177,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
 		 * interrupt level will suddenly eat the receive_queue.
 		 *
 		 * Look at current nfs client by the way...
-		 * However, this function was corrent in any case. 8)
+		 * However, this function was correct in any case. 8)
 		 */
 		unsigned long cpu_flags;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6bd554323a34..842c7f3650b9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6047,7 +6047,7 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
 		 * will suddenly eat the receive_queue.
 		 *
 		 *  Look at current nfs client by the way...
-		 *  However, this function was corrent in any case. 8)
+		 *  However, this function was correct in any case. 8)
 		 */
 		if (flags & MSG_PEEK) {
 			spin_lock_bh(&sk->sk_receive_queue.lock);
-- 
cgit v1.2.3


From 871a2c16c21b988688b4ab1a78eadd969765c0a3 Mon Sep 17 00:00:00 2001
From: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Date: Sat, 4 Dec 2010 13:38:01 +0100
Subject: dccp: Policy-based packet dequeueing infrastructure

This patch adds a generic infrastructure for policy-based dequeueing of
TX packets and provides two policies:
 * a simple FIFO policy (which is the default) and
 * a priority based policy (set via socket options).
Both policies honour the tx_qlen sysctl for the maximum size of the write
queue (can be overridden via socket options).

The priority policy uses skb->priority internally to assign an u32 priority
identifier, using the same ranking as SO_PRIORITY. The skb->priority field
is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary
data using cmsg(3), the patch also provides the requisite parsing routines.

Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 Documentation/networking/dccp.txt |  20 ++++++
 include/linux/dccp.h              |  21 +++++++
 net/dccp/Makefile                 |   4 +-
 net/dccp/dccp.h                   |  12 ++++
 net/dccp/output.c                 |   7 +--
 net/dccp/proto.c                  |  67 +++++++++++++++++++-
 net/dccp/qpolicy.c                | 126 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 248 insertions(+), 9 deletions(-)
 create mode 100644 net/dccp/qpolicy.c

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 271d524a4c8d..b395ca6a49f2 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -47,6 +47,26 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree
 
 Socket options
 ==============
+DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
+a policy ID as argument and can only be set before the connection (i.e. changes
+during an established connection are not supported). Currently, two policies are
+defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
+and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
+u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
+a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
+be formatted using a cmsg(3) message header filled in as follows:
+	cmsg->cmsg_level = SOL_DCCP;
+	cmsg->cmsg_type	 = DCCP_SCM_PRIORITY;
+	cmsg->cmsg_len	 = CMSG_LEN(sizeof(uint32_t));	/* or CMSG_LEN(4) */
+
+DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
+value is always interpreted as unbounded queue length. If different from zero,
+the interpretation of this parameter depends on the current dequeuing policy
+(see above): the "simple" policy will enforce a fixed queue size by returning
+EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
+lowest-priority packet first. The default value for this parameter is
+initialised from /proc/sys/net/dccp/default/tx_qlen.
+
 DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
 service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
 the socket will fall back to 0 (which means that no meaningful service code
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index eed52bcd35d0..010e2d87ed75 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -197,6 +197,21 @@ enum dccp_feature_numbers {
 	DCCPF_MAX_CCID_SPECIFIC = 255,
 };
 
+/* DCCP socket control message types for cmsg */
+enum dccp_cmsg_type {
+	DCCP_SCM_PRIORITY = 1,
+	DCCP_SCM_QPOLICY_MAX = 0xFFFF,
+	/* ^-- Up to here reserved exclusively for qpolicy parameters */
+	DCCP_SCM_MAX
+};
+
+/* DCCP priorities for outgoing/queued packets */
+enum dccp_packet_dequeueing_policy {
+	DCCPQ_POLICY_SIMPLE,
+	DCCPQ_POLICY_PRIO,
+	DCCPQ_POLICY_MAX
+};
+
 /* DCCP socket options */
 #define DCCP_SOCKOPT_PACKET_SIZE	1 /* XXX deprecated, without effect */
 #define DCCP_SOCKOPT_SERVICE		2
@@ -210,6 +225,8 @@ enum dccp_feature_numbers {
 #define DCCP_SOCKOPT_CCID		13
 #define DCCP_SOCKOPT_TX_CCID		14
 #define DCCP_SOCKOPT_RX_CCID		15
+#define DCCP_SOCKOPT_QPOLICY_ID		16
+#define DCCP_SOCKOPT_QPOLICY_TXQLEN	17
 #define DCCP_SOCKOPT_CCID_RX_INFO	128
 #define DCCP_SOCKOPT_CCID_TX_INFO	192
 
@@ -458,6 +475,8 @@ struct dccp_ackvec;
  * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
  * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
  * @dccps_options_received - parsed set of retrieved options
+ * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
+ * @dccps_tx_qlen - maximum length of the TX queue
  * @dccps_role - role of this sock, one of %dccp_role
  * @dccps_hc_rx_insert_options - receiver wants to add options when acking
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
@@ -500,6 +519,8 @@ struct dccp_sock {
 	struct ccid			*dccps_hc_rx_ccid;
 	struct ccid			*dccps_hc_tx_ccid;
 	struct dccp_options_received	dccps_options_received;
+	__u8				dccps_qpolicy;
+	__u32				dccps_tx_qlen;
 	enum dccp_role			dccps_role:2;
 	__u8				dccps_hc_rx_insert_options:1;
 	__u8				dccps_hc_tx_insert_options:1;
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2991efcc8dea..5c8362b037ed 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
 
-dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
-
+dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
+	  qpolicy.o
 #
 # CCID algorithms to be used by dccp.ko
 #
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 19fafd597465..d008da91cec2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -243,6 +243,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
+/*
+ * TX Packet Dequeueing Interface
+ */
+extern void		dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
+extern bool		dccp_qpolicy_full(struct sock *sk);
+extern void		dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
+extern struct sk_buff	*dccp_qpolicy_top(struct sock *sk);
+extern struct sk_buff	*dccp_qpolicy_pop(struct sock *sk);
+
+/*
+ * TX Packet Output and TX Timers
+ */
 extern void   dccp_write_xmit(struct sock *sk);
 extern void   dccp_write_space(struct sock *sk);
 extern void   dccp_flush_write_queue(struct sock *sk, long *time_budget);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index d96dd9d362ae..784d30210543 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)
 {
 	int err, len;
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
+	struct sk_buff *skb = dccp_qpolicy_pop(sk);
 
 	if (unlikely(skb == NULL))
 		return;
@@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct sk_buff *skb;
 
-	while ((skb = skb_peek(&sk->sk_write_queue))) {
+	while ((skb = dccp_qpolicy_top(sk))) {
 		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 
 		switch (ccid_packet_dequeue_eval(rc)) {
@@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)
 			dccp_xmit_packet(sk);
 			break;
 		case CCID_PACKET_ERR:
-			skb_dequeue(&sk->sk_write_queue);
-			kfree_skb(skb);
+			dccp_qpolicy_drop(sk, skb);
 			dccp_pr_debug("packet discarded due to err=%d\n", rc);
 		}
 	}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index ef343d53fcea..d6a224982bb5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 	dp->dccps_role		= DCCP_ROLE_UNDEFINED;
 	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;
 	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1;
+	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;
 
 	dccp_init_xmit_timers(sk);
 
@@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		err = dccp_setsockopt_cscov(sk, val, true);
 		break;
+	case DCCP_SOCKOPT_QPOLICY_ID:
+		if (sk->sk_state != DCCP_CLOSED)
+			err = -EISCONN;
+		else if (val < 0 || val >= DCCPQ_POLICY_MAX)
+			err = -EINVAL;
+		else
+			dp->dccps_qpolicy = val;
+		break;
+	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+		if (val < 0)
+			err = -EINVAL;
+		else
+			dp->dccps_tx_qlen = val;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		val = dp->dccps_pcrlen;
 		break;
+	case DCCP_SOCKOPT_QPOLICY_ID:
+		val = dp->dccps_qpolicy;
+		break;
+	case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+		val = dp->dccps_tx_qlen;
+		break;
 	case 128 ... 191:
 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
 					     len, (u32 __user *)optval, optlen);
@@ -681,6 +702,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 #endif
 
+static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
+{
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+
+	/*
+	 * Assign an (opaque) qpolicy priority value to skb->priority.
+	 *
+	 * We are overloading this skb field for use with the qpolicy subystem.
+	 * The skb->priority is normally used for the SO_PRIORITY option, which
+	 * is initialised from sk_priority. Since the assignment of sk_priority
+	 * to skb->priority happens later (on layer 3), we overload this field
+	 * for use with queueing priorities as long as the skb is on layer 4.
+	 * The default priority value (if nothing is set) is 0.
+	 */
+	skb->priority = 0;
+
+	for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+
+		if (cmsg->cmsg_level != SOL_DCCP)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case DCCP_SCM_PRIORITY:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
+				return -EINVAL;
+			skb->priority = *(__u32 *)CMSG_DATA(cmsg);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		 size_t len)
 {
@@ -696,8 +754,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	lock_sock(sk);
 
-	if (sysctl_dccp_tx_qlen &&
-	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
+	if (dccp_qpolicy_full(sk)) {
 		rc = -EAGAIN;
 		goto out_release;
 	}
@@ -725,7 +782,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (rc != 0)
 		goto out_discard;
 
-	skb_queue_tail(&sk->sk_write_queue, skb);
+	rc = dccp_msghdr_parse(msg, skb);
+	if (rc != 0)
+		goto out_discard;
+
+	dccp_qpolicy_push(sk, skb);
 	/*
 	 * The xmit_timer is set if the TX CCID is rate-based and will expire
 	 * when congestion control permits to release further packets into the
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
new file mode 100644
index 000000000000..4b0fd6b11f6d
--- /dev/null
+++ b/net/dccp/qpolicy.c
@@ -0,0 +1,126 @@
+/*
+ *  net/dccp/qpolicy.c
+ *
+ *  Policy-based packet dequeueing interface for DCCP.
+ *
+ *  Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License v2
+ *  as published by the Free Software Foundation.
+ */
+#include "dccp.h"
+
+/*
+ *	Simple Dequeueing Policy:
+ *	If tx_qlen is different from 0, enqueue up to tx_qlen elements.
+ */
+static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
+{
+	skb_queue_tail(&sk->sk_write_queue, skb);
+}
+
+static bool qpolicy_simple_full(struct sock *sk)
+{
+	return dccp_sk(sk)->dccps_tx_qlen &&
+	       sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
+}
+
+static struct sk_buff *qpolicy_simple_top(struct sock *sk)
+{
+	return skb_peek(&sk->sk_write_queue);
+}
+
+/*
+ *	Priority-based Dequeueing Policy:
+ *	If tx_qlen is different from 0 and the queue has reached its upper bound
+ *	of tx_qlen elements, replace older packets lowest-priority-first.
+ */
+static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
+{
+	struct sk_buff *skb, *best = NULL;
+
+	skb_queue_walk(&sk->sk_write_queue, skb)
+		if (best == NULL || skb->priority > best->priority)
+			best = skb;
+	return best;
+}
+
+static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
+{
+	struct sk_buff *skb, *worst = NULL;
+
+	skb_queue_walk(&sk->sk_write_queue, skb)
+		if (worst == NULL || skb->priority < worst->priority)
+			worst = skb;
+	return worst;
+}
+
+static bool qpolicy_prio_full(struct sock *sk)
+{
+	if (qpolicy_simple_full(sk))
+		dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
+	return false;
+}
+
+/**
+ * struct dccp_qpolicy_operations  -  TX Packet Dequeueing Interface
+ * @push: add a new @skb to the write queue
+ * @full: indicates that no more packets will be admitted
+ * @top:  peeks at whatever the queueing policy defines as its `top'
+ */
+static struct dccp_qpolicy_operations {
+	void		(*push)	(struct sock *sk, struct sk_buff *skb);
+	bool		(*full) (struct sock *sk);
+	struct sk_buff*	(*top)  (struct sock *sk);
+
+} qpol_table[DCCPQ_POLICY_MAX] = {
+	[DCCPQ_POLICY_SIMPLE] = {
+		.push = qpolicy_simple_push,
+		.full = qpolicy_simple_full,
+		.top  = qpolicy_simple_top,
+	},
+	[DCCPQ_POLICY_PRIO] = {
+		.push = qpolicy_simple_push,
+		.full = qpolicy_prio_full,
+		.top  = qpolicy_prio_best_skb,
+	},
+};
+
+/*
+ *	Externally visible interface
+ */
+void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
+{
+	qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
+}
+
+bool dccp_qpolicy_full(struct sock *sk)
+{
+	return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
+}
+
+void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
+{
+	if (skb != NULL) {
+		skb_unlink(skb, &sk->sk_write_queue);
+		kfree_skb(skb);
+	}
+}
+
+struct sk_buff *dccp_qpolicy_top(struct sock *sk)
+{
+	return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
+}
+
+struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
+{
+	struct sk_buff *skb = dccp_qpolicy_top(sk);
+
+	if (skb != NULL) {
+		/* Clear any skb fields that we used internally */
+		skb->priority = 0;
+		skb_unlink(skb, &sk->sk_write_queue);
+	}
+	return skb;
+}
-- 
cgit v1.2.3


From 04910265078f08a73208beab70ed2a3cce4a919f Mon Sep 17 00:00:00 2001
From: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Date: Sat, 4 Dec 2010 13:39:13 +0100
Subject: dccp qpolicy: Parameter checking of cmsg qpolicy parameters

Ensure that cmsg->cmsg_type value is valid for qpolicy
that is currently in use.

Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h    |  1 +
 net/dccp/proto.c   |  4 ++++
 net/dccp/qpolicy.c | 23 +++++++++++++++++------
 3 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index d008da91cec2..48ad5d9da7cb 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -251,6 +251,7 @@ extern bool		dccp_qpolicy_full(struct sock *sk);
 extern void		dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
 extern struct sk_buff	*dccp_qpolicy_top(struct sock *sk);
 extern struct sk_buff	*dccp_qpolicy_pop(struct sock *sk);
+extern bool		dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
 
 /*
  * TX Packet Output and TX Timers
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index d6a224982bb5..152975d942d9 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -726,6 +726,10 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 		if (cmsg->cmsg_level != SOL_DCCP)
 			continue;
 
+		if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
+		    !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
+			return -EINVAL;
+
 		switch (cmsg->cmsg_type) {
 		case DCCP_SCM_PRIORITY:
 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
index 4b0fd6b11f6d..63c30bfa4703 100644
--- a/net/dccp/qpolicy.c
+++ b/net/dccp/qpolicy.c
@@ -73,17 +73,20 @@ static struct dccp_qpolicy_operations {
 	void		(*push)	(struct sock *sk, struct sk_buff *skb);
 	bool		(*full) (struct sock *sk);
 	struct sk_buff*	(*top)  (struct sock *sk);
+	__be32		params;
 
 } qpol_table[DCCPQ_POLICY_MAX] = {
 	[DCCPQ_POLICY_SIMPLE] = {
-		.push = qpolicy_simple_push,
-		.full = qpolicy_simple_full,
-		.top  = qpolicy_simple_top,
+		.push   = qpolicy_simple_push,
+		.full   = qpolicy_simple_full,
+		.top    = qpolicy_simple_top,
+		.params = 0,
 	},
 	[DCCPQ_POLICY_PRIO] = {
-		.push = qpolicy_simple_push,
-		.full = qpolicy_prio_full,
-		.top  = qpolicy_prio_best_skb,
+		.push   = qpolicy_simple_push,
+		.full   = qpolicy_prio_full,
+		.top    = qpolicy_prio_best_skb,
+		.params = DCCP_SCM_PRIORITY,
 	},
 };
 
@@ -124,3 +127,11 @@ struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
 	}
 	return skb;
 }
+
+bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
+{
+	/* check if exactly one bit is set */
+	if (!param || (param & (param - 1)))
+		return false;
+	return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
+}
-- 
cgit v1.2.3


From ff2109f5f94ff75f45e0c8d2d1d56fefdf20733f Mon Sep 17 00:00:00 2001
From: Tracey Dent <tdent48227@gmail.com>
Date: Sun, 21 Nov 2010 15:03:12 +0000
Subject: Net: bluetooth: Makefile: Remove deprecated kbuild goal definitions

Changed Makefile to use <modules>-y instead of <modules>-objs
because -objs is deprecated and not mentioned in
Documentation/kbuild/makefiles.txt.

Signed-off-by: Tracey Dent <tdent48227@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index d1e433f7d673..7ca1f46a471a 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP)	+= bnep/
 obj-$(CONFIG_BT_CMTP)	+= cmtp/
 obj-$(CONFIG_BT_HIDP)	+= hidp/
 
-bluetooth-objs := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
+bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
-- 
cgit v1.2.3


From 541a45a142df281c974d74eac2066138fc107b23 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 2 Dec 2010 19:12:43 +0900
Subject: nl80211/mac80211: Report signal average

Extend nl80211 to report an exponential weighted moving average (EWMA) of the
signal value. Since the signal value usually fluctuates between different
packets, an average can be more useful than the value of the last packet.

This uses the recently added generic EWMA library function.

--
v2:	fix ABI breakage and change factor to be a power of 2.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 include/net/cfg80211.h  | 4 ++++
 net/mac80211/Kconfig    | 1 +
 net/mac80211/cfg.c      | 3 ++-
 net/mac80211/rx.c       | 1 +
 net/mac80211/sta_info.c | 2 ++
 net/mac80211/sta_info.h | 3 +++
 net/wireless/nl80211.c  | 3 +++
 8 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 410a06ea551b..8e28053ea423 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1191,6 +1191,7 @@ enum nl80211_rate_info {
  *	station)
  * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station)
  * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station)
+ * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  */
 enum nl80211_sta_info {
 	__NL80211_STA_INFO_INVALID,
@@ -1206,6 +1207,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TX_PACKETS,
 	NL80211_STA_INFO_TX_RETRIES,
 	NL80211_STA_INFO_TX_FAILED,
+	NL80211_STA_INFO_SIGNAL_AVG,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 788c3989a9e8..8764c9a5bab7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -420,6 +420,7 @@ struct station_parameters {
  * @STATION_INFO_TX_RETRIES: @tx_retries filled
  * @STATION_INFO_TX_FAILED: @tx_failed filled
  * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
+ * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -435,6 +436,7 @@ enum station_info_flags {
 	STATION_INFO_TX_RETRIES		= 1<<10,
 	STATION_INFO_TX_FAILED		= 1<<11,
 	STATION_INFO_RX_DROP_MISC	= 1<<12,
+	STATION_INFO_SIGNAL_AVG		= 1<<13,
 };
 
 /**
@@ -481,6 +483,7 @@ struct rate_info {
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
  * @signal: signal strength of last received packet in dBm
+ * @signal_avg: signal strength average in dBm
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
@@ -501,6 +504,7 @@ struct station_info {
 	u16 plid;
 	u8 plink_state;
 	s8 signal;
+	s8 signal_avg;
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 4d6f8653ec88..798d9b9462e2 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -6,6 +6,7 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRC32
+	select AVERAGE
 	---help---
 	  This option enables the hardware independent IEEE 802.11
 	  networking stack.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 68329d713c02..af9620406321 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -342,8 +342,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
 		sinfo->signal = (s8)sta->last_signal;
+		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
 	}
 
 	sinfo->txrate.flags = 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6289525c0998..2fe8f5f86499 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1163,6 +1163,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
+	ewma_add(&sta->avg_signal, -status->signal);
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index eff58571fd7e..c426504ed1cf 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	sta->local = local;
 	sta->sdata = sdata;
 
+	ewma_init(&sta->avg_signal, 1024, 8);
+
 	if (sta_prepare_rate_control(local, sta, gfp)) {
 		kfree(sta);
 		return NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 05f11302443b..fdca52cf88de 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 #include <linux/workqueue.h>
+#include <linux/average.h>
 #include "key.h"
 
 /**
@@ -223,6 +224,7 @@ enum plink_state {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
+ * @avg_signal: moving average of signal of received frames from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -291,6 +293,7 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
+	struct ewma avg_signal;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 56508d40c740..2cf03331d4a2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1896,6 +1896,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_SIGNAL)
 		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL,
 			   sinfo->signal);
+	if (sinfo->filled & STATION_INFO_SIGNAL_AVG)
+		NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG,
+			   sinfo->signal_avg);
 	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
 		txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE);
 		if (!txrate)
-- 
cgit v1.2.3


From 0ab82b04ac83a05bda3ef8499f415fc6fd6ee206 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Fri, 3 Dec 2010 02:16:23 +0200
Subject: mac80211: fix dynamic-ps/pm_qos magic numbers

mac80211 uses pm_qos (/dev/network_latency) in order to determine the
dynamic ps timeout (or disable the dynamic-ps at all in some cases).

commit ff616381 added a comparison for the current network_latency
against one high value (1900ms), and against the default value
(2000sec, rather than the commented 2sec).

however, the representation of 1900ms was incorrect:
1900ms = 1900000us ( != 1900000000 )

fix it by using USEC_TO_MSEC/SEC consts.

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3a1dde3c7956..59e2e06aa4e9 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -625,11 +625,12 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 			/*
 			 * Go to full PSM if the user configures a very low
 			 * latency requirement.
-			 * The 2 second value is there for compatibility until
-			 * the PM_QOS_NETWORK_LATENCY is configured with real
-			 * values.
+			 * The 2000 second value is there for compatibility
+			 * until the PM_QOS_NETWORK_LATENCY is configured
+			 * with real values.
 			 */
-			if (latency > 1900000000 && latency != 2000000000)
+			if (latency > (1900 * USEC_PER_MSEC) &&
+			    latency != (2000 * USEC_PER_SEC))
 				timeout = 0;
 			else
 				timeout = 100;
-- 
cgit v1.2.3


From c658e5db01117bf2a321a9a782754dd5b10e2f15 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 7 Dec 2010 04:40:18 +0100
Subject: mac80211: fix a compiler warning

net/mac80211/mlme.c: In function 'ieee80211_sta_work':
net/mac80211/mlme.c:1981: warning: too many arguments for format

Introduced by commit 04ac3c0ee2c773c321ec472d892635a20556f34d
("mac80211: speed up AP probing using nullfunc frames").

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 59e2e06aa4e9..45fbb9e33746 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1982,8 +1982,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 				wiphy_debug(local->hw.wiphy,
 					    "%s: No ack for nullfunc frame to"
 					    " AP %pM, disconnecting.\n",
-					    sdata->name, bssid,
-					    ifmgd->probe_send_count);
+					    sdata->name, bssid);
 #endif
 				ieee80211_sta_connection_lost(sdata, bssid);
 			}
-- 
cgit v1.2.3


From 7659a193f94c0003dd06e9e874d19bade1a8c952 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Tue, 7 Dec 2010 10:41:47 -0800
Subject: mac80211: Fix compilation error when mesh is disabled

Wrap mesh sections inside CONFIG_MAC80211_MESH to fix compilation
problems reported by Stephen Rothwell, Larry Finger and Bruno Randolf.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 2de69766c6aa..973fee9f7d69 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -245,10 +245,12 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 				sdata->vif.bss_conf.enable_beacon =
 					!!sdata->u.ibss.presp;
 				break;
+#ifdef CONFIG_MAC80211_MESH
 			case NL80211_IFTYPE_MESH_POINT:
 				sdata->vif.bss_conf.enable_beacon =
 					!!sdata->u.mesh.mesh_id_len;
 				break;
+#endif
 			default:
 				/* not reached */
 				WARN_ON(1);
-- 
cgit v1.2.3


From 0381101fd6a73c7d6b545044dc1472d019fc64e3 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 8 Dec 2010 00:21:06 +0200
Subject: Bluetooth: Add initial Bluetooth Management interface callbacks

Add initial code for handling Bluetooth Management interface messages.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Acked-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/hci_core.h |  3 ++
 net/bluetooth/Makefile           |  2 +-
 net/bluetooth/hci_sock.c         | 39 +++++++++++++---
 net/bluetooth/mgmt.c             | 99 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 136 insertions(+), 7 deletions(-)
 create mode 100644 net/bluetooth/mgmt.c

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3e3435945980..1992fac7e921 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -660,6 +660,9 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data);
 /* ----- HCI Sockets ----- */
 void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
 
+/* Management interface */
+int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len);
+
 /* HCI info for socket */
 #define hci_pi(sk) ((struct hci_pinfo *) sk)
 
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 7ca1f46a471a..250f954f0213 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP)	+= bnep/
 obj-$(CONFIG_BT_CMTP)	+= cmtp/
 obj-$(CONFIG_BT_HIDP)	+= hidp/
 
-bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o
+bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index b3753bad2a55..207be7abda9f 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -49,6 +49,8 @@
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
+static int enable_mgmt;
+
 /* ----- HCI socket interface ----- */
 
 static inline int hci_test_bit(int nr, void *addr)
@@ -353,25 +355,35 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long a
 
 static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
-	struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr;
+	struct sockaddr_hci haddr;
 	struct sock *sk = sock->sk;
 	struct hci_dev *hdev = NULL;
-	int err = 0;
+	int len, err = 0;
 
 	BT_DBG("sock %p sk %p", sock, sk);
 
-	if (!haddr || haddr->hci_family != AF_BLUETOOTH)
+	if (!addr)
+		return -EINVAL;
+
+	memset(&haddr, 0, sizeof(haddr));
+	len = min_t(unsigned int, sizeof(haddr), addr_len);
+	memcpy(&haddr, addr, len);
+
+	if (haddr.hci_family != AF_BLUETOOTH)
+		return -EINVAL;
+
+	if (haddr.hci_channel != HCI_CHANNEL_RAW && !enable_mgmt)
 		return -EINVAL;
 
 	lock_sock(sk);
 
-	if (hci_pi(sk)->hdev) {
+	if (sk->sk_state == BT_BOUND || hci_pi(sk)->hdev) {
 		err = -EALREADY;
 		goto done;
 	}
 
-	if (haddr->hci_dev != HCI_DEV_NONE) {
-		hdev = hci_dev_get(haddr->hci_dev);
+	if (haddr.hci_dev != HCI_DEV_NONE) {
+		hdev = hci_dev_get(haddr.hci_dev);
 		if (!hdev) {
 			err = -ENODEV;
 			goto done;
@@ -380,6 +392,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
 		atomic_inc(&hdev->promisc);
 	}
 
+	hci_pi(sk)->channel = haddr.hci_channel;
 	hci_pi(sk)->hdev = hdev;
 	sk->sk_state = BT_BOUND;
 
@@ -502,6 +515,17 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	lock_sock(sk);
 
+	switch (hci_pi(sk)->channel) {
+	case HCI_CHANNEL_RAW:
+		break;
+	case HCI_CHANNEL_CONTROL:
+		err = mgmt_control(sk, msg, len);
+		goto done;
+	default:
+		err = -EINVAL;
+		goto done;
+	}
+
 	hdev = hci_pi(sk)->hdev;
 	if (!hdev) {
 		err = -EBADFD;
@@ -831,3 +855,6 @@ void __exit hci_sock_cleanup(void)
 
 	proto_unregister(&hci_sk_proto);
 }
+
+module_param(enable_mgmt, bool, 0644);
+MODULE_PARM_DESC(enable_mgmt, "Enable Management interface");
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
new file mode 100644
index 000000000000..d15bf676c350
--- /dev/null
+++ b/net/bluetooth/mgmt.c
@@ -0,0 +1,99 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2010  Nokia Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+/* Bluetooth HCI Management interface */
+
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/mgmt.h>
+
+static void cmd_status(struct sock *sk, u16 cmd, u8 status)
+{
+	struct sk_buff *skb;
+	struct mgmt_hdr *hdr;
+	struct mgmt_ev_cmd_status *ev;
+
+	BT_DBG("sock %p", sk);
+
+	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	hdr = (void *) skb_put(skb, sizeof(*hdr));
+
+	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS);
+	hdr->len = cpu_to_le16(sizeof(*ev));
+
+	ev = (void *) skb_put(skb, sizeof(*ev));
+	ev->status = status;
+	put_unaligned_le16(cmd, &ev->opcode);
+
+	if (sock_queue_rcv_skb(sk, skb) < 0)
+		kfree_skb(skb);
+}
+
+int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
+{
+	unsigned char *buf;
+	struct mgmt_hdr *hdr;
+	u16 opcode, len;
+	int err;
+
+	BT_DBG("got %zu bytes", msglen);
+
+	if (msglen < sizeof(*hdr))
+		return -EINVAL;
+
+	buf = kmalloc(msglen, GFP_ATOMIC);
+	if (!buf)
+		return -ENOMEM;
+
+	if (memcpy_fromiovec(buf, msg->msg_iov, msglen)) {
+		err = -EFAULT;
+		goto done;
+	}
+
+	hdr = (struct mgmt_hdr *) buf;
+	opcode = get_unaligned_le16(&hdr->opcode);
+	len = get_unaligned_le16(&hdr->len);
+
+	if (len != msglen - sizeof(*hdr)) {
+		err = -EINVAL;
+		goto done;
+	}
+
+	switch (opcode) {
+	default:
+		BT_DBG("Unknown op %u", opcode);
+		cmd_status(sk, opcode, 0x01);
+		break;
+	}
+
+	err = msglen;
+
+done:
+	kfree(buf);
+	return err;
+}
-- 
cgit v1.2.3


From a40c406cbdd28dcca3483065bc2ba794cf5aaab7 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Wed, 8 Dec 2010 00:21:07 +0200
Subject: Bluetooth: Make hci_send_to_sock usable for management control
 sockets

In order to send data to management control sockets the function should:

  - skip checks intended for raw HCI data and stack internal events
  - make sure RAW HCI data or stack internal events don't go to
    management control sockets

In order to accomplish this the patch adds a new member to the bluetooth
skb private data to flag skb's that are destined for management control
sockets.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/bluetooth.h |  1 +
 net/bluetooth/hci_sock.c          | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index d81ea7997701..0c5e72503b77 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -144,6 +144,7 @@ struct bt_skb_cb {
 	__u8 tx_seq;
 	__u8 retries;
 	__u8 sar;
+	unsigned short channel;
 };
 #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 207be7abda9f..f6c18abab797 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -104,6 +104,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 		if (skb->sk == sk)
 			continue;
 
+		if (bt_cb(skb)->channel != hci_pi(sk)->channel)
+			continue;
+
+		if (bt_cb(skb)->channel == HCI_CHANNEL_CONTROL)
+			goto clone;
+
 		/* Apply filter */
 		flt = &hci_pi(sk)->filter;
 
@@ -127,12 +133,14 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 				continue;
 		}
 
+clone:
 		nskb = skb_clone(skb, GFP_ATOMIC);
 		if (!nskb)
 			continue;
 
 		/* Put type byte before the data */
-		memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1);
+		if (bt_cb(skb)->channel == HCI_CHANNEL_RAW)
+			memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1);
 
 		if (sock_queue_rcv_skb(sk, nskb))
 			kfree_skb(nskb);
-- 
cgit v1.2.3


From aa9421041128abb4d269ee1dc502ff65fb3b7d69 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 4 Dec 2010 02:31:41 +0000
Subject: net: init ingress queue

The dev field of ingress queue is forgot to initialized, then NULL
pointer dereference happens in qdisc_alloc().

Move inits of tx queues to netif_alloc_netdev_queues().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 35 +++++++++++++----------------------
 1 file changed, 13 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 55ff66fabce4..ee605c0867e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5112,11 +5112,21 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 }
 #endif
 
+static void netdev_init_one_queue(struct net_device *dev,
+				  struct netdev_queue *queue, void *_unused)
+{
+	/* Initialize queue lock */
+	spin_lock_init(&queue->_xmit_lock);
+	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+	queue->xmit_lock_owner = -1;
+	netdev_queue_numa_node_write(queue, -1);
+	queue->dev = dev;
+}
+
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
 	unsigned int count = dev->num_tx_queues;
 	struct netdev_queue *tx;
-	int i;
 
 	BUG_ON(count < 1);
 
@@ -5128,27 +5138,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	}
 	dev->_tx = tx;
 
-	for (i = 0; i < count; i++) {
-		netdev_queue_numa_node_write(&tx[i], -1);
-		tx[i].dev = dev;
-	}
-	return 0;
-}
-
-static void netdev_init_one_queue(struct net_device *dev,
-				  struct netdev_queue *queue,
-				  void *_unused)
-{
-	/* Initialize queue lock */
-	spin_lock_init(&queue->_xmit_lock);
-	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
-	queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queues(struct net_device *dev)
-{
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 	spin_lock_init(&dev->tx_global_lock);
+
+	return 0;
 }
 
 /**
@@ -5187,8 +5180,6 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
-	netdev_init_queues(dev);
-
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
-- 
cgit v1.2.3


From 01b0c5cfb23f19837650aa53495ace6d0fd7d3f8 Mon Sep 17 00:00:00 2001
From: Thiago Farina <tfransosi@gmail.com>
Date: Sat, 4 Dec 2010 15:22:46 +0000
Subject: net/9p/protocol.c: Remove duplicated macros.

Use the macros already provided by kernel.h file.

Signed-off-by: Thiago Farina <tfransosi@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/protocol.c | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 45c15f491401..798beac7f100 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -27,31 +27,16 @@
 
 #include <linux/module.h>
 #include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/stddef.h>
 #include <linux/types.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include "protocol.h"
 
-#ifndef MIN
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-#endif
-
-#ifndef MAX
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-#endif
-
-#ifndef offset_of
-#define offset_of(type, memb) \
-	((unsigned long)(&((type *)0)->memb))
-#endif
-#ifndef container_of
-#define container_of(obj, type, memb) \
-	((type *)(((char *)obj) - offset_of(type, memb)))
-#endif
-
 static int
 p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
 
@@ -104,7 +89,7 @@ EXPORT_SYMBOL(p9stat_free);
 
 static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
 {
-	size_t len = MIN(pdu->size - pdu->offset, size);
+	size_t len = min(pdu->size - pdu->offset, size);
 	memcpy(data, &pdu->sdata[pdu->offset], len);
 	pdu->offset += len;
 	return size - len;
@@ -112,7 +97,7 @@ static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
 
 static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
 {
-	size_t len = MIN(pdu->capacity - pdu->size, size);
+	size_t len = min(pdu->capacity - pdu->size, size);
 	memcpy(&pdu->sdata[pdu->size], data, len);
 	pdu->size += len;
 	return size - len;
@@ -121,7 +106,7 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
 static size_t
 pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
 {
-	size_t len = MIN(pdu->capacity - pdu->size, size);
+	size_t len = min(pdu->capacity - pdu->size, size);
 	if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
 		len = 0;
 
@@ -201,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				if (errcode)
 					break;
 
-				size = MAX(len, 0);
+				size = max_t(int16_t, len, 0);
 
 				*sptr = kmalloc(size + 1, GFP_KERNEL);
 				if (*sptr == NULL) {
@@ -256,8 +241,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				    p9pdu_readf(pdu, proto_version, "d", count);
 				if (!errcode) {
 					*count =
-					    MIN(*count,
-						pdu->size - pdu->offset);
+					    min_t(int32_t, *count,
+						  pdu->size - pdu->offset);
 					*data = &pdu->sdata[pdu->offset];
 				}
 			}
@@ -421,7 +406,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				const char *sptr = va_arg(ap, const char *);
 				int16_t len = 0;
 				if (sptr)
-					len = MIN(strlen(sptr), USHRT_MAX);
+					len = min_t(int16_t, strlen(sptr), USHRT_MAX);
 
 				errcode = p9pdu_writef(pdu, proto_version,
 								"w", len);
-- 
cgit v1.2.3


From 941666c2e3e0f9f6a1cb5808d02352d445bd702c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 5 Dec 2010 01:23:53 +0000
Subject: net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le dimanche 05 décembre 2010 à 09:19 +0100, Eric Dumazet a écrit :

> Hmm..
>
> If somebody can explain why RTNL is held in arp_ioctl() (and therefore
> in arp_req_delete()), we might first remove RTNL use in arp_ioctl() so
> that your patch can be applied.
>
> Right now it is not good, because RTNL wont be necessarly held when you
> are going to call arp_invalidate() ?

While doing this analysis, I found a refcount bug in llc, I'll send a
patch for net-2.6

Meanwhile, here is the patch for net-next-2.6

Your patch then can be applied after mine.

Thanks

[PATCH] net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()

dev_getbyhwaddr() was called under RTNL.

Rename it to dev_getbyhwaddr_rcu() and change all its caller to now use
RCU locking instead of RTNL.

Change arp_ioctl() to use RCU instead of RTNL locking.

Note: this fix a dev refcount bug in llc

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |  3 ++-
 net/core/dev.c                 | 17 +++++++----------
 net/ieee802154/af_ieee802154.c |  6 +++---
 net/ipv4/arp.c                 | 17 +++++++++--------
 net/llc/af_llc.c               | 11 ++++++-----
 5 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a9ac5dc26e3c..d31bc3c94717 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1360,7 +1360,8 @@ static inline struct net_device *first_net_device(struct net *net)
 
 extern int 			netdev_boot_setup_check(struct net_device *dev);
 extern unsigned long		netdev_boot_base(const char *prefix, int unit);
-extern struct net_device    *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr);
+extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+					      const char *hwaddr);
 extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
 extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
 extern void		dev_add_pack(struct packet_type *pt);
diff --git a/net/core/dev.c b/net/core/dev.c
index ee605c0867e7..822b15b8d11c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
- *	dev_getbyhwaddr - find a device by its hardware address
+ *	dev_getbyhwaddr_rcu - find a device by its hardware address
  *	@net: the applicable net namespace
  *	@type: media type of device
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
- *	is not found or a pointer to the device. The caller must hold the
- *	rtnl semaphore. The returned device has not had its ref count increased
+ *	is not found or a pointer to the device. The caller must hold RCU
+ *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
- *	BUGS:
- *	If the API was consistent this would be __dev_get_by_hwaddr
  */
 
-struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *ha)
 {
 	struct net_device *dev;
 
-	ASSERT_RTNL();
-
-	for_each_netdev(net, dev)
+	for_each_netdev_rcu(net, dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 			return dev;
 
 	return NULL;
 }
-EXPORT_SYMBOL(dev_getbyhwaddr);
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 
 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 93c91b633a56..6df6ecf49708 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -52,11 +52,11 @@ struct net_device *ieee802154_get_dev(struct net *net,
 
 	switch (addr->addr_type) {
 	case IEEE802154_ADDR_LONG:
-		rtnl_lock();
-		dev = dev_getbyhwaddr(net, ARPHRD_IEEE802154, addr->hwaddr);
+		rcu_read_lock();
+		dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, addr->hwaddr);
 		if (dev)
 			dev_hold(dev);
-		rtnl_unlock();
+		rcu_read_unlock();
 		break;
 	case IEEE802154_ADDR_SHORT:
 		if (addr->pan_id == 0xffff ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 10af759f2630..a2fc7b961dbc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1017,13 +1017,14 @@ static int arp_req_set_proxy(struct net *net, struct net_device *dev, int on)
 		IPV4_DEVCONF_ALL(net, PROXY_ARP) = on;
 		return 0;
 	}
-	if (__in_dev_get_rtnl(dev)) {
-		IN_DEV_CONF_SET(__in_dev_get_rtnl(dev), PROXY_ARP, on);
+	if (__in_dev_get_rcu(dev)) {
+		IN_DEV_CONF_SET(__in_dev_get_rcu(dev), PROXY_ARP, on);
 		return 0;
 	}
 	return -ENXIO;
 }
 
+/* must be called with rcu_read_lock() */
 static int arp_req_set_public(struct net *net, struct arpreq *r,
 		struct net_device *dev)
 {
@@ -1033,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
 	if (mask && mask != htonl(0xFFFFFFFF))
 		return -EINVAL;
 	if (!dev && (r->arp_flags & ATF_COM)) {
-		dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
+		dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
 				      r->arp_ha.sa_data);
 		if (!dev)
 			return -ENODEV;
@@ -1225,10 +1226,10 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 	if (!(r.arp_flags & ATF_NETMASK))
 		((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
 							   htonl(0xFFFFFFFFUL);
-	rtnl_lock();
+	rcu_read_lock();
 	if (r.arp_dev[0]) {
 		err = -ENODEV;
-		dev = __dev_get_by_name(net, r.arp_dev);
+		dev = dev_get_by_name_rcu(net, r.arp_dev);
 		if (dev == NULL)
 			goto out;
 
@@ -1252,12 +1253,12 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 		break;
 	case SIOCGARP:
 		err = arp_req_get(&r, dev);
-		if (!err && copy_to_user(arg, &r, sizeof(r)))
-			err = -EFAULT;
 		break;
 	}
 out:
-	rtnl_unlock();
+	rcu_read_unlock();
+	if (cmd == SIOCGARP && !err && copy_to_user(arg, &r, sizeof(r)))
+		err = -EFAULT;
 	return err;
 }
 
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 582612998211..dfd3a648a551 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -316,9 +316,9 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	if (unlikely(addr->sllc_family != AF_LLC))
 		goto out;
 	rc = -ENODEV;
-	rtnl_lock();
+	rcu_read_lock();
 	if (sk->sk_bound_dev_if) {
-		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
 		if (llc->dev) {
 			if (!addr->sllc_arphrd)
 				addr->sllc_arphrd = llc->dev->type;
@@ -329,14 +329,15 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 			    !llc_mac_match(addr->sllc_mac,
 					   llc->dev->dev_addr)) {
 				rc = -EINVAL;
-				dev_put(llc->dev);
 				llc->dev = NULL;
 			}
 		}
 	} else
-		llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd,
+		llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
 					   addr->sllc_mac);
-	rtnl_unlock();
+	if (llc->dev)
+		dev_hold(llc->dev);
+	rcu_read_unlock();
 	if (!llc->dev)
 		goto out;
 	if (!addr->sllc_sap) {
-- 
cgit v1.2.3


From 62ab0812137ec4f9884dd7de346238841ac03283 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 6 Dec 2010 20:50:09 +0000
Subject: filter: constify sk_run_filter()

sk_run_filter() doesnt write on skb, change its prototype to reflect
this.

Fix two af_packet comments.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h  |  2 +-
 net/core/filter.c       |  7 ++++---
 net/core/timestamping.c |  2 +-
 net/packet/af_packet.c  | 31 ++++++++++++++++---------------
 4 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5334adaf4072..45266b75409a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -148,7 +148,7 @@ struct sk_buff;
 struct sock;
 
 extern int sk_filter(struct sock *sk, struct sk_buff *skb);
-extern unsigned int sk_run_filter(struct sk_buff *skb,
+extern unsigned int sk_run_filter(const struct sk_buff *skb,
 				  const struct sock_filter *filter);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_detach_filter(struct sock *sk);
diff --git a/net/core/filter.c b/net/core/filter.c
index ac4920a87be5..25500f16a18a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -88,7 +88,7 @@ enum {
 };
 
 /* No hurry in this branch */
-static void *__load_pointer(struct sk_buff *skb, int k)
+static void *__load_pointer(const struct sk_buff *skb, int k)
 {
 	u8 *ptr = NULL;
 
@@ -102,7 +102,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	return NULL;
 }
 
-static inline void *load_pointer(struct sk_buff *skb, int k,
+static inline void *load_pointer(const struct sk_buff *skb, int k,
 				 unsigned int size, void *buffer)
 {
 	if (k >= 0)
@@ -160,7 +160,8 @@ EXPORT_SYMBOL(sk_filter);
  * and last instruction guaranteed to be a RET, we dont need to check
  * flen. (We used to pass to this function the length of filter)
  */
-unsigned int sk_run_filter(struct sk_buff *skb, const struct sock_filter *fentry)
+unsigned int sk_run_filter(const struct sk_buff *skb,
+			   const struct sock_filter *fentry)
 {
 	void *ptr;
 	u32 A = 0;			/* Accumulator */
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index dac7ed687f60..b124d28ff1c8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -26,7 +26,7 @@ static struct sock_filter ptp_filter[] = {
 	PTP_FILTER
 };
 
-static unsigned int classify(struct sk_buff *skb)
+static unsigned int classify(const struct sk_buff *skb)
 {
 	if (likely(skb->dev &&
 		   skb->dev->phydev &&
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a11c731d2ee4..17eafe5b48c6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -517,7 +517,8 @@ out_free:
 	return err;
 }
 
-static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
+static inline unsigned int run_filter(const struct sk_buff *skb,
+				      const struct sock *sk,
 				      unsigned int res)
 {
 	struct sk_filter *filter;
@@ -532,15 +533,15 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
 }
 
 /*
-   This function makes lazy skb cloning in hope that most of packets
-   are discarded by BPF.
-
-   Note tricky part: we DO mangle shared skb! skb->data, skb->len
-   and skb->cb are mangled. It works because (and until) packets
-   falling here are owned by current CPU. Output packets are cloned
-   by dev_queue_xmit_nit(), input packets are processed by net_bh
-   sequencially, so that if we return skb to original state on exit,
-   we will not harm anyone.
+ * This function makes lazy skb cloning in hope that most of packets
+ * are discarded by BPF.
+ *
+ * Note tricky part: we DO mangle shared skb! skb->data, skb->len
+ * and skb->cb are mangled. It works because (and until) packets
+ * falling here are owned by current CPU. Output packets are cloned
+ * by dev_queue_xmit_nit(), input packets are processed by net_bh
+ * sequencially, so that if we return skb to original state on exit,
+ * we will not harm anyone.
  */
 
 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -566,11 +567,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	if (dev->header_ops) {
 		/* The device has an explicit notion of ll header,
-		   exported to higher levels.
-
-		   Otherwise, the device hides datails of it frame
-		   structure, so that corresponding packet head
-		   never delivered to user.
+		 * exported to higher levels.
+		 *
+		 * Otherwise, the device hides details of its frame
+		 * structure, so that corresponding packet head is
+		 * never delivered to user.
 		 */
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb_mac_header(skb));
-- 
cgit v1.2.3


From 15c2d75f49189e1769c5e8f5f099d03d055c4910 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 7 Dec 2010 00:30:37 +0000
Subject: net: call dev_queue_xmit_nit() after skb_dst_drop()

Avoid some atomic ops on dst refcount, calling dev_queue_xmit_nit()
after skb_dst_drop() in dev_hard_start_xmit().

When queueing a packet into af_packet socket, we drop dst anyway.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 822b15b8d11c..d28b3a023bb2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2022,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
-		if (!list_empty(&ptype_all))
-			dev_queue_xmit_nit(skb, dev);
-
 		/*
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
@@ -2032,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 
+		if (!list_empty(&ptype_all))
+			dev_queue_xmit_nit(skb, dev);
+
 		skb_orphan_try(skb);
 
 		if (vlan_tx_tag_present(skb) &&
-- 
cgit v1.2.3


From f6dafa95d1a48f73ab4a5b0f7dc0dcb72817e051 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 7 Dec 2010 04:26:16 +0000
Subject: af_packet: eliminate pgv_to_page on some arches

Some arches don't need flush_dcache_page(), and don't implement it, so
we can eliminate pgv_to_page() calls on those arches.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 17eafe5b48c6..9292ec93eb52 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -223,7 +223,7 @@ struct packet_skb_cb {
 
 #define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
 
-static inline struct page *pgv_to_page(void *addr)
+static inline __pure struct page *pgv_to_page(void *addr)
 {
 	if (is_vmalloc_addr(addr))
 		return vmalloc_to_page(addr);
@@ -806,6 +806,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	__packet_set_status(po, h.raw, status);
 	smp_mb();
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 	{
 		u8 *start, *end;
 
@@ -813,6 +814,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		for (start = h.raw; start < end; start += PAGE_SIZE)
 			flush_dcache_page(pgv_to_page(start));
 	}
+#endif
 
 	sk->sk_data_ready(sk, 0);
 
-- 
cgit v1.2.3


From 920b8d913bd3d963d5c88bca160a272b71e0c95a Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 7 Dec 2010 05:05:18 +0000
Subject: af_packet: fix freeing pg_vec twice on error path

It is introduced in:
        commit 0e3125c755445664f00ad036e4fc2cd32fd52877
        Author: Neil Horman <nhorman@tuxdriver.com>
        Date:   Tue Nov 16 10:26:47 2010 -0800

        packet: Enhance AF_PACKET implementation to not require high order contiguous memory allocation (v4)

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9292ec93eb52..246a04a13234 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2409,7 +2409,6 @@ out:
 
 out_free_pgvec:
 	free_pg_vec(pg_vec, order, block_nr);
-	kfree(pg_vec);
 	pg_vec = NULL;
 	goto out;
 }
-- 
cgit v1.2.3


From 50b12f597be354a5a224f05c65c54c0667e57aec Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 19 Nov 2010 12:40:25 +0100
Subject: cfg80211: Add new BSS attribute ht_opmode

Add a new BSS attribute to allow hostapd to set the current HT opmode.
Otherwise drivers won't be able to set up protection for HT rates in
AP mode.

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 4 ++++
 include/net/cfg80211.h  | 3 +++
 net/wireless/nl80211.c  | 5 +++++
 3 files changed, 12 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 8e28053ea423..380421253d16 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -849,6 +849,8 @@ enum nl80211_commands {
  *	flag isn't set, the frame will be rejected. This is also used as an
  *	nl80211 capability flag.
  *
+ * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1025,6 +1027,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_OFFCHANNEL_TX_OK,
 
+	NL80211_ATTR_BSS_HT_OPMODE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8764c9a5bab7..0d5979924be3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -605,6 +605,8 @@ struct mpath_info {
  *	(or NULL for no change)
  * @basic_rates_len: number of basic rates
  * @ap_isolate: do not forward packets between connected stations
+ * @ht_opmode: HT Operation mode
+ * 	(u16 = opmode, -1 = do not change)
  */
 struct bss_parameters {
 	int use_cts_prot;
@@ -613,6 +615,7 @@ struct bss_parameters {
 	u8 *basic_rates;
 	u8 basic_rates_len;
 	int ap_isolate;
+	int ht_opmode;
 };
 
 /*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2cf03331d4a2..c3f80e565365 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -121,6 +121,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 },
 	[NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY,
 					   .len = NL80211_MAX_SUPP_RATES },
+	[NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 },
 
 	[NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED },
 
@@ -2462,6 +2463,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	params.use_short_preamble = -1;
 	params.use_short_slot_time = -1;
 	params.ap_isolate = -1;
+	params.ht_opmode = -1;
 
 	if (info->attrs[NL80211_ATTR_BSS_CTS_PROT])
 		params.use_cts_prot =
@@ -2480,6 +2482,9 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	}
 	if (info->attrs[NL80211_ATTR_AP_ISOLATE])
 		params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
+	if (info->attrs[NL80211_ATTR_BSS_HT_OPMODE])
+		params.ht_opmode =
+			nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]);
 
 	if (!rdev->ops->change_bss)
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 80d7e403c97b712e302ec37e9beceff1dbdc9402 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 19 Nov 2010 12:40:26 +0100
Subject: mac80211: Apply ht_opmode changes in ieee80211_change_bss

Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index af9620406321..c30b8b72eedb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1134,6 +1134,12 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 			sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
 	}
 
+	if (params->ht_opmode >= 0) {
+		sdata->vif.bss_conf.ht_operation_mode =
+			(u16) params->ht_opmode;
+		changed |= BSS_CHANGED_HT;
+	}
+
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	return 0;
-- 
cgit v1.2.3


From 2f886750118c1781d3b53268bf25519aef1d4d71 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Tue, 7 Dec 2010 11:27:01 -0800
Subject: mac80211: Show max number of probe tries in debug message.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 2b5c3f267198..de43753076d2 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -458,8 +458,9 @@ ieee80211_direct_probe(struct ieee80211_work *wk)
 		return WORK_ACT_TIMEOUT;
 	}
 
-	printk(KERN_DEBUG "%s: direct probe to %pM (try %d)\n",
-			sdata->name, wk->filter_ta, wk->probe_auth.tries);
+	printk(KERN_DEBUG "%s: direct probe to %pM (try %d/%i)\n",
+	       sdata->name, wk->filter_ta, wk->probe_auth.tries,
+	       IEEE80211_AUTH_MAX_TRIES);
 
 	/*
 	 * Direct probe is sent to broadcast address as some APs
-- 
cgit v1.2.3


From defb3519a64141608725e2dac5a5aa9a3c644bae Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 8 Dec 2010 21:16:57 -0800
Subject: net: Abstract away all dst_entry metrics accesses.

Use helper functions to hide all direct accesses, especially writes,
to dst_entry metrics values.

This will allow us to:

1) More easily change how the metrics are stored.

2) Implement COW for metrics.

In particular this will help us put metrics into the inetpeer
cache if that is what we end up doing.  We can make the _metrics
member a pointer instead of an array, initially have it point
at the read-only metrics in the FIB, and then on the first set
grab an inetpeer entry and point the _metrics member there.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/dst.h         | 26 ++++++++++++++++---
 net/bridge/br_device.c    |  2 +-
 net/bridge/br_netfilter.c |  2 +-
 net/decnet/dn_route.c     | 13 +++++-----
 net/ipv4/ip_gre.c         |  2 +-
 net/ipv4/route.c          | 55 +++++++++++++++++++++------------------
 net/ipv4/tcp_input.c      | 22 +++++++++-------
 net/ipv6/ndisc.c          |  5 ++--
 net/ipv6/route.c          | 66 ++++++++++++++++++++++++++---------------------
 net/xfrm/xfrm_policy.c    |  6 ++---
 10 files changed, 118 insertions(+), 81 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index a5bd72646d65..85dee3a57b9b 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -70,7 +70,7 @@ struct dst_entry {
 
 	struct  dst_ops	        *ops;
 
-	u32			metrics[RTAX_MAX];
+	u32			_metrics[RTAX_MAX];
 
 #ifdef CONFIG_NET_CLS_ROUTE
 	__u32			tclassid;
@@ -106,7 +106,27 @@ struct dst_entry {
 static inline u32
 dst_metric(const struct dst_entry *dst, int metric)
 {
-	return dst->metrics[metric-1];
+	return dst->_metrics[metric-1];
+}
+
+static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
+{
+	dst->_metrics[metric-1] = val;
+}
+
+static inline void dst_import_metrics(struct dst_entry *dst, const u32 *src_metrics)
+{
+	memcpy(dst->_metrics, src_metrics, RTAX_MAX * sizeof(u32));
+}
+
+static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src)
+{
+	dst_import_metrics(dest, src->_metrics);
+}
+
+static inline u32 *dst_metrics_ptr(struct dst_entry *dst)
+{
+	return dst->_metrics;
 }
 
 static inline u32
@@ -134,7 +154,7 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr
 static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric,
 				      unsigned long rtt)
 {
-	dst->metrics[metric-1] = jiffies_to_msecs(rtt);
+	dst_metric_set(dst, metric, jiffies_to_msecs(rtt));
 }
 
 static inline u32
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 17cb0b633576..556443566e9c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -141,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	/* remember the MTU in the rtable for PMTU */
-	br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
+	dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
 #endif
 
 	return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6e1392093911..16f5c333596a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -124,7 +124,7 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 	atomic_set(&rt->dst.__refcnt, 1);
 	rt->dst.dev = br->dev;
 	rt->dst.path = &rt->dst;
-	rt->dst.metrics[RTAX_MTU - 1] = 1500;
+	dst_metric_set(&rt->dst, RTAX_MTU, 1500);
 	rt->dst.flags	= DST_NOXFRM;
 	rt->dst.ops = &fake_dst_ops;
 }
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 8280e43c8861..e2e926841fe6 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -240,13 +240,13 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 	if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
 		if (!(dst_metric_locked(dst, RTAX_MTU))) {
-			dst->metrics[RTAX_MTU-1] = mtu;
+			dst_metric_set(dst, RTAX_MTU, mtu);
 			dst_set_expires(dst, dn_rt_mtu_expires);
 		}
 		if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
 			u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
 			if (dst_metric(dst, RTAX_ADVMSS) > mss)
-				dst->metrics[RTAX_ADVMSS-1] = mss;
+				dst_metric_set(dst, RTAX_ADVMSS, mss);
 		}
 	}
 }
@@ -806,8 +806,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		memcpy(rt->dst.metrics, fi->fib_metrics,
-		       sizeof(rt->dst.metrics));
+		dst_import_metrics(&rt->dst, fi->fib_metrics);
 	}
 	rt->rt_type = res->type;
 
@@ -820,11 +819,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 
 	if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
 	    dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
-		rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+		dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
 	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
 	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
 	    dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
-		rt->dst.metrics[RTAX_ADVMSS-1] = mss;
+		dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
 	return 0;
 }
 
@@ -1502,7 +1501,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
 	if (rt->rt_daddr != rt->rt_gateway)
 		RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
-	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 		goto rtattr_failure;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 258c98d5fa79..ff4e7a4e33ed 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -818,7 +818,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
 			    rt6->rt6i_dst.plen == 128) {
 				rt6->rt6i_flags |= RTF_MODIFIED;
-				skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
+				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
 			}
 		}
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3843c2dfde82..26ac396eaa5e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1686,11 +1686,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 					if (mtu < dst_mtu(&rth->dst)) {
 						dst_confirm(&rth->dst);
 						if (mtu < ip_rt_min_pmtu) {
+							u32 lock = dst_metric(&rth->dst,
+									      RTAX_LOCK);
 							mtu = ip_rt_min_pmtu;
-							rth->dst.metrics[RTAX_LOCK-1] |=
-								(1 << RTAX_MTU);
+							lock |= (1 << RTAX_MTU);
+							dst_metric_set(&rth->dst, RTAX_LOCK,
+								       lock);
 						}
-						rth->dst.metrics[RTAX_MTU-1] = mtu;
+						dst_metric_set(&rth->dst, RTAX_MTU, mtu);
 						dst_set_expires(&rth->dst,
 							ip_rt_mtu_expires);
 					}
@@ -1708,10 +1711,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 	if (dst_mtu(dst) > mtu && mtu >= 68 &&
 	    !(dst_metric_locked(dst, RTAX_MTU))) {
 		if (mtu < ip_rt_min_pmtu) {
+			u32 lock = dst_metric(dst, RTAX_LOCK);
 			mtu = ip_rt_min_pmtu;
-			dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
+			dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU));
 		}
-		dst->metrics[RTAX_MTU-1] = mtu;
+		dst_metric_set(dst, RTAX_MTU, mtu);
 		dst_set_expires(dst, ip_rt_mtu_expires);
 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
 	}
@@ -1796,36 +1800,37 @@ static void set_class_tag(struct rtable *rt, u32 tag)
 
 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 {
+	struct dst_entry *dst = &rt->dst;
 	struct fib_info *fi = res->fi;
 
 	if (fi) {
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		memcpy(rt->dst.metrics, fi->fib_metrics,
-		       sizeof(rt->dst.metrics));
+		dst_import_metrics(dst, fi->fib_metrics);
 		if (fi->fib_mtu == 0) {
-			rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
-			if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
+			dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
+			if (dst_metric_locked(dst, RTAX_MTU) &&
 			    rt->rt_gateway != rt->rt_dst &&
-			    rt->dst.dev->mtu > 576)
-				rt->dst.metrics[RTAX_MTU-1] = 576;
+			    dst->dev->mtu > 576)
+				dst_metric_set(dst, RTAX_MTU, 576);
 		}
 #ifdef CONFIG_NET_CLS_ROUTE
-		rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+		dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	} else
-		rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
-
-	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
-		rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
-	if (dst_mtu(&rt->dst) > IP_MAX_MTU)
-		rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
-	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
-		rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
-				       ip_rt_min_advmss);
-	if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
-		rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+		dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
+
+	if (dst_metric(dst, RTAX_HOPLIMIT) == 0)
+		dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl);
+	if (dst_mtu(dst) > IP_MAX_MTU)
+		dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
+	if (dst_metric(dst, RTAX_ADVMSS) == 0)
+		dst_metric_set(dst, RTAX_ADVMSS,
+			       max_t(unsigned int, dst->dev->mtu - 40,
+				     ip_rt_min_advmss));
+	if (dst_metric(dst, RTAX_ADVMSS) > 65535 - 40)
+		dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
 
 #ifdef CONFIG_NET_CLS_ROUTE
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -2720,7 +2725,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
-		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		dst_copy_metrics(new, &ort->dst);
 
 		new->dev = ort->dst.dev;
 		if (new->dev)
@@ -2827,7 +2832,7 @@ static int rt_fill_info(struct net *net,
 	if (rt->rt_dst != rt->rt_gateway)
 		NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
 
-	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 		goto nla_put_failure;
 
 	if (rt->fl.mark)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6d8ab1c4efc3..824e8c8a17ad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -734,7 +734,7 @@ void tcp_update_metrics(struct sock *sk)
 			 * Reset our results.
 			 */
 			if (!(dst_metric_locked(dst, RTAX_RTT)))
-				dst->metrics[RTAX_RTT - 1] = 0;
+				dst_metric_set(dst, RTAX_RTT, 0);
 			return;
 		}
 
@@ -776,34 +776,38 @@ void tcp_update_metrics(struct sock *sk)
 			if (dst_metric(dst, RTAX_SSTHRESH) &&
 			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
 			    (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
-				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
+				dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1);
 			if (!dst_metric_locked(dst, RTAX_CWND) &&
 			    tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
-				dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
+				dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd);
 		} else if (tp->snd_cwnd > tp->snd_ssthresh &&
 			   icsk->icsk_ca_state == TCP_CA_Open) {
 			/* Cong. avoidance phase, cwnd is reliable. */
 			if (!dst_metric_locked(dst, RTAX_SSTHRESH))
-				dst->metrics[RTAX_SSTHRESH-1] =
-					max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
+				dst_metric_set(dst, RTAX_SSTHRESH,
+					       max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
 			if (!dst_metric_locked(dst, RTAX_CWND))
-				dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
+				dst_metric_set(dst, RTAX_CWND,
+					       (dst_metric(dst, RTAX_CWND) +
+						tp->snd_cwnd) >> 1);
 		} else {
 			/* Else slow start did not finish, cwnd is non-sense,
 			   ssthresh may be also invalid.
 			 */
 			if (!dst_metric_locked(dst, RTAX_CWND))
-				dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
+				dst_metric_set(dst, RTAX_CWND,
+					       (dst_metric(dst, RTAX_CWND) +
+						tp->snd_ssthresh) >> 1);
 			if (dst_metric(dst, RTAX_SSTHRESH) &&
 			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
 			    tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
-				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
+				dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh);
 		}
 
 		if (!dst_metric_locked(dst, RTAX_REORDERING)) {
 			if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
 			    tp->reordering != sysctl_tcp_reordering)
-				dst->metrics[RTAX_REORDERING-1] = tp->reordering;
+				dst_metric_set(dst, RTAX_REORDERING, tp->reordering);
 		}
 	}
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e18f84130203..2342545a5ee9 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1259,7 +1259,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (ra_msg->icmph.icmp6_hop_limit) {
 		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
 		if (rt)
-			rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+			dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+				       ra_msg->icmph.icmp6_hop_limit);
 	}
 
 skip_defrtr:
@@ -1377,7 +1378,7 @@ skip_linkparms:
 			in6_dev->cnf.mtu6 = mtu;
 
 			if (rt)
-				rt->dst.metrics[RTAX_MTU-1] = mtu;
+				dst_metric_set(&rt->dst, RTAX_MTU, mtu);
 
 			rt6_mtu_change(skb->dev, mtu);
 		}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 026caef0326c..4aed0812b512 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -129,7 +129,6 @@ static struct rt6_info ip6_null_entry_template = {
 		.__use		= 1,
 		.obsolete	= -1,
 		.error		= -ENETUNREACH,
-		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
 		.input		= ip6_pkt_discard,
 		.output		= ip6_pkt_discard_out,
 	},
@@ -150,7 +149,6 @@ static struct rt6_info ip6_prohibit_entry_template = {
 		.__use		= 1,
 		.obsolete	= -1,
 		.error		= -EACCES,
-		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
 		.input		= ip6_pkt_prohibit,
 		.output		= ip6_pkt_prohibit_out,
 	},
@@ -166,7 +164,6 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 		.__use		= 1,
 		.obsolete	= -1,
 		.error		= -EINVAL,
-		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
 		.input		= dst_discard,
 		.output		= dst_discard,
 	},
@@ -844,7 +841,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 		new->input = dst_discard;
 		new->output = dst_discard;
 
-		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		dst_copy_metrics(new, &ort->dst);
 		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
@@ -928,10 +925,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
 		rt6->rt6i_flags |= RTF_MODIFIED;
 		if (mtu < IPV6_MIN_MTU) {
+			u32 features = dst_metric(dst, RTAX_FEATURES);
 			mtu = IPV6_MIN_MTU;
-			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+			features |= RTAX_FEATURE_ALLFRAG;
+			dst_metric_set(dst, RTAX_FEATURES, features);
 		}
-		dst->metrics[RTAX_MTU-1] = mtu;
+		dst_metric_set(dst, RTAX_MTU, mtu);
 		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
 	}
 }
@@ -989,9 +988,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->rt6i_idev     = idev;
 	rt->rt6i_nexthop  = neigh;
 	atomic_set(&rt->dst.__refcnt, 1);
-	rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
-	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
+	dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
+	dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
 	rt->dst.output  = ip6_output;
 
 #if 0	/* there's no chance to use these for ndisc */
@@ -1305,17 +1304,17 @@ install_route:
 					goto out;
 				}
 
-				rt->dst.metrics[type - 1] = nla_get_u32(nla);
+				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
 			}
 		}
 	}
 
 	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
-		rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+		dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
 	if (!dst_mtu(&rt->dst))
-		rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+		dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
 	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
-		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+		dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
 	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
@@ -1541,9 +1540,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 	nrt->rt6i_nexthop = neigh_clone(neigh);
 	/* Reset pmtu, it may be better */
-	nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-	nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
-							dst_mtu(&nrt->dst));
+	dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
+	dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev),
+							   dst_mtu(&nrt->dst)));
 
 	if (ip6_ins_rt(nrt))
 		goto out;
@@ -1602,9 +1601,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
 	   would return automatically.
 	 */
 	if (rt->rt6i_flags & RTF_CACHE) {
-		rt->dst.metrics[RTAX_MTU-1] = pmtu;
-		if (allfrag)
-			rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
+		if (allfrag) {
+			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
+			features |= RTAX_FEATURE_ALLFRAG;
+			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
+		}
 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
@@ -1621,9 +1623,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
 		nrt = rt6_alloc_clone(rt, daddr);
 
 	if (nrt) {
-		nrt->dst.metrics[RTAX_MTU-1] = pmtu;
-		if (allfrag)
-			nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
+		if (allfrag) {
+			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
+			features |= RTAX_FEATURE_ALLFRAG;
+			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
+		}
 
 		/* According to RFC 1981, detecting PMTU increase shouldn't be
 		 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1674,7 +1679,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 		rt->dst.input = ort->dst.input;
 		rt->dst.output = ort->dst.output;
 
-		memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		dst_copy_metrics(&rt->dst, &ort->dst);
 		rt->dst.error = ort->dst.error;
 		rt->dst.dev = ort->dst.dev;
 		if (rt->dst.dev)
@@ -1966,9 +1971,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->dst.output = ip6_output;
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
-	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
-	rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
+	dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
+	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
 	rt->dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
@@ -2068,8 +2073,8 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	    (dst_mtu(&rt->dst) >= arg->mtu ||
 	     (dst_mtu(&rt->dst) < arg->mtu &&
 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
-		rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
-		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+		dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu));
 	}
 	return 0;
 }
@@ -2295,7 +2300,7 @@ static int rt6_fill_node(struct net *net,
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
-	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
 		goto nla_put_failure;
 
 	if (rt->dst.neighbour)
@@ -2686,6 +2691,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+	dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2696,6 +2702,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_prohibit_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+	dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2705,6 +2712,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 	net->ipv6.ip6_blk_hole_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+	dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 044e77898512..6e50ccd8c532 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1433,7 +1433,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		}
 
 		xdst->route = dst;
-		memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
+		dst_copy_metrics(dst1, dst);
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
 			family = xfrm[i]->props.family;
@@ -2271,7 +2271,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
 		if (pmtu > route_mtu_cached)
 			pmtu = route_mtu_cached;
 
-		dst->metrics[RTAX_MTU-1] = pmtu;
+		dst_metric_set(dst, RTAX_MTU, pmtu);
 	} while ((dst = dst->next));
 }
 
@@ -2349,7 +2349,7 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 		mtu = xfrm_state_mtu(dst->xfrm, mtu);
 		if (mtu > last->route_mtu_cached)
 			mtu = last->route_mtu_cached;
-		dst->metrics[RTAX_MTU-1] = mtu;
+		dst_metric_set(dst, RTAX_MTU, mtu);
 
 		if (last == first)
 			break;
-- 
cgit v1.2.3


From 68835aba4d9b74e2f94106d13b6a4bddc447c4c8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 30 Nov 2010 19:04:07 +0000
Subject: net: optimize INET input path further

Followup of commit b178bb3dfc30 (net: reorder struct sock fields)

Optimize INET input path a bit further, by :

1) moving sk_refcnt close to sk_lock.

This reduces number of dirtied cache lines by one on 64bit arches (and
64 bytes cache line size).

2) moving inet_daddr & inet_rcv_saddr at the beginning of sk

(same cache line than hash / family / bound_dev_if / nulls_node)

This reduces number of accessed cache lines in lookups by one, and dont
increase size of inet and timewait socks.
inet and tw sockets now share same place-holder for these fields.

Before patch :

offsetof(struct sock, sk_refcnt) = 0x10
offsetof(struct sock, sk_lock) = 0x40
offsetof(struct sock, sk_receive_queue) = 0x60
offsetof(struct inet_sock, inet_daddr) = 0x270
offsetof(struct inet_sock, inet_rcv_saddr) = 0x274

After patch :

offsetof(struct sock, sk_refcnt) = 0x44
offsetof(struct sock, sk_lock) = 0x48
offsetof(struct sock, sk_receive_queue) = 0x68
offsetof(struct inet_sock, inet_daddr) = 0x0
offsetof(struct inet_sock, inet_rcv_saddr) = 0x4

compute_score() (udp or tcp) now use a single cache line per ignored
item, instead of two.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h          |  5 +++--
 include/net/inet_timewait_sock.h | 20 +++++++-------------
 include/net/sock.h               | 37 ++++++++++++++++++++++++-------------
 net/core/sock.c                  | 11 ++++++-----
 net/ipv4/inet_connection_sock.c  |  7 +++----
 net/ipv6/udp.c                   |  4 ++--
 6 files changed, 45 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 8945f9fb192a..8181498fa96c 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -116,8 +116,9 @@ struct inet_sock {
 	struct ipv6_pinfo	*pinet6;
 #endif
 	/* Socket demultiplex comparisons on incoming packets. */
-	__be32			inet_daddr;
-	__be32			inet_rcv_saddr;
+#define inet_daddr		sk.__sk_common.skc_daddr
+#define inet_rcv_saddr		sk.__sk_common.skc_rcv_saddr
+
 	__be16			inet_dport;
 	__u16			inet_num;
 	__be32			inet_saddr;
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index a066fdd50da6..17404b5388a7 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -88,12 +88,6 @@ extern void inet_twdr_hangman(unsigned long data);
 extern void inet_twdr_twkill_work(struct work_struct *work);
 extern void inet_twdr_twcal_tick(unsigned long data);
 
-#if (BITS_PER_LONG == 64)
-#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
-#else
-#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4
-#endif
-
 struct inet_bind_bucket;
 
 /*
@@ -117,15 +111,15 @@ struct inet_timewait_sock {
 #define tw_hash			__tw_common.skc_hash
 #define tw_prot			__tw_common.skc_prot
 #define tw_net			__tw_common.skc_net
+#define tw_daddr        	__tw_common.skc_daddr
+#define tw_rcv_saddr    	__tw_common.skc_rcv_saddr
 	int			tw_timeout;
 	volatile unsigned char	tw_substate;
-	/* 3 bits hole, try to pack */
 	unsigned char		tw_rcv_wscale;
+
 	/* Socket demultiplex comparisons on incoming packets. */
-	/* these five are in inet_sock */
+	/* these three are in inet_sock */
 	__be16			tw_sport;
-	__be32			tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES)));
-	__be32			tw_rcv_saddr;
 	__be16			tw_dport;
 	__u16			tw_num;
 	kmemcheck_bitfield_begin(flags);
@@ -191,10 +185,10 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 	return (struct inet_timewait_sock *)sk;
 }
 
-static inline __be32 inet_rcv_saddr(const struct sock *sk)
+static inline __be32 sk_rcv_saddr(const struct sock *sk)
 {
-	return likely(sk->sk_state != TCP_TIME_WAIT) ?
-		inet_sk(sk)->inet_rcv_saddr : inet_twsk(sk)->tw_rcv_saddr;
+/* both inet_sk() and inet_twsk() store rcv_saddr in skc_rcv_saddr */
+	return sk->__sk_common.skc_rcv_saddr;
 }
 
 extern void inet_twsk_put(struct inet_timewait_sock *tw);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3482004e5c29..82e86034702f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -105,10 +105,8 @@ struct net;
 
 /**
  *	struct sock_common - minimal network layer representation of sockets
- *	@skc_node: main hash linkage for various protocol lookup tables
- *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
- *	@skc_refcnt: reference count
- *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_daddr: Foreign IPv4 addr
+ *	@skc_rcv_saddr: Bound local IPv4 addr
  *	@skc_hash: hash value used with various protocol lookup tables
  *	@skc_u16hashes: two u16 hash values used by UDP lookup tables
  *	@skc_family: network address family
@@ -119,20 +117,20 @@ struct net;
  *	@skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
  *	@skc_prot: protocol handlers inside a network family
  *	@skc_net: reference to the network namespace of this socket
+ *	@skc_node: main hash linkage for various protocol lookup tables
+ *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
+ *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_refcnt: reference count
  *
  *	This is the minimal network layer representation of sockets, the header
  *	for struct sock and struct inet_timewait_sock.
  */
 struct sock_common {
-	/*
-	 * first fields are not copied in sock_copy()
+	/* skc_daddr and skc_rcv_saddr must be grouped :
+	 * cf INET_MATCH() and INET_TW_MATCH()
 	 */
-	union {
-		struct hlist_node	skc_node;
-		struct hlist_nulls_node skc_nulls_node;
-	};
-	atomic_t		skc_refcnt;
-	int			skc_tx_queue_mapping;
+	__be32			skc_daddr;
+	__be32			skc_rcv_saddr;
 
 	union  {
 		unsigned int	skc_hash;
@@ -150,6 +148,18 @@ struct sock_common {
 #ifdef CONFIG_NET_NS
 	struct net	 	*skc_net;
 #endif
+	/*
+	 * fields between dontcopy_begin/dontcopy_end
+	 * are not copied in sock_copy()
+	 */
+	int			skc_dontcopy_begin[0];
+	union {
+		struct hlist_node	skc_node;
+		struct hlist_nulls_node skc_nulls_node;
+	};
+	int			skc_tx_queue_mapping;
+	atomic_t		skc_refcnt;
+	int                     skc_dontcopy_end[0];
 };
 
 /**
@@ -232,7 +242,8 @@ struct sock {
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
 
-#define sk_copy_start		__sk_common.skc_hash
+#define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
+#define sk_dontcopy_end		__sk_common.skc_dontcopy_end
 #define sk_hash			__sk_common.skc_hash
 #define sk_family		__sk_common.skc_family
 #define sk_state		__sk_common.skc_state
diff --git a/net/core/sock.c b/net/core/sock.c
index fb6080111461..bcdb6ff6e621 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -992,17 +992,18 @@ static inline void sock_lock_init(struct sock *sk)
 /*
  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
  * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
  */
 static void sock_copy(struct sock *nsk, const struct sock *osk)
 {
 #ifdef CONFIG_SECURITY_NETWORK
 	void *sptr = nsk->sk_security;
 #endif
-	BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
-		     sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) +
-		     sizeof(osk->sk_tx_queue_mapping));
-	memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
-	       osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
+	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
+
+	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
+	       osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
 #ifdef CONFIG_SECURITY_NETWORK
 	nsk->sk_security = sptr;
 	security_sk_clone(osk, nsk);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 06f5f8f482f0..25e318153f14 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -55,7 +55,6 @@ EXPORT_SYMBOL(inet_get_local_port_range);
 int inet_csk_bind_conflict(const struct sock *sk,
 			   const struct inet_bind_bucket *tb)
 {
-	const __be32 sk_rcv_saddr = inet_rcv_saddr(sk);
 	struct sock *sk2;
 	struct hlist_node *node;
 	int reuse = sk->sk_reuse;
@@ -75,9 +74,9 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
 			if (!reuse || !sk2->sk_reuse ||
 			    sk2->sk_state == TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
-				if (!sk2_rcv_saddr || !sk_rcv_saddr ||
-				    sk2_rcv_saddr == sk_rcv_saddr)
+				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
+				    sk2_rcv_saddr == sk_rcv_saddr(sk))
 					break;
 			}
 		}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b541a4e009fb..7aad12770867 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
 	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-	__be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
-	__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
+	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
 	int sk2_ipv6only = inet_v6_ipv6only(sk2);
 	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
-- 
cgit v1.2.3


From 4bc65dd8d88671712d71592a83374cfb0b5fce7a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 7 Dec 2010 22:26:15 +0000
Subject: filter: use size of fetched data in __load_pointer()

__load_pointer() checks data we fetch from skb is included in head
portion, but assumes we fetch one byte, instead of up to four.

This wont crash because we have extra bytes (struct skb_shared_info)
after head, but this can read uninitialized bytes.

Fix this using size of the data (1, 2, 4 bytes) in the test.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index e193e29d4671..e8a6ac411ffb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -88,7 +88,7 @@ enum {
 };
 
 /* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k)
+static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
 {
 	u8 *ptr = NULL;
 
@@ -97,7 +97,7 @@ static void *__load_pointer(const struct sk_buff *skb, int k)
 	else if (k >= SKF_LL_OFF)
 		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
-	if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
+	if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
 		return ptr;
 	return NULL;
 }
@@ -110,7 +110,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
 	else {
 		if (k >= SKF_AD_OFF)
 			return NULL;
-		return __load_pointer(skb, k);
+		return __load_pointer(skb, k, size);
 	}
 }
 
-- 
cgit v1.2.3


From b7ec19af63b467e30189984fb24e6157603608e3 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Fri, 10 Dec 2010 12:49:23 +0100
Subject: dccp: remove unused macros

Remove macros which have been unused since the initial implementation
(commit 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c, [DCCP]: Initial
 implementation from Tue Aug 9 20:14:34 2005 -0700).

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 48ad5d9da7cb..45087052d894 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -93,9 +93,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 #define DCCP_FALLBACK_RTT	(USEC_PER_SEC / 5)
 #define DCCP_SANE_RTT_MAX	(3 * USEC_PER_SEC)
 
-/* Maximal interval between probes for local resources.  */
-#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
-
 /* sysctl variables for DCCP */
 extern int  sysctl_dccp_request_retries;
 extern int  sysctl_dccp_retries1;
@@ -203,12 +200,7 @@ struct dccp_mib {
 DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
 #define DCCP_INC_STATS(field)	    SNMP_INC_STATS(dccp_statistics, field)
 #define DCCP_INC_STATS_BH(field)    SNMP_INC_STATS_BH(dccp_statistics, field)
-#define DCCP_INC_STATS_USER(field)  SNMP_INC_STATS_USER(dccp_statistics, field)
 #define DCCP_DEC_STATS(field)	    SNMP_DEC_STATS(dccp_statistics, field)
-#define DCCP_ADD_STATS_BH(field, val) \
-			SNMP_ADD_STATS_BH(dccp_statistics, field, val)
-#define DCCP_ADD_STATS_USER(field, val)	\
-			SNMP_ADD_STATS_USER(dccp_statistics, field, val)
 
 /*
  * 	Checksumming routines
-- 
cgit v1.2.3


From 4c0833bcd4d302fe783b9f8286a00ca2999d6200 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 10 Dec 2010 03:18:04 +0000
Subject: bridge: Fix return values of
 br_multicast_add_group/br_multicast_new_group

If br_multicast_new_group returns NULL, we would return 0 (no error) to
the caller of br_multicast_add_group, which is not what we want. Instead
br_multicast_new_group should return ERR_PTR(-ENOMEM) in this case.
Also propagate the error number returned by br_mdb_rehash properly.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 326e599f83fb..85a0398b221e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -654,11 +654,13 @@ static struct net_bridge_mdb_entry *br_multicast_new_group(
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
 	int hash;
+	int err;
 
 	mdb = rcu_dereference_protected(br->mdb, 1);
 	if (!mdb) {
-		if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0))
-			return NULL;
+		err = br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0);
+		if (err)
+			return ERR_PTR(err);
 		goto rehash;
 	}
 
@@ -680,7 +682,7 @@ rehash:
 
 	mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
 	if (unlikely(!mp))
-		goto out;
+		return ERR_PTR(-ENOMEM);
 
 	mp->br = br;
 	mp->addr = *group;
@@ -713,7 +715,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 
 	mp = br_multicast_new_group(br, port, group);
 	err = PTR_ERR(mp);
-	if (unlikely(IS_ERR(mp) || !mp))
+	if (IS_ERR(mp))
 		goto err;
 
 	if (!port) {
-- 
cgit v1.2.3


From 457de4383ec6144df7d5a82cdfb110c825305a51 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 10 Dec 2010 13:16:09 -0800
Subject: ipv6: Fix 'release_it' logic in tcp_v6_get_peer()

We accidently set it to "true" for the case where we
are using a route bound peer.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 319458558df9..fee076891646 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1877,7 +1877,7 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 		if (!rt->rt6i_peer)
 			rt6_bind_peer(rt, 1);
 		peer = rt->rt6i_peer;
-		*release_it = true;
+		*release_it = false;
 	}
 
 	return peer;
-- 
cgit v1.2.3


From c07224005dd3fe746246acadc9be652a588a4d7f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 9 Dec 2010 03:40:30 +0000
Subject: net/ipv6/udp.c: fix typo in flush_stack()

skb1 should be passed as parameter to sk_rcvqueues_full() here.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7aad12770867..26a8da3f2044 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -602,7 +602,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
 
 		sk = stack[i];
 		if (skb1) {
-			if (sk_rcvqueues_full(sk, skb)) {
+			if (sk_rcvqueues_full(sk, skb1)) {
 				kfree_skb(skb1);
 				goto drop;
 			}
-- 
cgit v1.2.3


From 35d2856b4693e8de5d616307b56cef296b839157 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Wed, 8 Dec 2010 04:37:49 +0000
Subject: xfrm: Add Traffic Flow Confidentiality padding XFRM attribute

The XFRMA_TFCPAD attribute for XFRM state installation configures
Traffic Flow Confidentiality by padding ESP packets to a specified
length.

Signed-off-by: Martin Willi <martin@strongswan.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h |  1 +
 include/net/xfrm.h   |  1 +
 net/xfrm/xfrm_user.c | 19 +++++++++++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index b971e3848493..930fdd2de79c 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -283,6 +283,7 @@ enum xfrm_attr_type_t {
 	XFRMA_KMADDRESS,        /* struct xfrm_user_kmaddress */
 	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
 	XFRMA_MARK,		/* struct xfrm_mark */
+	XFRMA_TFCPAD,		/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7fa5b005893e..b9f385da758e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -143,6 +143,7 @@ struct xfrm_state {
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
+	u32			tfcpad;
 
 	u32			genid;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8bae6b22c846..8eb889510916 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -148,7 +148,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		     !attrs[XFRMA_ALG_AUTH_TRUNC]) ||
 		    attrs[XFRMA_ALG_AEAD]	||
 		    attrs[XFRMA_ALG_CRYPT]	||
-		    attrs[XFRMA_ALG_COMP])
+		    attrs[XFRMA_ALG_COMP]	||
+		    attrs[XFRMA_TFCPAD])
 			goto out;
 		break;
 
@@ -165,6 +166,9 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		     attrs[XFRMA_ALG_CRYPT]) &&
 		    attrs[XFRMA_ALG_AEAD])
 			goto out;
+		if (attrs[XFRMA_TFCPAD] &&
+		    p->mode != XFRM_MODE_TUNNEL)
+			goto out;
 		break;
 
 	case IPPROTO_COMP:
@@ -172,7 +176,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		    attrs[XFRMA_ALG_AEAD]	||
 		    attrs[XFRMA_ALG_AUTH]	||
 		    attrs[XFRMA_ALG_AUTH_TRUNC]	||
-		    attrs[XFRMA_ALG_CRYPT])
+		    attrs[XFRMA_ALG_CRYPT]	||
+		    attrs[XFRMA_TFCPAD])
 			goto out;
 		break;
 
@@ -186,6 +191,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		    attrs[XFRMA_ALG_CRYPT]	||
 		    attrs[XFRMA_ENCAP]		||
 		    attrs[XFRMA_SEC_CTX]	||
+		    attrs[XFRMA_TFCPAD]		||
 		    !attrs[XFRMA_COADDR])
 			goto out;
 		break;
@@ -439,6 +445,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 			goto error;
 	}
 
+	if (attrs[XFRMA_TFCPAD])
+		x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]);
+
 	if (attrs[XFRMA_COADDR]) {
 		x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
 				    sizeof(*x->coaddr), GFP_KERNEL);
@@ -688,6 +697,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 	if (x->encap)
 		NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
 
+	if (x->tfcpad)
+		NLA_PUT_U32(skb, XFRMA_TFCPAD, x->tfcpad);
+
 	if (xfrm_mark_put(skb, &x->mark))
 		goto nla_put_failure;
 
@@ -2122,6 +2134,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_MIGRATE]		= { .len = sizeof(struct xfrm_user_migrate) },
 	[XFRMA_KMADDRESS]	= { .len = sizeof(struct xfrm_user_kmaddress) },
 	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },
+	[XFRMA_TFCPAD]		= { .type = NLA_U32 },
 };
 
 static struct xfrm_link {
@@ -2301,6 +2314,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(*x->calg));
 	if (x->encap)
 		l += nla_total_size(sizeof(*x->encap));
+	if (x->tfcpad)
+		l += nla_total_size(sizeof(x->tfcpad));
 	if (x->security)
 		l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) +
 				    x->security->ctx_len);
-- 
cgit v1.2.3


From d979e20f2b9f8a50c8d5f889e0b5d78580440d1f Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Wed, 8 Dec 2010 04:37:50 +0000
Subject: xfrm: Traffic Flow Confidentiality for IPv4 ESP

Add TFC padding to all packets smaller than the boundary configured
on the xfrm state. If the boundary is larger than the PMTU, limit
padding to the PMTU.

Signed-off-by: Martin Willi <martin@strongswan.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/esp4.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 14ca1f1c3fb0..e42a905180f0 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -23,6 +23,8 @@ struct esp_skb_cb {
 
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
+
 /*
  * Allocate an AEAD request structure with extra space for SG and IV.
  *
@@ -117,25 +119,35 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	int blksize;
 	int clen;
 	int alen;
+	int plen;
+	int tfclen;
 	int nfrags;
 
 	/* skb is pure payload to encrypt */
 
 	err = -ENOMEM;
 
-	/* Round to block size */
-	clen = skb->len;
-
 	esp = x->data;
 	aead = esp->aead;
 	alen = crypto_aead_authsize(aead);
 
+	tfclen = 0;
+	if (x->tfcpad) {
+		struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+		u32 padto;
+
+		padto = min(x->tfcpad, esp4_get_mtu(x, dst->child_mtu_cached));
+		if (skb->len < padto)
+			tfclen = padto - skb->len;
+	}
 	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
-	clen = ALIGN(clen + 2, blksize);
+	clen = ALIGN(skb->len + 2 + tfclen, blksize);
 	if (esp->padlen)
 		clen = ALIGN(clen, esp->padlen);
+	plen = clen - skb->len - tfclen;
 
-	if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
+	err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
+	if (err < 0)
 		goto error;
 	nfrags = err;
 
@@ -150,13 +162,17 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* Fill padding... */
 	tail = skb_tail_pointer(trailer);
+	if (tfclen) {
+		memset(tail, 0, tfclen);
+		tail += tfclen;
+	}
 	do {
 		int i;
-		for (i=0; i<clen-skb->len - 2; i++)
+		for (i = 0; i < plen - 2; i++)
 			tail[i] = i + 1;
 	} while (0);
-	tail[clen - skb->len - 2] = (clen - skb->len) - 2;
-	tail[clen - skb->len - 1] = *skb_mac_header(skb);
+	tail[plen - 2] = plen - 2;
+	tail[plen - 1] = *skb_mac_header(skb);
 	pskb_put(skb, trailer, clen - skb->len + alen);
 
 	skb_push(skb, -skb_network_offset(skb));
-- 
cgit v1.2.3


From 040253c931e336360453c8d81f76d1b010b2b5e7 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Wed, 8 Dec 2010 04:37:51 +0000
Subject: xfrm: Traffic Flow Confidentiality for IPv6 ESP

Add TFC padding to all packets smaller than the boundary configured
on the xfrm state. If the boundary is larger than the PMTU, limit
padding to the PMTU.

Signed-off-by: Martin Willi <martin@strongswan.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/esp6.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ee9b93bdd6a2..1b5c9825743b 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -49,6 +49,8 @@ struct esp_skb_cb {
 
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
+
 /*
  * Allocate an AEAD request structure with extra space for SG and IV.
  *
@@ -140,6 +142,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	int blksize;
 	int clen;
 	int alen;
+	int plen;
+	int tfclen;
 	int nfrags;
 	u8 *iv;
 	u8 *tail;
@@ -148,18 +152,26 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	/* skb is pure payload to encrypt */
 	err = -ENOMEM;
 
-	/* Round to block size */
-	clen = skb->len;
-
 	aead = esp->aead;
 	alen = crypto_aead_authsize(aead);
 
+	tfclen = 0;
+	if (x->tfcpad) {
+		struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+		u32 padto;
+
+		padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+		if (skb->len < padto)
+			tfclen = padto - skb->len;
+	}
 	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
-	clen = ALIGN(clen + 2, blksize);
+	clen = ALIGN(skb->len + 2 + tfclen, blksize);
 	if (esp->padlen)
 		clen = ALIGN(clen, esp->padlen);
+	plen = clen - skb->len - tfclen;
 
-	if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
+	err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
+	if (err < 0)
 		goto error;
 	nfrags = err;
 
@@ -174,13 +186,17 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* Fill padding... */
 	tail = skb_tail_pointer(trailer);
+	if (tfclen) {
+		memset(tail, 0, tfclen);
+		tail += tfclen;
+	}
 	do {
 		int i;
-		for (i=0; i<clen-skb->len - 2; i++)
+		for (i = 0; i < plen - 2; i++)
 			tail[i] = i + 1;
 	} while (0);
-	tail[clen-skb->len - 2] = (clen - skb->len) - 2;
-	tail[clen - skb->len - 1] = *skb_mac_header(skb);
+	tail[plen - 2] = plen - 2;
+	tail[plen - 1] = *skb_mac_header(skb);
 	pskb_put(skb, trailer, clen - skb->len + alen);
 
 	skb_push(skb, -skb_network_offset(skb));
-- 
cgit v1.2.3


From 376d940ee91318cc6becefbb9454bb4454d7473f Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Thu, 9 Dec 2010 04:37:48 +0000
Subject: inet6: Remove redundant unlikely()

IS_ERR() already implies unlikely(), so it can be omitted here.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 54e8e42f7a88..059a3de647db 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -810,7 +810,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 	}
 	rcu_read_unlock();
 
-	if (unlikely(IS_ERR(segs)))
+	if (IS_ERR(segs))
 		goto out;
 
 	for (skb = segs; skb; skb = skb->next) {
-- 
cgit v1.2.3


From a8d764b9832d3cc86019f71916665dd2d337d7c2 Mon Sep 17 00:00:00 2001
From: Junchang Wang <junchangwang@gmail.com>
Date: Wed, 8 Dec 2010 16:55:16 +0000
Subject: pktgen: adding prefetchw() call

We know for sure pktgen is going to write skb->data right after
*_alloc_skb, causing unnecessary cache misses.

Idea is to add a prefetchw() call to prefetch the first cache line
indicated by skb->data. On systems with Adjacent Cache Line Prefetch,
it's probably two cache lines are prefetched.

With this patch, pktgen on Intel SR1625 server with two E5530
quad-core processors and a single ixgbe-based NIC went from 8.63Mpps
to 9.03Mpps, with 4.6% improvement.

Signed-off-by: Junchang Wang <junchangwang@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2953b2abc971..18fe20dacc60 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2660,6 +2660,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
 	}
+	prefetchw(skb->data);
 
 	skb_reserve(skb, datalen);
 
@@ -3007,6 +3008,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
 	}
+	prefetchw(skb->data);
 
 	skb_reserve(skb, 16);
 
-- 
cgit v1.2.3


From e596e6e4d578f2639416e620d367a3af34814a40 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 9 Dec 2010 12:08:35 +0000
Subject: ethtool: Report link-down while interface is down

While an interface is down, many implementations of
ethtool_ops::get_link, including the default, ethtool_op_get_link(),
will report the last link state seen while the interface was up.  In
general the current physical link state is not available if the
interface is down.

Define ETHTOOL_GLINK to reflect whether the interface *and* any
physical port have a working link, and consistently return 0 when the
interface is down.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  4 +++-
 net/core/ethtool.c      | 17 +++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 6628a507fd3b..1908929204a9 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -691,7 +691,9 @@ struct ethtool_ops {
 #define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
 #define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level. */
 #define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation. */
-#define ETHTOOL_GLINK		0x0000000a /* Get link status (ethtool_value) */
+/* Get link status for host, i.e. whether the interface *and* the
+ * physical port (if there is one) are up (ethtool_value). */
+#define ETHTOOL_GLINK		0x0000000a
 #define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
 #define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data. */
 #define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d5bc28818883..17741782a345 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -891,6 +891,20 @@ static int ethtool_nway_reset(struct net_device *dev)
 	return dev->ethtool_ops->nway_reset(dev);
 }
 
+static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
+
+	if (!dev->ethtool_ops->get_link)
+		return -EOPNOTSUPP;
+
+	edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
+
+	if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_eeprom eeprom;
@@ -1530,8 +1544,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		rc = ethtool_nway_reset(dev);
 		break;
 	case ETHTOOL_GLINK:
-		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       dev->ethtool_ops->get_link);
+		rc = ethtool_get_link(dev, useraddr);
 		break;
 	case ETHTOOL_GEEPROM:
 		rc = ethtool_get_eeprom(dev, useraddr);
-- 
cgit v1.2.3


From c053fd96d0d3d18c721f880b8fdd0b925894d9c4 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 10 Dec 2010 16:02:20 -0800
Subject: af_packet: use swap() instead of the open coded macro XC()

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 246a04a13234..e79efaf06389 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2501,22 +2501,20 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
 	mutex_lock(&po->pg_vec_lock);
 	if (closing || atomic_read(&po->mapped) == 0) {
 		err = 0;
-#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
 		spin_lock_bh(&rb_queue->lock);
-		pg_vec = XC(rb->pg_vec, pg_vec);
+		swap(rb->pg_vec, pg_vec);
 		rb->frame_max = (req->tp_frame_nr - 1);
 		rb->head = 0;
 		rb->frame_size = req->tp_frame_size;
 		spin_unlock_bh(&rb_queue->lock);
 
-		order = XC(rb->pg_vec_order, order);
-		req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
+		swap(rb->pg_vec_order, order);
+		swap(rb->pg_vec_len, req->tp_block_nr);
 
 		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
 		po->prot_hook.func = (po->rx_ring.pg_vec) ?
 						tpacket_rcv : packet_rcv;
 		skb_queue_purge(rb_queue);
-#undef XC
 		if (atomic_read(&po->mapped))
 			pr_err("packet_mmap: vma is busy: %d\n",
 			       atomic_read(&po->mapped));
-- 
cgit v1.2.3


From deef4b522b814593407cfd56216840c2b75e9f15 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Dec 2010 17:38:11 +0000
Subject: bridge: Use consistent NF_DROP returns in nf_pre_routing

The nf_pre_routing functions in bridging have collected two
distinct ways of returning NF_DROP over the years, inline and
via goto.  There is no reason for preferring either one.

So this patch arbitrarily picks the inline variant and converts
the all the gotos.

Also removes a redundant comment.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 16f5c333596a..4b5b66d07bba 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -562,26 +562,26 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	u32 pkt_len;
 
 	if (skb->len < sizeof(struct ipv6hdr))
-		goto inhdr_error;
+		return NF_DROP;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-		goto inhdr_error;
+		return NF_DROP;
 
 	hdr = ipv6_hdr(skb);
 
 	if (hdr->version != 6)
-		goto inhdr_error;
+		return NF_DROP;
 
 	pkt_len = ntohs(hdr->payload_len);
 
 	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
 		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
-			goto inhdr_error;
+			return NF_DROP;
 		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
-			goto inhdr_error;
+			return NF_DROP;
 	}
 	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
-		goto inhdr_error;
+		return NF_DROP;
 
 	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
@@ -594,9 +594,6 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 		br_nf_pre_routing_finish_ipv6);
 
 	return NF_STOLEN;
-
-inhdr_error:
-	return NF_DROP;
 }
 
 /* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
@@ -615,11 +612,11 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 	__u32 len = nf_bridge_encap_header_len(skb);
 
 	if (unlikely(!pskb_may_pull(skb, len)))
-		goto out;
+		return NF_DROP;
 
 	p = br_port_get_rcu(in);
 	if (p == NULL)
-		goto out;
+		return NF_DROP;
 	br = p->br;
 
 	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
@@ -641,8 +638,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 	nf_bridge_pull_encap_header_rcsum(skb);
 
 	if (br_parse_ip_options(skb))
-		/* Drop invalid packet */
-		goto out;
+		return NF_DROP;
 
 	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
@@ -656,9 +652,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 		br_nf_pre_routing_finish);
 
 	return NF_STOLEN;
-
-out:
-	return NF_DROP;
 }
 
 
-- 
cgit v1.2.3


From abbf46ae0e4954584eac599bec73502c1c805e9e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 12 Dec 2010 21:14:46 -0800
Subject: ipv6: Use ip6_dst_hoplimit() instead of direct dst_metric() calls.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/ip6t_REJECT.c | 2 +-
 net/ipv6/route.c                 | 1 +
 net/ipv6/xfrm6_mode_tunnel.c     | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 2933396e0281..bf998feac14e 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -124,7 +124,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	skb_reset_network_header(nskb);
 	ip6h = ipv6_hdr(nskb);
 	ip6h->version = 6;
-	ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
+	ip6h->hop_limit = ip6_dst_hoplimit(dst);
 	ip6h->nexthdr = IPPROTO_TCP;
 	ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
 	ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4aed0812b512..9b2d7bc7beda 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1119,6 +1119,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
 	}
 	return hoplimit;
 }
+EXPORT_SYMBOL(ip6_dst_hoplimit);
 
 /*
  *
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index b809812c8d30..645cb968d450 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -14,6 +14,7 @@
 #include <net/dsfield.h>
 #include <net/dst.h>
 #include <net/inet_ecn.h>
+#include <net/ip6_route.h>
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
@@ -53,7 +54,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
-	top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
+	top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
 	return 0;
-- 
cgit v1.2.3


From 5170ae824ddf1988a63fb12cbedcff817634c444 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 12 Dec 2010 21:35:57 -0800
Subject: net: Abstract RTAX_HOPLIMIT metric accesses behind helper.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pptp.c              |  2 +-
 include/net/dst.h               | 15 ++++++++++++++-
 net/ipv4/ip_gre.c               |  2 +-
 net/ipv4/ip_output.c            |  2 +-
 net/ipv4/netfilter/ipt_REJECT.c |  2 +-
 net/ipv4/route.c                |  2 +-
 net/ipv4/xfrm4_mode_tunnel.c    |  2 +-
 net/ipv6/route.c                |  4 ++--
 8 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c
index 7556a9224f72..c83e168eef21 100644
--- a/drivers/net/pptp.c
+++ b/drivers/net/pptp.c
@@ -277,7 +277,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	iph->tos      = 0;
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
-	iph->ttl      = dst_metric(&rt->dst, RTAX_HOPLIMIT);
+	iph->ttl      = dst_metric_hoplimit(&rt->dst);
 	iph->tot_len  = htons(skb->len);
 
 	skb_dst_drop(skb);
diff --git a/include/net/dst.h b/include/net/dst.h
index 85dee3a57b9b..9208b500aaaf 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -104,11 +104,24 @@ struct dst_entry {
 #ifdef __KERNEL__
 
 static inline u32
-dst_metric(const struct dst_entry *dst, int metric)
+dst_metric_raw(const struct dst_entry *dst, const int metric)
 {
 	return dst->_metrics[metric-1];
 }
 
+static inline u32
+dst_metric(const struct dst_entry *dst, const int metric)
+{
+	WARN_ON_ONCE(metric == RTAX_HOPLIMIT);
+	return dst_metric_raw(dst, metric);
+}
+
+static inline u32
+dst_metric_hoplimit(const struct dst_entry *dst)
+{
+	return dst_metric_raw(dst, RTAX_HOPLIMIT);
+}
+
 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
 {
 	dst->_metrics[metric-1] = val;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ff4e7a4e33ed..46eb3dc37ec6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -890,7 +890,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
 		else
-			iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
+			iph->ttl = dst_metric_hoplimit(&rt->dst);
 	}
 
 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5090c7ff525e..ea28fa5f1992 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -130,7 +130,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
 	int ttl = inet->uc_ttl;
 
 	if (ttl < 0)
-		ttl = dst_metric(dst, RTAX_HOPLIMIT);
+		ttl = dst_metric_hoplimit(dst);
 	return ttl;
 }
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 43eec80c0e7c..f1309072c541 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -116,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
-	niph->ttl	= dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
+	niph->ttl	= dst_metric_hoplimit(skb_dst(nskb));
 
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(skb_dst(nskb)))
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 26ac396eaa5e..90b5a37555ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1821,7 +1821,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 	} else
 		dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
 
-	if (dst_metric(dst, RTAX_HOPLIMIT) == 0)
+	if (dst_metric_raw(dst, RTAX_HOPLIMIT) == 0)
 		dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl);
 	if (dst_mtu(dst) > IP_MAX_MTU)
 		dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 6f368413eb0e..63b854e74d99 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -56,7 +56,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
 	ip_select_ident(top_iph, dst->child, NULL);
 
-	top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
+	top_iph->ttl = dst_metric_hoplimit(dst->child);
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9b2d7bc7beda..d9405d1863b8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1104,7 +1104,7 @@ static int ipv6_get_mtu(struct net_device *dev)
 
 int ip6_dst_hoplimit(struct dst_entry *dst)
 {
-	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
+	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
 	if (hoplimit < 0) {
 		struct net_device *dev = dst->dev;
 		struct inet6_dev *idev;
@@ -1310,7 +1310,7 @@ install_route:
 		}
 	}
 
-	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+	if (dst_metric_raw(&rt->dst, RTAX_HOPLIMIT) == 0)
 		dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
 	if (!dst_mtu(&rt->dst))
 		dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
-- 
cgit v1.2.3


From a02e4b7dae455151c423e2f69ef222c502a321fd Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 12 Dec 2010 21:39:02 -0800
Subject: ipv6: Demark default hoplimit as zero.

This is for consistency with ipv4.  Using "-1" makes
no sense.

It was made this way a long time ago merely to be consistent
with how the ipv6 socket hoplimit "default" is stored.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d9405d1863b8..98796b0dc2b7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1105,7 +1105,7 @@ static int ipv6_get_mtu(struct net_device *dev)
 int ip6_dst_hoplimit(struct dst_entry *dst)
 {
 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
-	if (hoplimit < 0) {
+	if (hoplimit == 0) {
 		struct net_device *dev = dst->dev;
 		struct inet6_dev *idev;
 
@@ -1310,8 +1310,6 @@ install_route:
 		}
 	}
 
-	if (dst_metric_raw(&rt->dst, RTAX_HOPLIMIT) == 0)
-		dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
 	if (!dst_mtu(&rt->dst))
 		dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
 	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
-- 
cgit v1.2.3


From 323e126f0c5995f779d7df7fd035f6e8fed8764d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 12 Dec 2010 21:55:08 -0800
Subject: ipv4: Don't pre-seed hoplimit metric.

Always go through a new ip4_dst_hoplimit() helper, just like ipv6.

This allowed several simplifications:

1) The interim dst_metric_hoplimit() can go as it's no longer
   userd.

2) The sysctl_ip_default_ttl entry no longer needs to use
   ipv4_doint_and_flush, since the sysctl is not cached in
   routing cache metrics any longer.

3) ipv4_doint_and_flush no longer needs to be exported and
   therefore can be marked static.

When ipv4_doint_and_flush_strategy was removed some time ago,
the external declaration in ip.h was mistakenly left around
so kill that off too.

We have to move the sysctl_ip_default_ttl declaration into
ipv4's route cache definition header net/route.h, because
currently net/ip.h (where the declaration lives now) has
a back dependency on net/route.h

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pptp.c              |  2 +-
 include/net/dst.h               |  6 ------
 include/net/ip.h                | 10 ----------
 include/net/route.h             | 11 +++++++++++
 net/ipv4/devinet.c              |  6 +++---
 net/ipv4/ip_gre.c               |  2 +-
 net/ipv4/ip_output.c            |  3 ++-
 net/ipv4/netfilter/ipt_REJECT.c |  2 +-
 net/ipv4/route.c                |  2 --
 net/ipv4/sysctl_net_ipv4.c      |  2 +-
 net/ipv4/xfrm4_mode_tunnel.c    |  2 +-
 11 files changed, 21 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c
index c83e168eef21..164cfad6ce79 100644
--- a/drivers/net/pptp.c
+++ b/drivers/net/pptp.c
@@ -277,7 +277,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	iph->tos      = 0;
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
-	iph->ttl      = dst_metric_hoplimit(&rt->dst);
+	iph->ttl      = ip4_dst_hoplimit(&rt->dst);
 	iph->tot_len  = htons(skb->len);
 
 	skb_dst_drop(skb);
diff --git a/include/net/dst.h b/include/net/dst.h
index 9208b500aaaf..755ac6c1aa03 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -116,12 +116,6 @@ dst_metric(const struct dst_entry *dst, const int metric)
 	return dst_metric_raw(dst, metric);
 }
 
-static inline u32
-dst_metric_hoplimit(const struct dst_entry *dst)
-{
-	return dst_metric_raw(dst, RTAX_HOPLIMIT);
-}
-
 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
 {
 	dst->_metrics[metric-1] = val;
diff --git a/include/net/ip.h b/include/net/ip.h
index 86e2b182a0c0..67fac78a186b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -201,7 +201,6 @@ static inline int inet_is_reserved_local_port(int port)
 	return test_bit(port, sysctl_local_reserved_ports);
 }
 
-extern int sysctl_ip_default_ttl;
 extern int sysctl_ip_nonlocal_bind;
 
 extern struct ctl_path net_core_path[];
@@ -428,15 +427,6 @@ extern void	ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 extern void	ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
 			       u32 info);
 
-/* sysctl helpers - any sysctl which holds a value that ends up being
- * fed into the routing cache should use these handlers.
- */
-int ipv4_doint_and_flush(ctl_table *ctl, int write,
-			 void __user *buffer,
-			 size_t *lenp, loff_t *ppos);
-int ipv4_doint_and_flush_strategy(ctl_table *table,
-				  void __user *oldval, size_t __user *oldlenp,
-				  void __user *newval, size_t newlen);
 #ifdef CONFIG_PROC_FS
 extern int ip_misc_proc_init(void);
 #endif
diff --git a/include/net/route.h b/include/net/route.h
index b8c1f7703fc6..27002362944a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -231,4 +231,15 @@ static inline int inet_iif(const struct sk_buff *skb)
 	return skb_rtable(skb)->rt_iif;
 }
 
+extern int sysctl_ip_default_ttl;
+
+static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
+{
+	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+
+	if (hoplimit == 0)
+		hoplimit = sysctl_ip_default_ttl;
+	return hoplimit;
+}
+
 #endif	/* _ROUTE_H */
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3b067704ab38..748cb5b337bd 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1430,9 +1430,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 	return ret;
 }
 
-int ipv4_doint_and_flush(ctl_table *ctl, int write,
-			 void __user *buffer,
-			 size_t *lenp, loff_t *ppos)
+static int ipv4_doint_and_flush(ctl_table *ctl, int write,
+				void __user *buffer,
+				size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 46eb3dc37ec6..eb68a0e34e49 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -890,7 +890,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
 		else
-			iph->ttl = dst_metric_hoplimit(&rt->dst);
+			iph->ttl = ip4_dst_hoplimit(&rt->dst);
 	}
 
 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ea28fa5f1992..04c7b3ba6b39 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -82,6 +82,7 @@
 #include <linux/tcp.h>
 
 int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
+EXPORT_SYMBOL(sysctl_ip_default_ttl);
 
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
@@ -130,7 +131,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
 	int ttl = inet->uc_ttl;
 
 	if (ttl < 0)
-		ttl = dst_metric_hoplimit(dst);
+		ttl = ip4_dst_hoplimit(dst);
 	return ttl;
 }
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f1309072c541..1ff79e557f96 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -116,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
-	niph->ttl	= dst_metric_hoplimit(skb_dst(nskb));
+	niph->ttl	= ip4_dst_hoplimit(skb_dst(nskb));
 
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(skb_dst(nskb)))
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 90b5a37555ab..770f70427f0b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1821,8 +1821,6 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 	} else
 		dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
 
-	if (dst_metric_raw(dst, RTAX_HOPLIMIT) == 0)
-		dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl);
 	if (dst_mtu(dst) > IP_MAX_MTU)
 		dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
 	if (dst_metric(dst, RTAX_ADVMSS) == 0)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1b4ec21497a4..e85ff5930607 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -155,7 +155,7 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_ip_default_ttl,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= ipv4_doint_and_flush,
+		.proc_handler	= proc_dointvec,
 		.extra2		= &init_net,
 	},
 	{
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 63b854e74d99..534972e114ac 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -56,7 +56,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
 	ip_select_ident(top_iph, dst->child, NULL);
 
-	top_iph->ttl = dst_metric_hoplimit(dst->child);
+	top_iph->ttl = ip4_dst_hoplimit(dst->child);
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
-- 
cgit v1.2.3


From 249fab773dd5f689318c969ed649c4db077cdfc3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 13 Dec 2010 12:16:14 -0800
Subject: net: add limits to ip_default_ttl

ip_default_ttl should be between 1 and 255

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e85ff5930607..1a456652086b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,6 +28,8 @@ static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
 static int tcp_adv_win_scale_min = -31;
 static int tcp_adv_win_scale_max = 31;
+static int ip_ttl_min = 1;
+static int ip_ttl_max = 255;
 
 /* Update system visible IP port range */
 static void set_local_port_range(int range[2])
@@ -155,8 +157,9 @@ static struct ctl_table ipv4_table[] = {
 		.data		= &sysctl_ip_default_ttl,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra2		= &init_net,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &ip_ttl_min,
+		.extra2		= &ip_ttl_max,
 	},
 	{
 		.procname	= "ip_no_pmtu_disc",
-- 
cgit v1.2.3


From a7ffac9591a2a0ee74c431396ae475a8d0caa51e Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 8 Dec 2010 13:59:24 +0900
Subject: cfg80211: Add antenna availability information

Add a field to wiphy for the hardware to report the availble antennas for
configuration. Only if this is set to something bigger than zero, will the
anntenna configuration ops be executed.

Allthough this could be a simple number of antennas, I defined it as a bitmap
of antennas which are available for configuration, since it's more consistent
with the rest of the antenna API and there could be cases where the
hardware allows only configuration of certain antennas. As it does not make
much of a difference in size or normal usage, I think it's better to be able to
support this, in case the need arises.

The antenna configuration is now also checked against the availabe antennas and
rejected if it does not match.

Signed-off-by: Bruno Randolf <br1@einfach.org>

--
v3:	always apply available antenna mask (for "all" antennas case).

v2:	reject antenna configurations which don't match the available antennas
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  5 +++++
 net/wireless/nl80211.c | 15 +++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0d5979924be3..4d5acb013636 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1468,6 +1468,9 @@ struct ieee80211_txrx_stypes {
  * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or
  *	transmitted through nl80211, points to an array indexed by interface
  *	type
+ *
+ * @available_antennas: bitmap of antennas which are available to configure.
+ *	antenna configuration commands will be rejected unless this is set.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1507,6 +1510,8 @@ struct wiphy {
 
 	u8 max_num_pmkids;
 
+	u32 available_antennas;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c3f80e565365..73a7f6d354c9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -548,7 +548,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
 
-	if (dev->ops->get_antenna) {
+	if (dev->wiphy.available_antennas && dev->ops->get_antenna) {
 		u32 tx_ant = 0, rx_ant = 0;
 		int res;
 		res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant);
@@ -1046,7 +1046,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
 	    info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) {
 		u32 tx_ant, rx_ant;
-		if (!rdev->ops->set_antenna) {
+		if (!rdev->wiphy.available_antennas || !rdev->ops->set_antenna) {
 			result = -EOPNOTSUPP;
 			goto bad_res;
 		}
@@ -1054,6 +1054,17 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]);
 		rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]);
 
+		/* reject antenna configurations which don't match the
+		 * available antenna mask, except for the "all" mask */
+		if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas)) ||
+		    (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas))) {
+			result = -EINVAL;
+			goto bad_res;
+		}
+
+		tx_ant = tx_ant & rdev->wiphy.available_antennas;
+		rx_ant = rx_ant & rdev->wiphy.available_antennas;
+
 		result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant);
 		if (result)
 			goto bad_res;
-- 
cgit v1.2.3


From 01123e233193a544c85b622e1690f44532052c5b Mon Sep 17 00:00:00 2001
From: Sven Neumann <s.neumann@raumfeld.com>
Date: Thu, 9 Dec 2010 15:05:24 +0100
Subject: cfg80211: update information elements in cached BSS struct

When a cached BSS struct is updated because a new beacon was received,
the code replaces the cached information elements by the IEs from the
new beacon. However it did not update the pub.information_elements
and pub.len_information_elements fields leaving them either pointing
to the old beacon IEs or in an inconsistent state where the data is
replaced by the new beacon IEs but len_information_elements still has
its value from the first beacon.

Fix this by updating the information elements fields if they are
pointing to beacon IEs.

Signed-off-by: Sven Neumann <s.neumann@raumfeld.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 503ebb86ba18..ea427f418f64 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -464,6 +464,9 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 		if (res->pub.beacon_ies) {
 			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
 			size_t ielen = res->pub.len_beacon_ies;
+			bool information_elements_is_beacon_ies =
+				(found->pub.information_elements ==
+				 found->pub.beacon_ies);
 
 			if (found->pub.beacon_ies &&
 			    !found->beacon_ies_allocated &&
@@ -487,6 +490,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 					found->pub.len_beacon_ies = ielen;
 				}
 			}
+
+			/* Override IEs if they were from a beacon before */
+			if (information_elements_is_beacon_ies) {
+				found->pub.information_elements =
+					found->pub.beacon_ies;
+				found->pub.len_information_elements =
+					found->pub.len_beacon_ies;
+			}
 		}
 
 		kref_put(&res->ref, bss_release);
-- 
cgit v1.2.3


From 897bed8b4320774e56f282cdc1cceb4d77442797 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 9 Dec 2010 19:49:00 +0100
Subject: mac80211: clean up RX key checks

Using the default key for "any key set" isn't
quite what we should do. It works, but with the
upcoming changes it makes life unnecessarily
complex, so do something better here and really
check for "any key".

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2fe8f5f86499..052789ef4745 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -955,12 +955,31 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		 * have been expected.
 		 */
 		struct ieee80211_key *key = NULL;
+		struct ieee80211_sub_if_data *sdata = rx->sdata;
+		int i;
+
 		if (ieee80211_is_mgmt(fc) &&
 		    is_multicast_ether_addr(hdr->addr1) &&
 		    (key = rcu_dereference(rx->sdata->default_mgmt_key)))
 			rx->key = key;
-		else if ((key = rcu_dereference(rx->sdata->default_key)))
-			rx->key = key;
+		else {
+			if (rx->sta) {
+				for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+					key = rcu_dereference(rx->sta->gtk[i]);
+					if (key)
+						break;
+				}
+			}
+			if (!key) {
+				for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+					key = rcu_dereference(sdata->keys[i]);
+					if (key)
+						break;
+				}
+			}
+			if (key)
+				rx->key = key;
+		}
 		return RX_CONTINUE;
 	} else {
 		u8 keyid;
-- 
cgit v1.2.3


From dbd2fd656f2060abfd3a16257f8b51ec60f6d2ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 9 Dec 2010 19:58:59 +0100
Subject: cfg80211/nl80211: separate unicast/multicast default TX keys

Allow userspace to specify that a given key
is default only for unicast and/or multicast
transmissions. Only WEP keys are for both,
WPA/RSN keys set here are GTKs for multicast
only. For more future flexibility, allow to
specify all combiations.

Wireless extensions can only set both so use
nl80211; WEP keys (connect keys) must be set
as default for both (but 802.1X WEP is still
possible).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/cfg80211.c |   3 +-
 drivers/net/wireless/libertas/cfg.c          |   3 +-
 drivers/net/wireless/rndis_wlan.c            |   4 +-
 include/linux/nl80211.h                      |  27 ++++++
 include/net/cfg80211.h                       |   5 +-
 net/mac80211/cfg.c                           |   3 +-
 net/wireless/nl80211.c                       | 125 +++++++++++++++++++++++----
 net/wireless/util.c                          |   3 +-
 net/wireless/wext-compat.c                   |   8 +-
 9 files changed, 152 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
index c6c0eff9b5ed..5a4982271e96 100644
--- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c
+++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
@@ -225,7 +225,8 @@ static int iwm_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev,
 
 static int iwm_cfg80211_set_default_key(struct wiphy *wiphy,
 					struct net_device *ndev,
-					u8 key_index)
+					u8 key_index, bool unicast,
+					bool multicast)
 {
 	struct iwm_priv *iwm = ndev_to_iwm(ndev);
 
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index dee32d3681a5..300be1931826 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1422,7 +1422,8 @@ static int lbs_cfg_disconnect(struct wiphy *wiphy, struct net_device *dev,
 
 static int lbs_cfg_set_default_key(struct wiphy *wiphy,
 				   struct net_device *netdev,
-				   u8 key_index)
+				   u8 key_index, bool unicast,
+				   bool multicast)
 {
 	struct lbs_private *priv = wiphy_priv(wiphy);
 
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 19f3d568f700..4a4f00591447 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -554,7 +554,7 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev,
 			 u8 key_index, bool pairwise, const u8 *mac_addr);
 
 static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev,
-								u8 key_index);
+				 u8 key_index, bool unicast, bool multicast);
 
 static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev,
 					u8 *mac, struct station_info *sinfo);
@@ -2381,7 +2381,7 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev,
 }
 
 static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev,
-								u8 key_index)
+				 u8 key_index, bool unicast, bool multicast)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 380421253d16..b8fa25d741ba 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -851,6 +851,10 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16)
  *
+ * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ *	attributes, specifying what a key should be set as default as.
+ *	See &enum nl80211_key_default_types.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1029,6 +1033,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_BSS_HT_OPMODE,
 
+	NL80211_ATTR_KEY_DEFAULT_TYPES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1774,6 +1780,23 @@ enum nl80211_wpa_versions {
 	NL80211_WPA_VERSION_2 = 1 << 1,
 };
 
+/**
+ * enum nl80211_key_default_types - key default types
+ * @__NL80211_KEY_DEFAULT_TYPE_INVALID: invalid
+ * @NL80211_KEY_DEFAULT_TYPE_UNICAST: key should be used as default
+ *	unicast key
+ * @NL80211_KEY_DEFAULT_TYPE_MULTICAST: key should be used as default
+ *	multicast key
+ * @NUM_NL80211_KEY_DEFAULT_TYPES: number of default types
+ */
+enum nl80211_key_default_types {
+	__NL80211_KEY_DEFAULT_TYPE_INVALID,
+	NL80211_KEY_DEFAULT_TYPE_UNICAST,
+	NL80211_KEY_DEFAULT_TYPE_MULTICAST,
+
+	NUM_NL80211_KEY_DEFAULT_TYPES
+};
+
 /**
  * enum nl80211_key_attributes - key attributes
  * @__NL80211_KEY_INVALID: invalid
@@ -1790,6 +1813,9 @@ enum nl80211_wpa_versions {
  * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not
  *	specified the default depends on whether a MAC address was
  *	given with the command using the key or not (u32)
+ * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags
+ *	attributes, specifying what a key should be set as default as.
+ *	See &enum nl80211_key_default_types.
  * @__NL80211_KEY_AFTER_LAST: internal
  * @NL80211_KEY_MAX: highest key attribute
  */
@@ -1802,6 +1828,7 @@ enum nl80211_key_attributes {
 	NL80211_KEY_DEFAULT,
 	NL80211_KEY_DEFAULT_MGMT,
 	NL80211_KEY_TYPE,
+	NL80211_KEY_DEFAULT_TYPES,
 
 	/* keep last */
 	__NL80211_KEY_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4d5acb013636..22be7c625b70 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1211,7 +1211,7 @@ struct cfg80211_ops {
 			   u8 key_index, bool pairwise, const u8 *mac_addr);
 	int	(*set_default_key)(struct wiphy *wiphy,
 				   struct net_device *netdev,
-				   u8 key_index);
+				   u8 key_index, bool unicast, bool multicast);
 	int	(*set_default_mgmt_key)(struct wiphy *wiphy,
 					struct net_device *netdev,
 					u8 key_index);
@@ -1393,6 +1393,8 @@ struct cfg80211_ops {
  *	control port protocol ethertype. The device also honours the
  *	control_port_no_encrypt flag.
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
+ * @WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS: The device supports separate
+ *	unicast and multicast TX keys.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1404,6 +1406,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_4ADDR_STATION		= BIT(6),
 	WIPHY_FLAG_CONTROL_PORT_PROTOCOL	= BIT(7),
 	WIPHY_FLAG_IBSS_RSN			= BIT(8),
+	WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS= BIT(9),
 };
 
 struct mac_address {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c30b8b72eedb..12f7dc048d34 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -295,7 +295,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 
 static int ieee80211_config_default_key(struct wiphy *wiphy,
 					struct net_device *dev,
-					u8 key_idx)
+					u8 key_idx, bool uni,
+					bool multi)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 73a7f6d354c9..53f044370cde 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -171,6 +171,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 },
 	[NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG },
+	[NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
 };
 
 /* policy for the key attributes */
@@ -182,6 +183,14 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
 	[NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
 	[NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
 	[NL80211_KEY_TYPE] = { .type = NLA_U32 },
+	[NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED },
+};
+
+/* policy for the key default flags */
+static const struct nla_policy
+nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
+	[NL80211_KEY_DEFAULT_TYPE_UNICAST] = { .type = NLA_FLAG },
+	[NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
 };
 
 /* ifidx get helper */
@@ -314,6 +323,7 @@ struct key_parse {
 	int idx;
 	int type;
 	bool def, defmgmt;
+	bool def_uni, def_multi;
 };
 
 static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
@@ -327,6 +337,13 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
 	k->def = !!tb[NL80211_KEY_DEFAULT];
 	k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT];
 
+	if (k->def) {
+		k->def_uni = true;
+		k->def_multi = true;
+	}
+	if (k->defmgmt)
+		k->def_multi = true;
+
 	if (tb[NL80211_KEY_IDX])
 		k->idx = nla_get_u8(tb[NL80211_KEY_IDX]);
 
@@ -349,6 +366,19 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
 			return -EINVAL;
 	}
 
+	if (tb[NL80211_KEY_DEFAULT_TYPES]) {
+		struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
+		int err = nla_parse_nested(kdt,
+					   NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+					   tb[NL80211_KEY_DEFAULT_TYPES],
+					   nl80211_key_default_policy);
+		if (err)
+			return err;
+
+		k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST];
+		k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST];
+	}
+
 	return 0;
 }
 
@@ -373,12 +403,32 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
 	k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT];
 	k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT];
 
+	if (k->def) {
+		k->def_uni = true;
+		k->def_multi = true;
+	}
+	if (k->defmgmt)
+		k->def_multi = true;
+
 	if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
 		k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
 		if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
 			return -EINVAL;
 	}
 
+	if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) {
+		struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES];
+		int err = nla_parse_nested(
+				kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1,
+				info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES],
+				nl80211_key_default_policy);
+		if (err)
+			return err;
+
+		k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST];
+		k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST];
+	}
+
 	return 0;
 }
 
@@ -401,6 +451,11 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
 	if (k->def && k->defmgmt)
 		return -EINVAL;
 
+	if (k->defmgmt) {
+		if (k->def_uni || !k->def_multi)
+			return -EINVAL;
+	}
+
 	if (k->idx != -1) {
 		if (k->defmgmt) {
 			if (k->idx < 4 || k->idx > 5)
@@ -450,6 +505,8 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
 				goto error;
 			def = 1;
 			result->def = parse.idx;
+			if (!parse.def_uni || !parse.def_multi)
+				goto error;
 		} else if (parse.defmgmt)
 			goto error;
 		err = cfg80211_validate_key_settings(rdev, &parse.p,
@@ -1586,8 +1643,6 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	struct key_parse key;
 	int err;
 	struct net_device *dev = info->user_ptr[1];
-	int (*func)(struct wiphy *wiphy, struct net_device *netdev,
-		    u8 key_index);
 
 	err = nl80211_parse_key(info, &key);
 	if (err)
@@ -1600,27 +1655,61 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	if (!key.def && !key.defmgmt)
 		return -EINVAL;
 
-	if (key.def)
-		func = rdev->ops->set_default_key;
-	else
-		func = rdev->ops->set_default_mgmt_key;
+	wdev_lock(dev->ieee80211_ptr);
 
-	if (!func)
-		return -EOPNOTSUPP;
+	if (key.def) {
+		if (!rdev->ops->set_default_key) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
 
-	wdev_lock(dev->ieee80211_ptr);
-	err = nl80211_key_allowed(dev->ieee80211_ptr);
-	if (!err)
-		err = func(&rdev->wiphy, dev, key.idx);
+		err = nl80211_key_allowed(dev->ieee80211_ptr);
+		if (err)
+			goto out;
+
+		if (!(rdev->wiphy.flags &
+				WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS)) {
+			if (!key.def_uni || !key.def_multi) {
+				err = -EOPNOTSUPP;
+				goto out;
+			}
+		}
+
+		err = rdev->ops->set_default_key(&rdev->wiphy, dev, key.idx,
+						 key.def_uni, key.def_multi);
+
+		if (err)
+			goto out;
 
 #ifdef CONFIG_CFG80211_WEXT
-	if (!err) {
-		if (func == rdev->ops->set_default_key)
-			dev->ieee80211_ptr->wext.default_key = key.idx;
-		else
-			dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
-	}
+		dev->ieee80211_ptr->wext.default_key = key.idx;
+#endif
+	} else {
+		if (key.def_uni || !key.def_multi) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (!rdev->ops->set_default_mgmt_key) {
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = nl80211_key_allowed(dev->ieee80211_ptr);
+		if (err)
+			goto out;
+
+		err = rdev->ops->set_default_mgmt_key(&rdev->wiphy,
+						      dev, key.idx);
+		if (err)
+			goto out;
+
+#ifdef CONFIG_CFG80211_WEXT
+		dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
 #endif
+	}
+
+ out:
 	wdev_unlock(dev->ieee80211_ptr);
 
 	return err;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4de624ca4c63..7620ae2fcf18 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -689,7 +689,8 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 			continue;
 		}
 		if (wdev->connect_keys->def == i)
-			if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) {
+			if (rdev->ops->set_default_key(wdev->wiphy, dev,
+						       i, true, true)) {
 				netdev_err(dev, "failed to set defkey %d\n", i);
 				continue;
 			}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 12222ee6ebf2..3e5dbd4e4cd5 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -548,8 +548,8 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 				__cfg80211_leave_ibss(rdev, wdev->netdev, true);
 				rejoin = true;
 			}
-			err = rdev->ops->set_default_key(&rdev->wiphy,
-							 dev, idx);
+			err = rdev->ops->set_default_key(&rdev->wiphy, dev,
+							 idx, true, true);
 		}
 		if (!err) {
 			wdev->wext.default_key = idx;
@@ -627,8 +627,8 @@ int cfg80211_wext_siwencode(struct net_device *dev,
 		err = 0;
 		wdev_lock(wdev);
 		if (wdev->current_bss)
-			err = rdev->ops->set_default_key(&rdev->wiphy,
-							 dev, idx);
+			err = rdev->ops->set_default_key(&rdev->wiphy, dev,
+							 idx, true, true);
 		if (!err)
 			wdev->wext.default_key = idx;
 		wdev_unlock(wdev);
-- 
cgit v1.2.3


From f7e0104c1a4e77cc4f23d5969b0677bdc4f62c63 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 9 Dec 2010 19:49:02 +0100
Subject: mac80211: support separate default keys

Add support for split default keys (unicast
and multicast) in mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         |  2 +-
 net/mac80211/debugfs_key.c | 37 +++++++++++++++++++++----------------
 net/mac80211/debugfs_key.h |  8 ++------
 net/mac80211/ieee80211_i.h |  5 +++--
 net/mac80211/key.c         | 45 ++++++++++++++++++++++++++-------------------
 net/mac80211/key.h         |  3 ++-
 net/mac80211/main.c        |  3 ++-
 net/mac80211/tx.c          |  6 +++++-
 8 files changed, 62 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 12f7dc048d34..ea06f92801e9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -300,7 +300,7 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	ieee80211_set_default_key(sdata, key_idx);
+	ieee80211_set_default_key(sdata, key_idx, uni, multi);
 
 	return 0;
 }
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 5822a6ce7671..f7ef3477c24a 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -274,7 +274,8 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
 	debugfs_remove_recursive(key->debugfs.dir);
 	key->debugfs.dir = NULL;
 }
-void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata)
+
+void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
 {
 	char buf[50];
 	struct ieee80211_key *key;
@@ -282,25 +283,29 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata)
 	if (!sdata->debugfs.dir)
 		return;
 
-	/* this is running under the key lock */
+	lockdep_assert_held(&sdata->local->key_mtx);
 
-	key = sdata->default_key;
-	if (key) {
+	if (sdata->default_unicast_key) {
+		key = sdata->default_unicast_key;
 		sprintf(buf, "../keys/%d", key->debugfs.cnt);
-		sdata->debugfs.default_key =
-			debugfs_create_symlink("default_key",
+		sdata->debugfs.default_unicast_key =
+			debugfs_create_symlink("default_unicast_key",
 					       sdata->debugfs.dir, buf);
-	} else
-		ieee80211_debugfs_key_remove_default(sdata);
-}
-
-void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
-{
-	if (!sdata)
-		return;
+	} else {
+		debugfs_remove(sdata->debugfs.default_unicast_key);
+		sdata->debugfs.default_unicast_key = NULL;
+	}
 
-	debugfs_remove(sdata->debugfs.default_key);
-	sdata->debugfs.default_key = NULL;
+	if (sdata->default_multicast_key) {
+		key = sdata->default_multicast_key;
+		sprintf(buf, "../keys/%d", key->debugfs.cnt);
+		sdata->debugfs.default_multicast_key =
+			debugfs_create_symlink("default_multicast_key",
+					       sdata->debugfs.dir, buf);
+	} else {
+		debugfs_remove(sdata->debugfs.default_multicast_key);
+		sdata->debugfs.default_multicast_key = NULL;
+	}
 }
 
 void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index 54717b4e1371..32adc77e9c77 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -4,8 +4,7 @@
 #ifdef CONFIG_MAC80211_DEBUGFS
 void ieee80211_debugfs_key_add(struct ieee80211_key *key);
 void ieee80211_debugfs_key_remove(struct ieee80211_key *key);
-void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata);
-void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata);
+void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata);
 void ieee80211_debugfs_key_add_mgmt_default(
 	struct ieee80211_sub_if_data *sdata);
 void ieee80211_debugfs_key_remove_mgmt_default(
@@ -17,10 +16,7 @@ static inline void ieee80211_debugfs_key_add(struct ieee80211_key *key)
 {}
 static inline void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
 {}
-static inline void ieee80211_debugfs_key_add_default(
-	struct ieee80211_sub_if_data *sdata)
-{}
-static inline void ieee80211_debugfs_key_remove_default(
+static inline void ieee80211_debugfs_key_update_default(
 	struct ieee80211_sub_if_data *sdata)
 {}
 static inline void ieee80211_debugfs_key_add_mgmt_default(
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 72499fe5fc36..ce58b2a676e2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -557,7 +557,7 @@ struct ieee80211_sub_if_data {
 	unsigned int fragment_next;
 
 	struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
-	struct ieee80211_key *default_key;
+	struct ieee80211_key *default_unicast_key, *default_multicast_key;
 	struct ieee80211_key *default_mgmt_key;
 
 	u16 sequence_number;
@@ -595,7 +595,8 @@ struct ieee80211_sub_if_data {
 	struct {
 		struct dentry *dir;
 		struct dentry *subdir_stations;
-		struct dentry *default_key;
+		struct dentry *default_unicast_key;
+		struct dentry *default_multicast_key;
 		struct dentry *default_mgmt_key;
 	} debugfs;
 #endif
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 72df1ca7299b..84cf9196820f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -178,7 +178,7 @@ void ieee80211_key_removed(struct ieee80211_key_conf *key_conf)
 EXPORT_SYMBOL_GPL(ieee80211_key_removed);
 
 static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
-					int idx)
+					int idx, bool uni, bool multi)
 {
 	struct ieee80211_key *key = NULL;
 
@@ -187,18 +187,19 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
 	if (idx >= 0 && idx < NUM_DEFAULT_KEYS)
 		key = sdata->keys[idx];
 
-	rcu_assign_pointer(sdata->default_key, key);
+	if (uni)
+		rcu_assign_pointer(sdata->default_unicast_key, key);
+	if (multi)
+		rcu_assign_pointer(sdata->default_multicast_key, key);
 
-	if (key) {
-		ieee80211_debugfs_key_remove_default(key->sdata);
-		ieee80211_debugfs_key_add_default(key->sdata);
-	}
+	ieee80211_debugfs_key_update_default(sdata);
 }
 
-void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx)
+void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
+			       bool uni, bool multi)
 {
 	mutex_lock(&sdata->local->key_mtx);
-	__ieee80211_set_default_key(sdata, idx);
+	__ieee80211_set_default_key(sdata, idx, uni, multi);
 	mutex_unlock(&sdata->local->key_mtx);
 }
 
@@ -215,10 +216,7 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
 
 	rcu_assign_pointer(sdata->default_mgmt_key, key);
 
-	if (key) {
-		ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
-		ieee80211_debugfs_key_add_mgmt_default(key->sdata);
-	}
+	ieee80211_debugfs_key_update_default(sdata);
 }
 
 void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
@@ -236,7 +234,8 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 				    struct ieee80211_key *old,
 				    struct ieee80211_key *new)
 {
-	int idx, defkey, defmgmtkey;
+	int idx;
+	bool defunikey, defmultikey, defmgmtkey;
 
 	if (new)
 		list_add(&new->list, &sdata->key_list);
@@ -257,17 +256,24 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 		else
 			idx = new->conf.keyidx;
 
-		defkey = old && sdata->default_key == old;
+		defunikey = old && sdata->default_unicast_key == old;
+		defmultikey = old && sdata->default_multicast_key == old;
 		defmgmtkey = old && sdata->default_mgmt_key == old;
 
-		if (defkey && !new)
-			__ieee80211_set_default_key(sdata, -1);
+		if (defunikey && !new)
+			__ieee80211_set_default_key(sdata, -1, true, false);
+		if (defmultikey && !new)
+			__ieee80211_set_default_key(sdata, -1, false, true);
 		if (defmgmtkey && !new)
 			__ieee80211_set_default_mgmt_key(sdata, -1);
 
 		rcu_assign_pointer(sdata->keys[idx], new);
-		if (defkey && new)
-			__ieee80211_set_default_key(sdata, new->conf.keyidx);
+		if (defunikey && new)
+			__ieee80211_set_default_key(sdata, new->conf.keyidx,
+						    true, false);
+		if (defmultikey && new)
+			__ieee80211_set_default_key(sdata, new->conf.keyidx,
+						    false, true);
 		if (defmgmtkey && new)
 			__ieee80211_set_default_mgmt_key(sdata,
 							 new->conf.keyidx);
@@ -509,11 +515,12 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
 
 	mutex_lock(&sdata->local->key_mtx);
 
-	ieee80211_debugfs_key_remove_default(sdata);
 	ieee80211_debugfs_key_remove_mgmt_default(sdata);
 
 	list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
 		__ieee80211_key_free(key);
 
+	ieee80211_debugfs_key_update_default(sdata);
+
 	mutex_unlock(&sdata->local->key_mtx);
 }
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 0db1c0f5f697..8106aa1b7466 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -138,7 +138,8 @@ int __must_check ieee80211_key_link(struct ieee80211_key *key,
 				    struct sta_info *sta);
 void ieee80211_key_free(struct ieee80211_local *local,
 			struct ieee80211_key *key);
-void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
+void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
+			       bool uni, bool multi);
 void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
 				    int idx);
 void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 973fee9f7d69..ae656b6e3bc2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -519,7 +519,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	wiphy->flags |= WIPHY_FLAG_NETNS_OK |
 			WIPHY_FLAG_4ADDR_AP |
-			WIPHY_FLAG_4ADDR_STATION;
+			WIPHY_FLAG_4ADDR_STATION |
+			WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS;
 	wiphy->privid = mac80211_wiphy_privid;
 
 	wiphy->bss_priv_size = sizeof(struct ieee80211_bss);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0ee56bb0ea7e..157bde993ef5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -539,7 +539,11 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 		 ieee80211_is_robust_mgmt_frame(hdr) &&
 		 (key = rcu_dereference(tx->sdata->default_mgmt_key)))
 		tx->key = key;
-	else if ((key = rcu_dereference(tx->sdata->default_key)))
+	else if (is_multicast_ether_addr(hdr->addr1) &&
+		 (key = rcu_dereference(tx->sdata->default_multicast_key)))
+		tx->key = key;
+	else if (!is_multicast_ether_addr(hdr->addr1) &&
+		 (key = rcu_dereference(tx->sdata->default_unicast_key)))
 		tx->key = key;
 	else if (tx->sdata->drop_unencrypted &&
 		 (tx->skb->protocol != tx->sdata->control_port_protocol) &&
-- 
cgit v1.2.3


From 91f44b02992f632ac6c070f985cd58d5acee4199 Mon Sep 17 00:00:00 2001
From: Tim Harvey <harvey.tim@gmail.com>
Date: Thu, 9 Dec 2010 13:15:45 -0800
Subject: mac80211 default tx_last_beacon false (congestion)

The 802.11 spec states that the STA that generated the last Beacon frame shall
be the STA that response to a probe request.  This is important for congestion
reduction when a probe request is received - only 1 node in an adhoc BSS
will transmit a response.  While mac80211 drivers should provide the
tx_last_beacon function to report if they transmitted the last beacon many
do not.  As an attempt to reduce probe response congestion default this
to 0 such that a node not implementing this capability does not contribute
to unnecessary congestion.

In a modern medium sized office environment I see upwards of 100 probe
requests per second received at a given node from various hardware/OS/drivers
doing zeroconf 'active probing' as opposed to passively listening for beacons.
With a modest 10-node adhoc network consisting of drivers that do not implement
this tx_last_beacon feature, I have seen this result in the simultaneous xmit
of probe responses accumulating to 500 probe responses per second because of
collisions which brings the adhoc network to its knees as well as causes
needless congestion.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/driver-ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 4244554d218a..af0c4398cceb 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -367,7 +367,7 @@ static inline void drv_reset_tsf(struct ieee80211_local *local)
 
 static inline int drv_tx_last_beacon(struct ieee80211_local *local)
 {
-	int ret = 1;
+	int ret = 0; /* default unsuported op for less congestion */
 
 	might_sleep();
 
-- 
cgit v1.2.3


From 207aba6018a7b1757b5248ced2b280d20790c498 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 10 Dec 2010 17:10:44 +0100
Subject: mac80211: support IBSS RSN with SW crypto

When software crypto is used, mac80211 will
support IBSS RSN, it doesn't depend on the
driver in that case.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ae656b6e3bc2..f7bdb7c78879 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -517,11 +517,15 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;
 
+	wiphy->privid = mac80211_wiphy_privid;
+
 	wiphy->flags |= WIPHY_FLAG_NETNS_OK |
 			WIPHY_FLAG_4ADDR_AP |
 			WIPHY_FLAG_4ADDR_STATION |
 			WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS;
-	wiphy->privid = mac80211_wiphy_privid;
+
+	if (!ops->set_key)
+		wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
 
 	wiphy->bss_priv_size = sizeof(struct ieee80211_bss);
 
-- 
cgit v1.2.3


From 0dbaee3b37e118a96bb7b8eb0d9bbaeeb46264be Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 13 Dec 2010 12:52:14 -0800
Subject: net: Abstract default ADVMSS behind an accessor.

Make all RTAX_ADVMSS metric accesses go through a new helper function,
dst_metric_advmss().

Leave the actual default metric as "zero" in the real metric slot,
and compute the actual default value dynamically via a new dst_ops
AF specific callback.

For stacked IPSEC routes, we use the advmss of the path which
preserves existing behavior.

Unlike ipv4/ipv6, DecNET ties the advmss to the mtu and thus updates
advmss on pmtu updates.  This inconsistency in advmss handling
results in more raw metric accesses than I wish we ended up with.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/cxgbi/libcxgbi.c |  2 +-
 include/net/dst.h             | 14 +++++++++++++-
 include/net/dst_ops.h         |  1 +
 net/decnet/af_decnet.c        |  4 ++--
 net/decnet/dn_route.c         | 22 ++++++++++++++++------
 net/ipv4/route.c              | 24 +++++++++++++++++-------
 net/ipv4/tcp_ipv4.c           |  2 +-
 net/ipv4/tcp_output.c         | 14 +++++++++-----
 net/ipv6/route.c              | 16 +++++++---------
 net/ipv6/tcp_ipv6.c           |  2 +-
 net/xfrm/xfrm_policy.c        |  7 +++++++
 11 files changed, 75 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index be5661707dfa..d2ad3d676724 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -825,7 +825,7 @@ unsigned int cxgbi_sock_select_mss(struct cxgbi_sock *csk, unsigned int pmtu)
 	unsigned int idx;
 	struct dst_entry *dst = csk->dst;
 
-	csk->advmss = dst_metric(dst, RTAX_ADVMSS);
+	csk->advmss = dst_metric_advmss(dst);
 
 	if (csk->advmss > pmtu - 40)
 		csk->advmss = pmtu - 40;
diff --git a/include/net/dst.h b/include/net/dst.h
index 755ac6c1aa03..03a1c3d52d80 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -112,10 +112,22 @@ dst_metric_raw(const struct dst_entry *dst, const int metric)
 static inline u32
 dst_metric(const struct dst_entry *dst, const int metric)
 {
-	WARN_ON_ONCE(metric == RTAX_HOPLIMIT);
+	WARN_ON_ONCE(metric == RTAX_HOPLIMIT ||
+		     metric == RTAX_ADVMSS);
 	return dst_metric_raw(dst, metric);
 }
 
+static inline u32
+dst_metric_advmss(const struct dst_entry *dst)
+{
+	u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS);
+
+	if (!advmss)
+		advmss = dst->ops->default_advmss(dst);
+
+	return advmss;
+}
+
 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
 {
 	dst->_metrics[metric-1] = val;
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 51665b3461b8..15fb7af08c42 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -16,6 +16,7 @@ struct dst_ops {
 
 	int			(*gc)(struct dst_ops *ops);
 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
+	unsigned int		(*default_advmss)(const struct dst_entry *);
 	void			(*destroy)(struct dst_entry *);
 	void			(*ifdown)(struct dst_entry *,
 					  struct net_device *dev, int how);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 0065e7e14af4..2af15b15d1fa 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -829,7 +829,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
 		return -EINVAL;
 
 	scp->state = DN_CC;
-	scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS);
+	scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
 	dn_send_conn_conf(sk, allocation);
 
 	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -958,7 +958,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen,
 	sk->sk_route_caps = sk->sk_dst_cache->dev->features;
 	sock->state = SS_CONNECTING;
 	scp->state = DN_CI;
-	scp->segsize_loc = dst_metric(sk->sk_dst_cache, RTAX_ADVMSS);
+	scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache);
 
 	dn_nsp_send_conninit(sk, NSP_CI);
 	err = -EINPROGRESS;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index e2e926841fe6..b8a5c0515be8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -110,6 +110,7 @@ static unsigned long dn_rt_deadline;
 
 static int dn_dst_gc(struct dst_ops *ops);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
+static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -129,6 +130,7 @@ static struct dst_ops dn_dst_ops = {
 	.gc_thresh =		128,
 	.gc =			dn_dst_gc,
 	.check =		dn_dst_check,
+	.default_advmss =	dn_dst_default_advmss,
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
 	.update_pmtu =		dn_dst_update_pmtu,
@@ -245,7 +247,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
 		}
 		if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
 			u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
-			if (dst_metric(dst, RTAX_ADVMSS) > mss)
+			u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS);
+			if (!existing_mss || existing_mss > mss)
 				dst_metric_set(dst, RTAX_ADVMSS, mss);
 		}
 	}
@@ -795,12 +798,17 @@ static int dn_rt_bug(struct sk_buff *skb)
 	return NET_RX_DROP;
 }
 
+static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
+{
+	return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
+}
+
 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
 	struct net_device *dev = rt->dst.dev;
 	struct neighbour *n;
-	unsigned mss;
+	unsigned int metric;
 
 	if (fi) {
 		if (DN_FIB_RES_GW(*res) &&
@@ -820,10 +828,12 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 	if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
 	    dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
 		dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
-	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
-	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
-	    dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
-		dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
+	metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
+	if (metric) {
+		unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
+		if (metric > mss)
+			dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
+	}
 	return 0;
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 770f70427f0b..80997333db0c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -139,6 +139,7 @@ static unsigned long expires_ljiffies;
  */
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
+static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
 static void		 ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
@@ -155,6 +156,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.protocol =		cpu_to_be16(ETH_P_IP),
 	.gc =			rt_garbage_collect,
 	.check =		ipv4_dst_check,
+	.default_advmss =	ipv4_default_advmss,
 	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
 	.negative_advice =	ipv4_negative_advice,
@@ -383,8 +385,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 			(__force u32)r->rt_gateway,
 			r->rt_flags, atomic_read(&r->dst.__refcnt),
 			r->dst.__use, 0, (__force u32)r->rt_src,
-			(dst_metric(&r->dst, RTAX_ADVMSS) ?
-			     (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
+			dst_metric_advmss(&r->dst) + 40,
 			dst_metric(&r->dst, RTAX_WINDOW),
 			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
 			      dst_metric(&r->dst, RTAX_RTTVAR)),
@@ -1798,6 +1799,19 @@ static void set_class_tag(struct rtable *rt, u32 tag)
 }
 #endif
 
+static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
+{
+	unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
+
+	if (advmss == 0) {
+		advmss = max_t(unsigned int, dst->dev->mtu - 40,
+			       ip_rt_min_advmss);
+		if (advmss > 65535 - 40)
+			advmss = 65535 - 40;
+	}
+	return advmss;
+}
+
 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 {
 	struct dst_entry *dst = &rt->dst;
@@ -1823,11 +1837,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 
 	if (dst_mtu(dst) > IP_MAX_MTU)
 		dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
-	if (dst_metric(dst, RTAX_ADVMSS) == 0)
-		dst_metric_set(dst, RTAX_ADVMSS,
-			       max_t(unsigned int, dst->dev->mtu - 40,
-				     ip_rt_min_advmss));
-	if (dst_metric(dst, RTAX_ADVMSS) > 65535 - 40)
+	if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
 		dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
 
 #ifdef CONFIG_NET_CLS_ROUTE
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4fc3387aa994..f4011027543d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1436,7 +1436,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	tcp_mtup_init(newsk);
 	tcp_sync_mss(newsk, dst_mtu(dst));
-	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	newtp->advmss = dst_metric_advmss(dst);
 	if (tcp_sk(sk)->rx_opt.user_mss &&
 	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 97041f24cd27..2d390669d406 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -119,9 +119,13 @@ static __u16 tcp_advertise_mss(struct sock *sk)
 	struct dst_entry *dst = __sk_dst_get(sk);
 	int mss = tp->advmss;
 
-	if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) {
-		mss = dst_metric(dst, RTAX_ADVMSS);
-		tp->advmss = mss;
+	if (dst) {
+		unsigned int metric = dst_metric_advmss(dst);
+
+		if (metric < mss) {
+			mss = metric;
+			tp->advmss = mss;
+		}
 	}
 
 	return (__u16)mss;
@@ -2422,7 +2426,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	skb_dst_set(skb, dst_clone(dst));
 
-	mss = dst_metric(dst, RTAX_ADVMSS);
+	mss = dst_metric_advmss(dst);
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
 		mss = tp->rx_opt.user_mss;
 
@@ -2556,7 +2560,7 @@ static void tcp_connect_init(struct sock *sk)
 
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
-	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	tp->advmss = dst_metric_advmss(dst);
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
 		tp->advmss = tp->rx_opt.user_mss;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 98796b0dc2b7..d9cb832be529 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -76,6 +76,7 @@
 
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
+static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
@@ -103,6 +104,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.gc			=	ip6_dst_gc,
 	.gc_thresh		=	1024,
 	.check			=	ip6_dst_check,
+	.default_advmss		=	ip6_default_advmss,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
@@ -937,8 +939,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 static int ipv6_get_mtu(struct net_device *dev);
 
-static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
+static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
+	struct net_device *dev = dst->dev;
+	unsigned int mtu = dst_mtu(dst);
+	struct net *net = dev_net(dev);
+
 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
 
 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
@@ -990,7 +996,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	atomic_set(&rt->dst.__refcnt, 1);
 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
 	dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
-	dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
 	rt->dst.output  = ip6_output;
 
 #if 0	/* there's no chance to use these for ndisc */
@@ -1312,8 +1317,6 @@ install_route:
 
 	if (!dst_mtu(&rt->dst))
 		dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
-	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
-		dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
 	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
@@ -1540,8 +1543,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	nrt->rt6i_nexthop = neigh_clone(neigh);
 	/* Reset pmtu, it may be better */
 	dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
-	dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev),
-							   dst_mtu(&nrt->dst)));
 
 	if (ip6_ins_rt(nrt))
 		goto out;
@@ -1971,7 +1972,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
 	dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
-	dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
 	rt->dst.obsolete = -1;
 
@@ -2041,7 +2041,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 	struct inet6_dev *idev;
-	struct net *net = dev_net(arg->dev);
 
 	/* In IPv6 pmtu discovery is not optional,
 	   so that RTAX_MTU lock cannot disable it.
@@ -2073,7 +2072,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	     (dst_mtu(&rt->dst) < arg->mtu &&
 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
-		dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu));
 	}
 	return 0;
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fee076891646..20aa95e37359 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1521,7 +1521,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	tcp_mtup_init(newsk);
 	tcp_sync_mss(newsk, dst_mtu(dst));
-	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	newtp->advmss = dst_metric_advmss(dst);
 	tcp_initialize_rcv_mss(newsk);
 
 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6e50ccd8c532..36936c8ae961 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2361,6 +2361,11 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 	return 1;
 }
 
+static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
+{
+	return dst_metric_advmss(dst->path);
+}
+
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
 	struct net *net;
@@ -2378,6 +2383,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 			dst_ops->kmem_cachep = xfrm_dst_cache;
 		if (likely(dst_ops->check == NULL))
 			dst_ops->check = xfrm_dst_check;
+		if (likely(dst_ops->default_advmss == NULL))
+			dst_ops->default_advmss = xfrm_default_advmss;
 		if (likely(dst_ops->negative_advice == NULL))
 			dst_ops->negative_advice = xfrm_negative_advice;
 		if (likely(dst_ops->link_failure == NULL))
-- 
cgit v1.2.3


From d33e455337ea2c71d09d7f4367d6ad6dd32b6965 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 14 Dec 2010 13:01:14 -0800
Subject: net: Abstract default MTU metric calculation behind an accessor.

Like RTAX_ADVMSS, make the default calculation go through a dst_ops
method rather than caching the computation in the routing cache
entries.

Now dst metrics are pretty much left as-is when new entries are
created, thus optimizing metric sharing becomes a real possibility.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h      | 15 ++++++++-------
 include/net/dst_ops.h  |  1 +
 net/decnet/dn_route.c  | 10 ++++++++--
 net/ipv4/route.c       | 29 ++++++++++++++++++++---------
 net/ipv6/route.c       | 37 ++++++++++++++++---------------------
 net/xfrm/xfrm_policy.c |  7 +++++++
 6 files changed, 60 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 03a1c3d52d80..93b0310317be 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -113,7 +113,8 @@ static inline u32
 dst_metric(const struct dst_entry *dst, const int metric)
 {
 	WARN_ON_ONCE(metric == RTAX_HOPLIMIT ||
-		     metric == RTAX_ADVMSS);
+		     metric == RTAX_ADVMSS ||
+		     metric == RTAX_MTU);
 	return dst_metric_raw(dst, metric);
 }
 
@@ -156,11 +157,11 @@ dst_feature(const struct dst_entry *dst, u32 feature)
 
 static inline u32 dst_mtu(const struct dst_entry *dst)
 {
-	u32 mtu = dst_metric(dst, RTAX_MTU);
-	/*
-	 * Alexey put it here, so ask him about it :)
-	 */
-	barrier();
+	u32 mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	if (!mtu)
+		mtu = dst->ops->default_mtu(dst);
+
 	return mtu;
 }
 
@@ -186,7 +187,7 @@ dst_allfrag(const struct dst_entry *dst)
 }
 
 static inline int
-dst_metric_locked(struct dst_entry *dst, int metric)
+dst_metric_locked(const struct dst_entry *dst, int metric)
 {
 	return dst_metric(dst, RTAX_LOCK) & (1<<metric);
 }
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 15fb7af08c42..21a320b8708e 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -17,6 +17,7 @@ struct dst_ops {
 	int			(*gc)(struct dst_ops *ops);
 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
 	unsigned int		(*default_advmss)(const struct dst_entry *);
+	unsigned int		(*default_mtu)(const struct dst_entry *);
 	void			(*destroy)(struct dst_entry *);
 	void			(*ifdown)(struct dst_entry *,
 					  struct net_device *dev, int how);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b8a5c0515be8..5e636365d33c 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -111,6 +111,7 @@ static unsigned long dn_rt_deadline;
 static int dn_dst_gc(struct dst_ops *ops);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
 static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
+static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
 static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
@@ -131,6 +132,7 @@ static struct dst_ops dn_dst_ops = {
 	.gc =			dn_dst_gc,
 	.check =		dn_dst_check,
 	.default_advmss =	dn_dst_default_advmss,
+	.default_mtu =		dn_dst_default_mtu,
 	.negative_advice =	dn_dst_negative_advice,
 	.link_failure =		dn_dst_link_failure,
 	.update_pmtu =		dn_dst_update_pmtu,
@@ -803,6 +805,11 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
 	return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
 }
 
+static unsigned int dn_dst_default_mtu(const struct dst_entry *dst)
+{
+	return dst->dev->mtu;
+}
+
 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
@@ -825,8 +832,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		rt->dst.neighbour = n;
 	}
 
-	if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
-	    dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
+	if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
 		dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
 	metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
 	if (metric) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 80997333db0c..ae520963540f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -140,6 +140,7 @@ static unsigned long expires_ljiffies;
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
+static unsigned int	 ipv4_default_mtu(const struct dst_entry *dst);
 static void		 ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
@@ -157,6 +158,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.gc =			rt_garbage_collect,
 	.check =		ipv4_dst_check,
 	.default_advmss =	ipv4_default_advmss,
+	.default_mtu =		ipv4_default_mtu,
 	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
 	.negative_advice =	ipv4_negative_advice,
@@ -1812,6 +1814,23 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 	return advmss;
 }
 
+static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
+{
+	unsigned int mtu = dst->dev->mtu;
+
+	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+		const struct rtable *rt = (const struct rtable *) dst;
+
+		if (rt->rt_gateway != rt->rt_dst && mtu > 576)
+			mtu = 576;
+	}
+
+	if (mtu > IP_MAX_MTU)
+		mtu = IP_MAX_MTU;
+
+	return mtu;
+}
+
 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 {
 	struct dst_entry *dst = &rt->dst;
@@ -1822,18 +1841,10 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
 		dst_import_metrics(dst, fi->fib_metrics);
-		if (fi->fib_mtu == 0) {
-			dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
-			if (dst_metric_locked(dst, RTAX_MTU) &&
-			    rt->rt_gateway != rt->rt_dst &&
-			    dst->dev->mtu > 576)
-				dst_metric_set(dst, RTAX_MTU, 576);
-		}
 #ifdef CONFIG_NET_CLS_ROUTE
 		dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
-	} else
-		dst_metric_set(dst, RTAX_MTU, dst->dev->mtu);
+	}
 
 	if (dst_mtu(dst) > IP_MAX_MTU)
 		dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d9cb832be529..e7efb269a6e9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -77,6 +77,7 @@
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
+static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
@@ -105,6 +106,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.gc_thresh		=	1024,
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
+	.default_mtu		=	ip6_default_mtu,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
 	.negative_advice	=	ip6_negative_advice,
@@ -937,8 +939,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 	}
 }
 
-static int ipv6_get_mtu(struct net_device *dev);
-
 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
 	struct net_device *dev = dst->dev;
@@ -961,6 +961,20 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 	return mtu;
 }
 
+static unsigned int ip6_default_mtu(const struct dst_entry *dst)
+{
+	unsigned int mtu = IPV6_MIN_MTU;
+	struct inet6_dev *idev;
+
+	rcu_read_lock();
+	idev = __in6_dev_get(dst->dev);
+	if (idev)
+		mtu = idev->cnf.mtu6;
+	rcu_read_unlock();
+
+	return mtu;
+}
+
 static struct dst_entry *icmp6_dst_gc_list;
 static DEFINE_SPINLOCK(icmp6_dst_lock);
 
@@ -995,7 +1009,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->rt6i_nexthop  = neigh;
 	atomic_set(&rt->dst.__refcnt, 1);
 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
-	dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
 	rt->dst.output  = ip6_output;
 
 #if 0	/* there's no chance to use these for ndisc */
@@ -1094,19 +1107,6 @@ out:
    Remove it only when all the things will work!
  */
 
-static int ipv6_get_mtu(struct net_device *dev)
-{
-	int mtu = IPV6_MIN_MTU;
-	struct inet6_dev *idev;
-
-	rcu_read_lock();
-	idev = __in6_dev_get(dev);
-	if (idev)
-		mtu = idev->cnf.mtu6;
-	rcu_read_unlock();
-	return mtu;
-}
-
 int ip6_dst_hoplimit(struct dst_entry *dst)
 {
 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
@@ -1315,8 +1315,6 @@ install_route:
 		}
 	}
 
-	if (!dst_mtu(&rt->dst))
-		dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
 	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
@@ -1541,8 +1539,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 	nrt->rt6i_nexthop = neigh_clone(neigh);
-	/* Reset pmtu, it may be better */
-	dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
 
 	if (ip6_ins_rt(nrt))
 		goto out;
@@ -1971,7 +1967,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->dst.output = ip6_output;
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
-	dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
 	rt->dst.obsolete = -1;
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 36936c8ae961..8b3ef404c794 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2366,6 +2366,11 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
 	return dst_metric_advmss(dst->path);
 }
 
+static unsigned int xfrm_default_mtu(const struct dst_entry *dst)
+{
+	return dst_mtu(dst->path);
+}
+
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
 	struct net *net;
@@ -2385,6 +2390,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 			dst_ops->check = xfrm_dst_check;
 		if (likely(dst_ops->default_advmss == NULL))
 			dst_ops->default_advmss = xfrm_default_advmss;
+		if (likely(dst_ops->default_mtu == NULL))
+			dst_ops->default_mtu = xfrm_default_mtu;
 		if (likely(dst_ops->negative_advice == NULL))
 			dst_ops->negative_advice = xfrm_negative_advice;
 		if (likely(dst_ops->link_failure == NULL))
-- 
cgit v1.2.3


From afe2c511fb2d75f1515081ff1be15bd79cfe722d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Dec 2010 16:21:17 +0100
Subject: workqueue: convert cancel_rearming_delayed_work[queue]() users to
 cancel_delayed_work_sync()

cancel_rearming_delayed_work[queue]() has been superceded by
cancel_delayed_work_sync() quite some time ago.  Convert all the
in-kernel users.  The conversions are completely equivalent and
trivial.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: "David S. Miller" <davem@davemloft.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: netdev@vger.kernel.org
Cc: Anton Vorontsov <cbou@mail.ru>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Alex Elder <aelder@sgi.com>
Cc: xfs-masters@oss.sgi.com
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: netfilter-devel@vger.kernel.org
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: linux-nfs@vger.kernel.org
---
 drivers/ata/libata-core.c                  | 2 +-
 drivers/ata/libata-sff.c                   | 2 +-
 drivers/macintosh/rack-meter.c             | 4 ++--
 drivers/media/dvb/dvb-usb/dvb-usb-remote.c | 2 +-
 drivers/media/video/em28xx/em28xx-input.c  | 2 +-
 drivers/net/chelsio/my3126.c               | 2 +-
 drivers/net/ibm_newemac/core.c             | 4 ++--
 drivers/net/wireless/zd1211rw/zd_mac.c     | 3 +--
 drivers/power/ds2760_battery.c             | 6 ++----
 drivers/power/intel_mid_battery.c          | 6 ++----
 drivers/staging/pohmelfs/inode.c           | 4 ++--
 drivers/usb/atm/cxacru.c                   | 2 +-
 drivers/video/fb_defio.c                   | 2 +-
 drivers/video/omap/lcd_mipid.c             | 2 +-
 fs/nfsd/nfs4state.c                        | 2 +-
 fs/xfs/xfs_mru_cache.c                     | 2 +-
 mm/slab.c                                  | 2 +-
 mm/vmstat.c                                | 2 +-
 net/atm/lec.c                              | 2 +-
 net/core/netpoll.c                         | 2 +-
 net/netfilter/ipvs/ip_vs_ctl.c             | 2 +-
 net/sunrpc/xprtsock.c                      | 2 +-
 22 files changed, 27 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 7f77c67d267c..6669b44044fb 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6122,7 +6122,7 @@ static void ata_port_detach(struct ata_port *ap)
 	/* it better be dead now */
 	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
 
-	cancel_rearming_delayed_work(&ap->hotplug_task);
+	cancel_delayed_work_sync(&ap->hotplug_task);
 
  skip_eh:
 	if (ap->pmp_link) {
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index d05387d1e14b..8660a70f164a 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1320,7 +1320,7 @@ void ata_sff_flush_pio_task(struct ata_port *ap)
 {
 	DPRINTK("ENTER\n");
 
-	cancel_rearming_delayed_work(&ap->sff_pio_task);
+	cancel_delayed_work_sync(&ap->sff_pio_task);
 	ap->hsm_task_state = HSM_ST_IDLE;
 
 	if (ata_msg_ctl(ap))
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 53cce3a5da23..39f660b2a60d 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -285,8 +285,8 @@ static void __devinit rackmeter_init_cpu_sniffer(struct rackmeter *rm)
 
 static void __devexit rackmeter_stop_cpu_sniffer(struct rackmeter *rm)
 {
-	cancel_rearming_delayed_work(&rm->cpu[0].sniffer);
-	cancel_rearming_delayed_work(&rm->cpu[1].sniffer);
+	cancel_delayed_work_sync(&rm->cpu[0].sniffer);
+	cancel_delayed_work_sync(&rm->cpu[1].sniffer);
 }
 
 static int __devinit rackmeter_setup(struct rackmeter *rm)
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index b579fed3ab3f..0831469af69f 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -298,7 +298,7 @@ int dvb_usb_remote_init(struct dvb_usb_device *d)
 int dvb_usb_remote_exit(struct dvb_usb_device *d)
 {
 	if (d->state & DVB_USB_STATE_REMOTE) {
-		cancel_rearming_delayed_work(&d->rc_query_work);
+		cancel_delayed_work_sync(&d->rc_query_work);
 		flush_scheduled_work();
 		if (d->props.rc.mode == DVB_RC_LEGACY)
 			input_unregister_device(d->rc_input_dev);
diff --git a/drivers/media/video/em28xx/em28xx-input.c b/drivers/media/video/em28xx/em28xx-input.c
index 6759cd5570dd..99403c720e3a 100644
--- a/drivers/media/video/em28xx/em28xx-input.c
+++ b/drivers/media/video/em28xx/em28xx-input.c
@@ -557,7 +557,7 @@ void em28xx_deregister_snapshot_button(struct em28xx *dev)
 {
 	if (dev->sbutton_input_dev != NULL) {
 		em28xx_info("Deregistering snapshot button\n");
-		cancel_rearming_delayed_work(&dev->sbutton_query_work);
+		cancel_delayed_work_sync(&dev->sbutton_query_work);
 		input_unregister_device(dev->sbutton_input_dev);
 		dev->sbutton_input_dev = NULL;
 	}
diff --git a/drivers/net/chelsio/my3126.c b/drivers/net/chelsio/my3126.c
index 4c6028512d10..a683fd3bb624 100644
--- a/drivers/net/chelsio/my3126.c
+++ b/drivers/net/chelsio/my3126.c
@@ -22,7 +22,7 @@ static int my3126_interrupt_enable(struct cphy *cphy)
 
 static int my3126_interrupt_disable(struct cphy *cphy)
 {
-	cancel_rearming_delayed_work(&cphy->phy_update);
+	cancel_delayed_work_sync(&cphy->phy_update);
 	return 0;
 }
 
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 06bb9b799458..e209efaa01b9 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -1279,7 +1279,7 @@ static void emac_force_link_update(struct emac_instance *dev)
 	netif_carrier_off(dev->ndev);
 	smp_rmb();
 	if (dev->link_polling) {
-		cancel_rearming_delayed_work(&dev->link_work);
+		cancel_delayed_work_sync(&dev->link_work);
 		if (dev->link_polling)
 			schedule_delayed_work(&dev->link_work,  PHY_POLL_LINK_OFF);
 	}
@@ -1294,7 +1294,7 @@ static int emac_close(struct net_device *ndev)
 
 	if (dev->phy.address >= 0) {
 		dev->link_polling = 0;
-		cancel_rearming_delayed_work(&dev->link_work);
+		cancel_delayed_work_sync(&dev->link_work);
 	}
 	mutex_lock(&dev->link_lock);
 	emac_netif_stop(dev);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 43307bd42a69..6107304cb94c 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -1207,7 +1207,6 @@ static void housekeeping_enable(struct zd_mac *mac)
 static void housekeeping_disable(struct zd_mac *mac)
 {
 	dev_dbg_f(zd_mac_dev(mac), "\n");
-	cancel_rearming_delayed_workqueue(zd_workqueue,
-		&mac->housekeeping.link_led_work);
+	cancel_delayed_work_sync(&mac->housekeeping.link_led_work);
 	zd_chip_control_leds(&mac->chip, ZD_LED_OFF);
 }
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index b3c01c16a164..e7f89785beef 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -580,10 +580,8 @@ static int ds2760_battery_remove(struct platform_device *pdev)
 {
 	struct ds2760_device_info *di = platform_get_drvdata(pdev);
 
-	cancel_rearming_delayed_workqueue(di->monitor_wqueue,
-					  &di->monitor_work);
-	cancel_rearming_delayed_workqueue(di->monitor_wqueue,
-					  &di->set_charged_work);
+	cancel_delayed_work_sync(&di->monitor_work);
+	cancel_delayed_work_sync(&di->set_charged_work);
 	destroy_workqueue(di->monitor_wqueue);
 	power_supply_unregister(&di->bat);
 	kfree(di);
diff --git a/drivers/power/intel_mid_battery.c b/drivers/power/intel_mid_battery.c
index 2a10cd361181..36cf402c0677 100644
--- a/drivers/power/intel_mid_battery.c
+++ b/drivers/power/intel_mid_battery.c
@@ -730,8 +730,7 @@ static __devinit int probe(int irq, struct device *dev)
 power_reg_failed_1:
 	power_supply_unregister(&pbi->batt);
 power_reg_failed:
-	cancel_rearming_delayed_workqueue(pbi->monitor_wqueue,
-						&pbi->monitor_battery);
+	cancel_delayed_work_sync(&pbi->monitor_battery);
 requestirq_failed:
 	destroy_workqueue(pbi->monitor_wqueue);
 wqueue_failed:
@@ -760,8 +759,7 @@ static int __devexit platform_pmic_battery_remove(struct platform_device *pdev)
 	struct pmic_power_module_info *pbi = dev_get_drvdata(&pdev->dev);
 
 	free_irq(pbi->irq, pbi);
-	cancel_rearming_delayed_workqueue(pbi->monitor_wqueue,
-					&pbi->monitor_battery);
+	cancel_delayed_work_sync(&pbi->monitor_battery);
 	destroy_workqueue(pbi->monitor_wqueue);
 
 	power_supply_unregister(&pbi->usb);
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 61685ccceda8..d4a1f204b9d5 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1318,8 +1318,8 @@ static void pohmelfs_put_super(struct super_block *sb)
 	}
 
 	psb->trans_scan_timeout = psb->drop_scan_timeout = 0;
-	cancel_rearming_delayed_work(&psb->dwork);
-	cancel_rearming_delayed_work(&psb->drop_dwork);
+	cancel_delayed_work_sync(&psb->dwork);
+	cancel_delayed_work_sync(&psb->drop_dwork);
 	flush_scheduled_work();
 
 	dprintk("%s: stopped workqueues.\n", __func__);
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index f383cb42b1d7..a845f8b8382f 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -1247,7 +1247,7 @@ static void cxacru_unbind(struct usbatm_data *usbatm_instance,
 	mutex_unlock(&instance->poll_state_serialize);
 
 	if (is_polling)
-		cancel_rearming_delayed_work(&instance->poll_work);
+		cancel_delayed_work_sync(&instance->poll_work);
 
 	usb_kill_urb(instance->snd_urb);
 	usb_kill_urb(instance->rcv_urb);
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 6b93ef93cb12..804000183c5e 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -75,7 +75,7 @@ int fb_deferred_io_fsync(struct file *file, int datasync)
 		return 0;
 
 	/* Kill off the delayed work */
-	cancel_rearming_delayed_work(&info->deferred_work);
+	cancel_delayed_work_sync(&info->deferred_work);
 
 	/* Run it immediately */
 	return schedule_delayed_work(&info->deferred_work, 0);
diff --git a/drivers/video/omap/lcd_mipid.c b/drivers/video/omap/lcd_mipid.c
index 64dcc7439c99..90e3bdd1b7ab 100644
--- a/drivers/video/omap/lcd_mipid.c
+++ b/drivers/video/omap/lcd_mipid.c
@@ -396,7 +396,7 @@ static void mipid_esd_start_check(struct mipid_device *md)
 static void mipid_esd_stop_check(struct mipid_device *md)
 {
 	if (md->esd_check != NULL)
-		cancel_rearming_delayed_workqueue(md->esd_wq, &md->esd_work);
+		cancel_delayed_work_sync(&md->esd_work);
 }
 
 static void mipid_esd_work(struct work_struct *work)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 116cab970e0f..fbd18c3074bb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4336,7 +4336,7 @@ __nfs4_state_shutdown(void)
 void
 nfs4_state_shutdown(void)
 {
-	cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
+	cancel_delayed_work_sync(&laundromat_work);
 	destroy_workqueue(laundry_wq);
 	locks_end_grace(&nfsd4_manager);
 	nfs4_lock_state();
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 45ce15dc5b2b..edfa178bafb6 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -408,7 +408,7 @@ xfs_mru_cache_flush(
 	spin_lock(&mru->lock);
 	if (mru->queued) {
 		spin_unlock(&mru->lock);
-		cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+		cancel_delayed_work_sync(&mru->work);
 		spin_lock(&mru->lock);
 	}
 
diff --git a/mm/slab.c b/mm/slab.c
index b1e40dafbab3..dc983867682b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1293,7 +1293,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
 		 * anything expensive but will only modify reap_work
 		 * and reschedule the timer.
 		*/
-		cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
+		cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
 		/* Now the cache_reaper is guaranteed to be not running. */
 		per_cpu(slab_reap_work, cpu).work.func = NULL;
   		break;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 42eac4d33216..d1f3cb63a8a9 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1033,7 +1033,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
-		cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu));
+		cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
 		per_cpu(vmstat_work, cpu).work.func = NULL;
 		break;
 	case CPU_DOWN_FAILED:
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 181d70c73d70..96a4a4bd2304 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1608,7 +1608,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
 	struct lec_arp_table *entry;
 	int i;
 
-	cancel_rearming_delayed_work(&priv->lec_arp_work);
+	cancel_delayed_work_sync(&priv->lec_arp_work);
 
 	/*
 	 * Remove all entries
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4e98ffac3af0..d2910947a3ac 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -925,7 +925,7 @@ void __netpoll_cleanup(struct netpoll *np)
 
 		skb_queue_purge(&npinfo->arp_tx);
 		skb_queue_purge(&npinfo->txq);
-		cancel_rearming_delayed_work(&npinfo->tx_work);
+		cancel_delayed_work_sync(&npinfo->tx_work);
 
 		/* clean after last, unfinished work */
 		__skb_queue_purge(&npinfo->txq);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 5f5daa30b0af..96334e0fd04e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3432,7 +3432,7 @@ void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
 	ip_vs_trash_cleanup();
-	cancel_rearming_delayed_work(&defense_work);
+	cancel_delayed_work_sync(&defense_work);
 	cancel_work_sync(&defense_work.work);
 	ip_vs_kill_estimator(&ip_vs_stats);
 	unregister_sysctl_table(sysctl_header);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index dfcab5ac65af..96549df836ee 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -770,7 +770,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	dprintk("RPC:       xs_destroy xprt %p\n", xprt);
 
-	cancel_rearming_delayed_work(&transport->connect_worker);
+	cancel_delayed_work_sync(&transport->connect_worker);
 
 	xs_close(xprt);
 	xs_free_peer_addresses(xprt);
-- 
cgit v1.2.3


From a293911d4fd5e8593dbf478399a77f990d466269 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 14 Dec 2010 17:54:28 +0100
Subject: nl80211: advertise maximum remain-on-channel duration

With the upcoming hardware offload implementation,
some devices will have a different maximum duration
for the remain-on-channel command. Advertise the
maximum duration in mac80211, and make mac80211 set
it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 5 +++++
 include/net/cfg80211.h  | 5 +++++
 net/mac80211/main.c     | 2 ++
 net/wireless/nl80211.c  | 7 ++++++-
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b8fa25d741ba..1cee56b3a79a 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -773,6 +773,9 @@ enum nl80211_commands {
  *	cache, a wiphy attribute.
  *
  * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32.
+ * @NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION: Device attribute that
+ *	specifies the maximum duration that can be requested with the
+ *	remain-on-channel operation, in milliseconds, u32.
  *
  * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects.
  *
@@ -1035,6 +1038,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_KEY_DEFAULT_TYPES,
 
+	NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 22be7c625b70..f45e15f12446 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1474,6 +1474,9 @@ struct ieee80211_txrx_stypes {
  *
  * @available_antennas: bitmap of antennas which are available to configure.
  *	antenna configuration commands will be rejected unless this is set.
+ *
+ * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation
+ *	may request, if implemented.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1511,6 +1514,8 @@ struct wiphy {
 	char fw_version[ETHTOOL_BUSINFO_LEN];
 	u32 hw_version;
 
+	u16 max_remain_on_channel_duration;
+
 	u8 max_num_pmkids;
 
 	u32 available_antennas;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f7bdb7c78879..d87eb005690f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -745,6 +745,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
+	local->hw.wiphy->max_remain_on_channel_duration = 5000;
+
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
 		goto fail_wiphy_register;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 53f044370cde..594a6ac8b9d2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -755,6 +755,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	nla_nest_end(msg, nl_cmds);
 
+	if (dev->ops->remain_on_channel)
+		NLA_PUT_U32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+			    dev->wiphy.max_remain_on_channel_duration);
+
 	/* for now at least assume all drivers have it */
 	if (dev->ops->mgmt_tx)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK);
@@ -4228,7 +4232,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
 	 * We should be on that channel for at least one jiffie,
 	 * and more than 5 seconds seems excessive.
 	 */
-	if (!duration || !msecs_to_jiffies(duration) || duration > 5000)
+	if (!duration || !msecs_to_jiffies(duration) ||
+	    duration > rdev->wiphy.max_remain_on_channel_duration)
 		return -EINVAL;
 
 	if (!rdev->ops->remain_on_channel)
-- 
cgit v1.2.3


From bd2ce6e43f65127bc723e7fcc044758cf8113260 Mon Sep 17 00:00:00 2001
From: Sujith Manoharan <Sujith.Manoharan@atheros.com>
Date: Wed, 15 Dec 2010 07:47:10 +0530
Subject: mac80211: Add timeout to BA session start API

Allow drivers or rate control algorithms to specify BlockAck session
timeout when initiating an ADDBA transaction. This is useful in cases
where maintaining persistent BA sessions does not incur any overhead.

The current timeout value of 5000 TUs is retained for all non ath9k/ath9k_htc
drivers.

Signed-off-by: Sujith Manoharan <Sujith.Manoharan@atheros.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +-
 drivers/net/wireless/ath/ath9k/rc.c           | 2 +-
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c     | 2 +-
 drivers/net/wireless/rtlwifi/rc.c             | 2 +-
 include/net/mac80211.h                        | 4 +++-
 net/mac80211/agg-tx.c                         | 7 +++++--
 net/mac80211/debugfs_sta.c                    | 2 +-
 net/mac80211/rc80211_minstrel_ht.c            | 2 +-
 net/mac80211/sta_info.h                       | 2 ++
 9 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 31fad82239b3..33f36029fa4f 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -251,7 +251,7 @@ void ath9k_tx_tasklet(unsigned long data)
 				ista = (struct ath9k_htc_sta *)sta->drv_priv;
 
 				if (ath9k_htc_check_tx_aggr(priv, ista, tid)) {
-					ieee80211_start_tx_ba_session(sta, tid);
+					ieee80211_start_tx_ba_session(sta, tid, 0);
 					spin_lock_bh(&priv->tx_lock);
 					ista->tid_state[tid] = AGGR_PROGRESS;
 					spin_unlock_bh(&priv->tx_lock);
diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c
index 2061a755a026..896d12986b1e 100644
--- a/drivers/net/wireless/ath/ath9k/rc.c
+++ b/drivers/net/wireless/ath/ath9k/rc.c
@@ -1373,7 +1373,7 @@ static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband,
 			an = (struct ath_node *)sta->drv_priv;
 
 			if(ath_tx_aggr_check(sc, an, tid))
-				ieee80211_start_tx_ba_session(sta, tid);
+				ieee80211_start_tx_ba_session(sta, tid, 0);
 		}
 	}
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index ee123482e1d5..5083dba122ca 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -387,7 +387,7 @@ static int rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv,
 	if (load > IWL_AGG_LOAD_THRESHOLD) {
 		IWL_DEBUG_HT(priv, "Starting Tx agg: STA: %pM tid: %d\n",
 				sta->addr, tid);
-		ret = ieee80211_start_tx_ba_session(sta, tid);
+		ret = ieee80211_start_tx_ba_session(sta, tid, 5000);
 		if (ret == -EAGAIN) {
 			/*
 			 * driver and mac80211 is out of sync
diff --git a/drivers/net/wireless/rtlwifi/rc.c b/drivers/net/wireless/rtlwifi/rc.c
index 904b8fd01f6d..91634107434a 100644
--- a/drivers/net/wireless/rtlwifi/rc.c
+++ b/drivers/net/wireless/rtlwifi/rc.c
@@ -169,7 +169,7 @@ static void rtl_tx_status(void *ppriv,
 			tid = qc[0] & 0xf;
 
 			if (_rtl_tx_aggr_check(rtlpriv, tid))
-				ieee80211_start_tx_ba_session(sta, tid);
+				ieee80211_start_tx_ba_session(sta, tid, 5000);
 		}
 	}
 }
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e411cf87fb41..69ded1ee49ce 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2435,6 +2435,7 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
  * ieee80211_start_tx_ba_session - Start a tx Block Ack session.
  * @sta: the station for which to start a BA session
  * @tid: the TID to BA on.
+ * @timeout: session timeout value (in TUs)
  *
  * Return: success if addBA request was sent, failure otherwise
  *
@@ -2442,7 +2443,8 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
  * the need to start aggregation on a certain RA/TID, the session level
  * will be managed by the mac80211.
  */
-int ieee80211_start_tx_ba_session(struct ieee80211_sta *sta, u16 tid);
+int ieee80211_start_tx_ba_session(struct ieee80211_sta *sta, u16 tid,
+				  u16 timeout);
 
 /**
  * ieee80211_start_tx_ba_cb_irqsafe - low level driver ready to aggregate.
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index d4679b265ba8..9cc472c6a6a5 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -342,10 +342,11 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, start_seq_num,
-				     0x40, 5000);
+				     0x40, tid_tx->timeout);
 }
 
-int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
+int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
+				  u16 timeout)
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -420,6 +421,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	skb_queue_head_init(&tid_tx->pending);
 	__set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
 
+	tid_tx->timeout = timeout;
+
 	/* Tx timer */
 	tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
 	tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 8bb5af85f469..c04a1396cf8d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -189,7 +189,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
 
 	if (tx) {
 		if (start)
-			ret = ieee80211_start_tx_ba_session(&sta->sta, tid);
+			ret = ieee80211_start_tx_ba_session(&sta->sta, tid, 5000);
 		else
 			ret = ieee80211_stop_tx_ba_session(&sta->sta, tid);
 	} else {
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 4ad7a362fcc1..165a4518bb48 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -374,7 +374,7 @@ minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, stru
 	if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
 		return;
 
-	ieee80211_start_tx_ba_session(pubsta, tid);
+	ieee80211_start_tx_ba_session(pubsta, tid, 5000);
 }
 
 static void
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index fdca52cf88de..bbdd2a86a94b 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -78,6 +78,7 @@ enum ieee80211_sta_info_flags {
  * @addba_resp_timer: timer for peer's response to addba request
  * @pending: pending frames queue -- use sta's spinlock to protect
  * @dialog_token: dialog token for aggregation session
+ * @timeout: session timeout value to be filled in ADDBA requests
  * @state: session state (see above)
  * @stop_initiator: initiator of a session stop
  * @tx_stop: TX DelBA frame when stopping
@@ -96,6 +97,7 @@ struct tid_ampdu_tx {
 	struct timer_list addba_resp_timer;
 	struct sk_buff_head pending;
 	unsigned long state;
+	u16 timeout;
 	u8 dialog_token;
 	u8 stop_initiator;
 	bool tx_stop;
-- 
cgit v1.2.3


From 38cd6b4f52a75926fd81fc85f53f5067dcd809f7 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 15 Dec 2010 14:27:28 +0800
Subject: wireless:mac80211: kill unuse macro MESH_CFG_CMP_LEN in mesh.h

Commit 00d3f14c has removed the references of this macro,
but left it only. So remove this definition.

commit 00d3f14cf9f12c21428121026a5e1d5f65926447
Author: Johannes Berg <johannes@sipsolutions.net>
Date:   Tue Feb 10 21:26:00 2009 +0100

    mac80211: use cfg80211s BSS infrastructure

    Remove all the code from mac80211 to keep track of BSSes
    and use the cfg80211-provided code completely.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mesh.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 039d7fa0af74..5b828fa8c541 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -164,17 +164,6 @@ struct mesh_rmc {
 };
 
 
-/*
- * MESH_CFG_COMP_LEN Includes:
- * 	- Active path selection protocol ID.
- * 	- Active path selection metric ID.
- * 	- Congestion control mode identifier.
- * 	- Channel precedence.
- * Does not include mesh capabilities, which may vary across nodes in the same
- * mesh
- */
-#define MESH_CFG_CMP_LEN 	(IEEE80211_MESH_CONFIG_LEN - 2)
-
 #define MESH_DEFAULT_BEACON_INTERVAL		1000 	/* in 1024 us units */
 
 #define MESH_PATH_EXPIRE (600 * HZ)
-- 
cgit v1.2.3


From 4129ccf303593a1922a934697f99e682ff491504 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:58:59 +0000
Subject: SUNRPC: Avoid return code checking in rpcbind XDR encoder functions

Clean up.

The trend in the other XDR encoder functions is to BUG() when encoding
problems occur, since a problem here is always due to a local coding
error.  Then, instead of a status, zero is unconditionally returned.

Update the rpcbind XDR encoders to behave this way.

To finish the update, use the new-style be32_to_cpup() and
cpu_to_be32() macros, and compute the buffer sizes using raw integers
instead of sizeof().  This matches the conventions used in other XDR
functions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 60 ++++++++++++++++++--------------------------------
 1 file changed, 21 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index fa6d7ca2c851..d2a2ea090f95 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -705,14 +705,11 @@ static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p,
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 
-	p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz);
-	if (unlikely(p == NULL))
-		return -EIO;
-
-	*p++ = htonl(rpcb->r_prog);
-	*p++ = htonl(rpcb->r_vers);
-	*p++ = htonl(rpcb->r_prot);
-	*p   = htonl(rpcb->r_port);
+	p = xdr_reserve_space(&xdr, RPCB_mappingargs_sz << 2);
+	*p++ = cpu_to_be32(rpcb->r_prog);
+	*p++ = cpu_to_be32(rpcb->r_vers);
+	*p++ = cpu_to_be32(rpcb->r_prot);
+	*p   = cpu_to_be32(rpcb->r_port);
 
 	return 0;
 }
@@ -728,11 +725,11 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
 
 	rpcb->r_port = 0;
 
-	p = xdr_inline_decode(&xdr, sizeof(__be32));
+	p = xdr_inline_decode(&xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 
-	port = ntohl(*p);
+	port = be32_to_cpup(p);
 	dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
 			task->tk_msg.rpc_proc->p_name, port);
 	if (unlikely(port > USHRT_MAX))
@@ -750,7 +747,7 @@ static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
 
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
 
-	p = xdr_inline_decode(&xdr, sizeof(__be32));
+	p = xdr_inline_decode(&xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 
@@ -764,24 +761,16 @@ static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
 	return 0;
 }
 
-static int encode_rpcb_string(struct xdr_stream *xdr, const char *string,
-				const u32 maxstrlen)
+static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
+			       const u32 maxstrlen)
 {
-	u32 len;
 	__be32 *p;
+	u32 len;
 
-	if (unlikely(string == NULL))
-		return -EIO;
 	len = strlen(string);
-	if (unlikely(len > maxstrlen))
-		return -EIO;
-
-	p = xdr_reserve_space(xdr, sizeof(__be32) + len);
-	if (unlikely(p == NULL))
-		return -EIO;
+	BUG_ON(len > maxstrlen);
+	p = xdr_reserve_space(xdr, 4 + len);
 	xdr_encode_opaque(p, string, len);
-
-	return 0;
 }
 
 static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p,
@@ -797,20 +786,13 @@ static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p,
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 
-	p = xdr_reserve_space(&xdr,
-			sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz));
-	if (unlikely(p == NULL))
-		return -EIO;
-	*p++ = htonl(rpcb->r_prog);
-	*p = htonl(rpcb->r_vers);
-
-	if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN))
-		return -EIO;
-	if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN))
-		return -EIO;
-	if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN))
-		return -EIO;
+	p = xdr_reserve_space(&xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
+	*p++ = cpu_to_be32(rpcb->r_prog);
+	*p = cpu_to_be32(rpcb->r_vers);
 
+	encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
+	encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
+	encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
 	return 0;
 }
 
@@ -827,10 +809,10 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
 
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
 
-	p = xdr_inline_decode(&xdr, sizeof(__be32));
+	p = xdr_inline_decode(&xdr, 4);
 	if (unlikely(p == NULL))
 		goto out_fail;
-	len = ntohl(*p);
+	len = be32_to_cpup(p);
 
 	/*
 	 * If the returned universal address is a null string,
-- 
cgit v1.2.3


From 1ac7c23e4af5e83525137661595000099f1ce94f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:59:09 +0000
Subject: SUNRPC: Determine value of "nrprocs" automatically

Clean up.

Just fixed a panic where the nrprocs field in a different upper layer
client was set by hand incorrectly.  Use the compiler-generated method
used by the other upper layer protocols.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index d2a2ea090f95..43838c72b778 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -57,10 +57,6 @@ enum {
 	RPCBPROC_GETSTAT,
 };
 
-#define RPCB_HIGHPROC_2		RPCBPROC_CALLIT
-#define RPCB_HIGHPROC_3		RPCBPROC_TADDR2UADDR
-#define RPCB_HIGHPROC_4		RPCBPROC_GETSTAT
-
 /*
  * r_owner
  *
@@ -975,19 +971,19 @@ static struct rpcb_info rpcb_next_version6[] = {
 
 static struct rpc_version rpcb_version2 = {
 	.number		= RPCBVERS_2,
-	.nrprocs	= RPCB_HIGHPROC_2,
+	.nrprocs	= ARRAY_SIZE(rpcb_procedures2),
 	.procs		= rpcb_procedures2
 };
 
 static struct rpc_version rpcb_version3 = {
 	.number		= RPCBVERS_3,
-	.nrprocs	= RPCB_HIGHPROC_3,
+	.nrprocs	= ARRAY_SIZE(rpcb_procedures3),
 	.procs		= rpcb_procedures3
 };
 
 static struct rpc_version rpcb_version4 = {
 	.number		= RPCBVERS_4,
-	.nrprocs	= RPCB_HIGHPROC_4,
+	.nrprocs	= ARRAY_SIZE(rpcb_procedures4),
 	.procs		= rpcb_procedures4
 };
 
-- 
cgit v1.2.3


From 9f06c719f474be7003763284a990bed6377bb0d4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:59:18 +0000
Subject: SUNRPC: New xdr_streams XDR encoder API

Now that all client-side XDR encoder routines use xdr_streams, there
should be no need to support the legacy calling sequence [rpc_rqst *,
__be32 *, RPC arg *] anywhere.  We can construct an xdr_stream in the
generic RPC code, instead of in each encoder function.

Also, all the client-side encoder functions return 0 now, making a
return value superfluous.  Take this opportunity to convert them to
return void instead.

This is a refactoring change.  It should not cause different behavior.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clnt4xdr.c            |  92 +++---
 fs/lockd/clntxdr.c             |  92 +++---
 fs/lockd/mon.c                 |  26 +-
 fs/nfs/mount_clnt.c            |  18 +-
 fs/nfs/nfs2xdr.c               | 147 ++++-----
 fs/nfs/nfs3xdr.c               | 241 ++++++---------
 fs/nfs/nfs4xdr.c               | 663 ++++++++++++++++++-----------------------
 fs/nfsd/nfs4callback.c         |  22 +-
 include/linux/sunrpc/auth.h    |   4 +-
 include/linux/sunrpc/clnt.h    |   2 +-
 include/linux/sunrpc/xdr.h     |   9 +-
 net/sunrpc/auth.c              |  14 +-
 net/sunrpc/auth_gss/auth_gss.c |  31 +-
 net/sunrpc/clnt.c              |   5 +-
 net/sunrpc/rpcb_clnt.c         |  47 ++-
 15 files changed, 606 insertions(+), 807 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 1a1c3e21ed2c..974f1d9cd323 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -385,17 +385,15 @@ static void encode_nlm4_lock(struct xdr_stream *xdr,
  *		struct nlm4_lock alock;
  *	};
  */
-static int nlm4_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nlm_args *args)
+static void nlm4_xdr_enc_testargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm4_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
 }
 
 /*
@@ -408,20 +406,18 @@ static int nlm4_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
  *		int state;
  *	};
  */
-static int nlm4_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nlm_args *args)
+static void nlm4_xdr_enc_lockargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm4_lock(&xdr, lock);
-	encode_bool(&xdr, args->reclaim);
-	encode_int32(&xdr, args->state);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
+	encode_bool(xdr, args->reclaim);
+	encode_int32(xdr, args->state);
 }
 
 /*
@@ -432,18 +428,16 @@ static int nlm4_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm4_lock alock;
  *	};
  */
-static int nlm4_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nlm_args *args)
+static void nlm4_xdr_enc_cancargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm4_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm4_lock(xdr, lock);
 }
 
 /*
@@ -452,16 +446,14 @@ static int nlm4_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm4_lock alock;
  *	};
  */
-static int nlm4_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nlm_args *args)
+static void nlm4_xdr_enc_unlockargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_nlm4_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_nlm4_lock(xdr, lock);
 }
 
 /*
@@ -470,15 +462,12 @@ static int nlm4_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
  *		nlm4_stat stat;
  *	};
  */
-static int nlm4_xdr_enc_res(struct rpc_rqst *req, __be32 *p,
-			    const struct nlm_res *result)
+static void nlm4_xdr_enc_res(struct rpc_rqst *req,
+			     struct xdr_stream *xdr,
+			     const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm4_stat(&xdr, result->status);
-	return 0;
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm4_stat(xdr, result->status);
 }
 
 /*
@@ -494,17 +483,14 @@ static int nlm4_xdr_enc_res(struct rpc_rqst *req, __be32 *p,
  *		nlm4_testrply test_stat;
  *	};
  */
-static int nlm4_xdr_enc_testres(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_res *result)
+static void nlm4_xdr_enc_testres(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm4_stat(&xdr, result->status);
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm4_stat(xdr, result->status);
 	if (result->status == nlm_lck_denied)
-		encode_nlm4_holder(&xdr, result);
-	return 0;
+		encode_nlm4_holder(xdr, result);
 }
 
 
@@ -588,7 +574,7 @@ out:
 #define PROC(proc, argtype, restype)					\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdrproc_t)nlm4_xdr_enc_##argtype,		\
+	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
 	.p_decode    = (kxdrproc_t)nlm4_xdr_dec_##restype,		\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 0472f2aff509..c6fda8fb1c5b 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -378,17 +378,15 @@ static void encode_nlm_lock(struct xdr_stream *xdr,
  *		struct nlm_lock alock;
  *	};
  */
-static int nlm_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_args *args)
+static void nlm_xdr_enc_testargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
 }
 
 /*
@@ -401,20 +399,18 @@ static int nlm_xdr_enc_testargs(struct rpc_rqst *req, __be32 *p,
  *		int state;
  *	};
  */
-static int nlm_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_args *args)
+static void nlm_xdr_enc_lockargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm_lock(&xdr, lock);
-	encode_bool(&xdr, args->reclaim);
-	encode_int32(&xdr, args->state);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
+	encode_bool(xdr, args->reclaim);
+	encode_int32(xdr, args->state);
 }
 
 /*
@@ -425,18 +421,16 @@ static int nlm_xdr_enc_lockargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm_lock alock;
  *	};
  */
-static int nlm_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
-				const struct nlm_args *args)
+static void nlm_xdr_enc_cancargs(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_bool(&xdr, args->block);
-	encode_bool(&xdr, lock->fl.fl_type == F_WRLCK);
-	encode_nlm_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_bool(xdr, args->block);
+	encode_bool(xdr, lock->fl.fl_type == F_WRLCK);
+	encode_nlm_lock(xdr, lock);
 }
 
 /*
@@ -445,16 +439,14 @@ static int nlm_xdr_enc_cancargs(struct rpc_rqst *req, __be32 *p,
  *		struct nlm_lock alock;
  *	};
  */
-static int nlm_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nlm_args *args)
+static void nlm_xdr_enc_unlockargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nlm_args *args)
 {
 	const struct nlm_lock *lock = &args->lock;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &args->cookie);
-	encode_nlm_lock(&xdr, lock);
-	return 0;
+	encode_cookie(xdr, &args->cookie);
+	encode_nlm_lock(xdr, lock);
 }
 
 /*
@@ -463,15 +455,12 @@ static int nlm_xdr_enc_unlockargs(struct rpc_rqst *req, __be32 *p,
  *		nlm_stat stat;
  *	};
  */
-static int nlm_xdr_enc_res(struct rpc_rqst *req, __be32 *p,
-			   const struct nlm_res *result)
+static void nlm_xdr_enc_res(struct rpc_rqst *req,
+			    struct xdr_stream *xdr,
+			    const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm_stat(&xdr, result->status);
-	return 0;
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm_stat(xdr, result->status);
 }
 
 /*
@@ -494,16 +483,13 @@ static void encode_nlm_testrply(struct xdr_stream *xdr,
 		encode_nlm_holder(xdr, result);
 }
 
-static int nlm_xdr_enc_testres(struct rpc_rqst *req, __be32 *p,
-			       const struct nlm_res *result)
+static void nlm_xdr_enc_testres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				const struct nlm_res *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cookie(&xdr, &result->cookie);
-	encode_nlm_stat(&xdr, result->status);
-	encode_nlm_testrply(&xdr, result);
-	return 0;
+	encode_cookie(xdr, &result->cookie);
+	encode_nlm_stat(xdr, result->status);
+	encode_nlm_testrply(xdr, result);
 }
 
 
@@ -586,7 +572,7 @@ out:
 #define PROC(proc, argtype, restype)	\
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
-	.p_encode    = (kxdrproc_t)nlm_xdr_enc_##argtype,		\
+	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
 	.p_decode    = (kxdrproc_t)nlm_xdr_dec_##restype,		\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index d812818d0258..baa77bc9d825 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -459,25 +459,17 @@ static void encode_priv(struct xdr_stream *xdr, const struct nsm_args *argp)
 	xdr_encode_opaque_fixed(p, argp->priv->data, SM_PRIV_SIZE);
 }
 
-static int xdr_enc_mon(struct rpc_rqst *req, __be32 *p,
-		       const struct nsm_args *argp)
+static void nsm_xdr_enc_mon(struct rpc_rqst *req, struct xdr_stream *xdr,
+			    const struct nsm_args *argp)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_mon_id(&xdr, argp);
-	encode_priv(&xdr, argp);
-	return 0;
+	encode_mon_id(xdr, argp);
+	encode_priv(xdr, argp);
 }
 
-static int xdr_enc_unmon(struct rpc_rqst *req, __be32 *p,
-			 const struct nsm_args *argp)
+static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      const struct nsm_args *argp)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_mon_id(&xdr, argp);
-	return 0;
+	encode_mon_id(xdr, argp);
 }
 
 static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p,
@@ -524,7 +516,7 @@ static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
 static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
-		.p_encode	= (kxdrproc_t)xdr_enc_mon,
+		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
 		.p_decode	= (kxdrproc_t)xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
@@ -533,7 +525,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 	},
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
-		.p_encode	= (kxdrproc_t)xdr_enc_unmon,
+		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
 		.p_decode	= (kxdrproc_t)xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 97c3ec793305..979ebd7af3cb 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -288,14 +288,10 @@ static void encode_mntdirpath(struct xdr_stream *xdr, const char *pathname)
 	xdr_encode_opaque(p, pathname, pathname_len);
 }
 
-static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p,
-			   const char *dirpath)
+static void mnt_xdr_enc_dirpath(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const char *dirpath)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_mntdirpath(&xdr, dirpath);
-	return 0;
+	encode_mntdirpath(xdr, dirpath);
 }
 
 /*
@@ -460,7 +456,7 @@ static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
 static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrproc_t)mnt_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
@@ -469,7 +465,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 	},
 	[MOUNTPROC_UMNT] = {
 		.p_proc		= MOUNTPROC_UMNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC_UMNT,
 		.p_name		= "UMOUNT",
@@ -479,7 +475,7 @@ static struct rpc_procinfo mnt_procedures[] = {
 static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_decode	= (kxdrproc_t)mnt_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
@@ -488,7 +484,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	},
 	[MOUNTPROC3_UMNT] = {
 		.p_proc		= MOUNTPROC3_UMNT,
-		.p_encode	= (kxdrproc_t)mnt_enc_dirpath,
+		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_statidx	= MOUNTPROC3_UMNT,
 		.p_name		= "UMOUNT",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index a9b848edbd2e..8f3acbec761f 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -558,14 +558,11 @@ out_default:
  * "NFS: Network File System Protocol Specification".
  */
 
-static int nfs2_xdr_enc_fhandle(struct rpc_rqst *req, __be32 *p,
-				const struct nfs_fh *fh)
+static void nfs2_xdr_enc_fhandle(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 const struct nfs_fh *fh)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, fh);
-	return 0;
+	encode_fhandle(xdr, fh);
 }
 
 /*
@@ -576,37 +573,28 @@ static int nfs2_xdr_enc_fhandle(struct rpc_rqst *req, __be32 *p,
  *		sattr attributes;
  *	};
  */
-static int nfs2_xdr_enc_sattrargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_sattrargs *args)
+static void nfs2_xdr_enc_sattrargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_sattrargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, args->fh);
-	encode_sattr(&xdr, args->sattr);
-	return 0;
+	encode_fhandle(xdr, args->fh);
+	encode_sattr(xdr, args->sattr);
 }
 
-static int nfs2_xdr_enc_diropargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_diropargs *args)
+static void nfs2_xdr_enc_diropargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_diropargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fh, args->name, args->len);
-	return 0;
+	encode_diropargs(xdr, args->fh, args->name, args->len);
 }
 
-static int nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs_readlinkargs *args)
+static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs_readlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, args->fh);
+	encode_fhandle(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS_readlinkres_sz);
-	return 0;
 }
 
 /*
@@ -634,17 +622,14 @@ static void encode_readargs(struct xdr_stream *xdr,
 	*p = cpu_to_be32(count);
 }
 
-static int nfs2_xdr_enc_readargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nfs_readargs *args)
+static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nfs_readargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readargs(&xdr, args);
+	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS_readres_sz);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
-	return 0;
 }
 
 /*
@@ -677,15 +662,12 @@ static void encode_writeargs(struct xdr_stream *xdr,
 	xdr_write_pages(xdr, args->pages, args->pgbase, count);
 }
 
-static int nfs2_xdr_enc_writeargs(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_writeargs *args)
+static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_writeargs(&xdr, args);
-	xdr.buf->flags |= XDRBUF_WRITE;
-	return 0;
+	encode_writeargs(xdr, args);
+	xdr->buf->flags |= XDRBUF_WRITE;
 }
 
 /*
@@ -696,25 +678,19 @@ static int nfs2_xdr_enc_writeargs(struct rpc_rqst *req, __be32 *p,
  *		sattr attributes;
  *	};
  */
-static int nfs2_xdr_enc_createargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_createargs *args)
+static void nfs2_xdr_enc_createargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_createargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fh, args->name, args->len);
-	encode_sattr(&xdr, args->sattr);
-	return 0;
+	encode_diropargs(xdr, args->fh, args->name, args->len);
+	encode_sattr(xdr, args->sattr);
 }
 
-static int nfs2_xdr_enc_removeargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_removeargs *args)
+static void nfs2_xdr_enc_removeargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_removeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fh, args->name.name, args->name.len);
-	return 0;
+	encode_diropargs(xdr, args->fh, args->name.name, args->name.len);
 }
 
 /*
@@ -725,17 +701,15 @@ static int nfs2_xdr_enc_removeargs(struct rpc_rqst *req, __be32 *p,
  *		diropargs to;
  *	};
  */
-static int nfs2_xdr_enc_renameargs(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_renameargs *args)
+static void nfs2_xdr_enc_renameargs(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_renameargs *args)
 {
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->old_dir, old->name, old->len);
-	encode_diropargs(&xdr, args->new_dir, new->name, new->len);
-	return 0;
+	encode_diropargs(xdr, args->old_dir, old->name, old->len);
+	encode_diropargs(xdr, args->new_dir, new->name, new->len);
 }
 
 /*
@@ -746,15 +720,12 @@ static int nfs2_xdr_enc_renameargs(struct rpc_rqst *req, __be32 *p,
  *		diropargs to;
  *	};
  */
-static int nfs2_xdr_enc_linkargs(struct rpc_rqst *req, __be32 *p,
-				 const struct nfs_linkargs *args)
+static void nfs2_xdr_enc_linkargs(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  const struct nfs_linkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_fhandle(&xdr, args->fromfh);
-	encode_diropargs(&xdr, args->tofh, args->toname, args->tolen);
-	return 0;
+	encode_fhandle(xdr, args->fromfh);
+	encode_diropargs(xdr, args->tofh, args->toname, args->tolen);
 }
 
 /*
@@ -766,16 +737,13 @@ static int nfs2_xdr_enc_linkargs(struct rpc_rqst *req, __be32 *p,
  *		sattr attributes;
  *	};
  */
-static int nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_symlinkargs *args)
+static void nfs2_xdr_enc_symlinkargs(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_symlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs(&xdr, args->fromfh, args->fromname, args->fromlen);
-	encode_path(&xdr, args->pages, args->pathlen);
-	encode_sattr(&xdr, args->sattr);
-	return 0;
+	encode_diropargs(xdr, args->fromfh, args->fromname, args->fromlen);
+	encode_path(xdr, args->pages, args->pathlen);
+	encode_sattr(xdr, args->sattr);
 }
 
 /*
@@ -799,16 +767,13 @@ static void encode_readdirargs(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs2_xdr_enc_readdirargs(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_readdirargs *args)
+static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_readdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readdirargs(&xdr, args);
+	encode_readdirargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 					args->count, NFS_readdirres_sz);
-	return 0;
 }
 
 /*
@@ -1184,7 +1149,7 @@ int nfs_stat_to_errno(enum nfs_stat status)
 #define PROC(proc, argtype, restype, timer)				\
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
-	.p_encode   =  (kxdrproc_t)nfs2_xdr_enc_##argtype,		\
+	.p_encode   =  (kxdreproc_t)nfs2_xdr_enc_##argtype,		\
 	.p_decode   =  (kxdrproc_t)nfs2_xdr_dec_##restype,		\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 15c93ccd90c5..ae1b1a43f05e 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -835,14 +835,11 @@ static void encode_diropargs3(struct xdr_stream *xdr, const struct nfs_fh *fh,
  *		nfs_fh3  object;
  *	};
  */
-static int nfs3_xdr_enc_getattr3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs_fh *fh)
+static void nfs3_xdr_enc_getattr3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs_fh *fh)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, fh);
-	return 0;
+	encode_nfs_fh3(xdr, fh);
 }
 
 /*
@@ -876,16 +873,13 @@ static void encode_sattrguard3(struct xdr_stream *xdr,
 	}
 }
 
-static int nfs3_xdr_enc_setattr3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs3_sattrargs *args)
+static void nfs3_xdr_enc_setattr3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs3_sattrargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fh);
-	encode_sattr3(&xdr, args->sattr);
-	encode_sattrguard3(&xdr, args);
-	return 0;
+	encode_nfs_fh3(xdr, args->fh);
+	encode_sattr3(xdr, args->sattr);
+	encode_sattrguard3(xdr, args);
 }
 
 /*
@@ -895,14 +889,11 @@ static int nfs3_xdr_enc_setattr3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3  what;
  *	};
  */
-static int nfs3_xdr_enc_lookup3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_diropargs *args)
+static void nfs3_xdr_enc_lookup3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_diropargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
 }
 
 /*
@@ -920,14 +911,11 @@ static void encode_access3args(struct xdr_stream *xdr,
 	encode_uint32(xdr, args->access);
 }
 
-static int nfs3_xdr_enc_access3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_accessargs *args)
+static void nfs3_xdr_enc_access3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_accessargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_access3args(&xdr, args);
-	return 0;
+	encode_access3args(xdr, args);
 }
 
 /*
@@ -937,16 +925,13 @@ static int nfs3_xdr_enc_access3args(struct rpc_rqst *req, __be32 *p,
  *		nfs_fh3	symlink;
  *	};
  */
-static int nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, __be32 *p,
-				      const struct nfs3_readlinkargs *args)
+static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
+				       struct xdr_stream *xdr,
+				       const struct nfs3_readlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fh);
+	encode_nfs_fh3(xdr, args->fh);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->pglen, NFS3_readlinkres_sz);
-	return 0;
 }
 
 /*
@@ -970,17 +955,14 @@ static void encode_read3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_read3args(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs_readargs *args)
+static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs_readargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_read3args(&xdr, args);
+	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
 					args->count, NFS3_readres_sz);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
-	return 0;
 }
 
 /*
@@ -1015,15 +997,12 @@ static void encode_write3args(struct xdr_stream *xdr,
 	xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
 }
 
-static int nfs3_xdr_enc_write3args(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs_writeargs *args)
+static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_write3args(&xdr, args);
-	xdr.buf->flags |= XDRBUF_WRITE;
-	return 0;
+	encode_write3args(xdr, args);
+	xdr->buf->flags |= XDRBUF_WRITE;
 }
 
 /*
@@ -1065,15 +1044,12 @@ static void encode_createhow3(struct xdr_stream *xdr,
 	}
 }
 
-static int nfs3_xdr_enc_create3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_createargs *args)
+static void nfs3_xdr_enc_create3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_createargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	encode_createhow3(&xdr, args);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
+	encode_createhow3(xdr, args);
 }
 
 /*
@@ -1084,15 +1060,12 @@ static int nfs3_xdr_enc_create3args(struct rpc_rqst *req, __be32 *p,
  *		sattr3		attributes;
  *	};
  */
-static int nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs3_mkdirargs *args)
+static void nfs3_xdr_enc_mkdir3args(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs3_mkdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	encode_sattr3(&xdr, args->sattr);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
+	encode_sattr3(xdr, args->sattr);
 }
 
 /*
@@ -1115,15 +1088,12 @@ static void encode_symlinkdata3(struct xdr_stream *xdr,
 	encode_nfspath3(xdr, args->pages, args->pathlen);
 }
 
-static int nfs3_xdr_enc_symlink3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs3_symlinkargs *args)
+static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs3_symlinkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fromfh, args->fromname, args->fromlen);
-	encode_symlinkdata3(&xdr, args);
-	return 0;
+	encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
+	encode_symlinkdata3(xdr, args);
 }
 
 /*
@@ -1178,15 +1148,12 @@ static void encode_mknoddata3(struct xdr_stream *xdr,
 	}
 }
 
-static int nfs3_xdr_enc_mknod3args(struct rpc_rqst *req, __be32 *p,
-				   const struct nfs3_mknodargs *args)
+static void nfs3_xdr_enc_mknod3args(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    const struct nfs3_mknodargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name, args->len);
-	encode_mknoddata3(&xdr, args);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name, args->len);
+	encode_mknoddata3(xdr, args);
 }
 
 /*
@@ -1196,14 +1163,11 @@ static int nfs3_xdr_enc_mknod3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3  object;
  *	};
  */
-static int nfs3_xdr_enc_remove3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_removeargs *args)
+static void nfs3_xdr_enc_remove3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_removeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->fh, args->name.name, args->name.len);
-	return 0;
+	encode_diropargs3(xdr, args->fh, args->name.name, args->name.len);
 }
 
 /*
@@ -1214,17 +1178,15 @@ static int nfs3_xdr_enc_remove3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3	to;
  *	};
  */
-static int nfs3_xdr_enc_rename3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_renameargs *args)
+static void nfs3_xdr_enc_rename3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_renameargs *args)
 {
 	const struct qstr *old = args->old_name;
 	const struct qstr *new = args->new_name;
-	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_diropargs3(&xdr, args->old_dir, old->name, old->len);
-	encode_diropargs3(&xdr, args->new_dir, new->name, new->len);
-	return 0;
+	encode_diropargs3(xdr, args->old_dir, old->name, old->len);
+	encode_diropargs3(xdr, args->new_dir, new->name, new->len);
 }
 
 /*
@@ -1235,15 +1197,12 @@ static int nfs3_xdr_enc_rename3args(struct rpc_rqst *req, __be32 *p,
  *		diropargs3	link;
  *	};
  */
-static int nfs3_xdr_enc_link3args(struct rpc_rqst *req, __be32 *p,
-				  const struct nfs3_linkargs *args)
+static void nfs3_xdr_enc_link3args(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   const struct nfs3_linkargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fromfh);
-	encode_diropargs3(&xdr, args->tofh, args->toname, args->tolen);
-	return 0;
+	encode_nfs_fh3(xdr, args->fromfh);
+	encode_diropargs3(xdr, args->tofh, args->toname, args->tolen);
 }
 
 /*
@@ -1269,16 +1228,13 @@ static void encode_readdir3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, __be32 *p,
-				     const struct nfs3_readdirargs *args)
+static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      const struct nfs3_readdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readdir3args(&xdr, args);
+	encode_readdir3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
-	return 0;
 }
 
 /*
@@ -1312,16 +1268,13 @@ static void encode_readdirplus3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, __be32 *p,
-					 const struct nfs3_readdirargs *args)
+static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
+					  struct xdr_stream *xdr,
+					  const struct nfs3_readdirargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_readdirplus3args(&xdr, args);
+	encode_readdirplus3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
-	return 0;
 }
 
 /*
@@ -1345,57 +1298,49 @@ static void encode_commit3args(struct xdr_stream *xdr,
 	*p = cpu_to_be32(args->count);
 }
 
-static int nfs3_xdr_enc_commit3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs_writeargs *args)
+static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_commit3args(&xdr, args);
-	return 0;
+	encode_commit3args(xdr, args);
 }
 
 #ifdef CONFIG_NFS_V3_ACL
 
-static int nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_getaclargs *args)
+static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_getaclargs *args)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, args->fh);
-	encode_uint32(&xdr, args->mask);
+	encode_nfs_fh3(xdr, args->fh);
+	encode_uint32(xdr, args->mask);
 	if (args->mask & (NFS_ACL | NFS_DFACL))
 		prepare_reply_buffer(req, args->pages, 0,
 					NFSACL_MAXPAGES << PAGE_SHIFT,
 					ACL3_getaclres_sz);
-	return 0;
 }
 
-static int nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, __be32 *p,
-				    const struct nfs3_setaclargs *args)
+static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs3_setaclargs *args)
 {
-	struct xdr_stream xdr;
 	unsigned int base;
 	int error;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_nfs_fh3(&xdr, NFS_FH(args->inode));
-	encode_uint32(&xdr, args->mask);
+	encode_nfs_fh3(xdr, NFS_FH(args->inode));
+	encode_uint32(xdr, args->mask);
 	if (args->npages != 0)
-		xdr_write_pages(&xdr, args->pages, 0, args->len);
+		xdr_write_pages(xdr, args->pages, 0, args->len);
 
 	base = req->rq_slen;
-	error = nfsacl_encode(xdr.buf, base, args->inode,
+	error = nfsacl_encode(xdr->buf, base, args->inode,
 			    (args->mask & NFS_ACL) ?
 			    args->acl_access : NULL, 1, 0);
 	BUG_ON(error < 0);
-	error = nfsacl_encode(xdr.buf, base + error, args->inode,
+	error = nfsacl_encode(xdr->buf, base + error, args->inode,
 			    (args->mask & NFS_DFACL) ?
 			    args->acl_default : NULL, 1,
 			    NFS_ACL_DEFAULT);
 	BUG_ON(error < 0);
-	return 0;
 }
 
 #endif  /* CONFIG_NFS_V3_ACL */
@@ -2506,7 +2451,7 @@ out_default:
 #define PROC(proc, argtype, restype, timer)				\
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
-	.p_encode    = (kxdrproc_t)nfs3_xdr_enc_##argtype##3args,	\
+	.p_encode    = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args,	\
 	.p_decode    = (kxdrproc_t)nfs3_xdr_dec_##restype##3res,	\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
@@ -2549,7 +2494,7 @@ struct rpc_version		nfs_version3 = {
 static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
-		.p_encode = (kxdrproc_t)nfs3_xdr_enc_getacl3args,
+		.p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
 		.p_decode = (kxdrproc_t)nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
@@ -2558,7 +2503,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
-		.p_encode = (kxdrproc_t)nfs3_xdr_enc_setacl3args,
+		.p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
 		.p_decode = (kxdrproc_t)nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a15fe99fea86..6ec38b3e4a3d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1510,7 +1510,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	hdr->replen += decode_restorefh_maxsz;
 }
 
-static int
+static void
 encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -1521,14 +1521,12 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
 	p = reserve_space(xdr, 2*4);
 	*p++ = cpu_to_be32(1);
 	*p = cpu_to_be32(FATTR4_WORD0_ACL);
-	if (arg->acl_len % 4)
-		return -EINVAL;
+	BUG_ON(arg->acl_len % 4);
 	p = reserve_space(xdr, 4);
 	*p = cpu_to_be32(arg->acl_len);
 	xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
 	hdr->nops++;
 	hdr->replen += decode_setacl_maxsz;
-	return 0;
 }
 
 static void
@@ -1833,393 +1831,362 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
 /*
  * Encode an ACCESS request
  */
-static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs4_accessargs *args)
+static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_accessargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_access(&xdr, args->access, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_access(xdr, args->access, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode LOOKUP request
  */
-static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_arg *args)
+static void nfs4_xdr_enc_lookup(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_lookup_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_lookup(&xdr, args->name, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_lookup(xdr, args->name, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode LOOKUP_ROOT request
  */
-static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struct nfs4_lookup_root_arg *args)
+static void nfs4_xdr_enc_lookup_root(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs4_lookup_root_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putrootfh(&xdr, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putrootfh(xdr, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode REMOVE request
  */
-static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs *args)
+static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs_removeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_remove(&xdr, &args->name, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_remove(xdr, &args->name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode RENAME request
  */
-static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
+static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs_renameargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->old_dir, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_putfh(&xdr, args->new_dir, &hdr);
-	encode_rename(&xdr, args->old_name, args->new_name, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->old_dir, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_putfh(xdr, args->new_dir, &hdr);
+	encode_rename(xdr, args->old_name, args->new_name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode LINK request
  */
-static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_link_arg *args)
+static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct nfs4_link_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_link(&xdr, args->name, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_link(xdr, args->name, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode CREATE request
  */
-static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_create_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_create(&xdr, args, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_create(xdr, args, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode SYMLINK request
  */
-static int nfs4_xdr_enc_symlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_create_arg *args)
+static void nfs4_xdr_enc_symlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 const struct nfs4_create_arg *args)
 {
-	return nfs4_xdr_enc_create(req, p, args);
+	nfs4_xdr_enc_create(req, xdr, args);
 }
 
 /*
  * Encode GETATTR request
  */
-static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nfs4_getattr_arg *args)
+static void nfs4_xdr_enc_getattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 const struct nfs4_getattr_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a CLOSE request
  */
-static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_closeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_close(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_close(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN request
  */
-static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      struct nfs_openargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_savefh(&xdr, &hdr);
-	encode_open(&xdr, args, &hdr);
-	encode_getfh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
-	encode_restorefh(&xdr, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_savefh(xdr, &hdr);
+	encode_open(xdr, args, &hdr);
+	encode_getfh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_restorefh(xdr, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN_CONFIRM request
  */
-static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_open_confirmargs *args)
+static void nfs4_xdr_enc_open_confirm(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs_open_confirmargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops   = 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_open_confirm(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_open_confirm(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN request with no attributes.
  */
-static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_openargs *args)
+static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs_openargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_open(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_open(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an OPEN_DOWNGRADE request
  */
-static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
+static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					struct nfs_closeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_open_downgrade(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_open_downgrade(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a LOCK request
  */
-static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_args *args)
+static void nfs4_xdr_enc_lock(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      struct nfs_lock_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_lock(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_lock(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a LOCKT request
  */
-static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_args *args)
+static void nfs4_xdr_enc_lockt(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_lockt_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_lockt(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_lockt(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a LOCKU request
  */
-static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_args *args)
+static void nfs4_xdr_enc_locku(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_locku_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_locku(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_locku(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
-static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args)
+static void nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req,
+					   struct xdr_stream *xdr,
+					struct nfs_release_lockowner_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_release_lockowner(&xdr, &args->lock_owner, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_release_lockowner(xdr, &args->lock_owner, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a READLINK request
  */
-static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct nfs4_readlink *args)
+static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
+				  const struct nfs4_readlink *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_readlink(&xdr, args, req, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_readlink(xdr, args, req, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
 			args->pgbase, args->pglen);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a READDIR request
  */
-static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nfs4_readdir_arg *args)
+static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 const struct nfs4_readdir_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_readdir(&xdr, args, req, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_readdir(xdr, args, req, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
 			 args->pgbase, args->count);
@@ -2227,428 +2194,387 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
 			__func__, hdr.replen << 2, args->pages,
 			args->pgbase, args->count);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a READ request
  */
-static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
+static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
+			      struct nfs_readargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_read(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_read(xdr, args, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
 			 args->pages, args->pgbase, args->count);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode an SETATTR request
  */
-static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args)
+static void nfs4_xdr_enc_setattr(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 struct nfs_setattrargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_setattr(&xdr, args, args->server, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_setattr(xdr, args, args->server, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a GETACL request
  */
-static int
-nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
-		struct nfs_getaclargs *args)
+static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs_getaclargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 	uint32_t replen;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
 	replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
-	encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
+	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode a WRITE request
  */
-static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_write(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_write(xdr, args, &hdr);
 	req->rq_snd_buf.flags |= XDRBUF_WRITE;
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  *  a COMMIT request
  */
-static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args)
+static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs_writeargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_commit(&xdr, args, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_commit(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * FSINFO request
  */
-static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_arg *args)
+static void nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs4_fsinfo_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_fsinfo(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_fsinfo(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a PATHCONF request
  */
-static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct nfs4_pathconf_arg *args)
+static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
+				  const struct nfs4_pathconf_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
 			   &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a STATFS request
  */
-static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs4_statfs_arg *args)
+static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
+				const struct nfs4_statfs_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
 			   args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * GETATTR_BITMAP request
  */
-static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p,
-				    struct nfs4_server_caps_arg *args)
+static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs4_server_caps_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fhandle, &hdr);
-	encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fhandle, &hdr);
+	encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
 			   FATTR4_WORD0_LINK_SUPPORT|
 			   FATTR4_WORD0_SYMLINK_SUPPORT|
 			   FATTR4_WORD0_ACLSUPPORT, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
+static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr,
+			       struct nfs_client *clp)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_renew(&xdr, clp, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_renew(xdr, clp, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a SETCLIENTID request
  */
-static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid *sc)
+static void nfs4_xdr_enc_setclientid(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs4_setclientid *sc)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_setclientid(&xdr, sc, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_setclientid(xdr, sc, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
+static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req,
+					     struct xdr_stream *xdr,
+					     struct nfs4_setclientid_res *arg)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops	= 0,
 	};
 	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_setclientid_confirm(&xdr, arg, &hdr);
-	encode_putrootfh(&xdr, &hdr);
-	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_setclientid_confirm(xdr, arg, &hdr);
+	encode_putrootfh(xdr, &hdr);
+	encode_fsinfo(xdr, lease_bitmap, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * DELEGRETURN request
  */
-static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struct nfs4_delegreturnargs *args)
+static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const struct nfs4_delegreturnargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fhandle, &hdr);
-	encode_delegreturn(&xdr, args->stateid, &hdr);
-	encode_getfattr(&xdr, args->bitmask, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fhandle, &hdr);
+	encode_delegreturn(xdr, args->stateid, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode FS_LOCATIONS request
  */
-static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_arg *args)
+static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
+				      struct xdr_stream *xdr,
+				      struct nfs4_fs_locations_arg *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 	uint32_t replen;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->dir_fh, &hdr);
-	encode_lookup(&xdr, args->name, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->dir_fh, &hdr);
+	encode_lookup(xdr, args->name, &hdr);
 	replen = hdr.replen;	/* get the attribute into args->page */
-	encode_fs_locations(&xdr, args->bitmask, &hdr);
+	encode_fs_locations(xdr, args->bitmask, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page,
 			0, PAGE_SIZE);
 	encode_nops(&hdr);
-	return 0;
 }
 
 #if defined(CONFIG_NFS_V4_1)
 /*
  * EXCHANGE_ID request
  */
-static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p,
-				    struct nfs41_exchange_id_args *args)
+static void nfs4_xdr_enc_exchange_id(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     struct nfs41_exchange_id_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_exchange_id(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_exchange_id(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a CREATE_SESSION request
  */
-static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p,
-				       struct nfs41_create_session_args *args)
+static void nfs4_xdr_enc_create_session(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					struct nfs41_create_session_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = args->client->cl_mvops->minor_version,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_create_session(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_create_session(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a DESTROY_SESSION request
  */
-static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p,
-					struct nfs4_session *session)
+static void nfs4_xdr_enc_destroy_session(struct rpc_rqst *req,
+					 struct xdr_stream *xdr,
+					 struct nfs4_session *session)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = session->clp->cl_mvops->minor_version,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_destroy_session(&xdr, session, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_destroy_session(xdr, session, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a SEQUENCE request
  */
-static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p,
-				 struct nfs4_sequence_args *args)
+static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
+				  struct nfs4_sequence_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a GET_LEASE_TIME request
  */
-static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
-				       struct nfs4_get_lease_time_args *args)
+static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					struct nfs4_get_lease_time_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->la_seq_args),
 	};
 	const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->la_seq_args, &hdr);
-	encode_putrootfh(&xdr, &hdr);
-	encode_fsinfo(&xdr, lease_bitmap, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->la_seq_args, &hdr);
+	encode_putrootfh(xdr, &hdr);
+	encode_fsinfo(xdr, lease_bitmap, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * a RECLAIM_COMPLETE request
  */
-static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
-				     struct nfs41_reclaim_complete_args *args)
+static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
+					  struct xdr_stream *xdr,
+				struct nfs41_reclaim_complete_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args)
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_reclaim_complete(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_reclaim_complete(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  * Encode GETDEVICEINFO request
  */
-static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
-				      struct nfs4_getdeviceinfo_args *args)
+static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
+				       struct xdr_stream *xdr,
+				       struct nfs4_getdeviceinfo_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_getdeviceinfo(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_getdeviceinfo(xdr, args, &hdr);
 
 	/* set up reply kvec. Subtract notification bitmap max size (2)
 	 * so that notification bitmap is put in xdr_buf tail */
@@ -2657,27 +2583,24 @@ static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
 			 args->pdev->pglen);
 
 	encode_nops(&hdr);
-	return 0;
 }
 
 /*
  *  Encode LAYOUTGET request
  */
-static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
-				  struct nfs4_layoutget_args *args)
+static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
+				   struct nfs4_layoutget_args *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
-	encode_layoutget(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+	encode_layoutget(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return 0;
 }
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -5368,22 +5291,18 @@ out:
 /*
  * Encode an SETACL request
  */
-static int
-nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args)
+static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
+				struct nfs_setaclargs *args)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	int status;
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_compound_hdr(&xdr, req, &hdr);
-	encode_sequence(&xdr, &args->seq_args, &hdr);
-	encode_putfh(&xdr, args->fh, &hdr);
-	status = encode_setacl(&xdr, args, &hdr);
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->fh, &hdr);
+	encode_setacl(xdr, args, &hdr);
 	encode_nops(&hdr);
-	return status;
 }
 
 /*
@@ -6316,7 +6235,7 @@ nfs4_stat_to_errno(int stat)
 #define PROC(proc, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
-	.p_encode = (kxdrproc_t)nfs4_xdr_##argtype,		\
+	.p_encode = (kxdreproc_t)nfs4_xdr_##argtype,		\
 	.p_decode = (kxdrproc_t)nfs4_xdr_##restype,		\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 6529534d7aae..c363efda8ecf 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -499,34 +499,28 @@ out_default:
 /*
  * NB: Without this zero space reservation, callbacks over krb5p fail
  */
-static int nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p, void *__unused)
+static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+				 void *__unused)
 {
-	struct xdr_stream xdrs, *xdr = &xdrs;
-
-	xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
 	xdr_reserve_space(xdr, 0);
-	return 0;
 }
 
 /*
  * 20.2. Operation 4: CB_RECALL - Recall a Delegation
  */
-static int nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
-				  const struct nfsd4_callback *cb)
+static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+				   const struct nfsd4_callback *cb)
 {
-	struct xdr_stream xdr;
 	const struct nfs4_delegation *args = cb->cb_op;
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = cb->cb_clp->cl_cb_ident,
 		.minorversion = cb->cb_minorversion,
 	};
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-	encode_cb_compound4args(&xdr, &hdr);
-	encode_cb_sequence4args(&xdr, cb, &hdr);
-	encode_cb_recall4args(&xdr, args, &hdr);
+	encode_cb_compound4args(xdr, &hdr);
+	encode_cb_sequence4args(xdr, cb, &hdr);
+	encode_cb_recall4args(xdr, args, &hdr);
 	encode_cb_nops(&hdr);
-	return 0;
 }
 
 
@@ -583,7 +577,7 @@ out_default:
 #define PROC(proc, call, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
-	.p_encode  = (kxdrproc_t)nfs4_xdr_enc_##argtype,		\
+	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
 	.p_decode  = (kxdrproc_t)nfs4_xdr_dec_##restype,		\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index b2024757edd5..d88cffbaa6df 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -110,7 +110,7 @@ struct rpc_credops {
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
-	int			(*crwrap_req)(struct rpc_task *, kxdrproc_t,
+	int			(*crwrap_req)(struct rpc_task *, kxdreproc_t,
 						void *, __be32 *, void *);
 	int			(*crunwrap_resp)(struct rpc_task *, kxdrproc_t,
 						void *, __be32 *, void *);
@@ -139,7 +139,7 @@ struct rpc_cred *	rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *
 void			put_rpccred(struct rpc_cred *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
-int			rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index a5a55f284b7d..7b19c4e1ce53 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -89,7 +89,7 @@ struct rpc_version {
  */
 struct rpc_procinfo {
 	u32			p_proc;		/* RPC procedure number */
-	kxdrproc_t		p_encode;	/* XDR encode function */
+	kxdreproc_t		p_encode;	/* XDR encode function */
 	kxdrproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 498ab93a81e4..a21cf5378c1d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -33,8 +33,8 @@ struct xdr_netobj {
 };
 
 /*
- * This is the generic XDR function. rqstp is either a rpc_rqst (client
- * side) or svc_rqst pointer (server side).
+ * This is the legacy generic XDR function. rqstp is either a rpc_rqst
+ * (client side) or svc_rqst pointer (server side).
  * Encode functions always assume there's enough room in the buffer.
  */
 typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
@@ -203,6 +203,11 @@ struct xdr_stream {
 	struct kvec *iov;	/* pointer to the current kvec */
 };
 
+/*
+ * This is the xdr_stream style generic XDR function.
+ */
+typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index afe67849269f..651c9da703cb 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -563,8 +563,17 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
 	return cred->cr_ops->crvalidate(task, p);
 }
 
+static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+				   __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
+	encode(rqstp, &xdr, obj);
+}
+
 int
-rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
+rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
 		__be32 *data, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -574,7 +583,8 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 	if (cred->cr_ops->crwrap_req)
 		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
 	/* By default, we encode the arguments normally. */
-	return encode(rqstp, data, obj);
+	rpcauth_wrap_req_encode(encode, rqstp, data, obj);
+	return 0;
 }
 
 int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3835ce35e224..42b46f9a670a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1231,9 +1231,19 @@ out_bad:
 	return NULL;
 }
 
+static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
+				__be32 *p, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
+	encode(rqstp, &xdr, obj);
+}
+
 static inline int
 gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+		   kxdreproc_t encode, struct rpc_rqst *rqstp,
+		   __be32 *p, void *obj)
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	integ_buf;
@@ -1249,9 +1259,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = encode(rqstp, p, obj);
-	if (status)
-		return status;
+	gss_wrap_req_encode(encode, rqstp, p, obj);
 
 	if (xdr_buf_subsegment(snd_buf, &integ_buf,
 				offset, snd_buf->len - offset))
@@ -1325,7 +1333,8 @@ out:
 
 static inline int
 gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
+		  kxdreproc_t encode, struct rpc_rqst *rqstp,
+		  __be32 *p, void *obj)
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	u32		offset;
@@ -1342,9 +1351,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	*p++ = htonl(rqstp->rq_seqno);
 
-	status = encode(rqstp, p, obj);
-	if (status)
-		return status;
+	gss_wrap_req_encode(encode, rqstp, p, obj);
 
 	status = alloc_enc_pages(rqstp);
 	if (status)
@@ -1394,7 +1401,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 
 static int
 gss_wrap_req(struct rpc_task *task,
-	     kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
+	     kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
@@ -1407,12 +1414,14 @@ gss_wrap_req(struct rpc_task *task,
 		/* The spec seems a little ambiguous here, but I think that not
 		 * wrapping context destruction requests makes the most sense.
 		 */
-		status = encode(rqstp, p, obj);
+		gss_wrap_req_encode(encode, rqstp, p, obj);
+		status = 0;
 		goto out;
 	}
 	switch (gss_cred->gc_service) {
 		case RPC_GSS_SVC_NONE:
-			status = encode(rqstp, p, obj);
+			gss_wrap_req_encode(encode, rqstp, p, obj);
+			status = 0;
 			break;
 		case RPC_GSS_SVC_INTEGRITY:
 			status = gss_wrap_req_integ(cred, ctx, encode,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 92ce94f5146b..d446a32be667 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1095,7 +1095,7 @@ static void
 rpc_xdr_encode(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrproc_t	encode;
+	kxdreproc_t	encode;
 	__be32		*p;
 
 	dprint_status(task);
@@ -1776,9 +1776,8 @@ out_overflow:
 	goto out_garbage;
 }
 
-static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj)
+static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
-	return 0;
 }
 
 static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 43838c72b778..63912a1a2983 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -689,25 +689,21 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
  * XDR functions for rpcbind
  */
 
-static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p,
-			    const struct rpcbind_args *rpcb)
+static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct rpcbind_args *rpcb)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 
 	dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n",
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port);
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
-	p = xdr_reserve_space(&xdr, RPCB_mappingargs_sz << 2);
+	p = xdr_reserve_space(xdr, RPCB_mappingargs_sz << 2);
 	*p++ = cpu_to_be32(rpcb->r_prog);
 	*p++ = cpu_to_be32(rpcb->r_vers);
 	*p++ = cpu_to_be32(rpcb->r_prot);
 	*p   = cpu_to_be32(rpcb->r_port);
-
-	return 0;
 }
 
 static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
@@ -769,27 +765,24 @@ static void encode_rpcb_string(struct xdr_stream *xdr, const char *string,
 	xdr_encode_opaque(p, string, len);
 }
 
-static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p,
-			    const struct rpcbind_args *rpcb)
+static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
+			     const struct rpcbind_args *rpcb)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 
 	dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n",
 			task->tk_pid, task->tk_msg.rpc_proc->p_name,
 			rpcb->r_prog, rpcb->r_vers,
 			rpcb->r_netid, rpcb->r_addr);
 
-	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
-
-	p = xdr_reserve_space(&xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
+	p = xdr_reserve_space(xdr, (RPCB_program_sz + RPCB_version_sz) << 2);
 	*p++ = cpu_to_be32(rpcb->r_prog);
 	*p = cpu_to_be32(rpcb->r_vers);
 
-	encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
-	encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
-	encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
-	return 0;
+	encode_rpcb_string(xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN);
+	encode_rpcb_string(xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN);
+	encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
 }
 
 static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
@@ -849,7 +842,7 @@ out_fail:
 static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -859,7 +852,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -869,7 +862,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	},
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
-		.p_encode	= (kxdrproc_t)rpcb_enc_mapping,
+		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
 		.p_decode	= (kxdrproc_t)rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
@@ -882,7 +875,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -892,7 +885,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -902,7 +895,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
@@ -915,7 +908,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -925,7 +918,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
@@ -935,7 +928,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	},
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
-		.p_encode	= (kxdrproc_t)rpcb_enc_getaddr,
+		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
 		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
-- 
cgit v1.2.3


From bf2695516db982e90a22fc94f93491b481796bb1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 14 Dec 2010 14:59:29 +0000
Subject: SUNRPC: New xdr_streams XDR decoder API

Now that all client-side XDR decoder routines use xdr_streams, there
should be no need to support the legacy calling sequence [rpc_rqst *,
__be32 *, RPC res *] anywhere.  We can construct an xdr_stream in the
generic RPC code, instead of in each decoder function.

This is a refactoring change.  It should not cause different behavior.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clnt4xdr.c            |  20 +-
 fs/lockd/clntxdr.c             |  20 +-
 fs/lockd/mon.c                 |  30 +-
 fs/nfs/mount_clnt.c            |  30 +-
 fs/nfs/nfs2xdr.c               |  68 ++---
 fs/nfs/nfs3xdr.c               | 195 ++++++-------
 fs/nfs/nfs4xdr.c               | 619 ++++++++++++++++++++---------------------
 fs/nfsd/nfs4callback.c         |  16 +-
 include/linux/sunrpc/auth.h    |   4 +-
 include/linux/sunrpc/clnt.h    |   2 +-
 include/linux/sunrpc/xdr.h     |   3 +-
 net/sunrpc/auth.c              |  14 +-
 net/sunrpc/auth_gss/auth_gss.c |  13 +-
 net/sunrpc/clnt.c              |   4 +-
 net/sunrpc/rpcb_clnt.c         |  46 ++-
 15 files changed, 518 insertions(+), 566 deletions(-)

(limited to 'net')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 974f1d9cd323..f848b52c67b1 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -529,17 +529,16 @@ out:
 	return error;
 }
 
-static int nlm4_xdr_dec_testres(struct rpc_rqst *req, __be32 *p,
+static int nlm4_xdr_dec_testres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
 				struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm4_testrply(&xdr, result);
+	error = decode_nlm4_testrply(xdr, result);
 out:
 	return error;
 }
@@ -550,17 +549,16 @@ out:
  *		nlm4_stat stat;
  *	};
  */
-static int nlm4_xdr_dec_res(struct rpc_rqst *req, __be32 *p,
+static int nlm4_xdr_dec_res(struct rpc_rqst *req,
+			    struct xdr_stream *xdr,
 			    struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm4_stat(&xdr, &result->status);
+	error = decode_nlm4_stat(xdr, &result->status);
 out:
 	return error;
 }
@@ -575,7 +573,7 @@ out:
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdreproc_t)nlm4_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrproc_t)nlm4_xdr_dec_##restype,		\
+	.p_decode    = (kxdrdproc_t)nlm4_xdr_dec_##restype,		\
 	.p_arglen    = NLM4_##argtype##_sz,				\
 	.p_replen    = NLM4_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index c6fda8fb1c5b..180ac34feb9a 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -527,17 +527,16 @@ out:
 	return error;
 }
 
-static int nlm_xdr_dec_testres(struct rpc_rqst *req, __be32 *p,
+static int nlm_xdr_dec_testres(struct rpc_rqst *req,
+			       struct xdr_stream *xdr,
 			       struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm_testrply(&xdr, result);
+	error = decode_nlm_testrply(xdr, result);
 out:
 	return error;
 }
@@ -548,17 +547,16 @@ out:
  *		nlm_stat stat;
  *	};
  */
-static int nlm_xdr_dec_res(struct rpc_rqst *req, __be32 *p,
+static int nlm_xdr_dec_res(struct rpc_rqst *req,
+			   struct xdr_stream *xdr,
 			   struct nlm_res *result)
 {
-	struct xdr_stream xdr;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_cookie(&xdr, &result->cookie);
+	error = decode_cookie(xdr, &result->cookie);
 	if (unlikely(error))
 		goto out;
-	error = decode_nlm_stat(&xdr, &result->status);
+	error = decode_nlm_stat(xdr, &result->status);
 out:
 	return error;
 }
@@ -573,7 +571,7 @@ out:
 [NLMPROC_##proc] = {							\
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdreproc_t)nlm_xdr_enc_##argtype,		\
-	.p_decode    = (kxdrproc_t)nlm_xdr_dec_##restype,		\
+	.p_decode    = (kxdrdproc_t)nlm_xdr_dec_##restype,		\
 	.p_arglen    = NLM_##argtype##_sz,				\
 	.p_replen    = NLM_##restype##_sz,				\
 	.p_statidx   = NLMPROC_##proc,					\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index baa77bc9d825..23d7451b2938 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -472,35 +472,35 @@ static void nsm_xdr_enc_unmon(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_mon_id(xdr, argp);
 }
 
-static int xdr_dec_stat_res(struct rpc_rqst *rqstp, __be32 *p,
-			    struct nsm_res *resp)
+static int nsm_xdr_dec_stat_res(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct nsm_res *resp)
 {
-	struct xdr_stream xdr;
+	__be32 *p;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	p = xdr_inline_decode(&xdr, 4 + 4);
+	p = xdr_inline_decode(xdr, 4 + 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 	resp->status = be32_to_cpup(p++);
 	resp->state = be32_to_cpup(p);
 
-	dprintk("lockd: xdr_dec_stat_res status %d state %d\n",
-			resp->status, resp->state);
+	dprintk("lockd: %s status %d state %d\n",
+		__func__, resp->status, resp->state);
 	return 0;
 }
 
-static int xdr_dec_stat(struct rpc_rqst *rqstp, __be32 *p,
-			struct nsm_res *resp)
+static int nsm_xdr_dec_stat(struct rpc_rqst *rqstp,
+			    struct xdr_stream *xdr,
+			    struct nsm_res *resp)
 {
-	struct xdr_stream xdr;
+	__be32 *p;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 	resp->state = be32_to_cpup(p);
 
-	dprintk("lockd: xdr_dec_stat state %d\n", resp->state);
+	dprintk("lockd: %s state %d\n", __func__, resp->state);
 	return 0;
 }
 
@@ -517,7 +517,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_MON] = {
 		.p_proc		= NSMPROC_MON,
 		.p_encode	= (kxdreproc_t)nsm_xdr_enc_mon,
-		.p_decode	= (kxdrproc_t)xdr_dec_stat_res,
+		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat_res,
 		.p_arglen	= SM_mon_sz,
 		.p_replen	= SM_monres_sz,
 		.p_statidx	= NSMPROC_MON,
@@ -526,7 +526,7 @@ static struct rpc_procinfo	nsm_procedures[] = {
 [NSMPROC_UNMON] = {
 		.p_proc		= NSMPROC_UNMON,
 		.p_encode	= (kxdreproc_t)nsm_xdr_enc_unmon,
-		.p_decode	= (kxdrproc_t)xdr_dec_stat,
+		.p_decode	= (kxdrdproc_t)nsm_xdr_dec_stat,
 		.p_arglen	= SM_mon_id_sz,
 		.p_replen	= SM_unmonres_sz,
 		.p_statidx	= NSMPROC_UNMON,
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 979ebd7af3cb..697e07235f30 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -340,18 +340,16 @@ static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res)
 	return 0;
 }
 
-static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p,
-			    struct mountres *res)
+static int mnt_xdr_dec_mountres(struct rpc_rqst *req,
+				struct xdr_stream *xdr,
+				struct mountres *res)
 {
-	struct xdr_stream xdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	status = decode_status(&xdr, res);
+	status = decode_status(xdr, res);
 	if (unlikely(status != 0 || res->errno != 0))
 		return status;
-	return decode_fhandle(&xdr, res);
+	return decode_fhandle(xdr, res);
 }
 
 static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
@@ -434,30 +432,28 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 	return 0;
 }
 
-static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p,
-			     struct mountres *res)
+static int mnt_xdr_dec_mountres3(struct rpc_rqst *req,
+				 struct xdr_stream *xdr,
+				 struct mountres *res)
 {
-	struct xdr_stream xdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	status = decode_fhs_status(&xdr, res);
+	status = decode_fhs_status(xdr, res);
 	if (unlikely(status != 0 || res->errno != 0))
 		return status;
-	status = decode_fhandle3(&xdr, res);
+	status = decode_fhandle3(xdr, res);
 	if (unlikely(status != 0)) {
 		res->errno = -EBADHANDLE;
 		return 0;
 	}
-	return decode_auth_flavors(&xdr, res);
+	return decode_auth_flavors(xdr, res);
 }
 
 static struct rpc_procinfo mnt_procedures[] = {
 	[MOUNTPROC_MNT] = {
 		.p_proc		= MOUNTPROC_MNT,
 		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrproc_t)mnt_dec_mountres,
+		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres_sz,
 		.p_statidx	= MOUNTPROC_MNT,
@@ -476,7 +472,7 @@ static struct rpc_procinfo mnt3_procedures[] = {
 	[MOUNTPROC3_MNT] = {
 		.p_proc		= MOUNTPROC3_MNT,
 		.p_encode	= (kxdreproc_t)mnt_xdr_enc_dirpath,
-		.p_decode	= (kxdrproc_t)mnt_dec_mountres3,
+		.p_decode	= (kxdrdproc_t)mnt_xdr_dec_mountres3,
 		.p_arglen	= MNT_enc_dirpath_sz,
 		.p_replen	= MNT_dec_mountres3_sz,
 		.p_statidx	= MOUNTPROC3_MNT,
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 8f3acbec761f..51f1cfa04d27 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -783,15 +783,13 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
  * "NFS: Network File System Protocol Specification".
  */
 
-static int nfs2_xdr_dec_stat(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr,
 			     void *__unused)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
@@ -802,22 +800,16 @@ out_default:
 	return nfs_stat_to_errno(status);
 }
 
-static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	return decode_attrstat(&xdr, result);
+	return decode_attrstat(xdr, result);
 }
 
-static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_diropok *result)
 {
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	return decode_diropres(&xdr, result);
+	return decode_diropres(xdr, result);
 }
 
 /*
@@ -830,20 +822,18 @@ static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, __be32 *p,
  *		void;
  *	};
  */
-static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req, __be32 *p,
-				    void *__unused)
+static int nfs2_xdr_dec_readlinkres(struct rpc_rqst *req,
+				    struct xdr_stream *xdr, void *__unused)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_path(&xdr);
+	error = decode_path(xdr);
 out:
 	return error;
 out_default:
@@ -861,39 +851,33 @@ out_default:
  *		void;
  *	};
  */
-static int nfs2_xdr_dec_readres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				struct nfs_readres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_fattr(&xdr, result->fattr);
+	error = decode_fattr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_nfsdata(&xdr, result);
+	error = decode_nfsdata(xdr, result);
 out:
 	return error;
 out_default:
 	return nfs_stat_to_errno(status);
 }
 
-static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_writeres *result)
 {
-	struct xdr_stream xdr;
-
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	return decode_attrstat(&xdr, result->fattr);
+	return decode_attrstat(xdr, result->fattr);
 }
 
 /**
@@ -1008,20 +992,18 @@ out_cheating:
 	goto out;
 }
 
-static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req, __be32 *p,
-				   void *__unused)
+static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req,
+				   struct xdr_stream *xdr, void *__unused)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_readdirok(&xdr);
+	error = decode_readdirok(xdr);
 out:
 	return error;
 out_default:
@@ -1062,20 +1044,18 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, __be32 *p,
+static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
 				  struct nfs2_fsstat *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_stat(&xdr, &status);
+	error = decode_stat(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS_OK)
 		goto out_default;
-	error = decode_info(&xdr, result);
+	error = decode_info(xdr, result);
 out:
 	return error;
 out_default:
@@ -1150,7 +1130,7 @@ int nfs_stat_to_errno(enum nfs_stat status)
 [NFSPROC_##proc] = {							\
 	.p_proc	    =  NFSPROC_##proc,					\
 	.p_encode   =  (kxdreproc_t)nfs2_xdr_enc_##argtype,		\
-	.p_decode   =  (kxdrproc_t)nfs2_xdr_dec_##restype,		\
+	.p_decode   =  (kxdrdproc_t)nfs2_xdr_dec_##restype,		\
 	.p_arglen   =  NFS_##argtype##_sz,				\
 	.p_replen   =  NFS_##restype##_sz,				\
 	.p_timer    =  timer,						\
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index ae1b1a43f05e..df30a26cc4fa 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1366,20 +1366,19 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
  *		void;
  *	};
  */
-static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_getattr3res(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
 				    struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_fattr3(&xdr, result);
+	error = decode_fattr3(xdr, result);
 out:
 	return error;
 out_default:
@@ -1404,18 +1403,17 @@ out_default:
  *		SETATTR3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_setattr3res(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
 				    struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result);
+	error = decode_wcc_data(xdr, result);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1446,30 +1444,29 @@ out_status:
  *		LOOKUP3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_lookup3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_diropres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_nfs_fh3(&xdr, result->fh);
+	error = decode_nfs_fh3(xdr, result->fh);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr);
 out:
 	return error;
 out_default:
-	error = decode_post_op_attr(&xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	return nfs_stat_to_errno(status);
@@ -1494,23 +1491,22 @@ out_default:
  *		ACCESS3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_access3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_access3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_accessres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_uint32(&xdr, &result->access);
+	error = decode_uint32(xdr, &result->access);
 out:
 	return error;
 out_default:
@@ -1536,23 +1532,22 @@ out_default:
  *		READLINK3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_readlink3res(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
 				     struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result);
+	error = decode_post_op_attr(xdr, result);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_nfspath3(&xdr);
+	error = decode_nfspath3(xdr);
 out:
 	return error;
 out_default:
@@ -1620,23 +1615,21 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs_readres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_read3resok(&xdr, result);
+	error = decode_read3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -1692,23 +1685,21 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				  struct nfs_writeres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->fattr);
+	error = decode_wcc_data(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_write3resok(&xdr, result);
+	error = decode_write3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -1757,24 +1748,23 @@ out:
 	return error;
 }
 
-static int nfs3_xdr_dec_create3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_create3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_diropres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_create3resok(&xdr, result);
+	error = decode_create3resok(xdr, result);
 out:
 	return error;
 out_default:
-	error = decode_wcc_data(&xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	return nfs_stat_to_errno(status);
@@ -1798,18 +1788,17 @@ out_default:
  *		REMOVE3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_remove3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_removeres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1840,21 +1829,20 @@ out_status:
  *		RENAME3resfail resfail;
  *	};
  */
-static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_rename3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_renameres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->old_fattr);
+	error = decode_wcc_data(xdr, result->old_fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->new_fattr);
+	error = decode_wcc_data(xdr, result->new_fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -1885,21 +1873,19 @@ out_status:
  *		LINK3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_link3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs3_linkres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->dir_attr);
+	error = decode_wcc_data(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
@@ -2085,24 +2071,23 @@ out:
 	return error;
 }
 
-static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_readdir3res(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
 				    struct nfs3_readdirres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_readdir3resok(&xdr, result);
+	error = decode_readdir3resok(xdr, result);
 out:
 	return error;
 out_default:
-	error = decode_post_op_attr(&xdr, result->dir_attr);
+	error = decode_post_op_attr(xdr, result->dir_attr);
 	if (unlikely(error))
 		goto out;
 	return nfs_stat_to_errno(status);
@@ -2154,23 +2139,22 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_fsstat *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_fsstat3resok(&xdr, result);
+	error = decode_fsstat3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -2231,23 +2215,22 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_fsinfo *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_fsinfo3resok(&xdr, result);
+	error = decode_fsinfo3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -2295,23 +2278,22 @@ out_overflow:
 	return -EIO;
 }
 
-static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
 				     struct nfs_pathconf *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_post_op_attr(&xdr, result->fattr);
+	error = decode_post_op_attr(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_pathconf3resok(&xdr, result);
+	error = decode_pathconf3resok(xdr, result);
 out:
 	return error;
 out_status:
@@ -2337,23 +2319,22 @@ out_status:
  *		COMMIT3resfail	resfail;
  *	};
  */
-static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_writeres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
-	error = decode_wcc_data(&xdr, result->fattr);
+	error = decode_wcc_data(xdr, result->fattr);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_status;
-	error = decode_writeverf3(&xdr, result->verf->verifier);
+	error = decode_writeverf3(xdr, result->verf->verifier);
 out:
 	return error;
 out_status:
@@ -2406,40 +2387,38 @@ out:
 	return error;
 }
 
-static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_getacl3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs3_getaclres *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_getacl3resok(&xdr, result);
+	error = decode_getacl3resok(xdr, result);
 out:
 	return error;
 out_default:
 	return nfs_stat_to_errno(status);
 }
 
-static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req, __be32 *p,
+static int nfs3_xdr_dec_setacl3res(struct rpc_rqst *req,
+				   struct xdr_stream *xdr,
 				   struct nfs_fattr *result)
 {
-	struct xdr_stream xdr;
 	enum nfs_stat status;
 	int error;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	error = decode_nfsstat3(&xdr, &status);
+	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
 	if (status != NFS3_OK)
 		goto out_default;
-	error = decode_post_op_attr(&xdr, result);
+	error = decode_post_op_attr(xdr, result);
 out:
 	return error;
 out_default:
@@ -2452,7 +2431,7 @@ out_default:
 [NFS3PROC_##proc] = {							\
 	.p_proc      = NFS3PROC_##proc,					\
 	.p_encode    = (kxdreproc_t)nfs3_xdr_enc_##argtype##3args,	\
-	.p_decode    = (kxdrproc_t)nfs3_xdr_dec_##restype##3res,	\
+	.p_decode    = (kxdrdproc_t)nfs3_xdr_dec_##restype##3res,	\
 	.p_arglen    = NFS3_##argtype##args_sz,				\
 	.p_replen    = NFS3_##restype##res_sz,				\
 	.p_timer     = timer,						\
@@ -2495,7 +2474,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_GETACL] = {
 		.p_proc = ACLPROC3_GETACL,
 		.p_encode = (kxdreproc_t)nfs3_xdr_enc_getacl3args,
-		.p_decode = (kxdrproc_t)nfs3_xdr_dec_getacl3res,
+		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_getacl3res,
 		.p_arglen = ACL3_getaclargs_sz,
 		.p_replen = ACL3_getaclres_sz,
 		.p_timer = 1,
@@ -2504,7 +2483,7 @@ static struct rpc_procinfo	nfs3_acl_procedures[] = {
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
 		.p_encode = (kxdreproc_t)nfs3_xdr_enc_setacl3args,
-		.p_decode = (kxdrproc_t)nfs3_xdr_dec_setacl3res,
+		.p_decode = (kxdrdproc_t)nfs3_xdr_dec_setacl3res,
 		.p_arglen = ACL3_setaclargs_sz,
 		.p_replen = ACL3_setaclres_sz,
 		.p_timer = 0,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6ec38b3e4a3d..f3f99156bfcb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5013,26 +5013,26 @@ out_overflow:
 /*
  * Decode OPEN_DOWNGRADE response
  */
-static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
+				       struct nfs_closeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_open_downgrade(&xdr, res);
+	status = decode_open_downgrade(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5041,26 +5041,25 @@ out:
 /*
  * Decode ACCESS response
  */
-static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res)
+static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs4_accessres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status != 0)
 		goto out;
-	status = decode_access(&xdr, res);
+	status = decode_access(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5069,26 +5068,28 @@ out:
 /*
  * Decode LOOKUP response
  */
-static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs4_lookup_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_lookup(&xdr)) != 0)
+	status = decode_lookup(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_getfh(&xdr, res->fh)) != 0)
+	status = decode_getfh(xdr, res->fh);
+	if (status)
 		goto out;
-	status = decode_getfattr(&xdr, res->fattr, res->server
+	status = decode_getfattr(xdr, res->fattr, res->server
 			,!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5097,23 +5098,25 @@ out:
 /*
  * Decode LOOKUP_ROOT response
  */
-static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lookup_res *res)
+static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
+				    struct nfs4_lookup_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putrootfh(&xdr)) != 0)
+	status = decode_putrootfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_getfh(&xdr, res->fh)) == 0)
-		status = decode_getfattr(&xdr, res->fattr, res->server,
+	status = decode_getfh(xdr, res->fh);
+	if (status == 0)
+		status = decode_getfattr(xdr, res->fattr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5122,24 +5125,25 @@ out:
 /*
  * Decode REMOVE response
  */
-static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_removeres *res)
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs_removeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+	status = decode_remove(xdr, &res->cinfo);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->dir_attr, res->server,
+	decode_getfattr(xdr, res->dir_attr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5148,34 +5152,38 @@ out:
 /*
  * Decode RENAME response
  */
-static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs_renameres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_savefh(&xdr)) != 0)
+	status = decode_savefh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
+	status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo);
+	if (status)
 		goto out;
 	/* Current FH is target directory */
-	if (decode_getfattr(&xdr, res->new_fattr, res->server,
+	if (decode_getfattr(xdr, res->new_fattr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if ((status = decode_restorefh(&xdr)) != 0)
+	status = decode_restorefh(xdr);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->old_fattr, res->server,
+	decode_getfattr(xdr, res->old_fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5184,37 +5192,41 @@ out:
 /*
  * Decode LINK response
  */
-static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link_res *res)
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs4_link_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_savefh(&xdr)) != 0)
+	status = decode_savefh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_link(&xdr, &res->cinfo)) != 0)
+	status = decode_link(xdr, &res->cinfo);
+	if (status)
 		goto out;
 	/*
 	 * Note order: OP_LINK leaves the directory as the current
 	 *             filehandle.
 	 */
-	if (decode_getfattr(&xdr, res->dir_attr, res->server,
+	if (decode_getfattr(xdr, res->dir_attr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if ((status = decode_restorefh(&xdr)) != 0)
+	status = decode_restorefh(xdr);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5223,33 +5235,37 @@ out:
 /*
  * Decode CREATE response
  */
-static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs4_create_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_savefh(&xdr)) != 0)
+	status = decode_savefh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
+	status = decode_create(xdr, &res->dir_cinfo);
+	if (status)
 		goto out;
-	if ((status = decode_getfh(&xdr, res->fh)) != 0)
+	status = decode_getfh(xdr, res->fh);
+	if (status)
 		goto out;
-	if (decode_getfattr(&xdr, res->fattr, res->server,
+	if (decode_getfattr(xdr, res->fattr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if ((status = decode_restorefh(&xdr)) != 0)
+	status = decode_restorefh(xdr);
+	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->dir_fattr, res->server,
+	decode_getfattr(xdr, res->dir_fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5258,31 +5274,31 @@ out:
 /*
  * Decode SYMLINK response
  */
-static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_create_res *res)
+static int nfs4_xdr_dec_symlink(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+				struct nfs4_create_res *res)
 {
-	return nfs4_xdr_dec_create(rqstp, p, res);
+	return nfs4_xdr_dec_create(rqstp, xdr, res);
 }
 
 /*
  * Decode GETATTR response
  */
-static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_getattr_res *res)
+static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+				struct nfs4_getattr_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getfattr(&xdr, res->fattr, res->server,
+	status = decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5309,24 +5325,22 @@ static void nfs4_xdr_enc_setacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Decode SETACL response
  */
 static int
-nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		    struct nfs_setaclres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_setattr(&xdr);
+	status = decode_setattr(xdr);
 out:
 	return status;
 }
@@ -5335,24 +5349,22 @@ out:
  * Decode GETACL response
  */
 static int
-nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p,
+nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		    struct nfs_getaclres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getacl(&xdr, rqstp, &res->acl_len);
+	status = decode_getacl(xdr, rqstp, &res->acl_len);
 
 out:
 	return status;
@@ -5361,23 +5373,22 @@ out:
 /*
  * Decode CLOSE response
  */
-static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
+static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_closeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_close(&xdr, res);
+	status = decode_close(xdr, res);
 	if (status != 0)
 		goto out;
 	/*
@@ -5386,7 +5397,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
 	 * 	an ESTALE error. Shouldn't be a problem,
 	 * 	though, since fattr->valid will remain unset.
 	 */
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5395,36 +5406,35 @@ out:
 /*
  * Decode OPEN response
  */
-static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs_openres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_savefh(&xdr);
+	status = decode_savefh(xdr);
 	if (status)
 		goto out;
-	status = decode_open(&xdr, res);
+	status = decode_open(xdr, res);
 	if (status)
 		goto out;
-	if (decode_getfh(&xdr, &res->fh) != 0)
+	if (decode_getfh(xdr, &res->fh) != 0)
 		goto out;
-	if (decode_getfattr(&xdr, res->f_attr, res->server,
+	if (decode_getfattr(xdr, res->f_attr, res->server,
 				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
 		goto out;
-	if (decode_restorefh(&xdr) != 0)
+	if (decode_restorefh(xdr) != 0)
 		goto out;
-	decode_getfattr(&xdr, res->dir_attr, res->server,
+	decode_getfattr(xdr, res->dir_attr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5433,20 +5443,20 @@ out:
 /*
  * Decode OPEN_CONFIRM response
  */
-static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res)
+static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp,
+				     struct xdr_stream *xdr,
+				     struct nfs_open_confirmres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_open_confirm(&xdr, res);
+	status = decode_open_confirm(xdr, res);
 out:
 	return status;
 }
@@ -5454,26 +5464,26 @@ out:
 /*
  * Decode OPEN response
  */
-static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
+static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
+				    struct nfs_openres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_open(&xdr, res);
+	status = decode_open(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->f_attr, res->server,
+	decode_getfattr(xdr, res->f_attr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5482,26 +5492,26 @@ out:
 /*
  * Decode SETATTR response
  */
-static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res)
+static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
+				struct xdr_stream *xdr,
+				struct nfs_setattrres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_setattr(&xdr);
+	status = decode_setattr(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5510,23 +5520,22 @@ out:
 /*
  * Decode LOCK response
  */
-static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_res *res)
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs_lock_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_lock(&xdr, res);
+	status = decode_lock(xdr, res);
 out:
 	return status;
 }
@@ -5534,23 +5543,22 @@ out:
 /*
  * Decode LOCKT response
  */
-static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lockt_res *res)
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_lockt_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_lockt(&xdr, res);
+	status = decode_lockt(xdr, res);
 out:
 	return status;
 }
@@ -5558,61 +5566,58 @@ out:
 /*
  * Decode LOCKU response
  */
-static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_locku_res *res)
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_locku_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_locku(&xdr, res);
+	status = decode_locku(xdr, res);
 out:
 	return status;
 }
 
-static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp,
+					  struct xdr_stream *xdr, void *dummy)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_release_lockowner(&xdr);
+		status = decode_release_lockowner(xdr);
 	return status;
 }
 
 /*
  * Decode READLINK response
  */
-static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p,
+static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp,
+				 struct xdr_stream *xdr,
 				 struct nfs4_readlink_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_readlink(&xdr, rqstp);
+	status = decode_readlink(xdr, rqstp);
 out:
 	return status;
 }
@@ -5620,23 +5625,22 @@ out:
 /*
  * Decode READDIR response
  */
-static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_readdir_res *res)
+static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+				struct nfs4_readdir_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_readdir(&xdr, rqstp, res);
+	status = decode_readdir(xdr, rqstp, res);
 out:
 	return status;
 }
@@ -5644,23 +5648,22 @@ out:
 /*
  * Decode Read response
  */
-static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readres *res)
+static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     struct nfs_readres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_read(&xdr, rqstp, res);
+	status = decode_read(xdr, rqstp, res);
 	if (!status)
 		status = res->count;
 out:
@@ -5670,26 +5673,25 @@ out:
 /*
  * Decode WRITE response
  */
-static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      struct nfs_writeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_write(&xdr, res);
+	status = decode_write(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 	if (!status)
 		status = res->count;
@@ -5700,26 +5702,25 @@ out:
 /*
  * Decode COMMIT response
  */
-static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writeres *res)
+static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			       struct nfs_writeres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_commit(&xdr, res);
+	status = decode_commit(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5728,85 +5729,80 @@ out:
 /*
  * Decode FSINFO response
  */
-static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, struct xdr_stream *xdr,
 			       struct nfs4_fsinfo_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, req);
+		status = decode_sequence(xdr, &res->seq_res, req);
 	if (!status)
-		status = decode_putfh(&xdr);
+		status = decode_putfh(xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, res->fsinfo);
+		status = decode_fsinfo(xdr, res->fsinfo);
 	return status;
 }
 
 /*
  * Decode PATHCONF response
  */
-static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 struct nfs4_pathconf_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, req);
+		status = decode_sequence(xdr, &res->seq_res, req);
 	if (!status)
-		status = decode_putfh(&xdr);
+		status = decode_putfh(xdr);
 	if (!status)
-		status = decode_pathconf(&xdr, res->pathconf);
+		status = decode_pathconf(xdr, res->pathconf);
 	return status;
 }
 
 /*
  * Decode STATFS response
  */
-static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
 			       struct nfs4_statfs_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, req);
+		status = decode_sequence(xdr, &res->seq_res, req);
 	if (!status)
-		status = decode_putfh(&xdr);
+		status = decode_putfh(xdr);
 	if (!status)
-		status = decode_statfs(&xdr, res->fsstat);
+		status = decode_statfs(xdr, res->fsstat);
 	return status;
 }
 
 /*
  * Decode GETATTR_BITMAP response
  */
-static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res)
+static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    struct nfs4_server_caps_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, req);
+	status = decode_sequence(xdr, &res->seq_res, req);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	status = decode_server_caps(&xdr, res);
+	status = decode_server_caps(xdr, res);
 out:
 	return status;
 }
@@ -5814,79 +5810,77 @@ out:
 /*
  * Decode RENEW response
  */
-static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
+static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			      void *__unused)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_renew(&xdr);
+		status = decode_renew(xdr);
 	return status;
 }
 
 /*
  * Decode SETCLIENTID response
  */
-static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
-		struct nfs4_setclientid_res *res)
+static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    struct nfs4_setclientid_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_setclientid(&xdr, res);
+		status = decode_setclientid(xdr, res);
 	return status;
 }
 
 /*
  * Decode SETCLIENTID_CONFIRM response
  */
-static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo)
+static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req,
+					    struct xdr_stream *xdr,
+					    struct nfs_fsinfo *fsinfo)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_setclientid_confirm(&xdr);
+		status = decode_setclientid_confirm(xdr);
 	if (!status)
-		status = decode_putrootfh(&xdr);
+		status = decode_putrootfh(xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, fsinfo);
+		status = decode_fsinfo(xdr, fsinfo);
 	return status;
 }
 
 /*
  * Decode DELEGRETURN response
  */
-static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res)
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
+				    struct nfs4_delegreturnres *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status != 0)
 		goto out;
-	status = decode_delegreturn(&xdr);
+	status = decode_delegreturn(xdr);
 	if (status != 0)
 		goto out;
-	decode_getfattr(&xdr, res->fattr, res->server,
+	decode_getfattr(xdr, res->fattr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
@@ -5895,26 +5889,27 @@ out:
 /*
  * Decode FS_LOCATIONS response
  */
-static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
+static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
 				     struct nfs4_fs_locations_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, req);
+	status = decode_sequence(xdr, &res->seq_res, req);
 	if (status)
 		goto out;
-	if ((status = decode_putfh(&xdr)) != 0)
+	status = decode_putfh(xdr);
+	if (status)
 		goto out;
-	if ((status = decode_lookup(&xdr)) != 0)
+	status = decode_lookup(xdr);
+	if (status)
 		goto out;
-	xdr_enter_page(&xdr, PAGE_SIZE);
-	status = decode_getfattr(&xdr, &res->fs_locations->fattr,
+	xdr_enter_page(xdr, PAGE_SIZE);
+	status = decode_getfattr(xdr, &res->fs_locations->fattr,
 				 res->fs_locations->server,
 				 !RPC_IS_ASYNC(req->rq_task));
 out:
@@ -5925,129 +5920,122 @@ out:
 /*
  * Decode EXCHANGE_ID response
  */
-static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
 				    void *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_exchange_id(&xdr, res);
+		status = decode_exchange_id(xdr, res);
 	return status;
 }
 
 /*
  * Decode CREATE_SESSION response
  */
-static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
 				       struct nfs41_create_session_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_create_session(&xdr, res);
+		status = decode_create_session(xdr, res);
 	return status;
 }
 
 /*
  * Decode DESTROY_SESSION response
  */
-static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p,
-					void *dummy)
+static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp,
+					struct xdr_stream *xdr,
+					void *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_destroy_session(&xdr, dummy);
+		status = decode_destroy_session(xdr, res);
 	return status;
 }
 
 /*
  * Decode SEQUENCE response
  */
-static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp,
+				 struct xdr_stream *xdr,
 				 struct nfs4_sequence_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, res, rqstp);
+		status = decode_sequence(xdr, res, rqstp);
 	return status;
 }
 
 /*
  * Decode GET_LEASE_TIME response
  */
-static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
 				       struct nfs4_get_lease_time_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->lr_seq_res, rqstp);
+		status = decode_sequence(xdr, &res->lr_seq_res, rqstp);
 	if (!status)
-		status = decode_putrootfh(&xdr);
+		status = decode_putrootfh(xdr);
 	if (!status)
-		status = decode_fsinfo(&xdr, res->lr_fsinfo);
+		status = decode_fsinfo(xdr, res->lr_fsinfo);
 	return status;
 }
 
 /*
  * Decode RECLAIM_COMPLETE response
  */
-static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
+					 struct xdr_stream *xdr,
 					 struct nfs41_reclaim_complete_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (!status)
-		status = decode_sequence(&xdr, &res->seq_res, rqstp);
+		status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (!status)
-		status = decode_reclaim_complete(&xdr, (void *)NULL);
+		status = decode_reclaim_complete(xdr, (void *)NULL);
 	return status;
 }
 
 /*
  * Decode GETDEVINFO response
  */
-static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
+				      struct xdr_stream *xdr,
 				      struct nfs4_getdeviceinfo_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status != 0)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status != 0)
 		goto out;
-	status = decode_getdeviceinfo(&xdr, res->pdev);
+	status = decode_getdeviceinfo(xdr, res->pdev);
 out:
 	return status;
 }
@@ -6055,24 +6043,23 @@ out:
 /*
  * Decode LAYOUTGET response
  */
-static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
+static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
+				  struct xdr_stream *xdr,
 				  struct nfs4_layoutget_res *res)
 {
-	struct xdr_stream xdr;
 	struct compound_hdr hdr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_compound_hdr(&xdr, &hdr);
+	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
-	status = decode_sequence(&xdr, &res->seq_res, rqstp);
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
 	if (status)
 		goto out;
-	status = decode_putfh(&xdr);
+	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_layoutget(&xdr, rqstp, res);
+	status = decode_layoutget(xdr, rqstp, res);
 out:
 	return status;
 }
@@ -6236,7 +6223,7 @@ nfs4_stat_to_errno(int stat)
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
 	.p_encode = (kxdreproc_t)nfs4_xdr_##argtype,		\
-	.p_decode = (kxdrproc_t)nfs4_xdr_##restype,		\
+	.p_decode = (kxdrdproc_t)nfs4_xdr_##restype,		\
 	.p_arglen = NFS4_##argtype##_sz,			\
 	.p_replen = NFS4_##restype##_sz,			\
 	.p_statidx = NFSPROC4_CLNT_##proc,			\
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c363efda8ecf..21a63da305ff 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -533,7 +533,8 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Protocol".
  */
 
-static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p, void *__unused)
+static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
+				void *__unused)
 {
 	return 0;
 }
@@ -541,26 +542,25 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p, void *__unused)
 /*
  * 20.2. Operation 4: CB_RECALL - Recall a Delegation
  */
-static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
+static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+				  struct xdr_stream *xdr,
 				  struct nfsd4_callback *cb)
 {
-	struct xdr_stream xdr;
 	struct nfs4_cb_compound_hdr hdr;
 	enum nfsstat4 nfserr;
 	int status;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
-	status = decode_cb_compound4res(&xdr, &hdr);
+	status = decode_cb_compound4res(xdr, &hdr);
 	if (unlikely(status))
 		goto out;
 
 	if (cb != NULL) {
-		status = decode_cb_sequence4res(&xdr, cb);
+		status = decode_cb_sequence4res(xdr, cb);
 		if (unlikely(status))
 			goto out;
 	}
 
-	status = decode_cb_op_status(&xdr, OP_CB_RECALL, &nfserr);
+	status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr);
 	if (unlikely(status))
 		goto out;
 	if (unlikely(nfserr != NFS4_OK))
@@ -578,7 +578,7 @@ out_default:
 [NFSPROC4_CLNT_##proc] = {						\
 	.p_proc    = NFSPROC4_CB_##call,				\
 	.p_encode  = (kxdreproc_t)nfs4_xdr_enc_##argtype,		\
-	.p_decode  = (kxdrproc_t)nfs4_xdr_dec_##restype,		\
+	.p_decode  = (kxdrdproc_t)nfs4_xdr_dec_##restype,		\
 	.p_arglen  = NFS4_enc_##argtype##_sz,				\
 	.p_replen  = NFS4_dec_##restype##_sz,				\
 	.p_statidx = NFSPROC4_CB_##call,				\
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index d88cffbaa6df..8521067ed4f7 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -112,7 +112,7 @@ struct rpc_credops {
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
 	int			(*crwrap_req)(struct rpc_task *, kxdreproc_t,
 						void *, __be32 *, void *);
-	int			(*crunwrap_resp)(struct rpc_task *, kxdrproc_t,
+	int			(*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
 						void *, __be32 *, void *);
 };
 
@@ -140,7 +140,7 @@ void			put_rpccred(struct rpc_cred *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
 int			rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
-int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 7b19c4e1ce53..ef9476a36ff7 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -90,7 +90,7 @@ struct rpc_version {
 struct rpc_procinfo {
 	u32			p_proc;		/* RPC procedure number */
 	kxdreproc_t		p_encode;	/* XDR encode function */
-	kxdrproc_t		p_decode;	/* XDR decode function */
+	kxdrdproc_t		p_decode;	/* XDR decode function */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
 	unsigned int		p_count;	/* call count */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a21cf5378c1d..9a21e8102c42 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -204,9 +204,10 @@ struct xdr_stream {
 };
 
 /*
- * This is the xdr_stream style generic XDR function.
+ * These are the xdr_stream style generic XDR encode and decode functions.
  */
 typedef void	(*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
+typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 651c9da703cb..67e31276682a 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -587,8 +587,18 @@ rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
 	return 0;
 }
 
+static int
+rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+			  __be32 *data, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
+	return decode(rqstp, &xdr, obj);
+}
+
 int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
+rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
 		__be32 *data, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
@@ -599,7 +609,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
 						   data, obj);
 	/* By default, we decode the arguments normally. */
-	return decode(rqstp, data, obj);
+	return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
 }
 
 int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 42b46f9a670a..45dbf1521b9a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1503,10 +1503,19 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	return 0;
 }
 
+static int
+gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
+		      __be32 *p, void *obj)
+{
+	struct xdr_stream xdr;
+
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	return decode(rqstp, &xdr, obj);
+}
 
 static int
 gss_unwrap_resp(struct rpc_task *task,
-		kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
+		kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
@@ -1537,7 +1546,7 @@ gss_unwrap_resp(struct rpc_task *task,
 	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
 						+ (savedlen - head->iov_len);
 out_decode:
-	status = decode(rqstp, p, obj);
+	status = gss_unwrap_req_decode(decode, rqstp, p, obj);
 out:
 	gss_put_ctx(ctx);
 	dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d446a32be667..4e8210dcda72 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1535,7 +1535,7 @@ call_decode(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrproc_t	decode = task->tk_msg.rpc_proc->p_decode;
+	kxdrdproc_t	decode = task->tk_msg.rpc_proc->p_decode;
 	__be32		*p;
 
 	dprintk("RPC: %5u call_decode (status %d)\n",
@@ -1780,7 +1780,7 @@ static void rpcproc_encode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
 }
 
-static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
+static int rpcproc_decode_null(void *rqstp, struct xdr_stream *xdr, void *obj)
 {
 	return 0;
 }
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 63912a1a2983..c652e4cc9fe9 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -706,18 +706,16 @@ static void rpcb_enc_mapping(struct rpc_rqst *req, struct xdr_stream *xdr,
 	*p   = cpu_to_be32(rpcb->r_port);
 }
 
-static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getport(struct rpc_rqst *req, struct xdr_stream *xdr,
 			    struct rpcbind_args *rpcb)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
 	unsigned long port;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	__be32 *p;
 
 	rpcb->r_port = 0;
 
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 
@@ -731,20 +729,18 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
 	return 0;
 }
 
-static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_set(struct rpc_rqst *req, struct xdr_stream *xdr,
 			unsigned int *boolp)
 {
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
-
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
+	__be32 *p;
 
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		return -EIO;
 
 	*boolp = 0;
-	if (*p)
+	if (*p != xdr_zero)
 		*boolp = 1;
 
 	dprintk("RPC: %5u RPCB_%s call %s\n",
@@ -785,20 +781,18 @@ static void rpcb_enc_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_rpcb_string(xdr, rpcb->r_owner, RPCB_MAXOWNERLEN);
 }
 
-static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
+static int rpcb_dec_getaddr(struct rpc_rqst *req, struct xdr_stream *xdr,
 			    struct rpcbind_args *rpcb)
 {
 	struct sockaddr_storage address;
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	struct rpc_task *task = req->rq_task;
-	struct xdr_stream xdr;
+	__be32 *p;
 	u32 len;
 
 	rpcb->r_port = 0;
 
-	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
-
-	p = xdr_inline_decode(&xdr, 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		goto out_fail;
 	len = be32_to_cpup(p);
@@ -816,7 +810,7 @@ static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p,
 	if (unlikely(len > RPCBIND_MAXUADDRLEN))
 		goto out_fail;
 
-	p = xdr_inline_decode(&xdr, len);
+	p = xdr_inline_decode(xdr, len);
 	if (unlikely(p == NULL))
 		goto out_fail;
 	dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid,
@@ -843,7 +837,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -853,7 +847,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -863,7 +857,7 @@ static struct rpc_procinfo rpcb_procedures2[] = {
 	[RPCBPROC_GETPORT] = {
 		.p_proc		= RPCBPROC_GETPORT,
 		.p_encode	= (kxdreproc_t)rpcb_enc_mapping,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getport,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getport,
 		.p_arglen	= RPCB_mappingargs_sz,
 		.p_replen	= RPCB_getportres_sz,
 		.p_statidx	= RPCBPROC_GETPORT,
@@ -876,7 +870,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -886,7 +880,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -896,7 +890,7 @@ static struct rpc_procinfo rpcb_procedures3[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
@@ -909,7 +903,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_SET] = {
 		.p_proc		= RPCBPROC_SET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_SET,
@@ -919,7 +913,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_UNSET] = {
 		.p_proc		= RPCBPROC_UNSET,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_set,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_set,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_setres_sz,
 		.p_statidx	= RPCBPROC_UNSET,
@@ -929,7 +923,7 @@ static struct rpc_procinfo rpcb_procedures4[] = {
 	[RPCBPROC_GETADDR] = {
 		.p_proc		= RPCBPROC_GETADDR,
 		.p_encode	= (kxdreproc_t)rpcb_enc_getaddr,
-		.p_decode	= (kxdrproc_t)rpcb_dec_getaddr,
+		.p_decode	= (kxdrdproc_t)rpcb_dec_getaddr,
 		.p_arglen	= RPCB_getaddrargs_sz,
 		.p_replen	= RPCB_getaddrres_sz,
 		.p_statidx	= RPCBPROC_GETADDR,
-- 
cgit v1.2.3


From cf4e594ea7e55555e81647b74a3a8e8b2826a529 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Thu, 16 Dec 2010 00:52:40 +0200
Subject: nl80211: Add notification for dropped Deauth/Disassoc

Add a new notification to indicate that a received, unprotected
Deauthentication or Disassociation frame was dropped due to
management frame protection being in use. This notification is
needed to allow user space (e.g., wpa_supplicant) to implement
SA Query procedure to recover from association state mismatch
between an AP and STA.

This is needed to avoid getting stuck in non-working state when MFP
(IEEE 802.11w) is used and a protected Deauthentication or
Disassociation frame is dropped for any reason. After that, the
station would silently discard any unprotected Deauthentication or
Disassociation frame that could be indicating that the AP does not
have association for the STA (when the Reason Code would be 6 or 7).
IEEE Std 802.11w-2009, 11.13 describes this recovery mechanism.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 10 ++++++++++
 include/net/cfg80211.h  | 26 ++++++++++++++++++++++++++
 net/mac80211/rx.c       | 22 ++++++++++++++++++++--
 net/wireless/mlme.c     | 22 ++++++++++++++++++++++
 net/wireless/nl80211.c  | 16 ++++++++++++++++
 net/wireless/nl80211.h  |  6 ++++++
 6 files changed, 100 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 1cee56b3a79a..7483a89cee8f 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -399,6 +399,13 @@
  * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the
  *	network is determined by the network interface.
  *
+ * @NL80211_CMD_UNPROT_DEAUTHENTICATE: Unprotected deauthentication frame
+ *	notification. This event is used to indicate that an unprotected
+ *	deauthentication frame was dropped when MFP is in use.
+ * @NL80211_CMD_UNPROT_DISASSOCIATE: Unprotected disassociation frame
+ *	notification. This event is used to indicate that an unprotected
+ *	disassociation frame was dropped when MFP is in use.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -508,6 +515,9 @@ enum nl80211_commands {
 	NL80211_CMD_JOIN_MESH,
 	NL80211_CMD_LEAVE_MESH,
 
+	NL80211_CMD_UNPROT_DEAUTHENTICATE,
+	NL80211_CMD_UNPROT_DISASSOCIATE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f45e15f12446..3d1c09b777e8 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2359,6 +2359,32 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len);
 void __cfg80211_send_disassoc(struct net_device *dev, const u8 *buf,
 	size_t len);
 
+/**
+ * cfg80211_send_unprot_deauth - notification of unprotected deauthentication
+ * @dev: network device
+ * @buf: deauthentication frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever a received Deauthentication frame has been
+ * dropped in station mode because of MFP being used but the Deauthentication
+ * frame was not protected. This function may sleep.
+ */
+void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
+				 size_t len);
+
+/**
+ * cfg80211_send_unprot_disassoc - notification of unprotected disassociation
+ * @dev: network device
+ * @buf: disassociation frame (header + body)
+ * @len: length of the frame data
+ *
+ * This function is called whenever a received Disassociation frame has been
+ * dropped in station mode because of MFP being used but the Disassociation
+ * frame was not protected. This function may sleep.
+ */
+void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
+				   size_t len);
+
 /**
  * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP)
  * @dev: network device
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 052789ef4745..4573ce1e1d15 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1540,12 +1540,30 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 	if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
 		if (unlikely(!ieee80211_has_protected(fc) &&
 			     ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
-			     rx->key))
+			     rx->key)) {
+			if (ieee80211_is_deauth(fc))
+				cfg80211_send_unprot_deauth(rx->sdata->dev,
+							    rx->skb->data,
+							    rx->skb->len);
+			else if (ieee80211_is_disassoc(fc))
+				cfg80211_send_unprot_disassoc(rx->sdata->dev,
+							      rx->skb->data,
+							      rx->skb->len);
 			return -EACCES;
+		}
 		/* BIP does not use Protected field, so need to check MMIE */
 		if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
-			     ieee80211_get_mmie_keyidx(rx->skb) < 0))
+			     ieee80211_get_mmie_keyidx(rx->skb) < 0)) {
+			if (ieee80211_is_deauth(fc))
+				cfg80211_send_unprot_deauth(rx->sdata->dev,
+							    rx->skb->data,
+							    rx->skb->len);
+			else if (ieee80211_is_disassoc(fc))
+				cfg80211_send_unprot_disassoc(rx->sdata->dev,
+							      rx->skb->data,
+							      rx->skb->len);
 			return -EACCES;
+		}
 		/*
 		 * When using MFP, Action frames are not allowed prior to
 		 * having configured keys.
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d7680f2a4c5b..aa5df8865ff7 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -263,6 +263,28 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
+void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
+				 size_t len)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
+
+void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
+				   size_t len)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
+
 static void __cfg80211_auth_remove(struct wireless_dev *wdev, const u8 *addr)
 {
 	int i;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 594a6ac8b9d2..aefce54d47e2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5473,6 +5473,22 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 				NL80211_CMD_DISASSOCIATE, gfp);
 }
 
+void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev, const u8 *buf,
+				size_t len, gfp_t gfp)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp);
+}
+
+void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev, const u8 *buf,
+				  size_t len, gfp_t gfp)
+{
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_UNPROT_DISASSOCIATE, gfp);
+}
+
 static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 				      struct net_device *netdev, int cmd,
 				      const u8 *addr, gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 16c2f7190768..e3f7fa886966 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -25,6 +25,12 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
 			   const u8 *buf, size_t len, gfp_t gfp);
+void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
+				struct net_device *netdev,
+				const u8 *buf, size_t len, gfp_t gfp);
+void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
+				  struct net_device *netdev,
+				  const u8 *buf, size_t len, gfp_t gfp);
 void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev,
 			       const u8 *addr, gfp_t gfp);
-- 
cgit v1.2.3


From 2784fe915cd25adf23ea28534019308d8a144721 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 15 Dec 2010 19:24:11 -0800
Subject: cfg80211: fix null pointer dereference with a custom regulatory
 request

Once we moved the core regulatory request to the queue and let
the scheduler process it last_request will have been left NULL
until the schedular decides to process the first request. When
this happens and we are loading a driver with a custom regulatory
request like all Atheros drivers we end up with a NULL pointer
dereference. We fix this by checking if the request was a
custom one.

BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
IP: [<ffffffffa016de87>] freq_reg_info_regd.clone.2+0x27/0x130 [cfg80211]
PGD 71f91067 PUD 712b2067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
last sysfs file: /sys/devices/pci0000:00/0000:00:1d.7/usb2/2-1/firmware/2-1/loading
CPU 0
Modules linked in: ath9k_htc(+) ath9k_common ath9k_hw ath <etc>
Pid: 3094, comm: insmod Tainted: G        W   2.6.37-rc5-wl #16 INVALID/28427ZQ
RIP: 0010:[<ffffffffa016de87>]  [<ffffffffa016de87>] freq_reg_info_regd.clone.2+0x27/0x130 [cfg80211]
RSP: 0018:ffff88007045db78  EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffffffffa047d9a0 RCX: ffff88007045dbd0
RDX: 0000000000004e20 RSI: 000000000024cde0 RDI: ffff8800700483e0
RBP: ffff88007045db98 R08: ffffffffa02f5b40 R09: 0000000000000001
R10: 000000000000000e R11: 0000000000000001 R12: 0000000000000000
R13: ffff88007004e3b0 R14: 0000000000000000 R15: ffff880070048340
FS:  00007f635a707700(0000) GS:ffff880077400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000004 CR3: 00000000708a9000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process insmod (pid: 3094, threadinfo ffff88007045c000, task ffff8800713e3ec0)
Stack:
 ffffffffa047d9a0 0000000000000000 ffff88007004e3b0 0000000000000000
 ffff88007045dc08 ffffffffa016e147 000000007045dc08 0000000000000002
 ffff8800700483e0 ffffffffa02f5b40 ffff88007045dbd8 0000000000000000
Call Trace:
 [<ffffffffa016e147>] wiphy_apply_custom_regulatory+0x137/0x1d0 [cfg80211]
 [<ffffffffa047a690>] ? ath9k_reg_notifier+0x0/0x50 [ath9k_htc]
 [<ffffffffa02f47f7>] ath_regd_init+0x347/0x430 [ath]
 [<ffffffffa047b1f5>] ath9k_htc_probe_device+0x6c5/0x960 [ath9k_htc]
 [<ffffffffa0472a2c>] ath9k_htc_hw_init+0xc/0x30 [ath9k_htc]
 [<ffffffffa04747e6>] ath9k_hif_usb_probe+0x216/0x3b0 [ath9k_htc]
 [<ffffffffa03bb6bc>] usb_probe_interface+0x10c/0x210 [usbcore]
 [<ffffffff812aec26>] driver_probe_device+0x96/0x1c0
 [<ffffffff812aedf3>] __driver_attach+0xa3/0xb0
 [<ffffffff812aed50>] ? __driver_attach+0x0/0xb0
 [<ffffffff812adaae>] bus_for_each_dev+0x5e/0x90
 [<ffffffff812ae8c9>] driver_attach+0x19/0x20
 [<ffffffff812ae438>] bus_add_driver+0x168/0x320
 [<ffffffff812af071>] driver_register+0x71/0x140
 [<ffffffff811fc4a8>] ? __raw_spin_lock_init+0x38/0x70
 [<ffffffffa03ba39c>] usb_register_driver+0xdc/0x190 [usbcore]
 [<ffffffffa03a2000>] ? ath9k_htc_init+0x0/0x4f [ath9k_htc]
 [<ffffffffa047499e>] ath9k_hif_usb_init+0x1e/0x20 [ath9k_htc]
 [<ffffffffa03a202b>] ath9k_htc_init+0x2b/0x4f [ath9k_htc]
 [<ffffffff8100212f>] do_one_initcall+0x3f/0x180
 [<ffffffff8109ef5b>] sys_init_module+0xbb/0x200
 [<ffffffff8100bf52>] system_call_fastpath+0x16/0x1b
Code: <etc, who cares>
RIP  [<ffffffffa016de87>] freq_reg_info_regd.clone.2+0x27/0x130 [cfg80211]
 RSP <ffff88007045db78>
CR2: 0000000000000004
---[ end trace 79e4193601c8b713 ]---

Reported-by: Sujith Manoharan <Sujith.Manoharan@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 4 +++-
 net/wireless/reg.c     | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3d1c09b777e8..6dc665a727c2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1419,7 +1419,9 @@ struct ieee80211_txrx_stypes {
 
 /**
  * struct wiphy - wireless hardware description
- * @reg_notifier: the driver's regulatory notification callback
+ * @reg_notifier: the driver's regulatory notification callback,
+ *	note that if your driver uses wiphy_apply_custom_regulatory()
+ *	the reg_notifier's request can be passed as NULL
  * @regd: the driver's regulatory domain, if one was requested via
  * 	the regulatory_hint() API. This can be used by the driver
  *	on the reg_notifier() if it chooses to ignore future
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5ed615f94e0c..99d41831d76e 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -661,7 +661,8 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
 	 * Follow the driver's regulatory domain, if present, unless a country
 	 * IE has been processed or a user wants to help complaince further
 	 */
-	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+	if (!custom_regd &&
+	    last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    last_request->initiator != NL80211_REGDOM_SET_BY_USER &&
 	    wiphy->regd)
 		regd = wiphy->regd;
-- 
cgit v1.2.3


From a3d22a68d752ccc1a01bb0a64dd70b7a98bf9e23 Mon Sep 17 00:00:00 2001
From: Vladislav Zolotarov <vladz@broadcom.com>
Date: Mon, 13 Dec 2010 06:27:10 +0000
Subject: bnx2x: Take the distribution range definition out of skb_tx_hash()

Move the calcualation of the Tx hash for a given hash range into a separate
function and define the skb_tx_hash(), which calculates a Tx hash for a
[0; dev->real_num_tx_queues - 1] hash values range, using this
function (__skb_tx_hash()).

Signed-off-by: Vladislav Zolotarov <vladz@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++++
 include/linux/skbuff.h    |  5 +++--
 net/core/dev.c            | 15 ++++++++++-----
 3 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d31bc3c94717..445e6825f8eb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1747,6 +1747,16 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 		__netif_schedule(txq->qdisc);
 }
 
+/*
+ * Returns a Tx hash for the given packet when dev->real_num_tx_queues is used
+ * as a distribution range limit for the returned value.
+ */
+static inline u16 skb_tx_hash(const struct net_device *dev,
+			      const struct sk_buff *skb)
+{
+	return __skb_tx_hash(dev, skb, dev->real_num_tx_queues);
+}
+
 /**
  *	netif_is_multiqueue - test if device has multiple transmit queues
  *	@dev: network device
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 19f37a6ee6c4..4c4bec6316d9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2165,8 +2165,9 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-extern u16 skb_tx_hash(const struct net_device *dev,
-		       const struct sk_buff *skb);
+extern u16 __skb_tx_hash(const struct net_device *dev,
+			 const struct sk_buff *skb,
+			 unsigned int num_tx_queues);
 
 #ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
diff --git a/net/core/dev.c b/net/core/dev.c
index d28b3a023bb2..b25dd087f06a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2112,14 +2112,19 @@ out:
 
 static u32 hashrnd __read_mostly;
 
-u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues)
 {
 	u32 hash;
 
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
-		while (unlikely(hash >= dev->real_num_tx_queues))
-			hash -= dev->real_num_tx_queues;
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
 		return hash;
 	}
 
@@ -2129,9 +2134,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
-	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+	return (u16) (((u64) hash * num_tx_queues) >> 32);
 }
-EXPORT_SYMBOL(skb_tx_hash);
+EXPORT_SYMBOL(__skb_tx_hash);
 
 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
-- 
cgit v1.2.3


From b236da6931e2482bfe44a7865dd4e7bb036f3496 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 14 Dec 2010 03:09:15 +0000
Subject: net: use NUMA_NO_NODE instead of the magic number -1

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 net/core/dev.c            | 2 +-
 net/core/net-sysfs.c      | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 445e6825f8eb..cc916c5c3279 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -530,7 +530,7 @@ static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
 	return q->numa_node;
 #else
-	return -1;
+	return NUMA_NO_NODE;
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index b25dd087f06a..7ac26d2b9722 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5121,7 +5121,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
 	queue->xmit_lock_owner = -1;
-	netdev_queue_numa_node_write(queue, -1);
+	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 85e8b5326dd6..e23c01be5a5b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1009,7 +1009,8 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	if (dev_maps)
 		call_rcu(&dev_maps->rcu, xps_dev_maps_release);
 
-	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node : -1);
+	netdev_queue_numa_node_write(queue, (numa_node >= 0) ? numa_node :
+					    NUMA_NO_NODE);
 
 	mutex_unlock(&xps_map_mutex);
 
-- 
cgit v1.2.3


From c6c8fea29769d998d94fcec9b9f14d4b52b349d3 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 13 Dec 2010 11:19:28 +0000
Subject: net: Add batman-adv meshing protocol

B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is a routing
protocol for multi-hop ad-hoc mesh networks. The networks may be wired or
wireless. See http://www.open-mesh.org/ for more information and user space
tools.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ABI/testing/sysfs-class-net-batman-adv         |   14 +
 Documentation/ABI/testing/sysfs-class-net-mesh     |   69 +
 Documentation/networking/batman-adv.txt            |  240 ++++
 MAINTAINERS                                        |    9 +
 net/Kconfig                                        |    1 +
 net/Makefile                                       |    1 +
 net/batman-adv/Kconfig                             |   25 +
 net/batman-adv/Makefile                            |   39 +
 net/batman-adv/aggregation.c                       |  273 ++++
 net/batman-adv/aggregation.h                       |   43 +
 net/batman-adv/bat_debugfs.c                       |  360 +++++
 net/batman-adv/bat_debugfs.h                       |   33 +
 net/batman-adv/bat_sysfs.c                         |  593 +++++++++
 net/batman-adv/bat_sysfs.h                         |   42 +
 net/batman-adv/bitarray.c                          |  201 +++
 net/batman-adv/bitarray.h                          |   44 +
 net/batman-adv/gateway_client.c                    |  477 +++++++
 net/batman-adv/gateway_client.h                    |   36 +
 net/batman-adv/gateway_common.c                    |  177 +++
 net/batman-adv/gateway_common.h                    |   38 +
 net/batman-adv/hard-interface.c                    |  651 +++++++++
 net/batman-adv/hard-interface.h                    |   53 +
 net/batman-adv/hash.c                              |   62 +
 net/batman-adv/hash.h                              |  176 +++
 net/batman-adv/icmp_socket.c                       |  356 +++++
 net/batman-adv/icmp_socket.h                       |   34 +
 net/batman-adv/main.c                              |  187 +++
 net/batman-adv/main.h                              |  183 +++
 net/batman-adv/originator.c                        |  564 ++++++++
 net/batman-adv/originator.h                        |   64 +
 net/batman-adv/packet.h                            |  136 ++
 net/batman-adv/ring_buffer.c                       |   52 +
 net/batman-adv/ring_buffer.h                       |   28 +
 net/batman-adv/routing.c                           | 1397 ++++++++++++++++++++
 net/batman-adv/routing.h                           |   48 +
 net/batman-adv/send.c                              |  585 ++++++++
 net/batman-adv/send.h                              |   41 +
 net/batman-adv/soft-interface.c                    |  697 ++++++++++
 net/batman-adv/soft-interface.h                    |   35 +
 net/batman-adv/translation-table.c                 |  534 ++++++++
 net/batman-adv/translation-table.h                 |   45 +
 net/batman-adv/types.h                             |  271 ++++
 net/batman-adv/unicast.c                           |  343 +++++
 net/batman-adv/unicast.h                           |   35 +
 net/batman-adv/vis.c                               |  949 +++++++++++++
 net/batman-adv/vis.h                               |   37 +
 46 files changed, 10278 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-net-batman-adv
 create mode 100644 Documentation/ABI/testing/sysfs-class-net-mesh
 create mode 100644 Documentation/networking/batman-adv.txt
 create mode 100644 net/batman-adv/Kconfig
 create mode 100644 net/batman-adv/Makefile
 create mode 100644 net/batman-adv/aggregation.c
 create mode 100644 net/batman-adv/aggregation.h
 create mode 100644 net/batman-adv/bat_debugfs.c
 create mode 100644 net/batman-adv/bat_debugfs.h
 create mode 100644 net/batman-adv/bat_sysfs.c
 create mode 100644 net/batman-adv/bat_sysfs.h
 create mode 100644 net/batman-adv/bitarray.c
 create mode 100644 net/batman-adv/bitarray.h
 create mode 100644 net/batman-adv/gateway_client.c
 create mode 100644 net/batman-adv/gateway_client.h
 create mode 100644 net/batman-adv/gateway_common.c
 create mode 100644 net/batman-adv/gateway_common.h
 create mode 100644 net/batman-adv/hard-interface.c
 create mode 100644 net/batman-adv/hard-interface.h
 create mode 100644 net/batman-adv/hash.c
 create mode 100644 net/batman-adv/hash.h
 create mode 100644 net/batman-adv/icmp_socket.c
 create mode 100644 net/batman-adv/icmp_socket.h
 create mode 100644 net/batman-adv/main.c
 create mode 100644 net/batman-adv/main.h
 create mode 100644 net/batman-adv/originator.c
 create mode 100644 net/batman-adv/originator.h
 create mode 100644 net/batman-adv/packet.h
 create mode 100644 net/batman-adv/ring_buffer.c
 create mode 100644 net/batman-adv/ring_buffer.h
 create mode 100644 net/batman-adv/routing.c
 create mode 100644 net/batman-adv/routing.h
 create mode 100644 net/batman-adv/send.c
 create mode 100644 net/batman-adv/send.h
 create mode 100644 net/batman-adv/soft-interface.c
 create mode 100644 net/batman-adv/soft-interface.h
 create mode 100644 net/batman-adv/translation-table.c
 create mode 100644 net/batman-adv/translation-table.h
 create mode 100644 net/batman-adv/types.h
 create mode 100644 net/batman-adv/unicast.c
 create mode 100644 net/batman-adv/unicast.h
 create mode 100644 net/batman-adv/vis.c
 create mode 100644 net/batman-adv/vis.h

(limited to 'net')

diff --git a/Documentation/ABI/testing/sysfs-class-net-batman-adv b/Documentation/ABI/testing/sysfs-class-net-batman-adv
new file mode 100644
index 000000000000..38dd762def4b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-batman-adv
@@ -0,0 +1,14 @@
+
+What:           /sys/class/net/<iface>/batman-adv/mesh_iface
+Date:           May 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                The /sys/class/net/<iface>/batman-adv/mesh_iface file
+                displays the batman mesh interface this <iface>
+                currently is associated with.
+
+What:           /sys/class/net/<iface>/batman-adv/iface_status
+Date:           May 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                Indicates the status of <iface> as it is seen by batman.
diff --git a/Documentation/ABI/testing/sysfs-class-net-mesh b/Documentation/ABI/testing/sysfs-class-net-mesh
new file mode 100644
index 000000000000..748fe1701d25
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-mesh
@@ -0,0 +1,69 @@
+
+What:           /sys/class/net/<mesh_iface>/mesh/aggregated_ogms
+Date:           May 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                Indicates whether the batman protocol messages of the
+                mesh <mesh_iface> shall be aggregated or not.
+
+What:           /sys/class/net/<mesh_iface>/mesh/bonding
+Date:           June 2010
+Contact:        Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
+Description:
+                Indicates whether the data traffic going through the
+                mesh will be sent using multiple interfaces at the
+                same time (if available).
+
+What:           /sys/class/net/<mesh_iface>/mesh/fragmentation
+Date:           October 2010
+Contact:        Andreas Langer <an.langer@gmx.de>
+Description:
+                Indicates whether the data traffic going through the
+                mesh will be fragmented or silently discarded if the
+                packet size exceeds the outgoing interface MTU.
+
+What:           /sys/class/net/<mesh_iface>/mesh/gw_bandwidth
+Date:           October 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                Defines the bandwidth which is propagated by this
+                node if gw_mode was set to 'server'.
+
+What:           /sys/class/net/<mesh_iface>/mesh/gw_mode
+Date:           October 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                Defines the state of the gateway features. Can be
+                either 'off', 'client' or 'server'.
+
+What:           /sys/class/net/<mesh_iface>/mesh/gw_sel_class
+Date:           October 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                Defines the selection criteria this node will use
+                to choose a gateway if gw_mode was set to 'client'.
+
+What:           /sys/class/net/<mesh_iface>/mesh/orig_interval
+Date:           May 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                Defines the interval in milliseconds in which batman
+                sends its protocol messages.
+
+What:           /sys/class/net/<mesh_iface>/mesh/hop_penalty
+Date:           Oct 2010
+Contact:        Linus Lüssing <linus.luessing@web.de>
+Description:
+		Defines the penalty which will be applied to an
+		originator message's tq-field on every hop.
+
+What:           /sys/class/net/<mesh_iface>/mesh/vis_mode
+Date:           May 2010
+Contact:        Marek Lindner <lindner_marek@yahoo.de>
+Description:
+                Each batman node only maintains information about its
+                own local neighborhood, therefore generating graphs
+                showing the topology of the entire mesh is not easily
+                feasible without having a central instance to collect
+                the local topologies from all nodes. This file allows
+                to activate the collecting (server) mode.
diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt
new file mode 100644
index 000000000000..77f0cdd5b0dd
--- /dev/null
+++ b/Documentation/networking/batman-adv.txt
@@ -0,0 +1,240 @@
+[state: 21-11-2010]
+
+BATMAN-ADV
+----------
+
+Batman  advanced  is  a new approach to wireless networking which
+does no longer operate on the IP basis. Unlike the batman daemon,
+which  exchanges  information  using UDP packets and sets routing
+tables, batman-advanced operates on ISO/OSI Layer 2 only and uses
+and  routes  (or  better: bridges) Ethernet Frames. It emulates a
+virtual network switch of all nodes participating.  Therefore all
+nodes  appear  to be link local, thus all higher operating proto-
+cols won't be affected by any changes within the network. You can
+run almost any protocol above batman advanced, prominent examples
+are: IPv4, IPv6, DHCP, IPX.
+
+Batman advanced was implemented as a Linux kernel driver  to  re-
+duce the overhead to a minimum. It does not depend on any (other)
+network driver, and can be used on wifi as well as ethernet  lan,
+vpn,  etc ... (anything with ethernet-style layer 2).
+
+CONFIGURATION
+-------------
+
+Load the batman-adv module into your kernel:
+
+# insmod batman-adv.ko
+
+The  module  is now waiting for activation. You must add some in-
+terfaces on which batman can operate. After  loading  the  module
+batman  advanced  will scan your systems interfaces to search for
+compatible interfaces. Once found, it will create  subfolders  in
+the /sys directories of each supported interface, e.g.
+
+# ls /sys/class/net/eth0/batman_adv/
+# iface_status  mesh_iface
+
+If an interface does not have the "batman_adv" subfolder it prob-
+ably is not supported. Not supported  interfaces  are:  loopback,
+non-ethernet and batman's own interfaces.
+
+Note:  After the module was loaded it will continuously watch for
+new interfaces to verify the compatibility. There is no  need  to
+reload the module if you plug your USB wifi adapter into your ma-
+chine after batman advanced was initially loaded.
+
+To activate a  given  interface  simply  write  "bat0"  into  its
+"mesh_iface" file inside the batman_adv subfolder:
+
+# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface
+
+Repeat  this step for all interfaces you wish to add.  Now batman
+starts using/broadcasting on this/these interface(s).
+
+By reading the "iface_status" file you can check its status:
+
+# cat /sys/class/net/eth0/batman_adv/iface_status
+# active
+
+To deactivate an interface you have  to  write  "none"  into  its
+"mesh_iface" file:
+
+# echo none > /sys/class/net/eth0/batman_adv/mesh_iface
+
+
+All  mesh  wide  settings  can be found in batman's own interface
+folder:
+
+#  ls  /sys/class/net/bat0/mesh/
+#  aggregated_ogms  bonding  fragmentation  orig_interval
+#  vis_mode
+
+
+There is a special folder for debugging informations:
+
+#  ls /sys/kernel/debug/batman_adv/bat0/
+#  originators  socket  transtable_global  transtable_local
+#  vis_data
+
+
+Some of the files contain all sort of status information  regard-
+ing  the  mesh  network.  For  example, you can view the table of
+originators (mesh participants) with:
+
+# cat /sys/kernel/debug/batman_adv/bat0/originators
+
+Other files allow to change batman's behaviour to better fit your
+requirements.  For instance, you can check the current originator
+interval (value in milliseconds which determines how often batman
+sends its broadcast packets):
+
+# cat /sys/class/net/bat0/mesh/orig_interval
+# 1000
+
+and also change its value:
+
+# echo 3000 > /sys/class/net/bat0/mesh/orig_interval
+
+In very mobile scenarios, you might want to adjust the originator
+interval to a lower value. This will make the mesh  more  respon-
+sive to topology changes, but will also increase the overhead.
+
+
+USAGE
+-----
+
+To  make use of your newly created mesh, batman advanced provides
+a new interface "bat0" which you should use from this  point  on.
+All  interfaces  added  to  batman  advanced are not relevant any
+longer because batman handles them for you. Basically, one "hands
+over" the data by using the batman interface and batman will make
+sure it reaches its destination.
+
+The "bat0" interface can be used like any  other  regular  inter-
+face.  It needs an IP address which can be either statically con-
+figured or dynamically (by using DHCP or similar services):
+
+# NodeA: ifconfig bat0 192.168.0.1
+# NodeB: ifconfig bat0 192.168.0.2
+# NodeB: ping 192.168.0.1
+
+Note:  In  order to avoid problems remove all IP addresses previ-
+ously assigned to interfaces now used by batman advanced, e.g.
+
+# ifconfig eth0 0.0.0.0
+
+
+VISUALIZATION
+-------------
+
+If you want topology visualization, at least one mesh  node  must
+be configured as VIS-server:
+
+# echo "server" > /sys/class/net/bat0/mesh/vis_mode
+
+Each  node  is  either configured as "server" or as "client" (de-
+fault: "client").  Clients send their topology data to the server
+next to them, and server synchronize with other servers. If there
+is no server configured (default) within the  mesh,  no  topology
+information   will  be  transmitted.  With  these  "synchronizing
+servers", there can be 1 or more vis servers sharing the same (or
+at least very similar) data.
+
+When  configured  as  server,  you can get a topology snapshot of
+your mesh:
+
+# cat /sys/kernel/debug/batman_adv/bat0/vis_data
+
+This raw output is intended to be easily parsable and convertable
+with  other tools. Have a look at the batctl README if you want a
+vis output in dot or json format for instance and how those  out-
+puts could then be visualised in an image.
+
+The raw format consists of comma separated values per entry where
+each entry is giving information about a  certain  source  inter-
+face.  Each  entry can/has to have the following values:
+-> "mac" - mac address of an originator's source interface
+           (each line begins with it)
+-> "TQ mac  value"  -  src mac's link quality towards mac address
+                       of a neighbor originator's interface which
+                       is being used for routing
+-> "HNA mac" - HNA announced by source mac
+-> "PRIMARY" - this  is a primary interface
+-> "SEC mac" - secondary mac address of source
+               (requires preceding PRIMARY)
+
+The TQ value has a range from 4 to 255 with 255 being  the  best.
+The HNA entries are showing which hosts are connected to the mesh
+via bat0 or being bridged into the mesh network.  The PRIMARY/SEC
+values are only applied on primary interfaces
+
+
+LOGGING/DEBUGGING
+-----------------
+
+All error messages, warnings and information messages are sent to
+the kernel log. Depending on your operating  system  distribution
+this  can  be read in one of a number of ways. Try using the com-
+mands: dmesg, logread, or looking in the files  /var/log/kern.log
+or  /var/log/syslog.  All  batman-adv  messages are prefixed with
+"batman-adv:" So to see just these messages try
+
+# dmesg | grep batman-adv
+
+When investigating problems with your mesh network  it  is  some-
+times  necessary  to see more detail debug messages. This must be
+enabled when compiling the batman-adv module. When building  bat-
+man-adv  as  part of kernel, use "make menuconfig" and enable the
+option "B.A.T.M.A.N. debugging".
+
+Those additional  debug messages can be accessed  using a special
+file in debugfs
+
+# cat /sys/kernel/debug/batman_adv/bat0/log
+
+The additional debug output is by default disabled. It can be en-
+abled  during run time. Following log_levels are defined:
+
+0 - All  debug  output  disabled
+1 - Enable messages related to routing / flooding / broadcasting
+2 - Enable route or hna added / changed / deleted
+3 - Enable all messages
+
+The debug output can be changed at runtime  using  the  file
+/sys/class/net/bat0/mesh/log_level. e.g.
+
+# echo 2 > /sys/class/net/bat0/mesh/log_level
+
+will enable debug messages for when routes or HNAs change.
+
+
+BATCTL
+------
+
+As batman advanced operates on layer 2 all hosts participating in
+the  virtual switch are completely transparent for all  protocols
+above layer 2. Therefore the common diagnosis tools do  not  work
+as  expected.  To  overcome these problems batctl was created. At
+the  moment the  batctl contains ping,  traceroute,  tcpdump  and
+interfaces to the kernel module settings.
+
+For more information, please see the manpage (man batctl).
+
+batctl is available on http://www.open-mesh.org/
+
+
+CONTACT
+-------
+
+Please send us comments, experiences, questions, anything :)
+
+IRC:            #batman   on   irc.freenode.org
+Mailing-list:   b.a.t.m.a.n@b.a.t.m.a.n@lists.open-mesh.org
+                (optional   subscription   at
+                 https://lists.open-mesh.org/mm/listinfo/b.a.t.m.a.n)
+
+You can also contact the Authors:
+
+Marek  Lindner  <lindner_marek@yahoo.de>
+Simon  Wunderlich  <siwu@hrz.tu-chemnitz.de>
diff --git a/MAINTAINERS b/MAINTAINERS
index f16ce8f46934..2a5e60fb3b5d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1264,6 +1264,15 @@ S:	Maintained
 F:	drivers/video/backlight/
 F:	include/linux/backlight.h
 
+BATMAN ADVANCED
+M:	Marek Lindner <lindner_marek@yahoo.de>
+M:	Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
+M:	Sven Eckelmann <sven@narfation.org>
+L:	b.a.t.m.a.n@lists.open-mesh.org
+W:	http://www.open-mesh.org/
+S:	Maintained
+F:	net/batman-adv/
+
 BAYCOM/HDLCDRV DRIVERS FOR AX.25
 M:	Thomas Sailer <t.sailer@alumni.ethz.ch>
 L:	linux-hams@vger.kernel.org
diff --git a/net/Kconfig b/net/Kconfig
index 126c2af0fc1f..ad0aafe903f8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -214,6 +214,7 @@ source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
+source "net/batman-adv/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index 6b7bfd7f1416..a3330ebe2c53 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -69,3 +69,4 @@ endif
 obj-$(CONFIG_WIMAX)		+= wimax/
 obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
 obj-$(CONFIG_CEPH_LIB)		+= ceph/
+obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
new file mode 100644
index 000000000000..6c051ad833eb
--- /dev/null
+++ b/net/batman-adv/Kconfig
@@ -0,0 +1,25 @@
+#
+# B.A.T.M.A.N meshing protocol
+#
+
+config BATMAN_ADV
+	tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
+	depends on NET
+        default n
+	---help---
+
+        B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
+        a routing protocol for multi-hop ad-hoc mesh networks. The
+        networks may be wired or wireless. See
+        http://www.open-mesh.org/ for more information and user space
+        tools.
+
+config BATMAN_ADV_DEBUG
+	bool "B.A.T.M.A.N. debugging"
+	depends on BATMAN_ADV != n
+	---help---
+
+	  This is an option for use by developers; most people should
+	  say N here. This enables compilation of support for
+	  outputting debugging information to the kernel log. The
+	  output is controlled via the module parameter debug.
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
new file mode 100644
index 000000000000..d936aeccd194
--- /dev/null
+++ b/net/batman-adv/Makefile
@@ -0,0 +1,39 @@
+#
+# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+#
+# Marek Lindner, Simon Wunderlich
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of version 2 of the GNU General Public
+# License as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA
+#
+
+obj-$(CONFIG_BATMAN_ADV) += batman-adv.o
+batman-adv-y += aggregation.o
+batman-adv-y += bat_debugfs.o
+batman-adv-y += bat_sysfs.o
+batman-adv-y += bitarray.o
+batman-adv-y += gateway_client.o
+batman-adv-y += gateway_common.o
+batman-adv-y += hard-interface.o
+batman-adv-y += hash.o
+batman-adv-y += icmp_socket.o
+batman-adv-y += main.o
+batman-adv-y += originator.o
+batman-adv-y += ring_buffer.o
+batman-adv-y += routing.o
+batman-adv-y += send.o
+batman-adv-y += soft-interface.o
+batman-adv-y += translation-table.o
+batman-adv-y += unicast.o
+batman-adv-y += vis.o
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
new file mode 100644
index 000000000000..3850a3ecf947
--- /dev/null
+++ b/net/batman-adv/aggregation.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "aggregation.h"
+#include "send.h"
+#include "routing.h"
+
+/* calculate the size of the hna information for a given packet */
+static int hna_len(struct batman_packet *batman_packet)
+{
+	return batman_packet->num_hna * ETH_ALEN;
+}
+
+/* return true if new_packet can be aggregated with forw_packet */
+static bool can_aggregate_with(struct batman_packet *new_batman_packet,
+			       int packet_len,
+			       unsigned long send_time,
+			       bool directlink,
+			       struct batman_if *if_incoming,
+			       struct forw_packet *forw_packet)
+{
+	struct batman_packet *batman_packet =
+		(struct batman_packet *)forw_packet->skb->data;
+	int aggregated_bytes = forw_packet->packet_len + packet_len;
+
+	/**
+	 * we can aggregate the current packet to this aggregated packet
+	 * if:
+	 *
+	 * - the send time is within our MAX_AGGREGATION_MS time
+	 * - the resulting packet wont be bigger than
+	 *   MAX_AGGREGATION_BYTES
+	 */
+
+	if (time_before(send_time, forw_packet->send_time) &&
+	    time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
+					forw_packet->send_time) &&
+	    (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
+
+		/**
+		 * check aggregation compatibility
+		 * -> direct link packets are broadcasted on
+		 *    their interface only
+		 * -> aggregate packet if the current packet is
+		 *    a "global" packet as well as the base
+		 *    packet
+		 */
+
+		/* packets without direct link flag and high TTL
+		 * are flooded through the net  */
+		if ((!directlink) &&
+		    (!(batman_packet->flags & DIRECTLINK)) &&
+		    (batman_packet->ttl != 1) &&
+
+		    /* own packets originating non-primary
+		     * interfaces leave only that interface */
+		    ((!forw_packet->own) ||
+		     (forw_packet->if_incoming->if_num == 0)))
+			return true;
+
+		/* if the incoming packet is sent via this one
+		 * interface only - we still can aggregate */
+		if ((directlink) &&
+		    (new_batman_packet->ttl == 1) &&
+		    (forw_packet->if_incoming == if_incoming) &&
+
+		    /* packets from direct neighbors or
+		     * own secondary interface packets
+		     * (= secondary interface packets in general) */
+		    (batman_packet->flags & DIRECTLINK ||
+		     (forw_packet->own &&
+		      forw_packet->if_incoming->if_num != 0)))
+			return true;
+	}
+
+	return false;
+}
+
+#define atomic_dec_not_zero(v)          atomic_add_unless((v), -1, 0)
+/* create a new aggregated packet and add this packet to it */
+static void new_aggregated_packet(unsigned char *packet_buff, int packet_len,
+				  unsigned long send_time, bool direct_link,
+				  struct batman_if *if_incoming,
+				  int own_packet)
+{
+	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct forw_packet *forw_packet_aggr;
+	unsigned char *skb_buff;
+
+	/* own packet should always be scheduled */
+	if (!own_packet) {
+		if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
+			bat_dbg(DBG_BATMAN, bat_priv,
+				"batman packet queue full\n");
+			return;
+		}
+	}
+
+	forw_packet_aggr = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC);
+	if (!forw_packet_aggr) {
+		if (!own_packet)
+			atomic_inc(&bat_priv->batman_queue_left);
+		return;
+	}
+
+	if ((atomic_read(&bat_priv->aggregated_ogms)) &&
+	    (packet_len < MAX_AGGREGATION_BYTES))
+		forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
+						      sizeof(struct ethhdr));
+	else
+		forw_packet_aggr->skb = dev_alloc_skb(packet_len +
+						      sizeof(struct ethhdr));
+
+	if (!forw_packet_aggr->skb) {
+		if (!own_packet)
+			atomic_inc(&bat_priv->batman_queue_left);
+		kfree(forw_packet_aggr);
+		return;
+	}
+	skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
+
+	INIT_HLIST_NODE(&forw_packet_aggr->list);
+
+	skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+	forw_packet_aggr->packet_len = packet_len;
+	memcpy(skb_buff, packet_buff, packet_len);
+
+	forw_packet_aggr->own = own_packet;
+	forw_packet_aggr->if_incoming = if_incoming;
+	forw_packet_aggr->num_packets = 0;
+	forw_packet_aggr->direct_link_flags = 0;
+	forw_packet_aggr->send_time = send_time;
+
+	/* save packet direct link flag status */
+	if (direct_link)
+		forw_packet_aggr->direct_link_flags |= 1;
+
+	/* add new packet to packet list */
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
+	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+	/* start timer for this packet */
+	INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
+			  send_outstanding_bat_packet);
+	queue_delayed_work(bat_event_workqueue,
+			   &forw_packet_aggr->delayed_work,
+			   send_time - jiffies);
+}
+
+/* aggregate a new packet into the existing aggregation */
+static void aggregate(struct forw_packet *forw_packet_aggr,
+		      unsigned char *packet_buff,
+		      int packet_len,
+		      bool direct_link)
+{
+	unsigned char *skb_buff;
+
+	skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
+	memcpy(skb_buff, packet_buff, packet_len);
+	forw_packet_aggr->packet_len += packet_len;
+	forw_packet_aggr->num_packets++;
+
+	/* save packet direct link flag status */
+	if (direct_link)
+		forw_packet_aggr->direct_link_flags |=
+			(1 << forw_packet_aggr->num_packets);
+}
+
+void add_bat_packet_to_list(struct bat_priv *bat_priv,
+			    unsigned char *packet_buff, int packet_len,
+			    struct batman_if *if_incoming, char own_packet,
+			    unsigned long send_time)
+{
+	/**
+	 * _aggr -> pointer to the packet we want to aggregate with
+	 * _pos -> pointer to the position in the queue
+	 */
+	struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
+	struct hlist_node *tmp_node;
+	struct batman_packet *batman_packet =
+		(struct batman_packet *)packet_buff;
+	bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0;
+
+	/* find position for the packet in the forward queue */
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	/* own packets are not to be aggregated */
+	if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
+		hlist_for_each_entry(forw_packet_pos, tmp_node,
+				     &bat_priv->forw_bat_list, list) {
+			if (can_aggregate_with(batman_packet,
+					       packet_len,
+					       send_time,
+					       direct_link,
+					       if_incoming,
+					       forw_packet_pos)) {
+				forw_packet_aggr = forw_packet_pos;
+				break;
+			}
+		}
+	}
+
+	/* nothing to aggregate with - either aggregation disabled or no
+	 * suitable aggregation packet found */
+	if (!forw_packet_aggr) {
+		/* the following section can run without the lock */
+		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+		/**
+		 * if we could not aggregate this packet with one of the others
+		 * we hold it back for a while, so that it might be aggregated
+		 * later on
+		 */
+		if ((!own_packet) &&
+		    (atomic_read(&bat_priv->aggregated_ogms)))
+			send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
+
+		new_aggregated_packet(packet_buff, packet_len,
+				      send_time, direct_link,
+				      if_incoming, own_packet);
+	} else {
+		aggregate(forw_packet_aggr,
+			  packet_buff, packet_len,
+			  direct_link);
+		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+	}
+}
+
+/* unpack the aggregated packets and process them one by one */
+void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
+			     int packet_len, struct batman_if *if_incoming)
+{
+	struct batman_packet *batman_packet;
+	int buff_pos = 0;
+	unsigned char *hna_buff;
+
+	batman_packet = (struct batman_packet *)packet_buff;
+
+	do {
+		/* network to host order for our 32bit seqno, and the
+		   orig_interval. */
+		batman_packet->seqno = ntohl(batman_packet->seqno);
+
+		hna_buff = packet_buff + buff_pos + BAT_PACKET_LEN;
+		receive_bat_packet(ethhdr, batman_packet,
+				   hna_buff, hna_len(batman_packet),
+				   if_incoming);
+
+		buff_pos += BAT_PACKET_LEN + hna_len(batman_packet);
+		batman_packet = (struct batman_packet *)
+			(packet_buff + buff_pos);
+	} while (aggregated_packet(buff_pos, packet_len,
+				   batman_packet->num_hna));
+}
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
new file mode 100644
index 000000000000..71a91b3da913
--- /dev/null
+++ b/net/batman-adv/aggregation.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_AGGREGATION_H_
+#define _NET_BATMAN_ADV_AGGREGATION_H_
+
+#include "main.h"
+
+/* is there another aggregated packet here? */
+static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna)
+{
+	int next_buff_pos = buff_pos + BAT_PACKET_LEN + (num_hna * ETH_ALEN);
+
+	return (next_buff_pos <= packet_len) &&
+		(next_buff_pos <= MAX_AGGREGATION_BYTES);
+}
+
+void add_bat_packet_to_list(struct bat_priv *bat_priv,
+			    unsigned char *packet_buff, int packet_len,
+			    struct batman_if *if_incoming, char own_packet,
+			    unsigned long send_time);
+void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff,
+			     int packet_len, struct batman_if *if_incoming);
+
+#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c
new file mode 100644
index 000000000000..0ae81d07f102
--- /dev/null
+++ b/net/batman-adv/bat_debugfs.c
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+
+#include <linux/debugfs.h>
+
+#include "bat_debugfs.h"
+#include "translation-table.h"
+#include "originator.h"
+#include "hard-interface.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "soft-interface.h"
+#include "vis.h"
+#include "icmp_socket.h"
+
+static struct dentry *bat_debugfs;
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+#define LOG_BUFF_MASK (log_buff_len-1)
+#define LOG_BUFF(idx) (debug_log->log_buff[(idx) & LOG_BUFF_MASK])
+
+static int log_buff_len = LOG_BUF_LEN;
+
+static void emit_log_char(struct debug_log *debug_log, char c)
+{
+	LOG_BUFF(debug_log->log_end) = c;
+	debug_log->log_end++;
+
+	if (debug_log->log_end - debug_log->log_start > log_buff_len)
+		debug_log->log_start = debug_log->log_end - log_buff_len;
+}
+
+static int fdebug_log(struct debug_log *debug_log, char *fmt, ...)
+{
+	int printed_len;
+	va_list args;
+	static char debug_log_buf[256];
+	char *p;
+
+	if (!debug_log)
+		return 0;
+
+	spin_lock_bh(&debug_log->lock);
+	va_start(args, fmt);
+	printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf),
+				 fmt, args);
+	va_end(args);
+
+	for (p = debug_log_buf; *p != 0; p++)
+		emit_log_char(debug_log, *p);
+
+	spin_unlock_bh(&debug_log->lock);
+
+	wake_up(&debug_log->queue_wait);
+
+	return 0;
+}
+
+int debug_log(struct bat_priv *bat_priv, char *fmt, ...)
+{
+	va_list args;
+	char tmp_log_buf[256];
+
+	va_start(args, fmt);
+	vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
+	fdebug_log(bat_priv->debug_log, "[%10u] %s",
+		   (jiffies / HZ), tmp_log_buf);
+	va_end(args);
+
+	return 0;
+}
+
+static int log_open(struct inode *inode, struct file *file)
+{
+	nonseekable_open(inode, file);
+	file->private_data = inode->i_private;
+	inc_module_count();
+	return 0;
+}
+
+static int log_release(struct inode *inode, struct file *file)
+{
+	dec_module_count();
+	return 0;
+}
+
+static ssize_t log_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
+{
+	struct bat_priv *bat_priv = file->private_data;
+	struct debug_log *debug_log = bat_priv->debug_log;
+	int error, i = 0;
+	char c;
+
+	if ((file->f_flags & O_NONBLOCK) &&
+	    !(debug_log->log_end - debug_log->log_start))
+		return -EAGAIN;
+
+	if ((!buf) || (count < 0))
+		return -EINVAL;
+
+	if (count == 0)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	error = wait_event_interruptible(debug_log->queue_wait,
+				(debug_log->log_start - debug_log->log_end));
+
+	if (error)
+		return error;
+
+	spin_lock_bh(&debug_log->lock);
+
+	while ((!error) && (i < count) &&
+	       (debug_log->log_start != debug_log->log_end)) {
+		c = LOG_BUFF(debug_log->log_start);
+
+		debug_log->log_start++;
+
+		spin_unlock_bh(&debug_log->lock);
+
+		error = __put_user(c, buf);
+
+		spin_lock_bh(&debug_log->lock);
+
+		buf++;
+		i++;
+
+	}
+
+	spin_unlock_bh(&debug_log->lock);
+
+	if (!error)
+		return i;
+
+	return error;
+}
+
+static unsigned int log_poll(struct file *file, poll_table *wait)
+{
+	struct bat_priv *bat_priv = file->private_data;
+	struct debug_log *debug_log = bat_priv->debug_log;
+
+	poll_wait(file, &debug_log->queue_wait, wait);
+
+	if (debug_log->log_end - debug_log->log_start)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static const struct file_operations log_fops = {
+	.open           = log_open,
+	.release        = log_release,
+	.read           = log_read,
+	.poll           = log_poll,
+	.llseek         = no_llseek,
+};
+
+static int debug_log_setup(struct bat_priv *bat_priv)
+{
+	struct dentry *d;
+
+	if (!bat_priv->debug_dir)
+		goto err;
+
+	bat_priv->debug_log = kzalloc(sizeof(struct debug_log), GFP_ATOMIC);
+	if (!bat_priv->debug_log)
+		goto err;
+
+	spin_lock_init(&bat_priv->debug_log->lock);
+	init_waitqueue_head(&bat_priv->debug_log->queue_wait);
+
+	d = debugfs_create_file("log", S_IFREG | S_IRUSR,
+				bat_priv->debug_dir, bat_priv, &log_fops);
+	if (d)
+		goto err;
+
+	return 0;
+
+err:
+	return 1;
+}
+
+static void debug_log_cleanup(struct bat_priv *bat_priv)
+{
+	kfree(bat_priv->debug_log);
+	bat_priv->debug_log = NULL;
+}
+#else /* CONFIG_BATMAN_ADV_DEBUG */
+static int debug_log_setup(struct bat_priv *bat_priv)
+{
+	bat_priv->debug_log = NULL;
+	return 0;
+}
+
+static void debug_log_cleanup(struct bat_priv *bat_priv)
+{
+	return;
+}
+#endif
+
+static int originators_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+	return single_open(file, orig_seq_print_text, net_dev);
+}
+
+static int gateways_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+	return single_open(file, gw_client_seq_print_text, net_dev);
+}
+
+static int softif_neigh_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+	return single_open(file, softif_neigh_seq_print_text, net_dev);
+}
+
+static int transtable_global_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+	return single_open(file, hna_global_seq_print_text, net_dev);
+}
+
+static int transtable_local_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+	return single_open(file, hna_local_seq_print_text, net_dev);
+}
+
+static int vis_data_open(struct inode *inode, struct file *file)
+{
+	struct net_device *net_dev = (struct net_device *)inode->i_private;
+	return single_open(file, vis_seq_print_text, net_dev);
+}
+
+struct bat_debuginfo {
+	struct attribute attr;
+	const struct file_operations fops;
+};
+
+#define BAT_DEBUGINFO(_name, _mode, _open)	\
+struct bat_debuginfo bat_debuginfo_##_name = {	\
+	.attr = { .name = __stringify(_name),	\
+		  .mode = _mode, },		\
+	.fops = { .owner = THIS_MODULE,		\
+		  .open = _open,		\
+		  .read	= seq_read,		\
+		  .llseek = seq_lseek,		\
+		  .release = single_release,	\
+		}				\
+};
+
+static BAT_DEBUGINFO(originators, S_IRUGO, originators_open);
+static BAT_DEBUGINFO(gateways, S_IRUGO, gateways_open);
+static BAT_DEBUGINFO(softif_neigh, S_IRUGO, softif_neigh_open);
+static BAT_DEBUGINFO(transtable_global, S_IRUGO, transtable_global_open);
+static BAT_DEBUGINFO(transtable_local, S_IRUGO, transtable_local_open);
+static BAT_DEBUGINFO(vis_data, S_IRUGO, vis_data_open);
+
+static struct bat_debuginfo *mesh_debuginfos[] = {
+	&bat_debuginfo_originators,
+	&bat_debuginfo_gateways,
+	&bat_debuginfo_softif_neigh,
+	&bat_debuginfo_transtable_global,
+	&bat_debuginfo_transtable_local,
+	&bat_debuginfo_vis_data,
+	NULL,
+};
+
+void debugfs_init(void)
+{
+	bat_debugfs = debugfs_create_dir(DEBUGFS_BAT_SUBDIR, NULL);
+	if (bat_debugfs == ERR_PTR(-ENODEV))
+		bat_debugfs = NULL;
+}
+
+void debugfs_destroy(void)
+{
+	if (bat_debugfs) {
+		debugfs_remove_recursive(bat_debugfs);
+		bat_debugfs = NULL;
+	}
+}
+
+int debugfs_add_meshif(struct net_device *dev)
+{
+	struct bat_priv *bat_priv = netdev_priv(dev);
+	struct bat_debuginfo **bat_debug;
+	struct dentry *file;
+
+	if (!bat_debugfs)
+		goto out;
+
+	bat_priv->debug_dir = debugfs_create_dir(dev->name, bat_debugfs);
+	if (!bat_priv->debug_dir)
+		goto out;
+
+	bat_socket_setup(bat_priv);
+	debug_log_setup(bat_priv);
+
+	for (bat_debug = mesh_debuginfos; *bat_debug; ++bat_debug) {
+		file = debugfs_create_file(((*bat_debug)->attr).name,
+					  S_IFREG | ((*bat_debug)->attr).mode,
+					  bat_priv->debug_dir,
+					  dev, &(*bat_debug)->fops);
+		if (!file) {
+			bat_err(dev, "Can't add debugfs file: %s/%s\n",
+				dev->name, ((*bat_debug)->attr).name);
+			goto rem_attr;
+		}
+	}
+
+	return 0;
+rem_attr:
+	debugfs_remove_recursive(bat_priv->debug_dir);
+	bat_priv->debug_dir = NULL;
+out:
+#ifdef CONFIG_DEBUG_FS
+	return -ENOMEM;
+#else
+	return 0;
+#endif /* CONFIG_DEBUG_FS */
+}
+
+void debugfs_del_meshif(struct net_device *dev)
+{
+	struct bat_priv *bat_priv = netdev_priv(dev);
+
+	debug_log_cleanup(bat_priv);
+
+	if (bat_debugfs) {
+		debugfs_remove_recursive(bat_priv->debug_dir);
+		bat_priv->debug_dir = NULL;
+	}
+}
diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h
new file mode 100644
index 000000000000..72df532b7d5f
--- /dev/null
+++ b/net/batman-adv/bat_debugfs.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+
+#ifndef _NET_BATMAN_ADV_DEBUGFS_H_
+#define _NET_BATMAN_ADV_DEBUGFS_H_
+
+#define DEBUGFS_BAT_SUBDIR "batman_adv"
+
+void debugfs_init(void);
+void debugfs_destroy(void);
+int debugfs_add_meshif(struct net_device *dev);
+void debugfs_del_meshif(struct net_device *dev);
+
+#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
new file mode 100644
index 000000000000..cd7bb51825f1
--- /dev/null
+++ b/net/batman-adv/bat_sysfs.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "bat_sysfs.h"
+#include "translation-table.h"
+#include "originator.h"
+#include "hard-interface.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "vis.h"
+
+#define to_dev(obj)		container_of(obj, struct device, kobj)
+#define kobj_to_netdev(obj)	to_net_dev(to_dev(obj->parent))
+#define kobj_to_batpriv(obj)	netdev_priv(kobj_to_netdev(obj))
+
+/* Use this, if you have customized show and store functions */
+#define BAT_ATTR(_name, _mode, _show, _store)	\
+struct bat_attribute bat_attr_##_name = {	\
+	.attr = {.name = __stringify(_name),	\
+		 .mode = _mode },		\
+	.show   = _show,			\
+	.store  = _store,			\
+};
+
+#define BAT_ATTR_STORE_BOOL(_name, _post_func)				\
+ssize_t store_##_name(struct kobject *kobj, struct attribute *attr,	\
+		      char *buff, size_t count)				\
+{									\
+	struct net_device *net_dev = kobj_to_netdev(kobj);		\
+	struct bat_priv *bat_priv = netdev_priv(net_dev);		\
+	return __store_bool_attr(buff, count, _post_func, attr,		\
+				 &bat_priv->_name, net_dev);		\
+}
+
+#define BAT_ATTR_SHOW_BOOL(_name)					\
+ssize_t show_##_name(struct kobject *kobj, struct attribute *attr,	\
+			    char *buff)					\
+{									\
+	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);		\
+	return sprintf(buff, "%s\n",					\
+		       atomic_read(&bat_priv->_name) == 0 ?		\
+		       "disabled" : "enabled");				\
+}									\
+
+/* Use this, if you are going to turn a [name] in bat_priv on or off */
+#define BAT_ATTR_BOOL(_name, _mode, _post_func)				\
+	static BAT_ATTR_STORE_BOOL(_name, _post_func)			\
+	static BAT_ATTR_SHOW_BOOL(_name)				\
+	static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
+
+
+#define BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func)		\
+ssize_t store_##_name(struct kobject *kobj, struct attribute *attr,	\
+			     char *buff, size_t count)			\
+{									\
+	struct net_device *net_dev = kobj_to_netdev(kobj);		\
+	struct bat_priv *bat_priv = netdev_priv(net_dev);		\
+	return __store_uint_attr(buff, count, _min, _max, _post_func,	\
+				 attr, &bat_priv->_name, net_dev);	\
+}
+
+#define BAT_ATTR_SHOW_UINT(_name)					\
+ssize_t show_##_name(struct kobject *kobj, struct attribute *attr,	\
+			    char *buff)					\
+{									\
+	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);		\
+	return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name));	\
+}									\
+
+/* Use this, if you are going to set [name] in bat_priv to unsigned integer
+ * values only */
+#define BAT_ATTR_UINT(_name, _mode, _min, _max, _post_func)		\
+	static BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func)	\
+	static BAT_ATTR_SHOW_UINT(_name)				\
+	static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
+
+
+static int store_bool_attr(char *buff, size_t count,
+			   struct net_device *net_dev,
+			   char *attr_name, atomic_t *attr)
+{
+	int enabled = -1;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if ((strncmp(buff, "1", 2) == 0) ||
+	    (strncmp(buff, "enable", 7) == 0) ||
+	    (strncmp(buff, "enabled", 8) == 0))
+		enabled = 1;
+
+	if ((strncmp(buff, "0", 2) == 0) ||
+	    (strncmp(buff, "disable", 8) == 0) ||
+	    (strncmp(buff, "disabled", 9) == 0))
+		enabled = 0;
+
+	if (enabled < 0) {
+		bat_info(net_dev,
+			 "%s: Invalid parameter received: %s\n",
+			 attr_name, buff);
+		return -EINVAL;
+	}
+
+	if (atomic_read(attr) == enabled)
+		return count;
+
+	bat_info(net_dev, "%s: Changing from: %s to: %s\n", attr_name,
+		 atomic_read(attr) == 1 ? "enabled" : "disabled",
+		 enabled == 1 ? "enabled" : "disabled");
+
+	atomic_set(attr, (unsigned)enabled);
+	return count;
+}
+
+static inline ssize_t __store_bool_attr(char *buff, size_t count,
+			void (*post_func)(struct net_device *),
+			struct attribute *attr,
+			atomic_t *attr_store, struct net_device *net_dev)
+{
+	int ret;
+
+	ret = store_bool_attr(buff, count, net_dev, (char *)attr->name,
+			      attr_store);
+	if (post_func && ret)
+		post_func(net_dev);
+
+	return ret;
+}
+
+static int store_uint_attr(char *buff, size_t count,
+			   struct net_device *net_dev, char *attr_name,
+			   unsigned int min, unsigned int max, atomic_t *attr)
+{
+	unsigned long uint_val;
+	int ret;
+
+	ret = strict_strtoul(buff, 10, &uint_val);
+	if (ret) {
+		bat_info(net_dev,
+			 "%s: Invalid parameter received: %s\n",
+			 attr_name, buff);
+		return -EINVAL;
+	}
+
+	if (uint_val < min) {
+		bat_info(net_dev, "%s: Value is too small: %lu min: %u\n",
+			 attr_name, uint_val, min);
+		return -EINVAL;
+	}
+
+	if (uint_val > max) {
+		bat_info(net_dev, "%s: Value is too big: %lu max: %u\n",
+			 attr_name, uint_val, max);
+		return -EINVAL;
+	}
+
+	if (atomic_read(attr) == uint_val)
+		return count;
+
+	bat_info(net_dev, "%s: Changing from: %i to: %lu\n",
+		 attr_name, atomic_read(attr), uint_val);
+
+	atomic_set(attr, uint_val);
+	return count;
+}
+
+static inline ssize_t __store_uint_attr(char *buff, size_t count,
+			int min, int max,
+			void (*post_func)(struct net_device *),
+			struct attribute *attr,
+			atomic_t *attr_store, struct net_device *net_dev)
+{
+	int ret;
+
+	ret = store_uint_attr(buff, count, net_dev, (char *)attr->name,
+			      min, max, attr_store);
+	if (post_func && ret)
+		post_func(net_dev);
+
+	return ret;
+}
+
+static ssize_t show_vis_mode(struct kobject *kobj, struct attribute *attr,
+			     char *buff)
+{
+	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
+	int vis_mode = atomic_read(&bat_priv->vis_mode);
+
+	return sprintf(buff, "%s\n",
+		       vis_mode == VIS_TYPE_CLIENT_UPDATE ?
+							"client" : "server");
+}
+
+static ssize_t store_vis_mode(struct kobject *kobj, struct attribute *attr,
+			      char *buff, size_t count)
+{
+	struct net_device *net_dev = kobj_to_netdev(kobj);
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	unsigned long val;
+	int ret, vis_mode_tmp = -1;
+
+	ret = strict_strtoul(buff, 10, &val);
+
+	if (((count == 2) && (!ret) && (val == VIS_TYPE_CLIENT_UPDATE)) ||
+	    (strncmp(buff, "client", 6) == 0) ||
+	    (strncmp(buff, "off", 3) == 0))
+		vis_mode_tmp = VIS_TYPE_CLIENT_UPDATE;
+
+	if (((count == 2) && (!ret) && (val == VIS_TYPE_SERVER_SYNC)) ||
+	    (strncmp(buff, "server", 6) == 0))
+		vis_mode_tmp = VIS_TYPE_SERVER_SYNC;
+
+	if (vis_mode_tmp < 0) {
+		if (buff[count - 1] == '\n')
+			buff[count - 1] = '\0';
+
+		bat_info(net_dev,
+			 "Invalid parameter for 'vis mode' setting received: "
+			 "%s\n", buff);
+		return -EINVAL;
+	}
+
+	if (atomic_read(&bat_priv->vis_mode) == vis_mode_tmp)
+		return count;
+
+	bat_info(net_dev, "Changing vis mode from: %s to: %s\n",
+		 atomic_read(&bat_priv->vis_mode) == VIS_TYPE_CLIENT_UPDATE ?
+		 "client" : "server", vis_mode_tmp == VIS_TYPE_CLIENT_UPDATE ?
+		 "client" : "server");
+
+	atomic_set(&bat_priv->vis_mode, (unsigned)vis_mode_tmp);
+	return count;
+}
+
+static void post_gw_deselect(struct net_device *net_dev)
+{
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	gw_deselect(bat_priv);
+}
+
+static ssize_t show_gw_mode(struct kobject *kobj, struct attribute *attr,
+			    char *buff)
+{
+	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
+	int bytes_written;
+
+	switch (atomic_read(&bat_priv->gw_mode)) {
+	case GW_MODE_CLIENT:
+		bytes_written = sprintf(buff, "%s\n", GW_MODE_CLIENT_NAME);
+		break;
+	case GW_MODE_SERVER:
+		bytes_written = sprintf(buff, "%s\n", GW_MODE_SERVER_NAME);
+		break;
+	default:
+		bytes_written = sprintf(buff, "%s\n", GW_MODE_OFF_NAME);
+		break;
+	}
+
+	return bytes_written;
+}
+
+static ssize_t store_gw_mode(struct kobject *kobj, struct attribute *attr,
+			     char *buff, size_t count)
+{
+	struct net_device *net_dev = kobj_to_netdev(kobj);
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	char *curr_gw_mode_str;
+	int gw_mode_tmp = -1;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (strncmp(buff, GW_MODE_OFF_NAME, strlen(GW_MODE_OFF_NAME)) == 0)
+		gw_mode_tmp = GW_MODE_OFF;
+
+	if (strncmp(buff, GW_MODE_CLIENT_NAME,
+		   strlen(GW_MODE_CLIENT_NAME)) == 0)
+		gw_mode_tmp = GW_MODE_CLIENT;
+
+	if (strncmp(buff, GW_MODE_SERVER_NAME,
+		   strlen(GW_MODE_SERVER_NAME)) == 0)
+		gw_mode_tmp = GW_MODE_SERVER;
+
+	if (gw_mode_tmp < 0) {
+		bat_info(net_dev,
+			 "Invalid parameter for 'gw mode' setting received: "
+			 "%s\n", buff);
+		return -EINVAL;
+	}
+
+	if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp)
+		return count;
+
+	switch (atomic_read(&bat_priv->gw_mode)) {
+	case GW_MODE_CLIENT:
+		curr_gw_mode_str = GW_MODE_CLIENT_NAME;
+		break;
+	case GW_MODE_SERVER:
+		curr_gw_mode_str = GW_MODE_SERVER_NAME;
+		break;
+	default:
+		curr_gw_mode_str = GW_MODE_OFF_NAME;
+		break;
+	}
+
+	bat_info(net_dev, "Changing gw mode from: %s to: %s\n",
+		 curr_gw_mode_str, buff);
+
+	gw_deselect(bat_priv);
+	atomic_set(&bat_priv->gw_mode, (unsigned)gw_mode_tmp);
+	return count;
+}
+
+static ssize_t show_gw_bwidth(struct kobject *kobj, struct attribute *attr,
+			      char *buff)
+{
+	struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
+	int down, up;
+	int gw_bandwidth = atomic_read(&bat_priv->gw_bandwidth);
+
+	gw_bandwidth_to_kbit(gw_bandwidth, &down, &up);
+	return sprintf(buff, "%i%s/%i%s\n",
+		       (down > 2048 ? down / 1024 : down),
+		       (down > 2048 ? "MBit" : "KBit"),
+		       (up > 2048 ? up / 1024 : up),
+		       (up > 2048 ? "MBit" : "KBit"));
+}
+
+static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr,
+			       char *buff, size_t count)
+{
+	struct net_device *net_dev = kobj_to_netdev(kobj);
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	return gw_bandwidth_set(net_dev, buff, count);
+}
+
+BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
+BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
+BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu);
+static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode);
+static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode);
+BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL);
+BAT_ATTR_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, TQ_MAX_VALUE, NULL);
+BAT_ATTR_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE,
+	      post_gw_deselect);
+static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth,
+		store_gw_bwidth);
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 3, NULL);
+#endif
+
+static struct bat_attribute *mesh_attrs[] = {
+	&bat_attr_aggregated_ogms,
+	&bat_attr_bonding,
+	&bat_attr_fragmentation,
+	&bat_attr_vis_mode,
+	&bat_attr_gw_mode,
+	&bat_attr_orig_interval,
+	&bat_attr_hop_penalty,
+	&bat_attr_gw_sel_class,
+	&bat_attr_gw_bandwidth,
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	&bat_attr_log_level,
+#endif
+	NULL,
+};
+
+int sysfs_add_meshif(struct net_device *dev)
+{
+	struct kobject *batif_kobject = &dev->dev.kobj;
+	struct bat_priv *bat_priv = netdev_priv(dev);
+	struct bat_attribute **bat_attr;
+	int err;
+
+	bat_priv->mesh_obj = kobject_create_and_add(SYSFS_IF_MESH_SUBDIR,
+						    batif_kobject);
+	if (!bat_priv->mesh_obj) {
+		bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
+			SYSFS_IF_MESH_SUBDIR);
+		goto out;
+	}
+
+	for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr) {
+		err = sysfs_create_file(bat_priv->mesh_obj,
+					&((*bat_attr)->attr));
+		if (err) {
+			bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
+				dev->name, SYSFS_IF_MESH_SUBDIR,
+				((*bat_attr)->attr).name);
+			goto rem_attr;
+		}
+	}
+
+	return 0;
+
+rem_attr:
+	for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
+		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+
+	kobject_put(bat_priv->mesh_obj);
+	bat_priv->mesh_obj = NULL;
+out:
+	return -ENOMEM;
+}
+
+void sysfs_del_meshif(struct net_device *dev)
+{
+	struct bat_priv *bat_priv = netdev_priv(dev);
+	struct bat_attribute **bat_attr;
+
+	for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
+		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
+
+	kobject_put(bat_priv->mesh_obj);
+	bat_priv->mesh_obj = NULL;
+}
+
+static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr,
+			       char *buff)
+{
+	struct net_device *net_dev = kobj_to_netdev(kobj);
+	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	ssize_t length;
+
+	if (!batman_if)
+		return 0;
+
+	length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ?
+			 "none" : batman_if->soft_iface->name);
+
+	kref_put(&batman_if->refcount, hardif_free_ref);
+
+	return length;
+}
+
+static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
+				char *buff, size_t count)
+{
+	struct net_device *net_dev = kobj_to_netdev(kobj);
+	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	int status_tmp = -1;
+	int ret;
+
+	if (!batman_if)
+		return count;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (strlen(buff) >= IFNAMSIZ) {
+		pr_err("Invalid parameter for 'mesh_iface' setting received: "
+		       "interface name too long '%s'\n", buff);
+		kref_put(&batman_if->refcount, hardif_free_ref);
+		return -EINVAL;
+	}
+
+	if (strncmp(buff, "none", 4) == 0)
+		status_tmp = IF_NOT_IN_USE;
+	else
+		status_tmp = IF_I_WANT_YOU;
+
+	if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) &&
+	    (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) {
+		kref_put(&batman_if->refcount, hardif_free_ref);
+		return count;
+	}
+
+	if (status_tmp == IF_NOT_IN_USE) {
+		rtnl_lock();
+		hardif_disable_interface(batman_if);
+		rtnl_unlock();
+		kref_put(&batman_if->refcount, hardif_free_ref);
+		return count;
+	}
+
+	/* if the interface already is in use */
+	if (batman_if->if_status != IF_NOT_IN_USE) {
+		rtnl_lock();
+		hardif_disable_interface(batman_if);
+		rtnl_unlock();
+	}
+
+	ret = hardif_enable_interface(batman_if, buff);
+	kref_put(&batman_if->refcount, hardif_free_ref);
+
+	return ret;
+}
+
+static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
+				 char *buff)
+{
+	struct net_device *net_dev = kobj_to_netdev(kobj);
+	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	ssize_t length;
+
+	if (!batman_if)
+		return 0;
+
+	switch (batman_if->if_status) {
+	case IF_TO_BE_REMOVED:
+		length = sprintf(buff, "disabling\n");
+		break;
+	case IF_INACTIVE:
+		length = sprintf(buff, "inactive\n");
+		break;
+	case IF_ACTIVE:
+		length = sprintf(buff, "active\n");
+		break;
+	case IF_TO_BE_ACTIVATED:
+		length = sprintf(buff, "enabling\n");
+		break;
+	case IF_NOT_IN_USE:
+	default:
+		length = sprintf(buff, "not in use\n");
+		break;
+	}
+
+	kref_put(&batman_if->refcount, hardif_free_ref);
+
+	return length;
+}
+
+static BAT_ATTR(mesh_iface, S_IRUGO | S_IWUSR,
+		show_mesh_iface, store_mesh_iface);
+static BAT_ATTR(iface_status, S_IRUGO, show_iface_status, NULL);
+
+static struct bat_attribute *batman_attrs[] = {
+	&bat_attr_mesh_iface,
+	&bat_attr_iface_status,
+	NULL,
+};
+
+int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev)
+{
+	struct kobject *hardif_kobject = &dev->dev.kobj;
+	struct bat_attribute **bat_attr;
+	int err;
+
+	*hardif_obj = kobject_create_and_add(SYSFS_IF_BAT_SUBDIR,
+						    hardif_kobject);
+
+	if (!*hardif_obj) {
+		bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
+			SYSFS_IF_BAT_SUBDIR);
+		goto out;
+	}
+
+	for (bat_attr = batman_attrs; *bat_attr; ++bat_attr) {
+		err = sysfs_create_file(*hardif_obj, &((*bat_attr)->attr));
+		if (err) {
+			bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
+				dev->name, SYSFS_IF_BAT_SUBDIR,
+				((*bat_attr)->attr).name);
+			goto rem_attr;
+		}
+	}
+
+	return 0;
+
+rem_attr:
+	for (bat_attr = batman_attrs; *bat_attr; ++bat_attr)
+		sysfs_remove_file(*hardif_obj, &((*bat_attr)->attr));
+out:
+	return -ENOMEM;
+}
+
+void sysfs_del_hardif(struct kobject **hardif_obj)
+{
+	kobject_put(*hardif_obj);
+	*hardif_obj = NULL;
+}
diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h
new file mode 100644
index 000000000000..7f186c007b4f
--- /dev/null
+++ b/net/batman-adv/bat_sysfs.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+
+#ifndef _NET_BATMAN_ADV_SYSFS_H_
+#define _NET_BATMAN_ADV_SYSFS_H_
+
+#define SYSFS_IF_MESH_SUBDIR "mesh"
+#define SYSFS_IF_BAT_SUBDIR "batman_adv"
+
+struct bat_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
+			 char *buf, size_t count);
+};
+
+int sysfs_add_meshif(struct net_device *dev);
+void sysfs_del_meshif(struct net_device *dev);
+int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev);
+void sysfs_del_hardif(struct kobject **hardif_obj);
+
+#endif /* _NET_BATMAN_ADV_SYSFS_H_ */
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
new file mode 100644
index 000000000000..bbcd8f744cdd
--- /dev/null
+++ b/net/batman-adv/bitarray.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ *
+ * Simon Wunderlich, Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "bitarray.h"
+
+#include <linux/bitops.h>
+
+/* returns true if the corresponding bit in the given seq_bits indicates true
+ * and curr_seqno is within range of last_seqno */
+uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno,
+		       uint32_t curr_seqno)
+{
+	int32_t diff, word_offset, word_num;
+
+	diff = last_seqno - curr_seqno;
+	if (diff < 0 || diff >= TQ_LOCAL_WINDOW_SIZE) {
+		return 0;
+	} else {
+		/* which word */
+		word_num = (last_seqno - curr_seqno) / WORD_BIT_SIZE;
+		/* which position in the selected word */
+		word_offset = (last_seqno - curr_seqno) % WORD_BIT_SIZE;
+
+		if (test_bit(word_offset, &seq_bits[word_num]))
+			return 1;
+		else
+			return 0;
+	}
+}
+
+/* turn corresponding bit on, so we can remember that we got the packet */
+void bit_mark(unsigned long *seq_bits, int32_t n)
+{
+	int32_t word_offset, word_num;
+
+	/* if too old, just drop it */
+	if (n < 0 || n >= TQ_LOCAL_WINDOW_SIZE)
+		return;
+
+	/* which word */
+	word_num = n / WORD_BIT_SIZE;
+	/* which position in the selected word */
+	word_offset = n % WORD_BIT_SIZE;
+
+	set_bit(word_offset, &seq_bits[word_num]); /* turn the position on */
+}
+
+/* shift the packet array by n places. */
+static void bit_shift(unsigned long *seq_bits, int32_t n)
+{
+	int32_t word_offset, word_num;
+	int32_t i;
+
+	if (n <= 0 || n >= TQ_LOCAL_WINDOW_SIZE)
+		return;
+
+	word_offset = n % WORD_BIT_SIZE;/* shift how much inside each word */
+	word_num = n / WORD_BIT_SIZE;	/* shift over how much (full) words */
+
+	for (i = NUM_WORDS - 1; i > word_num; i--) {
+		/* going from old to new, so we don't overwrite the data we copy
+		 * from.
+		 *
+		 * left is high, right is low: FEDC BA98 7654 3210
+		 *					  ^^ ^^
+		 *			       vvvv
+		 * ^^^^ = from, vvvvv =to, we'd have word_num==1 and
+		 * word_offset==WORD_BIT_SIZE/2 ????? in this example.
+		 * (=24 bits)
+		 *
+		 * our desired output would be: 9876 5432 1000 0000
+		 * */
+
+		seq_bits[i] =
+			(seq_bits[i - word_num] << word_offset) +
+			/* take the lower port from the left half, shift it left
+			 * to its final position */
+			(seq_bits[i - word_num - 1] >>
+			 (WORD_BIT_SIZE-word_offset));
+		/* and the upper part of the right half and shift it left to
+		 * it's position */
+		/* for our example that would be: word[0] = 9800 + 0076 =
+		 * 9876 */
+	}
+	/* now for our last word, i==word_num, we only have the it's "left"
+	 * half. that's the 1000 word in our example.*/
+
+	seq_bits[i] = (seq_bits[i - word_num] << word_offset);
+
+	/* pad the rest with 0, if there is anything */
+	i--;
+
+	for (; i >= 0; i--)
+		seq_bits[i] = 0;
+}
+
+static void bit_reset_window(unsigned long *seq_bits)
+{
+	int i;
+	for (i = 0; i < NUM_WORDS; i++)
+		seq_bits[i] = 0;
+}
+
+
+/* receive and process one packet within the sequence number window.
+ *
+ * returns:
+ *  1 if the window was moved (either new or very old)
+ *  0 if the window was not moved/shifted.
+ */
+char bit_get_packet(void *priv, unsigned long *seq_bits,
+		    int32_t seq_num_diff, int8_t set_mark)
+{
+	struct bat_priv *bat_priv = (struct bat_priv *)priv;
+
+	/* sequence number is slightly older. We already got a sequence number
+	 * higher than this one, so we just mark it. */
+
+	if ((seq_num_diff <= 0) && (seq_num_diff > -TQ_LOCAL_WINDOW_SIZE)) {
+		if (set_mark)
+			bit_mark(seq_bits, -seq_num_diff);
+		return 0;
+	}
+
+	/* sequence number is slightly newer, so we shift the window and
+	 * set the mark if required */
+
+	if ((seq_num_diff > 0) && (seq_num_diff < TQ_LOCAL_WINDOW_SIZE)) {
+		bit_shift(seq_bits, seq_num_diff);
+
+		if (set_mark)
+			bit_mark(seq_bits, 0);
+		return 1;
+	}
+
+	/* sequence number is much newer, probably missed a lot of packets */
+
+	if ((seq_num_diff >= TQ_LOCAL_WINDOW_SIZE)
+		|| (seq_num_diff < EXPECTED_SEQNO_RANGE)) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"We missed a lot of packets (%i) !\n",
+			seq_num_diff - 1);
+		bit_reset_window(seq_bits);
+		if (set_mark)
+			bit_mark(seq_bits, 0);
+		return 1;
+	}
+
+	/* received a much older packet. The other host either restarted
+	 * or the old packet got delayed somewhere in the network. The
+	 * packet should be dropped without calling this function if the
+	 * seqno window is protected. */
+
+	if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE)
+		|| (seq_num_diff >= EXPECTED_SEQNO_RANGE)) {
+
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Other host probably restarted!\n");
+
+		bit_reset_window(seq_bits);
+		if (set_mark)
+			bit_mark(seq_bits, 0);
+
+		return 1;
+	}
+
+	/* never reached */
+	return 0;
+}
+
+/* count the hamming weight, how many good packets did we receive? just count
+ * the 1's.
+ */
+int bit_packet_count(unsigned long *seq_bits)
+{
+	int i, hamming = 0;
+
+	for (i = 0; i < NUM_WORDS; i++)
+		hamming += hweight_long(seq_bits[i]);
+
+	return hamming;
+}
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
new file mode 100644
index 000000000000..ac54017601b1
--- /dev/null
+++ b/net/batman-adv/bitarray.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ *
+ * Simon Wunderlich, Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_BITARRAY_H_
+#define _NET_BATMAN_ADV_BITARRAY_H_
+
+#define WORD_BIT_SIZE (sizeof(unsigned long) * 8)
+
+/* returns true if the corresponding bit in the given seq_bits indicates true
+ * and curr_seqno is within range of last_seqno */
+uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno,
+					   uint32_t curr_seqno);
+
+/* turn corresponding bit on, so we can remember that we got the packet */
+void bit_mark(unsigned long *seq_bits, int32_t n);
+
+
+/* receive and process one packet, returns 1 if received seq_num is considered
+ * new, 0 if old  */
+char bit_get_packet(void *priv, unsigned long *seq_bits,
+		    int32_t seq_num_diff, int8_t set_mark);
+
+/* count the hamming weight, how many good packets did we receive? */
+int  bit_packet_count(unsigned long *seq_bits);
+
+#endif /* _NET_BATMAN_ADV_BITARRAY_H_ */
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
new file mode 100644
index 000000000000..0065ffb8d96d
--- /dev/null
+++ b/net/batman-adv/gateway_client.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
+#include "hard-interface.h"
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/if_vlan.h>
+
+static void gw_node_free_ref(struct kref *refcount)
+{
+	struct gw_node *gw_node;
+
+	gw_node = container_of(refcount, struct gw_node, refcount);
+	kfree(gw_node);
+}
+
+static void gw_node_free_rcu(struct rcu_head *rcu)
+{
+	struct gw_node *gw_node;
+
+	gw_node = container_of(rcu, struct gw_node, rcu);
+	kref_put(&gw_node->refcount, gw_node_free_ref);
+}
+
+void *gw_get_selected(struct bat_priv *bat_priv)
+{
+	struct gw_node *curr_gateway_tmp = bat_priv->curr_gw;
+
+	if (!curr_gateway_tmp)
+		return NULL;
+
+	return curr_gateway_tmp->orig_node;
+}
+
+void gw_deselect(struct bat_priv *bat_priv)
+{
+	struct gw_node *gw_node = bat_priv->curr_gw;
+
+	bat_priv->curr_gw = NULL;
+
+	if (gw_node)
+		kref_put(&gw_node->refcount, gw_node_free_ref);
+}
+
+static struct gw_node *gw_select(struct bat_priv *bat_priv,
+			  struct gw_node *new_gw_node)
+{
+	struct gw_node *curr_gw_node = bat_priv->curr_gw;
+
+	if (new_gw_node)
+		kref_get(&new_gw_node->refcount);
+
+	bat_priv->curr_gw = new_gw_node;
+	return curr_gw_node;
+}
+
+void gw_election(struct bat_priv *bat_priv)
+{
+	struct hlist_node *node;
+	struct gw_node *gw_node, *curr_gw_tmp = NULL, *old_gw_node = NULL;
+	uint8_t max_tq = 0;
+	uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
+	int down, up;
+
+	/**
+	 * The batman daemon checks here if we already passed a full originator
+	 * cycle in order to make sure we don't choose the first gateway we
+	 * hear about. This check is based on the daemon's uptime which we
+	 * don't have.
+	 **/
+	if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT)
+		return;
+
+	if (bat_priv->curr_gw)
+		return;
+
+	rcu_read_lock();
+	if (hlist_empty(&bat_priv->gw_list)) {
+		rcu_read_unlock();
+
+		if (bat_priv->curr_gw) {
+			bat_dbg(DBG_BATMAN, bat_priv,
+				"Removing selected gateway - "
+				"no gateway in range\n");
+			gw_deselect(bat_priv);
+		}
+
+		return;
+	}
+
+	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
+		if (!gw_node->orig_node->router)
+			continue;
+
+		if (gw_node->deleted)
+			continue;
+
+		switch (atomic_read(&bat_priv->gw_sel_class)) {
+		case 1: /* fast connection */
+			gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags,
+					     &down, &up);
+
+			tmp_gw_factor = (gw_node->orig_node->router->tq_avg *
+					 gw_node->orig_node->router->tq_avg *
+					 down * 100 * 100) /
+					 (TQ_LOCAL_WINDOW_SIZE *
+					 TQ_LOCAL_WINDOW_SIZE * 64);
+
+			if ((tmp_gw_factor > max_gw_factor) ||
+			    ((tmp_gw_factor == max_gw_factor) &&
+			     (gw_node->orig_node->router->tq_avg > max_tq)))
+				curr_gw_tmp = gw_node;
+			break;
+
+		default: /**
+			  * 2:  stable connection (use best statistic)
+			  * 3:  fast-switch (use best statistic but change as
+			  *     soon as a better gateway appears)
+			  * XX: late-switch (use best statistic but change as
+			  *     soon as a better gateway appears which has
+			  *     $routing_class more tq points)
+			  **/
+			if (gw_node->orig_node->router->tq_avg > max_tq)
+				curr_gw_tmp = gw_node;
+			break;
+		}
+
+		if (gw_node->orig_node->router->tq_avg > max_tq)
+			max_tq = gw_node->orig_node->router->tq_avg;
+
+		if (tmp_gw_factor > max_gw_factor)
+			max_gw_factor = tmp_gw_factor;
+	}
+
+	if (bat_priv->curr_gw != curr_gw_tmp) {
+		if ((bat_priv->curr_gw) && (!curr_gw_tmp))
+			bat_dbg(DBG_BATMAN, bat_priv,
+				"Removing selected gateway - "
+				"no gateway in range\n");
+		else if ((!bat_priv->curr_gw) && (curr_gw_tmp))
+			bat_dbg(DBG_BATMAN, bat_priv,
+				"Adding route to gateway %pM "
+				"(gw_flags: %i, tq: %i)\n",
+				curr_gw_tmp->orig_node->orig,
+				curr_gw_tmp->orig_node->gw_flags,
+				curr_gw_tmp->orig_node->router->tq_avg);
+		else
+			bat_dbg(DBG_BATMAN, bat_priv,
+				"Changing route to gateway %pM "
+				"(gw_flags: %i, tq: %i)\n",
+				curr_gw_tmp->orig_node->orig,
+				curr_gw_tmp->orig_node->gw_flags,
+				curr_gw_tmp->orig_node->router->tq_avg);
+
+		old_gw_node = gw_select(bat_priv, curr_gw_tmp);
+	}
+
+	rcu_read_unlock();
+
+	/* the kfree() has to be outside of the rcu lock */
+	if (old_gw_node)
+		kref_put(&old_gw_node->refcount, gw_node_free_ref);
+}
+
+void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node)
+{
+	struct gw_node *curr_gateway_tmp = bat_priv->curr_gw;
+	uint8_t gw_tq_avg, orig_tq_avg;
+
+	if (!curr_gateway_tmp)
+		return;
+
+	if (!curr_gateway_tmp->orig_node)
+		goto deselect;
+
+	if (!curr_gateway_tmp->orig_node->router)
+		goto deselect;
+
+	/* this node already is the gateway */
+	if (curr_gateway_tmp->orig_node == orig_node)
+		return;
+
+	if (!orig_node->router)
+		return;
+
+	gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg;
+	orig_tq_avg = orig_node->router->tq_avg;
+
+	/* the TQ value has to be better */
+	if (orig_tq_avg < gw_tq_avg)
+		return;
+
+	/**
+	 * if the routing class is greater than 3 the value tells us how much
+	 * greater the TQ value of the new gateway must be
+	 **/
+	if ((atomic_read(&bat_priv->gw_sel_class) > 3) &&
+	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class)))
+		return;
+
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"Restarting gateway selection: better gateway found (tq curr: "
+		"%i, tq new: %i)\n",
+		gw_tq_avg, orig_tq_avg);
+
+deselect:
+	gw_deselect(bat_priv);
+}
+
+static void gw_node_add(struct bat_priv *bat_priv,
+			struct orig_node *orig_node, uint8_t new_gwflags)
+{
+	struct gw_node *gw_node;
+	int down, up;
+
+	gw_node = kmalloc(sizeof(struct gw_node), GFP_ATOMIC);
+	if (!gw_node)
+		return;
+
+	memset(gw_node, 0, sizeof(struct gw_node));
+	INIT_HLIST_NODE(&gw_node->list);
+	gw_node->orig_node = orig_node;
+	kref_init(&gw_node->refcount);
+
+	spin_lock_bh(&bat_priv->gw_list_lock);
+	hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list);
+	spin_unlock_bh(&bat_priv->gw_list_lock);
+
+	gw_bandwidth_to_kbit(new_gwflags, &down, &up);
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"Found new gateway %pM -> gw_class: %i - %i%s/%i%s\n",
+		orig_node->orig, new_gwflags,
+		(down > 2048 ? down / 1024 : down),
+		(down > 2048 ? "MBit" : "KBit"),
+		(up > 2048 ? up / 1024 : up),
+		(up > 2048 ? "MBit" : "KBit"));
+}
+
+void gw_node_update(struct bat_priv *bat_priv,
+		    struct orig_node *orig_node, uint8_t new_gwflags)
+{
+	struct hlist_node *node;
+	struct gw_node *gw_node;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
+		if (gw_node->orig_node != orig_node)
+			continue;
+
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Gateway class of originator %pM changed from "
+			"%i to %i\n",
+			orig_node->orig, gw_node->orig_node->gw_flags,
+			new_gwflags);
+
+		gw_node->deleted = 0;
+
+		if (new_gwflags == 0) {
+			gw_node->deleted = jiffies;
+			bat_dbg(DBG_BATMAN, bat_priv,
+				"Gateway %pM removed from gateway list\n",
+				orig_node->orig);
+
+			if (gw_node == bat_priv->curr_gw) {
+				rcu_read_unlock();
+				gw_deselect(bat_priv);
+				return;
+			}
+		}
+
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+
+	if (new_gwflags == 0)
+		return;
+
+	gw_node_add(bat_priv, orig_node, new_gwflags);
+}
+
+void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node)
+{
+	return gw_node_update(bat_priv, orig_node, 0);
+}
+
+void gw_node_purge(struct bat_priv *bat_priv)
+{
+	struct gw_node *gw_node;
+	struct hlist_node *node, *node_tmp;
+	unsigned long timeout = 2 * PURGE_TIMEOUT * HZ;
+
+	spin_lock_bh(&bat_priv->gw_list_lock);
+
+	hlist_for_each_entry_safe(gw_node, node, node_tmp,
+				  &bat_priv->gw_list, list) {
+		if (((!gw_node->deleted) ||
+		     (time_before(jiffies, gw_node->deleted + timeout))) &&
+		    atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE)
+			continue;
+
+		if (bat_priv->curr_gw == gw_node)
+			gw_deselect(bat_priv);
+
+		hlist_del_rcu(&gw_node->list);
+		call_rcu(&gw_node->rcu, gw_node_free_rcu);
+	}
+
+
+	spin_unlock_bh(&bat_priv->gw_list_lock);
+}
+
+static int _write_buffer_text(struct bat_priv *bat_priv,
+			      struct seq_file *seq, struct gw_node *gw_node)
+{
+	int down, up;
+
+	gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up);
+
+	return seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n",
+		       (bat_priv->curr_gw == gw_node ? "=>" : "  "),
+		       gw_node->orig_node->orig,
+		       gw_node->orig_node->router->tq_avg,
+		       gw_node->orig_node->router->addr,
+		       gw_node->orig_node->router->if_incoming->net_dev->name,
+		       gw_node->orig_node->gw_flags,
+		       (down > 2048 ? down / 1024 : down),
+		       (down > 2048 ? "MBit" : "KBit"),
+		       (up > 2048 ? up / 1024 : up),
+		       (up > 2048 ? "MBit" : "KBit"));
+}
+
+int gw_client_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	struct gw_node *gw_node;
+	struct hlist_node *node;
+	int gw_count = 0;
+
+	if (!bat_priv->primary_if) {
+
+		return seq_printf(seq, "BATMAN mesh %s disabled - please "
+				  "specify interfaces to enable it\n",
+				  net_dev->name);
+	}
+
+	if (bat_priv->primary_if->if_status != IF_ACTIVE) {
+
+		return seq_printf(seq, "BATMAN mesh %s disabled - "
+				       "primary interface not active\n",
+				       net_dev->name);
+	}
+
+	seq_printf(seq, "      %-12s (%s/%i) %17s [%10s]: gw_class ... "
+		   "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n",
+		   "Gateway", "#", TQ_MAX_VALUE, "Nexthop",
+		   "outgoingIF", SOURCE_VERSION, REVISION_VERSION_STR,
+		   bat_priv->primary_if->net_dev->name,
+		   bat_priv->primary_if->net_dev->dev_addr, net_dev->name);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
+		if (gw_node->deleted)
+			continue;
+
+		if (!gw_node->orig_node->router)
+			continue;
+
+		_write_buffer_text(bat_priv, seq, gw_node);
+		gw_count++;
+	}
+	rcu_read_unlock();
+
+	if (gw_count == 0)
+		seq_printf(seq, "No gateways in range ...\n");
+
+	return 0;
+}
+
+int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb)
+{
+	struct ethhdr *ethhdr;
+	struct iphdr *iphdr;
+	struct ipv6hdr *ipv6hdr;
+	struct udphdr *udphdr;
+	unsigned int header_len = 0;
+
+	if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF)
+		return 0;
+
+	/* check for ethernet header */
+	if (!pskb_may_pull(skb, header_len + ETH_HLEN))
+		return 0;
+	ethhdr = (struct ethhdr *)skb->data;
+	header_len += ETH_HLEN;
+
+	/* check for initial vlan header */
+	if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) {
+		if (!pskb_may_pull(skb, header_len + VLAN_HLEN))
+			return 0;
+		ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
+		header_len += VLAN_HLEN;
+	}
+
+	/* check for ip header */
+	switch (ntohs(ethhdr->h_proto)) {
+	case ETH_P_IP:
+		if (!pskb_may_pull(skb, header_len + sizeof(struct iphdr)))
+			return 0;
+		iphdr = (struct iphdr *)(skb->data + header_len);
+		header_len += iphdr->ihl * 4;
+
+		/* check for udp header */
+		if (iphdr->protocol != IPPROTO_UDP)
+			return 0;
+
+		break;
+	case ETH_P_IPV6:
+		if (!pskb_may_pull(skb, header_len + sizeof(struct ipv6hdr)))
+			return 0;
+		ipv6hdr = (struct ipv6hdr *)(skb->data + header_len);
+		header_len += sizeof(struct ipv6hdr);
+
+		/* check for udp header */
+		if (ipv6hdr->nexthdr != IPPROTO_UDP)
+			return 0;
+
+		break;
+	default:
+		return 0;
+	}
+
+	if (!pskb_may_pull(skb, header_len + sizeof(struct udphdr)))
+		return 0;
+	udphdr = (struct udphdr *)(skb->data + header_len);
+	header_len += sizeof(struct udphdr);
+
+	/* check for bootp port */
+	if ((ntohs(ethhdr->h_proto) == ETH_P_IP) &&
+	     (ntohs(udphdr->dest) != 67))
+		return 0;
+
+	if ((ntohs(ethhdr->h_proto) == ETH_P_IPV6) &&
+	    (ntohs(udphdr->dest) != 547))
+		return 0;
+
+	if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)
+		return -1;
+
+	if (!bat_priv->curr_gw)
+		return 0;
+
+	return 1;
+}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
new file mode 100644
index 000000000000..4585e6549844
--- /dev/null
+++ b/net/batman-adv/gateway_client.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
+#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
+
+void gw_deselect(struct bat_priv *bat_priv);
+void gw_election(struct bat_priv *bat_priv);
+void *gw_get_selected(struct bat_priv *bat_priv);
+void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node);
+void gw_node_update(struct bat_priv *bat_priv,
+		    struct orig_node *orig_node, uint8_t new_gwflags);
+void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node);
+void gw_node_purge(struct bat_priv *bat_priv);
+int gw_client_seq_print_text(struct seq_file *seq, void *offset);
+int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb);
+
+#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
new file mode 100644
index 000000000000..b962982f017e
--- /dev/null
+++ b/net/batman-adv/gateway_common.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+
+/* calculates the gateway class from kbit */
+static void kbit_to_gw_bandwidth(int down, int up, long *gw_srv_class)
+{
+	int mdown = 0, tdown, tup, difference;
+	uint8_t sbit, part;
+
+	*gw_srv_class = 0;
+	difference = 0x0FFFFFFF;
+
+	/* test all downspeeds */
+	for (sbit = 0; sbit < 2; sbit++) {
+		for (part = 0; part < 16; part++) {
+			tdown = 32 * (sbit + 2) * (1 << part);
+
+			if (abs(tdown - down) < difference) {
+				*gw_srv_class = (sbit << 7) + (part << 3);
+				difference = abs(tdown - down);
+				mdown = tdown;
+			}
+		}
+	}
+
+	/* test all upspeeds */
+	difference = 0x0FFFFFFF;
+
+	for (part = 0; part < 8; part++) {
+		tup = ((part + 1) * (mdown)) / 8;
+
+		if (abs(tup - up) < difference) {
+			*gw_srv_class = (*gw_srv_class & 0xF8) | part;
+			difference = abs(tup - up);
+		}
+	}
+}
+
+/* returns the up and downspeeds in kbit, calculated from the class */
+void gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up)
+{
+	char sbit = (gw_srv_class & 0x80) >> 7;
+	char dpart = (gw_srv_class & 0x78) >> 3;
+	char upart = (gw_srv_class & 0x07);
+
+	if (!gw_srv_class) {
+		*down = 0;
+		*up = 0;
+		return;
+	}
+
+	*down = 32 * (sbit + 2) * (1 << dpart);
+	*up = ((upart + 1) * (*down)) / 8;
+}
+
+static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff,
+			       long *up, long *down)
+{
+	int ret, multi = 1;
+	char *slash_ptr, *tmp_ptr;
+
+	slash_ptr = strchr(buff, '/');
+	if (slash_ptr)
+		*slash_ptr = 0;
+
+	if (strlen(buff) > 4) {
+		tmp_ptr = buff + strlen(buff) - 4;
+
+		if (strnicmp(tmp_ptr, "mbit", 4) == 0)
+			multi = 1024;
+
+		if ((strnicmp(tmp_ptr, "kbit", 4) == 0) ||
+			(multi > 1))
+			*tmp_ptr = '\0';
+	}
+
+	ret = strict_strtoul(buff, 10, down);
+	if (ret) {
+		bat_err(net_dev,
+			"Download speed of gateway mode invalid: %s\n",
+			buff);
+		return false;
+	}
+
+	*down *= multi;
+
+	/* we also got some upload info */
+	if (slash_ptr) {
+		multi = 1;
+
+		if (strlen(slash_ptr + 1) > 4) {
+			tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1);
+
+			if (strnicmp(tmp_ptr, "mbit", 4) == 0)
+				multi = 1024;
+
+			if ((strnicmp(tmp_ptr, "kbit", 4) == 0) ||
+				(multi > 1))
+				*tmp_ptr = '\0';
+		}
+
+		ret = strict_strtoul(slash_ptr + 1, 10, up);
+		if (ret) {
+			bat_err(net_dev,
+				"Upload speed of gateway mode invalid: "
+				"%s\n", slash_ptr + 1);
+			return false;
+		}
+
+		*up *= multi;
+	}
+
+	return true;
+}
+
+ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count)
+{
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	long gw_bandwidth_tmp = 0, up = 0, down = 0;
+	bool ret;
+
+	ret = parse_gw_bandwidth(net_dev, buff, &up, &down);
+	if (!ret)
+		goto end;
+
+	if ((!down) || (down < 256))
+		down = 2000;
+
+	if (!up)
+		up = down / 5;
+
+	kbit_to_gw_bandwidth(down, up, &gw_bandwidth_tmp);
+
+	/**
+	 * the gw bandwidth we guessed above might not match the given
+	 * speeds, hence we need to calculate it back to show the number
+	 * that is going to be propagated
+	 **/
+	gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp,
+			     (int *)&down, (int *)&up);
+
+	gw_deselect(bat_priv);
+	bat_info(net_dev, "Changing gateway bandwidth from: '%i' to: '%ld' "
+		 "(propagating: %ld%s/%ld%s)\n",
+		 atomic_read(&bat_priv->gw_bandwidth), gw_bandwidth_tmp,
+		 (down > 2048 ? down / 1024 : down),
+		 (down > 2048 ? "MBit" : "KBit"),
+		 (up > 2048 ? up / 1024 : up),
+		 (up > 2048 ? "MBit" : "KBit"));
+
+	atomic_set(&bat_priv->gw_bandwidth, gw_bandwidth_tmp);
+
+end:
+	return count;
+}
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
new file mode 100644
index 000000000000..5e728d0b7959
--- /dev/null
+++ b/net/batman-adv/gateway_common.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_GATEWAY_COMMON_H_
+#define _NET_BATMAN_ADV_GATEWAY_COMMON_H_
+
+enum gw_modes {
+	GW_MODE_OFF,
+	GW_MODE_CLIENT,
+	GW_MODE_SERVER,
+};
+
+#define GW_MODE_OFF_NAME	"off"
+#define GW_MODE_CLIENT_NAME	"client"
+#define GW_MODE_SERVER_NAME	"server"
+
+void gw_bandwidth_to_kbit(uint8_t gw_class, int *down, int *up);
+ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count);
+
+#endif /* _NET_BATMAN_ADV_GATEWAY_COMMON_H_ */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
new file mode 100644
index 000000000000..4f95777ce080
--- /dev/null
+++ b/net/batman-adv/hard-interface.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "hard-interface.h"
+#include "soft-interface.h"
+#include "send.h"
+#include "translation-table.h"
+#include "routing.h"
+#include "bat_sysfs.h"
+#include "originator.h"
+#include "hash.h"
+
+#include <linux/if_arp.h>
+
+/* protect update critical side of if_list - but not the content */
+static DEFINE_SPINLOCK(if_list_lock);
+
+static void hardif_free_rcu(struct rcu_head *rcu)
+{
+	struct batman_if *batman_if;
+
+	batman_if = container_of(rcu, struct batman_if, rcu);
+	dev_put(batman_if->net_dev);
+	kref_put(&batman_if->refcount, hardif_free_ref);
+}
+
+struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev)
+{
+	struct batman_if *batman_if;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if (batman_if->net_dev == net_dev)
+			goto out;
+	}
+
+	batman_if = NULL;
+
+out:
+	if (batman_if)
+		kref_get(&batman_if->refcount);
+
+	rcu_read_unlock();
+	return batman_if;
+}
+
+static int is_valid_iface(struct net_device *net_dev)
+{
+	if (net_dev->flags & IFF_LOOPBACK)
+		return 0;
+
+	if (net_dev->type != ARPHRD_ETHER)
+		return 0;
+
+	if (net_dev->addr_len != ETH_ALEN)
+		return 0;
+
+	/* no batman over batman */
+#ifdef HAVE_NET_DEVICE_OPS
+	if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
+		return 0;
+#else
+	if (net_dev->hard_start_xmit == interface_tx)
+		return 0;
+#endif
+
+	/* Device is being bridged */
+	/* if (net_dev->priv_flags & IFF_BRIDGE_PORT)
+		return 0; */
+
+	return 1;
+}
+
+static struct batman_if *get_active_batman_if(struct net_device *soft_iface)
+{
+	struct batman_if *batman_if;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if (batman_if->soft_iface != soft_iface)
+			continue;
+
+		if (batman_if->if_status == IF_ACTIVE)
+			goto out;
+	}
+
+	batman_if = NULL;
+
+out:
+	if (batman_if)
+		kref_get(&batman_if->refcount);
+
+	rcu_read_unlock();
+	return batman_if;
+}
+
+static void update_primary_addr(struct bat_priv *bat_priv)
+{
+	struct vis_packet *vis_packet;
+
+	vis_packet = (struct vis_packet *)
+				bat_priv->my_vis_info->skb_packet->data;
+	memcpy(vis_packet->vis_orig,
+	       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+	memcpy(vis_packet->sender_orig,
+	       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+}
+
+static void set_primary_if(struct bat_priv *bat_priv,
+			   struct batman_if *batman_if)
+{
+	struct batman_packet *batman_packet;
+	struct batman_if *old_if;
+
+	if (batman_if)
+		kref_get(&batman_if->refcount);
+
+	old_if = bat_priv->primary_if;
+	bat_priv->primary_if = batman_if;
+
+	if (old_if)
+		kref_put(&old_if->refcount, hardif_free_ref);
+
+	if (!bat_priv->primary_if)
+		return;
+
+	batman_packet = (struct batman_packet *)(batman_if->packet_buff);
+	batman_packet->flags = PRIMARIES_FIRST_HOP;
+	batman_packet->ttl = TTL;
+
+	update_primary_addr(bat_priv);
+
+	/***
+	 * hacky trick to make sure that we send the HNA information via
+	 * our new primary interface
+	 */
+	atomic_set(&bat_priv->hna_local_changed, 1);
+}
+
+static bool hardif_is_iface_up(struct batman_if *batman_if)
+{
+	if (batman_if->net_dev->flags & IFF_UP)
+		return true;
+
+	return false;
+}
+
+static void update_mac_addresses(struct batman_if *batman_if)
+{
+	memcpy(((struct batman_packet *)(batman_if->packet_buff))->orig,
+	       batman_if->net_dev->dev_addr, ETH_ALEN);
+	memcpy(((struct batman_packet *)(batman_if->packet_buff))->prev_sender,
+	       batman_if->net_dev->dev_addr, ETH_ALEN);
+}
+
+static void check_known_mac_addr(struct net_device *net_dev)
+{
+	struct batman_if *batman_if;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if ((batman_if->if_status != IF_ACTIVE) &&
+		    (batman_if->if_status != IF_TO_BE_ACTIVATED))
+			continue;
+
+		if (batman_if->net_dev == net_dev)
+			continue;
+
+		if (!compare_orig(batman_if->net_dev->dev_addr,
+				  net_dev->dev_addr))
+			continue;
+
+		pr_warning("The newly added mac address (%pM) already exists "
+			   "on: %s\n", net_dev->dev_addr,
+			   batman_if->net_dev->name);
+		pr_warning("It is strongly recommended to keep mac addresses "
+			   "unique to avoid problems!\n");
+	}
+	rcu_read_unlock();
+}
+
+int hardif_min_mtu(struct net_device *soft_iface)
+{
+	struct bat_priv *bat_priv = netdev_priv(soft_iface);
+	struct batman_if *batman_if;
+	/* allow big frames if all devices are capable to do so
+	 * (have MTU > 1500 + BAT_HEADER_LEN) */
+	int min_mtu = ETH_DATA_LEN;
+
+	if (atomic_read(&bat_priv->fragmentation))
+		goto out;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if ((batman_if->if_status != IF_ACTIVE) &&
+		    (batman_if->if_status != IF_TO_BE_ACTIVATED))
+			continue;
+
+		if (batman_if->soft_iface != soft_iface)
+			continue;
+
+		min_mtu = min_t(int, batman_if->net_dev->mtu - BAT_HEADER_LEN,
+				min_mtu);
+	}
+	rcu_read_unlock();
+out:
+	return min_mtu;
+}
+
+/* adjusts the MTU if a new interface with a smaller MTU appeared. */
+void update_min_mtu(struct net_device *soft_iface)
+{
+	int min_mtu;
+
+	min_mtu = hardif_min_mtu(soft_iface);
+	if (soft_iface->mtu != min_mtu)
+		soft_iface->mtu = min_mtu;
+}
+
+static void hardif_activate_interface(struct batman_if *batman_if)
+{
+	struct bat_priv *bat_priv;
+
+	if (batman_if->if_status != IF_INACTIVE)
+		return;
+
+	bat_priv = netdev_priv(batman_if->soft_iface);
+
+	update_mac_addresses(batman_if);
+	batman_if->if_status = IF_TO_BE_ACTIVATED;
+
+	/**
+	 * the first active interface becomes our primary interface or
+	 * the next active interface after the old primay interface was removed
+	 */
+	if (!bat_priv->primary_if)
+		set_primary_if(bat_priv, batman_if);
+
+	bat_info(batman_if->soft_iface, "Interface activated: %s\n",
+		 batman_if->net_dev->name);
+
+	update_min_mtu(batman_if->soft_iface);
+	return;
+}
+
+static void hardif_deactivate_interface(struct batman_if *batman_if)
+{
+	if ((batman_if->if_status != IF_ACTIVE) &&
+	   (batman_if->if_status != IF_TO_BE_ACTIVATED))
+		return;
+
+	batman_if->if_status = IF_INACTIVE;
+
+	bat_info(batman_if->soft_iface, "Interface deactivated: %s\n",
+		 batman_if->net_dev->name);
+
+	update_min_mtu(batman_if->soft_iface);
+}
+
+int hardif_enable_interface(struct batman_if *batman_if, char *iface_name)
+{
+	struct bat_priv *bat_priv;
+	struct batman_packet *batman_packet;
+
+	if (batman_if->if_status != IF_NOT_IN_USE)
+		goto out;
+
+	batman_if->soft_iface = dev_get_by_name(&init_net, iface_name);
+
+	if (!batman_if->soft_iface) {
+		batman_if->soft_iface = softif_create(iface_name);
+
+		if (!batman_if->soft_iface)
+			goto err;
+
+		/* dev_get_by_name() increases the reference counter for us */
+		dev_hold(batman_if->soft_iface);
+	}
+
+	bat_priv = netdev_priv(batman_if->soft_iface);
+	batman_if->packet_len = BAT_PACKET_LEN;
+	batman_if->packet_buff = kmalloc(batman_if->packet_len, GFP_ATOMIC);
+
+	if (!batman_if->packet_buff) {
+		bat_err(batman_if->soft_iface, "Can't add interface packet "
+			"(%s): out of memory\n", batman_if->net_dev->name);
+		goto err;
+	}
+
+	batman_packet = (struct batman_packet *)(batman_if->packet_buff);
+	batman_packet->packet_type = BAT_PACKET;
+	batman_packet->version = COMPAT_VERSION;
+	batman_packet->flags = 0;
+	batman_packet->ttl = 2;
+	batman_packet->tq = TQ_MAX_VALUE;
+	batman_packet->num_hna = 0;
+
+	batman_if->if_num = bat_priv->num_ifaces;
+	bat_priv->num_ifaces++;
+	batman_if->if_status = IF_INACTIVE;
+	orig_hash_add_if(batman_if, bat_priv->num_ifaces);
+
+	batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN);
+	batman_if->batman_adv_ptype.func = batman_skb_recv;
+	batman_if->batman_adv_ptype.dev = batman_if->net_dev;
+	kref_get(&batman_if->refcount);
+	dev_add_pack(&batman_if->batman_adv_ptype);
+
+	atomic_set(&batman_if->seqno, 1);
+	atomic_set(&batman_if->frag_seqno, 1);
+	bat_info(batman_if->soft_iface, "Adding interface: %s\n",
+		 batman_if->net_dev->name);
+
+	if (atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu <
+		ETH_DATA_LEN + BAT_HEADER_LEN)
+		bat_info(batman_if->soft_iface,
+			"The MTU of interface %s is too small (%i) to handle "
+			"the transport of batman-adv packets. Packets going "
+			"over this interface will be fragmented on layer2 "
+			"which could impact the performance. Setting the MTU "
+			"to %zi would solve the problem.\n",
+			batman_if->net_dev->name, batman_if->net_dev->mtu,
+			ETH_DATA_LEN + BAT_HEADER_LEN);
+
+	if (!atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu <
+		ETH_DATA_LEN + BAT_HEADER_LEN)
+		bat_info(batman_if->soft_iface,
+			"The MTU of interface %s is too small (%i) to handle "
+			"the transport of batman-adv packets. If you experience"
+			" problems getting traffic through try increasing the "
+			"MTU to %zi.\n",
+			batman_if->net_dev->name, batman_if->net_dev->mtu,
+			ETH_DATA_LEN + BAT_HEADER_LEN);
+
+	if (hardif_is_iface_up(batman_if))
+		hardif_activate_interface(batman_if);
+	else
+		bat_err(batman_if->soft_iface, "Not using interface %s "
+			"(retrying later): interface not active\n",
+			batman_if->net_dev->name);
+
+	/* begin scheduling originator messages on that interface */
+	schedule_own_packet(batman_if);
+
+out:
+	return 0;
+
+err:
+	return -ENOMEM;
+}
+
+void hardif_disable_interface(struct batman_if *batman_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+
+	if (batman_if->if_status == IF_ACTIVE)
+		hardif_deactivate_interface(batman_if);
+
+	if (batman_if->if_status != IF_INACTIVE)
+		return;
+
+	bat_info(batman_if->soft_iface, "Removing interface: %s\n",
+		 batman_if->net_dev->name);
+	dev_remove_pack(&batman_if->batman_adv_ptype);
+	kref_put(&batman_if->refcount, hardif_free_ref);
+
+	bat_priv->num_ifaces--;
+	orig_hash_del_if(batman_if, bat_priv->num_ifaces);
+
+	if (batman_if == bat_priv->primary_if) {
+		struct batman_if *new_if;
+
+		new_if = get_active_batman_if(batman_if->soft_iface);
+		set_primary_if(bat_priv, new_if);
+
+		if (new_if)
+			kref_put(&new_if->refcount, hardif_free_ref);
+	}
+
+	kfree(batman_if->packet_buff);
+	batman_if->packet_buff = NULL;
+	batman_if->if_status = IF_NOT_IN_USE;
+
+	/* delete all references to this batman_if */
+	purge_orig_ref(bat_priv);
+	purge_outstanding_packets(bat_priv, batman_if);
+	dev_put(batman_if->soft_iface);
+
+	/* nobody uses this interface anymore */
+	if (!bat_priv->num_ifaces)
+		softif_destroy(batman_if->soft_iface);
+
+	batman_if->soft_iface = NULL;
+}
+
+static struct batman_if *hardif_add_interface(struct net_device *net_dev)
+{
+	struct batman_if *batman_if;
+	int ret;
+
+	ret = is_valid_iface(net_dev);
+	if (ret != 1)
+		goto out;
+
+	dev_hold(net_dev);
+
+	batman_if = kmalloc(sizeof(struct batman_if), GFP_ATOMIC);
+	if (!batman_if) {
+		pr_err("Can't add interface (%s): out of memory\n",
+		       net_dev->name);
+		goto release_dev;
+	}
+
+	ret = sysfs_add_hardif(&batman_if->hardif_obj, net_dev);
+	if (ret)
+		goto free_if;
+
+	batman_if->if_num = -1;
+	batman_if->net_dev = net_dev;
+	batman_if->soft_iface = NULL;
+	batman_if->if_status = IF_NOT_IN_USE;
+	INIT_LIST_HEAD(&batman_if->list);
+	kref_init(&batman_if->refcount);
+
+	check_known_mac_addr(batman_if->net_dev);
+
+	spin_lock(&if_list_lock);
+	list_add_tail_rcu(&batman_if->list, &if_list);
+	spin_unlock(&if_list_lock);
+
+	/* extra reference for return */
+	kref_get(&batman_if->refcount);
+	return batman_if;
+
+free_if:
+	kfree(batman_if);
+release_dev:
+	dev_put(net_dev);
+out:
+	return NULL;
+}
+
+static void hardif_remove_interface(struct batman_if *batman_if)
+{
+	/* first deactivate interface */
+	if (batman_if->if_status != IF_NOT_IN_USE)
+		hardif_disable_interface(batman_if);
+
+	if (batman_if->if_status != IF_NOT_IN_USE)
+		return;
+
+	batman_if->if_status = IF_TO_BE_REMOVED;
+	sysfs_del_hardif(&batman_if->hardif_obj);
+	call_rcu(&batman_if->rcu, hardif_free_rcu);
+}
+
+void hardif_remove_interfaces(void)
+{
+	struct batman_if *batman_if, *batman_if_tmp;
+	struct list_head if_queue;
+
+	INIT_LIST_HEAD(&if_queue);
+
+	spin_lock(&if_list_lock);
+	list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) {
+		list_del_rcu(&batman_if->list);
+		list_add_tail(&batman_if->list, &if_queue);
+	}
+	spin_unlock(&if_list_lock);
+
+	rtnl_lock();
+	list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) {
+		hardif_remove_interface(batman_if);
+	}
+	rtnl_unlock();
+}
+
+static int hard_if_event(struct notifier_block *this,
+			 unsigned long event, void *ptr)
+{
+	struct net_device *net_dev = (struct net_device *)ptr;
+	struct batman_if *batman_if = get_batman_if_by_netdev(net_dev);
+	struct bat_priv *bat_priv;
+
+	if (!batman_if && event == NETDEV_REGISTER)
+		batman_if = hardif_add_interface(net_dev);
+
+	if (!batman_if)
+		goto out;
+
+	switch (event) {
+	case NETDEV_UP:
+		hardif_activate_interface(batman_if);
+		break;
+	case NETDEV_GOING_DOWN:
+	case NETDEV_DOWN:
+		hardif_deactivate_interface(batman_if);
+		break;
+	case NETDEV_UNREGISTER:
+		spin_lock(&if_list_lock);
+		list_del_rcu(&batman_if->list);
+		spin_unlock(&if_list_lock);
+
+		hardif_remove_interface(batman_if);
+		break;
+	case NETDEV_CHANGEMTU:
+		if (batman_if->soft_iface)
+			update_min_mtu(batman_if->soft_iface);
+		break;
+	case NETDEV_CHANGEADDR:
+		if (batman_if->if_status == IF_NOT_IN_USE)
+			goto hardif_put;
+
+		check_known_mac_addr(batman_if->net_dev);
+		update_mac_addresses(batman_if);
+
+		bat_priv = netdev_priv(batman_if->soft_iface);
+		if (batman_if == bat_priv->primary_if)
+			update_primary_addr(bat_priv);
+		break;
+	default:
+		break;
+	};
+
+hardif_put:
+	kref_put(&batman_if->refcount, hardif_free_ref);
+out:
+	return NOTIFY_DONE;
+}
+
+/* receive a packet with the batman ethertype coming on a hard
+ * interface */
+int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
+	struct packet_type *ptype, struct net_device *orig_dev)
+{
+	struct bat_priv *bat_priv;
+	struct batman_packet *batman_packet;
+	struct batman_if *batman_if;
+	int ret;
+
+	batman_if = container_of(ptype, struct batman_if, batman_adv_ptype);
+	skb = skb_share_check(skb, GFP_ATOMIC);
+
+	/* skb was released by skb_share_check() */
+	if (!skb)
+		goto err_out;
+
+	/* packet should hold at least type and version */
+	if (unlikely(!pskb_may_pull(skb, 2)))
+		goto err_free;
+
+	/* expect a valid ethernet header here. */
+	if (unlikely(skb->mac_len != sizeof(struct ethhdr)
+				|| !skb_mac_header(skb)))
+		goto err_free;
+
+	if (!batman_if->soft_iface)
+		goto err_free;
+
+	bat_priv = netdev_priv(batman_if->soft_iface);
+
+	if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
+		goto err_free;
+
+	/* discard frames on not active interfaces */
+	if (batman_if->if_status != IF_ACTIVE)
+		goto err_free;
+
+	batman_packet = (struct batman_packet *)skb->data;
+
+	if (batman_packet->version != COMPAT_VERSION) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: incompatible batman version (%i)\n",
+			batman_packet->version);
+		goto err_free;
+	}
+
+	/* all receive handlers return whether they received or reused
+	 * the supplied skb. if not, we have to free the skb. */
+
+	switch (batman_packet->packet_type) {
+		/* batman originator packet */
+	case BAT_PACKET:
+		ret = recv_bat_packet(skb, batman_if);
+		break;
+
+		/* batman icmp packet */
+	case BAT_ICMP:
+		ret = recv_icmp_packet(skb, batman_if);
+		break;
+
+		/* unicast packet */
+	case BAT_UNICAST:
+		ret = recv_unicast_packet(skb, batman_if);
+		break;
+
+		/* fragmented unicast packet */
+	case BAT_UNICAST_FRAG:
+		ret = recv_ucast_frag_packet(skb, batman_if);
+		break;
+
+		/* broadcast packet */
+	case BAT_BCAST:
+		ret = recv_bcast_packet(skb, batman_if);
+		break;
+
+		/* vis packet */
+	case BAT_VIS:
+		ret = recv_vis_packet(skb, batman_if);
+		break;
+	default:
+		ret = NET_RX_DROP;
+	}
+
+	if (ret == NET_RX_DROP)
+		kfree_skb(skb);
+
+	/* return NET_RX_SUCCESS in any case as we
+	 * most probably dropped the packet for
+	 * routing-logical reasons. */
+
+	return NET_RX_SUCCESS;
+
+err_free:
+	kfree_skb(skb);
+err_out:
+	return NET_RX_DROP;
+}
+
+struct notifier_block hard_if_notifier = {
+	.notifier_call = hard_if_event,
+};
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
new file mode 100644
index 000000000000..30ec3b8db459
--- /dev/null
+++ b/net/batman-adv/hard-interface.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_
+#define _NET_BATMAN_ADV_HARD_INTERFACE_H_
+
+#define IF_NOT_IN_USE 0
+#define IF_TO_BE_REMOVED 1
+#define IF_INACTIVE 2
+#define IF_ACTIVE 3
+#define IF_TO_BE_ACTIVATED 4
+#define IF_I_WANT_YOU 5
+
+extern struct notifier_block hard_if_notifier;
+
+struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev);
+int hardif_enable_interface(struct batman_if *batman_if, char *iface_name);
+void hardif_disable_interface(struct batman_if *batman_if);
+void hardif_remove_interfaces(void);
+int batman_skb_recv(struct sk_buff *skb,
+				struct net_device *dev,
+				struct packet_type *ptype,
+				struct net_device *orig_dev);
+int hardif_min_mtu(struct net_device *soft_iface);
+void update_min_mtu(struct net_device *soft_iface);
+
+static inline void hardif_free_ref(struct kref *refcount)
+{
+	struct batman_if *batman_if;
+
+	batman_if = container_of(refcount, struct batman_if, refcount);
+	kfree(batman_if);
+}
+
+#endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
new file mode 100644
index 000000000000..26e623eb9def
--- /dev/null
+++ b/net/batman-adv/hash.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ *
+ * Simon Wunderlich, Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "hash.h"
+
+/* clears the hash */
+static void hash_init(struct hashtable_t *hash)
+{
+	int i;
+
+	for (i = 0 ; i < hash->size; i++)
+		INIT_HLIST_HEAD(&hash->table[i]);
+}
+
+/* free only the hashtable and the hash itself. */
+void hash_destroy(struct hashtable_t *hash)
+{
+	kfree(hash->table);
+	kfree(hash);
+}
+
+/* allocates and clears the hash */
+struct hashtable_t *hash_new(int size)
+{
+	struct hashtable_t *hash;
+
+	hash = kmalloc(sizeof(struct hashtable_t) , GFP_ATOMIC);
+
+	if (!hash)
+		return NULL;
+
+	hash->size = size;
+	hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC);
+
+	if (!hash->table) {
+		kfree(hash);
+		return NULL;
+	}
+
+	hash_init(hash);
+
+	return hash;
+}
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
new file mode 100644
index 000000000000..09216ade16f1
--- /dev/null
+++ b/net/batman-adv/hash.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors:
+ *
+ * Simon Wunderlich, Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_HASH_H_
+#define _NET_BATMAN_ADV_HASH_H_
+
+#include <linux/list.h>
+
+/* callback to a compare function.  should
+ * compare 2 element datas for their keys,
+ * return 0 if same and not 0 if not
+ * same */
+typedef int (*hashdata_compare_cb)(void *, void *);
+
+/* the hashfunction, should return an index
+ * based on the key in the data of the first
+ * argument and the size the second */
+typedef int (*hashdata_choose_cb)(void *, int);
+typedef void (*hashdata_free_cb)(void *, void *);
+
+struct element_t {
+	void *data;		/* pointer to the data */
+	struct hlist_node hlist;	/* bucket list pointer */
+};
+
+struct hashtable_t {
+	struct hlist_head *table;   /* the hashtable itself, with the buckets */
+	int size;		    /* size of hashtable */
+};
+
+/* allocates and clears the hash */
+struct hashtable_t *hash_new(int size);
+
+/* remove element if you already found the element you want to delete and don't
+ * need the overhead to find it again with hash_remove().  But usually, you
+ * don't want to use this function, as it fiddles with hash-internals. */
+void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem);
+
+/* free only the hashtable and the hash itself. */
+void hash_destroy(struct hashtable_t *hash);
+
+/* remove the hash structure. if hashdata_free_cb != NULL, this function will be
+ * called to remove the elements inside of the hash.  if you don't remove the
+ * elements, memory might be leaked. */
+static inline void hash_delete(struct hashtable_t *hash,
+			       hashdata_free_cb free_cb, void *arg)
+{
+	struct hlist_head *head;
+	struct hlist_node *walk, *safe;
+	struct element_t *bucket;
+	int i;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_safe(walk, safe, head) {
+			bucket = hlist_entry(walk, struct element_t, hlist);
+			if (free_cb)
+				free_cb(bucket->data, arg);
+
+			hlist_del(walk);
+			kfree(bucket);
+		}
+	}
+
+	hash_destroy(hash);
+}
+
+/* adds data to the hashtable. returns 0 on success, -1 on error */
+static inline int hash_add(struct hashtable_t *hash,
+			   hashdata_compare_cb compare,
+			   hashdata_choose_cb choose, void *data)
+{
+	int index;
+	struct hlist_head *head;
+	struct hlist_node *walk, *safe;
+	struct element_t *bucket;
+
+	if (!hash)
+		return -1;
+
+	index = choose(data, hash->size);
+	head = &hash->table[index];
+
+	hlist_for_each_safe(walk, safe, head) {
+		bucket = hlist_entry(walk, struct element_t, hlist);
+		if (compare(bucket->data, data))
+			return -1;
+	}
+
+	/* no duplicate found in list, add new element */
+	bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC);
+
+	if (!bucket)
+		return -1;
+
+	bucket->data = data;
+	hlist_add_head(&bucket->hlist, head);
+
+	return 0;
+}
+
+/* removes data from hash, if found. returns pointer do data on success, so you
+ * can remove the used structure yourself, or NULL on error .  data could be the
+ * structure you use with just the key filled, we just need the key for
+ * comparing. */
+static inline void *hash_remove(struct hashtable_t *hash,
+				hashdata_compare_cb compare,
+				hashdata_choose_cb choose, void *data)
+{
+	size_t index;
+	struct hlist_node *walk;
+	struct element_t *bucket;
+	struct hlist_head *head;
+	void *data_save;
+
+	index = choose(data, hash->size);
+	head = &hash->table[index];
+
+	hlist_for_each_entry(bucket, walk, head, hlist) {
+		if (compare(bucket->data, data)) {
+			data_save = bucket->data;
+			hlist_del(walk);
+			kfree(bucket);
+			return data_save;
+		}
+	}
+
+	return NULL;
+}
+
+/* finds data, based on the key in keydata. returns the found data on success,
+ * or NULL on error */
+static inline void *hash_find(struct hashtable_t *hash,
+			      hashdata_compare_cb compare,
+			      hashdata_choose_cb choose, void *keydata)
+{
+	int index;
+	struct hlist_head *head;
+	struct hlist_node *walk;
+	struct element_t *bucket;
+
+	if (!hash)
+		return NULL;
+
+	index = choose(keydata , hash->size);
+	head = &hash->table[index];
+
+	hlist_for_each(walk, head) {
+		bucket = hlist_entry(walk, struct element_t, hlist);
+		if (compare(bucket->data, keydata))
+			return bucket->data;
+	}
+
+	return NULL;
+}
+
+#endif /* _NET_BATMAN_ADV_HASH_H_ */
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
new file mode 100644
index 000000000000..ecf6d7ffab2e
--- /dev/null
+++ b/net/batman-adv/icmp_socket.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include "icmp_socket.h"
+#include "send.h"
+#include "types.h"
+#include "hash.h"
+#include "originator.h"
+#include "hard-interface.h"
+
+static struct socket_client *socket_client_hash[256];
+
+static void bat_socket_add_packet(struct socket_client *socket_client,
+				  struct icmp_packet_rr *icmp_packet,
+				  size_t icmp_len);
+
+void bat_socket_init(void)
+{
+	memset(socket_client_hash, 0, sizeof(socket_client_hash));
+}
+
+static int bat_socket_open(struct inode *inode, struct file *file)
+{
+	unsigned int i;
+	struct socket_client *socket_client;
+
+	nonseekable_open(inode, file);
+
+	socket_client = kmalloc(sizeof(struct socket_client), GFP_KERNEL);
+
+	if (!socket_client)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(socket_client_hash); i++) {
+		if (!socket_client_hash[i]) {
+			socket_client_hash[i] = socket_client;
+			break;
+		}
+	}
+
+	if (i == ARRAY_SIZE(socket_client_hash)) {
+		pr_err("Error - can't add another packet client: "
+		       "maximum number of clients reached\n");
+		kfree(socket_client);
+		return -EXFULL;
+	}
+
+	INIT_LIST_HEAD(&socket_client->queue_list);
+	socket_client->queue_len = 0;
+	socket_client->index = i;
+	socket_client->bat_priv = inode->i_private;
+	spin_lock_init(&socket_client->lock);
+	init_waitqueue_head(&socket_client->queue_wait);
+
+	file->private_data = socket_client;
+
+	inc_module_count();
+	return 0;
+}
+
+static int bat_socket_release(struct inode *inode, struct file *file)
+{
+	struct socket_client *socket_client = file->private_data;
+	struct socket_packet *socket_packet;
+	struct list_head *list_pos, *list_pos_tmp;
+
+	spin_lock_bh(&socket_client->lock);
+
+	/* for all packets in the queue ... */
+	list_for_each_safe(list_pos, list_pos_tmp, &socket_client->queue_list) {
+		socket_packet = list_entry(list_pos,
+					   struct socket_packet, list);
+
+		list_del(list_pos);
+		kfree(socket_packet);
+	}
+
+	socket_client_hash[socket_client->index] = NULL;
+	spin_unlock_bh(&socket_client->lock);
+
+	kfree(socket_client);
+	dec_module_count();
+
+	return 0;
+}
+
+static ssize_t bat_socket_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct socket_client *socket_client = file->private_data;
+	struct socket_packet *socket_packet;
+	size_t packet_len;
+	int error;
+
+	if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0))
+		return -EAGAIN;
+
+	if ((!buf) || (count < sizeof(struct icmp_packet)))
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	error = wait_event_interruptible(socket_client->queue_wait,
+					 socket_client->queue_len);
+
+	if (error)
+		return error;
+
+	spin_lock_bh(&socket_client->lock);
+
+	socket_packet = list_first_entry(&socket_client->queue_list,
+					 struct socket_packet, list);
+	list_del(&socket_packet->list);
+	socket_client->queue_len--;
+
+	spin_unlock_bh(&socket_client->lock);
+
+	error = __copy_to_user(buf, &socket_packet->icmp_packet,
+			       socket_packet->icmp_len);
+
+	packet_len = socket_packet->icmp_len;
+	kfree(socket_packet);
+
+	if (error)
+		return -EFAULT;
+
+	return packet_len;
+}
+
+static ssize_t bat_socket_write(struct file *file, const char __user *buff,
+				size_t len, loff_t *off)
+{
+	struct socket_client *socket_client = file->private_data;
+	struct bat_priv *bat_priv = socket_client->bat_priv;
+	struct sk_buff *skb;
+	struct icmp_packet_rr *icmp_packet;
+
+	struct orig_node *orig_node;
+	struct batman_if *batman_if;
+	size_t packet_len = sizeof(struct icmp_packet);
+	uint8_t dstaddr[ETH_ALEN];
+
+	if (len < sizeof(struct icmp_packet)) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Error - can't send packet from char device: "
+			"invalid packet size\n");
+		return -EINVAL;
+	}
+
+	if (!bat_priv->primary_if)
+		return -EFAULT;
+
+	if (len >= sizeof(struct icmp_packet_rr))
+		packet_len = sizeof(struct icmp_packet_rr);
+
+	skb = dev_alloc_skb(packet_len + sizeof(struct ethhdr));
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, sizeof(struct ethhdr));
+	icmp_packet = (struct icmp_packet_rr *)skb_put(skb, packet_len);
+
+	if (!access_ok(VERIFY_READ, buff, packet_len)) {
+		len = -EFAULT;
+		goto free_skb;
+	}
+
+	if (__copy_from_user(icmp_packet, buff, packet_len)) {
+		len = -EFAULT;
+		goto free_skb;
+	}
+
+	if (icmp_packet->packet_type != BAT_ICMP) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Error - can't send packet from char device: "
+			"got bogus packet type (expected: BAT_ICMP)\n");
+		len = -EINVAL;
+		goto free_skb;
+	}
+
+	if (icmp_packet->msg_type != ECHO_REQUEST) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Error - can't send packet from char device: "
+			"got bogus message type (expected: ECHO_REQUEST)\n");
+		len = -EINVAL;
+		goto free_skb;
+	}
+
+	icmp_packet->uid = socket_client->index;
+
+	if (icmp_packet->version != COMPAT_VERSION) {
+		icmp_packet->msg_type = PARAMETER_PROBLEM;
+		icmp_packet->ttl = COMPAT_VERSION;
+		bat_socket_add_packet(socket_client, icmp_packet, packet_len);
+		goto free_skb;
+	}
+
+	if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
+		goto dst_unreach;
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
+						   compare_orig, choose_orig,
+						   icmp_packet->dst));
+
+	if (!orig_node)
+		goto unlock;
+
+	if (!orig_node->router)
+		goto unlock;
+
+	batman_if = orig_node->router->if_incoming;
+	memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	if (!batman_if)
+		goto dst_unreach;
+
+	if (batman_if->if_status != IF_ACTIVE)
+		goto dst_unreach;
+
+	memcpy(icmp_packet->orig,
+	       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+
+	if (packet_len == sizeof(struct icmp_packet_rr))
+		memcpy(icmp_packet->rr, batman_if->net_dev->dev_addr, ETH_ALEN);
+
+
+	send_skb_packet(skb, batman_if, dstaddr);
+
+	goto out;
+
+unlock:
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+dst_unreach:
+	icmp_packet->msg_type = DESTINATION_UNREACHABLE;
+	bat_socket_add_packet(socket_client, icmp_packet, packet_len);
+free_skb:
+	kfree_skb(skb);
+out:
+	return len;
+}
+
+static unsigned int bat_socket_poll(struct file *file, poll_table *wait)
+{
+	struct socket_client *socket_client = file->private_data;
+
+	poll_wait(file, &socket_client->queue_wait, wait);
+
+	if (socket_client->queue_len > 0)
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static const struct file_operations fops = {
+	.owner = THIS_MODULE,
+	.open = bat_socket_open,
+	.release = bat_socket_release,
+	.read = bat_socket_read,
+	.write = bat_socket_write,
+	.poll = bat_socket_poll,
+	.llseek = no_llseek,
+};
+
+int bat_socket_setup(struct bat_priv *bat_priv)
+{
+	struct dentry *d;
+
+	if (!bat_priv->debug_dir)
+		goto err;
+
+	d = debugfs_create_file(ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR,
+				bat_priv->debug_dir, bat_priv, &fops);
+	if (d)
+		goto err;
+
+	return 0;
+
+err:
+	return 1;
+}
+
+static void bat_socket_add_packet(struct socket_client *socket_client,
+				  struct icmp_packet_rr *icmp_packet,
+				  size_t icmp_len)
+{
+	struct socket_packet *socket_packet;
+
+	socket_packet = kmalloc(sizeof(struct socket_packet), GFP_ATOMIC);
+
+	if (!socket_packet)
+		return;
+
+	INIT_LIST_HEAD(&socket_packet->list);
+	memcpy(&socket_packet->icmp_packet, icmp_packet, icmp_len);
+	socket_packet->icmp_len = icmp_len;
+
+	spin_lock_bh(&socket_client->lock);
+
+	/* while waiting for the lock the socket_client could have been
+	 * deleted */
+	if (!socket_client_hash[icmp_packet->uid]) {
+		spin_unlock_bh(&socket_client->lock);
+		kfree(socket_packet);
+		return;
+	}
+
+	list_add_tail(&socket_packet->list, &socket_client->queue_list);
+	socket_client->queue_len++;
+
+	if (socket_client->queue_len > 100) {
+		socket_packet = list_first_entry(&socket_client->queue_list,
+						 struct socket_packet, list);
+
+		list_del(&socket_packet->list);
+		kfree(socket_packet);
+		socket_client->queue_len--;
+	}
+
+	spin_unlock_bh(&socket_client->lock);
+
+	wake_up(&socket_client->queue_wait);
+}
+
+void bat_socket_receive_packet(struct icmp_packet_rr *icmp_packet,
+			       size_t icmp_len)
+{
+	struct socket_client *hash = socket_client_hash[icmp_packet->uid];
+
+	if (hash)
+		bat_socket_add_packet(hash, icmp_packet, icmp_len);
+}
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
new file mode 100644
index 000000000000..bf9b348cde27
--- /dev/null
+++ b/net/batman-adv/icmp_socket.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_
+#define _NET_BATMAN_ADV_ICMP_SOCKET_H_
+
+#include "types.h"
+
+#define ICMP_SOCKET "socket"
+
+void bat_socket_init(void);
+int bat_socket_setup(struct bat_priv *bat_priv);
+void bat_socket_receive_packet(struct icmp_packet_rr *icmp_packet,
+			       size_t icmp_len);
+
+#endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
new file mode 100644
index 000000000000..b827f6a158cb
--- /dev/null
+++ b/net/batman-adv/main.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "bat_sysfs.h"
+#include "bat_debugfs.h"
+#include "routing.h"
+#include "send.h"
+#include "originator.h"
+#include "soft-interface.h"
+#include "icmp_socket.h"
+#include "translation-table.h"
+#include "hard-interface.h"
+#include "gateway_client.h"
+#include "types.h"
+#include "vis.h"
+#include "hash.h"
+
+struct list_head if_list;
+
+unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+struct workqueue_struct *bat_event_workqueue;
+
+static int __init batman_init(void)
+{
+	INIT_LIST_HEAD(&if_list);
+
+	/* the name should not be longer than 10 chars - see
+	 * http://lwn.net/Articles/23634/ */
+	bat_event_workqueue = create_singlethread_workqueue("bat_events");
+
+	if (!bat_event_workqueue)
+		return -ENOMEM;
+
+	bat_socket_init();
+	debugfs_init();
+
+	register_netdevice_notifier(&hard_if_notifier);
+
+	pr_info("B.A.T.M.A.N. advanced %s%s (compatibility version %i) "
+		"loaded\n", SOURCE_VERSION, REVISION_VERSION_STR,
+		COMPAT_VERSION);
+
+	return 0;
+}
+
+static void __exit batman_exit(void)
+{
+	debugfs_destroy();
+	unregister_netdevice_notifier(&hard_if_notifier);
+	hardif_remove_interfaces();
+
+	flush_workqueue(bat_event_workqueue);
+	destroy_workqueue(bat_event_workqueue);
+	bat_event_workqueue = NULL;
+
+	rcu_barrier();
+}
+
+int mesh_init(struct net_device *soft_iface)
+{
+	struct bat_priv *bat_priv = netdev_priv(soft_iface);
+
+	spin_lock_init(&bat_priv->orig_hash_lock);
+	spin_lock_init(&bat_priv->forw_bat_list_lock);
+	spin_lock_init(&bat_priv->forw_bcast_list_lock);
+	spin_lock_init(&bat_priv->hna_lhash_lock);
+	spin_lock_init(&bat_priv->hna_ghash_lock);
+	spin_lock_init(&bat_priv->gw_list_lock);
+	spin_lock_init(&bat_priv->vis_hash_lock);
+	spin_lock_init(&bat_priv->vis_list_lock);
+	spin_lock_init(&bat_priv->softif_neigh_lock);
+
+	INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
+	INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
+	INIT_HLIST_HEAD(&bat_priv->gw_list);
+	INIT_HLIST_HEAD(&bat_priv->softif_neigh_list);
+
+	if (originator_init(bat_priv) < 1)
+		goto err;
+
+	if (hna_local_init(bat_priv) < 1)
+		goto err;
+
+	if (hna_global_init(bat_priv) < 1)
+		goto err;
+
+	hna_local_add(soft_iface, soft_iface->dev_addr);
+
+	if (vis_init(bat_priv) < 1)
+		goto err;
+
+	atomic_set(&bat_priv->mesh_state, MESH_ACTIVE);
+	goto end;
+
+err:
+	pr_err("Unable to allocate memory for mesh information structures: "
+	       "out of mem ?\n");
+	mesh_free(soft_iface);
+	return -1;
+
+end:
+	return 0;
+}
+
+void mesh_free(struct net_device *soft_iface)
+{
+	struct bat_priv *bat_priv = netdev_priv(soft_iface);
+
+	atomic_set(&bat_priv->mesh_state, MESH_DEACTIVATING);
+
+	purge_outstanding_packets(bat_priv, NULL);
+
+	vis_quit(bat_priv);
+
+	gw_node_purge(bat_priv);
+	originator_free(bat_priv);
+
+	hna_local_free(bat_priv);
+	hna_global_free(bat_priv);
+
+	softif_neigh_purge(bat_priv);
+
+	atomic_set(&bat_priv->mesh_state, MESH_INACTIVE);
+}
+
+void inc_module_count(void)
+{
+	try_module_get(THIS_MODULE);
+}
+
+void dec_module_count(void)
+{
+	module_put(THIS_MODULE);
+}
+
+int is_my_mac(uint8_t *addr)
+{
+	struct batman_if *batman_if;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if (batman_if->if_status != IF_ACTIVE)
+			continue;
+
+		if (compare_orig(batman_if->net_dev->dev_addr, addr)) {
+			rcu_read_unlock();
+			return 1;
+		}
+	}
+	rcu_read_unlock();
+	return 0;
+
+}
+
+module_init(batman_init);
+module_exit(batman_exit);
+
+MODULE_LICENSE("GPL");
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_SUPPORTED_DEVICE(DRIVER_DEVICE);
+#ifdef REVISION_VERSION
+MODULE_VERSION(SOURCE_VERSION "-" REVISION_VERSION);
+#else
+MODULE_VERSION(SOURCE_VERSION);
+#endif
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
new file mode 100644
index 000000000000..d4d9926c2201
--- /dev/null
+++ b/net/batman-adv/main.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_MAIN_H_
+#define _NET_BATMAN_ADV_MAIN_H_
+
+/* Kernel Programming */
+#define LINUX
+
+#define DRIVER_AUTHOR "Marek Lindner <lindner_marek@yahoo.de>, " \
+		      "Simon Wunderlich <siwu@hrz.tu-chemnitz.de>"
+#define DRIVER_DESC   "B.A.T.M.A.N. advanced"
+#define DRIVER_DEVICE "batman-adv"
+
+#define SOURCE_VERSION "next"
+
+
+/* B.A.T.M.A.N. parameters */
+
+#define TQ_MAX_VALUE 255
+#define JITTER 20
+#define TTL 50			  /* Time To Live of broadcast messages */
+
+#define PURGE_TIMEOUT 200	/* purge originators after time in seconds if no
+				   * valid packet comes in -> TODO: check
+				   * influence on TQ_LOCAL_WINDOW_SIZE */
+#define LOCAL_HNA_TIMEOUT 3600 /* in seconds */
+
+#define TQ_LOCAL_WINDOW_SIZE 64	  /* sliding packet range of received originator
+				   * messages in squence numbers (should be a
+				   * multiple of our word size) */
+#define TQ_GLOBAL_WINDOW_SIZE 5
+#define TQ_LOCAL_BIDRECT_SEND_MINIMUM 1
+#define TQ_LOCAL_BIDRECT_RECV_MINIMUM 1
+#define TQ_TOTAL_BIDRECT_LIMIT 1
+
+#define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE)
+
+#define PACKBUFF_SIZE 2000
+#define LOG_BUF_LEN 8192	  /* has to be a power of 2 */
+
+#define VIS_INTERVAL 5000	/* 5 seconds */
+
+/* how much worse secondary interfaces may be to
+ * to be considered as bonding candidates */
+
+#define BONDING_TQ_THRESHOLD	50
+
+#define MAX_AGGREGATION_BYTES 512 /* should not be bigger than 512 bytes or
+				   * change the size of
+				   * forw_packet->direct_link_flags */
+#define MAX_AGGREGATION_MS 100
+
+#define SOFTIF_NEIGH_TIMEOUT 180000 /* 3 minutes */
+
+#define RESET_PROTECTION_MS 30000
+#define EXPECTED_SEQNO_RANGE	65536
+/* don't reset again within 30 seconds */
+
+#define MESH_INACTIVE 0
+#define MESH_ACTIVE 1
+#define MESH_DEACTIVATING 2
+
+#define BCAST_QUEUE_LEN		256
+#define BATMAN_QUEUE_LEN	256
+
+/*
+ * Debug Messages
+ */
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* Append 'batman-adv: ' before
+					     * kernel messages */
+
+#define DBG_BATMAN 1	/* all messages related to routing / flooding /
+			 * broadcasting / etc */
+#define DBG_ROUTES 2	/* route or hna added / changed / deleted */
+#define DBG_ALL 3
+
+#define LOG_BUF_LEN 8192          /* has to be a power of 2 */
+
+
+/*
+ *  Vis
+ */
+
+/* #define VIS_SUBCLUSTERS_DISABLED */
+
+/*
+ * Kernel headers
+ */
+
+#include <linux/mutex.h>	/* mutex */
+#include <linux/module.h>	/* needed by all modules */
+#include <linux/netdevice.h>	/* netdevice */
+#include <linux/etherdevice.h>  /* ethernet address classifaction */
+#include <linux/if_ether.h>	/* ethernet header */
+#include <linux/poll.h>		/* poll_table */
+#include <linux/kthread.h>	/* kernel threads */
+#include <linux/pkt_sched.h>	/* schedule types */
+#include <linux/workqueue.h>	/* workqueue */
+#include <linux/slab.h>
+#include <net/sock.h>		/* struct sock */
+#include <linux/jiffies.h>
+#include <linux/seq_file.h>
+#include "types.h"
+
+#ifndef REVISION_VERSION
+#define REVISION_VERSION_STR ""
+#else
+#define REVISION_VERSION_STR " "REVISION_VERSION
+#endif
+
+extern struct list_head if_list;
+
+extern unsigned char broadcast_addr[];
+extern struct workqueue_struct *bat_event_workqueue;
+
+int mesh_init(struct net_device *soft_iface);
+void mesh_free(struct net_device *soft_iface);
+void inc_module_count(void);
+void dec_module_count(void);
+int is_my_mac(uint8_t *addr);
+
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+int debug_log(struct bat_priv *bat_priv, char *fmt, ...);
+
+#define bat_dbg(type, bat_priv, fmt, arg...)			\
+	do {							\
+		if (atomic_read(&bat_priv->log_level) & type)	\
+			debug_log(bat_priv, fmt, ## arg);	\
+	}							\
+	while (0)
+#else /* !CONFIG_BATMAN_ADV_DEBUG */
+static inline void bat_dbg(char type __attribute__((unused)),
+			   struct bat_priv *bat_priv __attribute__((unused)),
+			   char *fmt __attribute__((unused)), ...)
+{
+}
+#endif
+
+#define bat_warning(net_dev, fmt, arg...)				\
+	do {								\
+		struct net_device *_netdev = (net_dev);                 \
+		struct bat_priv *_batpriv = netdev_priv(_netdev);       \
+		bat_dbg(DBG_ALL, _batpriv, fmt, ## arg);		\
+		pr_warning("%s: " fmt, _netdev->name, ## arg);		\
+	} while (0)
+#define bat_info(net_dev, fmt, arg...)					\
+	do {								\
+		struct net_device *_netdev = (net_dev);                 \
+		struct bat_priv *_batpriv = netdev_priv(_netdev);       \
+		bat_dbg(DBG_ALL, _batpriv, fmt, ## arg);		\
+		pr_info("%s: " fmt, _netdev->name, ## arg);		\
+	} while (0)
+#define bat_err(net_dev, fmt, arg...)					\
+	do {								\
+		struct net_device *_netdev = (net_dev);                 \
+		struct bat_priv *_batpriv = netdev_priv(_netdev);       \
+		bat_dbg(DBG_ALL, _batpriv, fmt, ## arg);		\
+		pr_err("%s: " fmt, _netdev->name, ## arg);		\
+	} while (0)
+
+#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
new file mode 100644
index 000000000000..6b7fb6b7e6f9
--- /dev/null
+++ b/net/batman-adv/originator.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+/* increase the reference counter for this originator */
+
+#include "main.h"
+#include "originator.h"
+#include "hash.h"
+#include "translation-table.h"
+#include "routing.h"
+#include "gateway_client.h"
+#include "hard-interface.h"
+#include "unicast.h"
+#include "soft-interface.h"
+
+static void purge_orig(struct work_struct *work);
+
+static void start_purge_timer(struct bat_priv *bat_priv)
+{
+	INIT_DELAYED_WORK(&bat_priv->orig_work, purge_orig);
+	queue_delayed_work(bat_event_workqueue, &bat_priv->orig_work, 1 * HZ);
+}
+
+int originator_init(struct bat_priv *bat_priv)
+{
+	if (bat_priv->orig_hash)
+		return 1;
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	bat_priv->orig_hash = hash_new(1024);
+
+	if (!bat_priv->orig_hash)
+		goto err;
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	start_purge_timer(bat_priv);
+	return 1;
+
+err:
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	return 0;
+}
+
+struct neigh_node *
+create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node,
+		uint8_t *neigh, struct batman_if *if_incoming)
+{
+	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct neigh_node *neigh_node;
+
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"Creating new last-hop neighbor of originator\n");
+
+	neigh_node = kzalloc(sizeof(struct neigh_node), GFP_ATOMIC);
+	if (!neigh_node)
+		return NULL;
+
+	INIT_LIST_HEAD(&neigh_node->list);
+
+	memcpy(neigh_node->addr, neigh, ETH_ALEN);
+	neigh_node->orig_node = orig_neigh_node;
+	neigh_node->if_incoming = if_incoming;
+
+	list_add_tail(&neigh_node->list, &orig_node->neigh_list);
+	return neigh_node;
+}
+
+static void free_orig_node(void *data, void *arg)
+{
+	struct list_head *list_pos, *list_pos_tmp;
+	struct neigh_node *neigh_node;
+	struct orig_node *orig_node = (struct orig_node *)data;
+	struct bat_priv *bat_priv = (struct bat_priv *)arg;
+
+	/* for all neighbors towards this originator ... */
+	list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) {
+		neigh_node = list_entry(list_pos, struct neigh_node, list);
+
+		list_del(list_pos);
+		kfree(neigh_node);
+	}
+
+	frag_list_free(&orig_node->frag_list);
+	hna_global_del_orig(bat_priv, orig_node, "originator timed out");
+
+	kfree(orig_node->bcast_own);
+	kfree(orig_node->bcast_own_sum);
+	kfree(orig_node);
+}
+
+void originator_free(struct bat_priv *bat_priv)
+{
+	if (!bat_priv->orig_hash)
+		return;
+
+	cancel_delayed_work_sync(&bat_priv->orig_work);
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	hash_delete(bat_priv->orig_hash, free_orig_node, bat_priv);
+	bat_priv->orig_hash = NULL;
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+}
+
+/* this function finds or creates an originator entry for the given
+ * address if it does not exits */
+struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr)
+{
+	struct orig_node *orig_node;
+	int size;
+	int hash_added;
+
+	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
+						   compare_orig, choose_orig,
+						   addr));
+
+	if (orig_node)
+		return orig_node;
+
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"Creating new originator: %pM\n", addr);
+
+	orig_node = kzalloc(sizeof(struct orig_node), GFP_ATOMIC);
+	if (!orig_node)
+		return NULL;
+
+	INIT_LIST_HEAD(&orig_node->neigh_list);
+
+	memcpy(orig_node->orig, addr, ETH_ALEN);
+	orig_node->router = NULL;
+	orig_node->hna_buff = NULL;
+	orig_node->bcast_seqno_reset = jiffies - 1
+					- msecs_to_jiffies(RESET_PROTECTION_MS);
+	orig_node->batman_seqno_reset = jiffies - 1
+					- msecs_to_jiffies(RESET_PROTECTION_MS);
+
+	size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS;
+
+	orig_node->bcast_own = kzalloc(size, GFP_ATOMIC);
+	if (!orig_node->bcast_own)
+		goto free_orig_node;
+
+	size = bat_priv->num_ifaces * sizeof(uint8_t);
+	orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC);
+
+	INIT_LIST_HEAD(&orig_node->frag_list);
+	orig_node->last_frag_packet = 0;
+
+	if (!orig_node->bcast_own_sum)
+		goto free_bcast_own;
+
+	hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig,
+			      orig_node);
+	if (hash_added < 0)
+		goto free_bcast_own_sum;
+
+	return orig_node;
+free_bcast_own_sum:
+	kfree(orig_node->bcast_own_sum);
+free_bcast_own:
+	kfree(orig_node->bcast_own);
+free_orig_node:
+	kfree(orig_node);
+	return NULL;
+}
+
+static bool purge_orig_neighbors(struct bat_priv *bat_priv,
+				 struct orig_node *orig_node,
+				 struct neigh_node **best_neigh_node)
+{
+	struct list_head *list_pos, *list_pos_tmp;
+	struct neigh_node *neigh_node;
+	bool neigh_purged = false;
+
+	*best_neigh_node = NULL;
+
+	/* for all neighbors towards this originator ... */
+	list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) {
+		neigh_node = list_entry(list_pos, struct neigh_node, list);
+
+		if ((time_after(jiffies,
+			neigh_node->last_valid + PURGE_TIMEOUT * HZ)) ||
+		    (neigh_node->if_incoming->if_status == IF_INACTIVE) ||
+		    (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) {
+
+			if (neigh_node->if_incoming->if_status ==
+							IF_TO_BE_REMOVED)
+				bat_dbg(DBG_BATMAN, bat_priv,
+					"neighbor purge: originator %pM, "
+					"neighbor: %pM, iface: %s\n",
+					orig_node->orig, neigh_node->addr,
+					neigh_node->if_incoming->net_dev->name);
+			else
+				bat_dbg(DBG_BATMAN, bat_priv,
+					"neighbor timeout: originator %pM, "
+					"neighbor: %pM, last_valid: %lu\n",
+					orig_node->orig, neigh_node->addr,
+					(neigh_node->last_valid / HZ));
+
+			neigh_purged = true;
+			list_del(list_pos);
+			kfree(neigh_node);
+		} else {
+			if ((!*best_neigh_node) ||
+			    (neigh_node->tq_avg > (*best_neigh_node)->tq_avg))
+				*best_neigh_node = neigh_node;
+		}
+	}
+	return neigh_purged;
+}
+
+static bool purge_orig_node(struct bat_priv *bat_priv,
+			    struct orig_node *orig_node)
+{
+	struct neigh_node *best_neigh_node;
+
+	if (time_after(jiffies,
+		orig_node->last_valid + 2 * PURGE_TIMEOUT * HZ)) {
+
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Originator timeout: originator %pM, last_valid %lu\n",
+			orig_node->orig, (orig_node->last_valid / HZ));
+		return true;
+	} else {
+		if (purge_orig_neighbors(bat_priv, orig_node,
+							&best_neigh_node)) {
+			update_routes(bat_priv, orig_node,
+				      best_neigh_node,
+				      orig_node->hna_buff,
+				      orig_node->hna_buff_len);
+			/* update bonding candidates, we could have lost
+			 * some candidates. */
+			update_bonding_candidates(bat_priv, orig_node);
+		}
+	}
+
+	return false;
+}
+
+static void _purge_orig(struct bat_priv *bat_priv)
+{
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk, *safe;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct orig_node *orig_node;
+	int i;
+
+	if (!hash)
+		return;
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+
+	/* for all origins... */
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) {
+			orig_node = bucket->data;
+
+			if (purge_orig_node(bat_priv, orig_node)) {
+				if (orig_node->gw_flags)
+					gw_node_delete(bat_priv, orig_node);
+				hlist_del(walk);
+				kfree(bucket);
+				free_orig_node(orig_node, bat_priv);
+			}
+
+			if (time_after(jiffies, orig_node->last_frag_packet +
+						msecs_to_jiffies(FRAG_TIMEOUT)))
+				frag_list_free(&orig_node->frag_list);
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	gw_node_purge(bat_priv);
+	gw_election(bat_priv);
+
+	softif_neigh_purge(bat_priv);
+}
+
+static void purge_orig(struct work_struct *work)
+{
+	struct delayed_work *delayed_work =
+		container_of(work, struct delayed_work, work);
+	struct bat_priv *bat_priv =
+		container_of(delayed_work, struct bat_priv, orig_work);
+
+	_purge_orig(bat_priv);
+	start_purge_timer(bat_priv);
+}
+
+void purge_orig_ref(struct bat_priv *bat_priv)
+{
+	_purge_orig(bat_priv);
+}
+
+int orig_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct orig_node *orig_node;
+	struct neigh_node *neigh_node;
+	int batman_count = 0;
+	int last_seen_secs;
+	int last_seen_msecs;
+	int i;
+
+	if ((!bat_priv->primary_if) ||
+	    (bat_priv->primary_if->if_status != IF_ACTIVE)) {
+		if (!bat_priv->primary_if)
+			return seq_printf(seq, "BATMAN mesh %s disabled - "
+				     "please specify interfaces to enable it\n",
+				     net_dev->name);
+
+		return seq_printf(seq, "BATMAN mesh %s "
+				  "disabled - primary interface not active\n",
+				  net_dev->name);
+	}
+
+	seq_printf(seq, "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n",
+		   SOURCE_VERSION, REVISION_VERSION_STR,
+		   bat_priv->primary_if->net_dev->name,
+		   bat_priv->primary_if->net_dev->dev_addr, net_dev->name);
+	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
+		   "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop",
+		   "outgoingIF", "Potential nexthops");
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			orig_node = bucket->data;
+
+			if (!orig_node->router)
+				continue;
+
+			if (orig_node->router->tq_avg == 0)
+				continue;
+
+			last_seen_secs = jiffies_to_msecs(jiffies -
+						orig_node->last_valid) / 1000;
+			last_seen_msecs = jiffies_to_msecs(jiffies -
+						orig_node->last_valid) % 1000;
+
+			neigh_node = orig_node->router;
+			seq_printf(seq, "%pM %4i.%03is   (%3i) %pM [%10s]:",
+				   orig_node->orig, last_seen_secs,
+				   last_seen_msecs, neigh_node->tq_avg,
+				   neigh_node->addr,
+				   neigh_node->if_incoming->net_dev->name);
+
+			list_for_each_entry(neigh_node, &orig_node->neigh_list,
+					    list) {
+				seq_printf(seq, " %pM (%3i)", neigh_node->addr,
+						neigh_node->tq_avg);
+			}
+
+			seq_printf(seq, "\n");
+			batman_count++;
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	if ((batman_count == 0))
+		seq_printf(seq, "No batman nodes in range ...\n");
+
+	return 0;
+}
+
+static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
+{
+	void *data_ptr;
+
+	data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS,
+			   GFP_ATOMIC);
+	if (!data_ptr) {
+		pr_err("Can't resize orig: out of memory\n");
+		return -1;
+	}
+
+	memcpy(data_ptr, orig_node->bcast_own,
+	       (max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS);
+	kfree(orig_node->bcast_own);
+	orig_node->bcast_own = data_ptr;
+
+	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+	if (!data_ptr) {
+		pr_err("Can't resize orig: out of memory\n");
+		return -1;
+	}
+
+	memcpy(data_ptr, orig_node->bcast_own_sum,
+	       (max_if_num - 1) * sizeof(uint8_t));
+	kfree(orig_node->bcast_own_sum);
+	orig_node->bcast_own_sum = data_ptr;
+
+	return 0;
+}
+
+int orig_hash_add_if(struct batman_if *batman_if, int max_if_num)
+{
+	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct orig_node *orig_node;
+	int i;
+
+	/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
+	 * if_num */
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			orig_node = bucket->data;
+
+			if (orig_node_add_if(orig_node, max_if_num) == -1)
+				goto err;
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	return 0;
+
+err:
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	return -ENOMEM;
+}
+
+static int orig_node_del_if(struct orig_node *orig_node,
+		     int max_if_num, int del_if_num)
+{
+	void *data_ptr = NULL;
+	int chunk_size;
+
+	/* last interface was removed */
+	if (max_if_num == 0)
+		goto free_bcast_own;
+
+	chunk_size = sizeof(unsigned long) * NUM_WORDS;
+	data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
+	if (!data_ptr) {
+		pr_err("Can't resize orig: out of memory\n");
+		return -1;
+	}
+
+	/* copy first part */
+	memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size);
+
+	/* copy second part */
+	memcpy(data_ptr + del_if_num * chunk_size,
+	       orig_node->bcast_own + ((del_if_num + 1) * chunk_size),
+	       (max_if_num - del_if_num) * chunk_size);
+
+free_bcast_own:
+	kfree(orig_node->bcast_own);
+	orig_node->bcast_own = data_ptr;
+
+	if (max_if_num == 0)
+		goto free_own_sum;
+
+	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+	if (!data_ptr) {
+		pr_err("Can't resize orig: out of memory\n");
+		return -1;
+	}
+
+	memcpy(data_ptr, orig_node->bcast_own_sum,
+	       del_if_num * sizeof(uint8_t));
+
+	memcpy(data_ptr + del_if_num * sizeof(uint8_t),
+	       orig_node->bcast_own_sum + ((del_if_num + 1) * sizeof(uint8_t)),
+	       (max_if_num - del_if_num) * sizeof(uint8_t));
+
+free_own_sum:
+	kfree(orig_node->bcast_own_sum);
+	orig_node->bcast_own_sum = data_ptr;
+
+	return 0;
+}
+
+int orig_hash_del_if(struct batman_if *batman_if, int max_if_num)
+{
+	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct batman_if *batman_if_tmp;
+	struct orig_node *orig_node;
+	int i, ret;
+
+	/* resize all orig nodes because orig_node->bcast_own(_sum) depend on
+	 * if_num */
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			orig_node = bucket->data;
+
+			ret = orig_node_del_if(orig_node, max_if_num,
+					batman_if->if_num);
+
+			if (ret == -1)
+				goto err;
+		}
+	}
+
+	/* renumber remaining batman interfaces _inside_ of orig_hash_lock */
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if_tmp, &if_list, list) {
+		if (batman_if_tmp->if_status == IF_NOT_IN_USE)
+			continue;
+
+		if (batman_if == batman_if_tmp)
+			continue;
+
+		if (batman_if->soft_iface != batman_if_tmp->soft_iface)
+			continue;
+
+		if (batman_if_tmp->if_num > batman_if->if_num)
+			batman_if_tmp->if_num--;
+	}
+	rcu_read_unlock();
+
+	batman_if->if_num = -1;
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	return 0;
+
+err:
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	return -ENOMEM;
+}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
new file mode 100644
index 000000000000..d474ceb2a4eb
--- /dev/null
+++ b/net/batman-adv/originator.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_ORIGINATOR_H_
+#define _NET_BATMAN_ADV_ORIGINATOR_H_
+
+int originator_init(struct bat_priv *bat_priv);
+void originator_free(struct bat_priv *bat_priv);
+void purge_orig_ref(struct bat_priv *bat_priv);
+struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr);
+struct neigh_node *
+create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node,
+		uint8_t *neigh, struct batman_if *if_incoming);
+int orig_seq_print_text(struct seq_file *seq, void *offset);
+int orig_hash_add_if(struct batman_if *batman_if, int max_if_num);
+int orig_hash_del_if(struct batman_if *batman_if, int max_if_num);
+
+
+/* returns 1 if they are the same originator */
+static inline int compare_orig(void *data1, void *data2)
+{
+	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
+}
+
+/* hashfunction to choose an entry in a hash table of given size */
+/* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */
+static inline int choose_orig(void *data, int32_t size)
+{
+	unsigned char *key = data;
+	uint32_t hash = 0;
+	size_t i;
+
+	for (i = 0; i < 6; i++) {
+		hash += key[i];
+		hash += (hash << 10);
+		hash ^= (hash >> 6);
+	}
+
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
+
+	return hash % size;
+}
+
+#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
new file mode 100644
index 000000000000..b49fdf70a6d5
--- /dev/null
+++ b/net/batman-adv/packet.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_PACKET_H_
+#define _NET_BATMAN_ADV_PACKET_H_
+
+#define ETH_P_BATMAN  0x4305	/* unofficial/not registered Ethertype */
+
+#define BAT_PACKET       0x01
+#define BAT_ICMP         0x02
+#define BAT_UNICAST      0x03
+#define BAT_BCAST        0x04
+#define BAT_VIS          0x05
+#define BAT_UNICAST_FRAG 0x06
+
+/* this file is included by batctl which needs these defines */
+#define COMPAT_VERSION 12
+#define DIRECTLINK 0x40
+#define VIS_SERVER 0x20
+#define PRIMARIES_FIRST_HOP 0x10
+
+/* ICMP message types */
+#define ECHO_REPLY 0
+#define DESTINATION_UNREACHABLE 3
+#define ECHO_REQUEST 8
+#define TTL_EXCEEDED 11
+#define PARAMETER_PROBLEM 12
+
+/* vis defines */
+#define VIS_TYPE_SERVER_SYNC		0
+#define VIS_TYPE_CLIENT_UPDATE		1
+
+/* fragmentation defines */
+#define UNI_FRAG_HEAD 0x01
+
+struct batman_packet {
+	uint8_t  packet_type;
+	uint8_t  version;  /* batman version field */
+	uint8_t  flags;    /* 0x40: DIRECTLINK flag, 0x20 VIS_SERVER flag... */
+	uint8_t  tq;
+	uint32_t seqno;
+	uint8_t  orig[6];
+	uint8_t  prev_sender[6];
+	uint8_t  ttl;
+	uint8_t  num_hna;
+	uint8_t  gw_flags;  /* flags related to gateway class */
+	uint8_t  align;
+} __attribute__((packed));
+
+#define BAT_PACKET_LEN sizeof(struct batman_packet)
+
+struct icmp_packet {
+	uint8_t  packet_type;
+	uint8_t  version;  /* batman version field */
+	uint8_t  msg_type; /* see ICMP message types above */
+	uint8_t  ttl;
+	uint8_t  dst[6];
+	uint8_t  orig[6];
+	uint16_t seqno;
+	uint8_t  uid;
+} __attribute__((packed));
+
+#define BAT_RR_LEN 16
+
+/* icmp_packet_rr must start with all fields from imcp_packet
+ * as this is assumed by code that handles ICMP packets */
+struct icmp_packet_rr {
+	uint8_t  packet_type;
+	uint8_t  version;  /* batman version field */
+	uint8_t  msg_type; /* see ICMP message types above */
+	uint8_t  ttl;
+	uint8_t  dst[6];
+	uint8_t  orig[6];
+	uint16_t seqno;
+	uint8_t  uid;
+	uint8_t  rr_cur;
+	uint8_t  rr[BAT_RR_LEN][ETH_ALEN];
+} __attribute__((packed));
+
+struct unicast_packet {
+	uint8_t  packet_type;
+	uint8_t  version;  /* batman version field */
+	uint8_t  dest[6];
+	uint8_t  ttl;
+} __attribute__((packed));
+
+struct unicast_frag_packet {
+	uint8_t  packet_type;
+	uint8_t  version;  /* batman version field */
+	uint8_t  dest[6];
+	uint8_t  ttl;
+	uint8_t  flags;
+	uint8_t  orig[6];
+	uint16_t seqno;
+} __attribute__((packed));
+
+struct bcast_packet {
+	uint8_t  packet_type;
+	uint8_t  version;  /* batman version field */
+	uint8_t  orig[6];
+	uint8_t  ttl;
+	uint32_t seqno;
+} __attribute__((packed));
+
+struct vis_packet {
+	uint8_t  packet_type;
+	uint8_t  version;        /* batman version field */
+	uint8_t  vis_type;	 /* which type of vis-participant sent this? */
+	uint8_t  entries;	 /* number of entries behind this struct */
+	uint32_t seqno;		 /* sequence number */
+	uint8_t  ttl;		 /* TTL */
+	uint8_t  vis_orig[6];	 /* originator that informs about its
+				  * neighbors */
+	uint8_t  target_orig[6]; /* who should receive this packet */
+	uint8_t  sender_orig[6]; /* who sent or rebroadcasted this packet */
+} __attribute__((packed));
+
+#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c
new file mode 100644
index 000000000000..defd37c9be1f
--- /dev/null
+++ b/net/batman-adv/ring_buffer.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "ring_buffer.h"
+
+void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value)
+{
+	lq_recv[*lq_index] = value;
+	*lq_index = (*lq_index + 1) % TQ_GLOBAL_WINDOW_SIZE;
+}
+
+uint8_t ring_buffer_avg(uint8_t lq_recv[])
+{
+	uint8_t *ptr;
+	uint16_t count = 0, i = 0, sum = 0;
+
+	ptr = lq_recv;
+
+	while (i < TQ_GLOBAL_WINDOW_SIZE) {
+		if (*ptr != 0) {
+			count++;
+			sum += *ptr;
+		}
+
+		i++;
+		ptr++;
+	}
+
+	if (count == 0)
+		return 0;
+
+	return (uint8_t)(sum / count);
+}
diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h
new file mode 100644
index 000000000000..6b0cb9aaeba5
--- /dev/null
+++ b/net/batman-adv/ring_buffer.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_
+#define _NET_BATMAN_ADV_RING_BUFFER_H_
+
+void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value);
+uint8_t ring_buffer_avg(uint8_t lq_recv[]);
+
+#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
new file mode 100644
index 000000000000..8828eddd3f72
--- /dev/null
+++ b/net/batman-adv/routing.c
@@ -0,0 +1,1397 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "routing.h"
+#include "send.h"
+#include "hash.h"
+#include "soft-interface.h"
+#include "hard-interface.h"
+#include "icmp_socket.h"
+#include "translation-table.h"
+#include "originator.h"
+#include "types.h"
+#include "ring_buffer.h"
+#include "vis.h"
+#include "aggregation.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "unicast.h"
+
+void slide_own_bcast_window(struct batman_if *batman_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct orig_node *orig_node;
+	unsigned long *word;
+	int i;
+	size_t word_index;
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			orig_node = bucket->data;
+			word_index = batman_if->if_num * NUM_WORDS;
+			word = &(orig_node->bcast_own[word_index]);
+
+			bit_get_packet(bat_priv, word, 1, 0);
+			orig_node->bcast_own_sum[batman_if->if_num] =
+				bit_packet_count(word);
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+}
+
+static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node,
+		       unsigned char *hna_buff, int hna_buff_len)
+{
+	if ((hna_buff_len != orig_node->hna_buff_len) ||
+	    ((hna_buff_len > 0) &&
+	     (orig_node->hna_buff_len > 0) &&
+	     (memcmp(orig_node->hna_buff, hna_buff, hna_buff_len) != 0))) {
+
+		if (orig_node->hna_buff_len > 0)
+			hna_global_del_orig(bat_priv, orig_node,
+					    "originator changed hna");
+
+		if ((hna_buff_len > 0) && (hna_buff))
+			hna_global_add_orig(bat_priv, orig_node,
+					    hna_buff, hna_buff_len);
+	}
+}
+
+static void update_route(struct bat_priv *bat_priv,
+			 struct orig_node *orig_node,
+			 struct neigh_node *neigh_node,
+			 unsigned char *hna_buff, int hna_buff_len)
+{
+	/* route deleted */
+	if ((orig_node->router) && (!neigh_node)) {
+
+		bat_dbg(DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n",
+			orig_node->orig);
+		hna_global_del_orig(bat_priv, orig_node,
+				    "originator timed out");
+
+		/* route added */
+	} else if ((!orig_node->router) && (neigh_node)) {
+
+		bat_dbg(DBG_ROUTES, bat_priv,
+			"Adding route towards: %pM (via %pM)\n",
+			orig_node->orig, neigh_node->addr);
+		hna_global_add_orig(bat_priv, orig_node,
+				    hna_buff, hna_buff_len);
+
+		/* route changed */
+	} else {
+		bat_dbg(DBG_ROUTES, bat_priv,
+			"Changing route towards: %pM "
+			"(now via %pM - was via %pM)\n",
+			orig_node->orig, neigh_node->addr,
+			orig_node->router->addr);
+	}
+
+	orig_node->router = neigh_node;
+}
+
+
+void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
+		   struct neigh_node *neigh_node, unsigned char *hna_buff,
+		   int hna_buff_len)
+{
+
+	if (!orig_node)
+		return;
+
+	if (orig_node->router != neigh_node)
+		update_route(bat_priv, orig_node, neigh_node,
+			     hna_buff, hna_buff_len);
+	/* may be just HNA changed */
+	else
+		update_HNA(bat_priv, orig_node, hna_buff, hna_buff_len);
+}
+
+static int is_bidirectional_neigh(struct orig_node *orig_node,
+				struct orig_node *orig_neigh_node,
+				struct batman_packet *batman_packet,
+				struct batman_if *if_incoming)
+{
+	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
+	unsigned char total_count;
+
+	if (orig_node == orig_neigh_node) {
+		list_for_each_entry(tmp_neigh_node,
+				    &orig_node->neigh_list,
+				    list) {
+
+			if (compare_orig(tmp_neigh_node->addr,
+					 orig_neigh_node->orig) &&
+			    (tmp_neigh_node->if_incoming == if_incoming))
+				neigh_node = tmp_neigh_node;
+		}
+
+		if (!neigh_node)
+			neigh_node = create_neighbor(orig_node,
+						     orig_neigh_node,
+						     orig_neigh_node->orig,
+						     if_incoming);
+		/* create_neighbor failed, return 0 */
+		if (!neigh_node)
+			return 0;
+
+		neigh_node->last_valid = jiffies;
+	} else {
+		/* find packet count of corresponding one hop neighbor */
+		list_for_each_entry(tmp_neigh_node,
+				    &orig_neigh_node->neigh_list, list) {
+
+			if (compare_orig(tmp_neigh_node->addr,
+					 orig_neigh_node->orig) &&
+			    (tmp_neigh_node->if_incoming == if_incoming))
+				neigh_node = tmp_neigh_node;
+		}
+
+		if (!neigh_node)
+			neigh_node = create_neighbor(orig_neigh_node,
+						     orig_neigh_node,
+						     orig_neigh_node->orig,
+						     if_incoming);
+		/* create_neighbor failed, return 0 */
+		if (!neigh_node)
+			return 0;
+	}
+
+	orig_node->last_valid = jiffies;
+
+	/* pay attention to not get a value bigger than 100 % */
+	total_count = (orig_neigh_node->bcast_own_sum[if_incoming->if_num] >
+		       neigh_node->real_packet_count ?
+		       neigh_node->real_packet_count :
+		       orig_neigh_node->bcast_own_sum[if_incoming->if_num]);
+
+	/* if we have too few packets (too less data) we set tq_own to zero */
+	/* if we receive too few packets it is not considered bidirectional */
+	if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) ||
+	    (neigh_node->real_packet_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM))
+		orig_neigh_node->tq_own = 0;
+	else
+		/* neigh_node->real_packet_count is never zero as we
+		 * only purge old information when getting new
+		 * information */
+		orig_neigh_node->tq_own = (TQ_MAX_VALUE * total_count) /
+			neigh_node->real_packet_count;
+
+	/*
+	 * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does
+	 * affect the nearly-symmetric links only a little, but
+	 * punishes asymmetric links more.  This will give a value
+	 * between 0 and TQ_MAX_VALUE
+	 */
+	orig_neigh_node->tq_asym_penalty =
+		TQ_MAX_VALUE -
+		(TQ_MAX_VALUE *
+		 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) *
+		 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) *
+		 (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count)) /
+		(TQ_LOCAL_WINDOW_SIZE *
+		 TQ_LOCAL_WINDOW_SIZE *
+		 TQ_LOCAL_WINDOW_SIZE);
+
+	batman_packet->tq = ((batman_packet->tq *
+			      orig_neigh_node->tq_own *
+			      orig_neigh_node->tq_asym_penalty) /
+			     (TQ_MAX_VALUE * TQ_MAX_VALUE));
+
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"bidirectional: "
+		"orig = %-15pM neigh = %-15pM => own_bcast = %2i, "
+		"real recv = %2i, local tq: %3i, asym_penalty: %3i, "
+		"total tq: %3i\n",
+		orig_node->orig, orig_neigh_node->orig, total_count,
+		neigh_node->real_packet_count, orig_neigh_node->tq_own,
+		orig_neigh_node->tq_asym_penalty, batman_packet->tq);
+
+	/* if link has the minimum required transmission quality
+	 * consider it bidirectional */
+	if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT)
+		return 1;
+
+	return 0;
+}
+
+static void update_orig(struct bat_priv *bat_priv,
+			struct orig_node *orig_node,
+			struct ethhdr *ethhdr,
+			struct batman_packet *batman_packet,
+			struct batman_if *if_incoming,
+			unsigned char *hna_buff, int hna_buff_len,
+			char is_duplicate)
+{
+	struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL;
+	int tmp_hna_buff_len;
+
+	bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): "
+		"Searching and updating originator entry of received packet\n");
+
+	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
+		if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) &&
+		    (tmp_neigh_node->if_incoming == if_incoming)) {
+			neigh_node = tmp_neigh_node;
+			continue;
+		}
+
+		if (is_duplicate)
+			continue;
+
+		ring_buffer_set(tmp_neigh_node->tq_recv,
+				&tmp_neigh_node->tq_index, 0);
+		tmp_neigh_node->tq_avg =
+			ring_buffer_avg(tmp_neigh_node->tq_recv);
+	}
+
+	if (!neigh_node) {
+		struct orig_node *orig_tmp;
+
+		orig_tmp = get_orig_node(bat_priv, ethhdr->h_source);
+		if (!orig_tmp)
+			return;
+
+		neigh_node = create_neighbor(orig_node, orig_tmp,
+					     ethhdr->h_source, if_incoming);
+		if (!neigh_node)
+			return;
+	} else
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Updating existing last-hop neighbor of originator\n");
+
+	orig_node->flags = batman_packet->flags;
+	neigh_node->last_valid = jiffies;
+
+	ring_buffer_set(neigh_node->tq_recv,
+			&neigh_node->tq_index,
+			batman_packet->tq);
+	neigh_node->tq_avg = ring_buffer_avg(neigh_node->tq_recv);
+
+	if (!is_duplicate) {
+		orig_node->last_ttl = batman_packet->ttl;
+		neigh_node->last_ttl = batman_packet->ttl;
+	}
+
+	tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ?
+			    batman_packet->num_hna * ETH_ALEN : hna_buff_len);
+
+	/* if this neighbor already is our next hop there is nothing
+	 * to change */
+	if (orig_node->router == neigh_node)
+		goto update_hna;
+
+	/* if this neighbor does not offer a better TQ we won't consider it */
+	if ((orig_node->router) &&
+	    (orig_node->router->tq_avg > neigh_node->tq_avg))
+		goto update_hna;
+
+	/* if the TQ is the same and the link not more symetric we
+	 * won't consider it either */
+	if ((orig_node->router) &&
+	     ((neigh_node->tq_avg == orig_node->router->tq_avg) &&
+	     (orig_node->router->orig_node->bcast_own_sum[if_incoming->if_num]
+	      >= neigh_node->orig_node->bcast_own_sum[if_incoming->if_num])))
+		goto update_hna;
+
+	update_routes(bat_priv, orig_node, neigh_node,
+		      hna_buff, tmp_hna_buff_len);
+	goto update_gw;
+
+update_hna:
+	update_routes(bat_priv, orig_node, orig_node->router,
+		      hna_buff, tmp_hna_buff_len);
+
+update_gw:
+	if (orig_node->gw_flags != batman_packet->gw_flags)
+		gw_node_update(bat_priv, orig_node, batman_packet->gw_flags);
+
+	orig_node->gw_flags = batman_packet->gw_flags;
+
+	/* restart gateway selection if fast or late switching was enabled */
+	if ((orig_node->gw_flags) &&
+	    (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) &&
+	    (atomic_read(&bat_priv->gw_sel_class) > 2))
+		gw_check_election(bat_priv, orig_node);
+}
+
+/* checks whether the host restarted and is in the protection time.
+ * returns:
+ *  0 if the packet is to be accepted
+ *  1 if the packet is to be ignored.
+ */
+static int window_protected(struct bat_priv *bat_priv,
+			    int32_t seq_num_diff,
+			    unsigned long *last_reset)
+{
+	if ((seq_num_diff <= -TQ_LOCAL_WINDOW_SIZE)
+		|| (seq_num_diff >= EXPECTED_SEQNO_RANGE)) {
+		if (time_after(jiffies, *last_reset +
+			msecs_to_jiffies(RESET_PROTECTION_MS))) {
+
+			*last_reset = jiffies;
+			bat_dbg(DBG_BATMAN, bat_priv,
+				"old packet received, start protection\n");
+
+			return 0;
+		} else
+			return 1;
+	}
+	return 0;
+}
+
+/* processes a batman packet for all interfaces, adjusts the sequence number and
+ * finds out whether it is a duplicate.
+ * returns:
+ *   1 the packet is a duplicate
+ *   0 the packet has not yet been received
+ *  -1 the packet is old and has been received while the seqno window
+ *     was protected. Caller should drop it.
+ */
+static char count_real_packets(struct ethhdr *ethhdr,
+			       struct batman_packet *batman_packet,
+			       struct batman_if *if_incoming)
+{
+	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct orig_node *orig_node;
+	struct neigh_node *tmp_neigh_node;
+	char is_duplicate = 0;
+	int32_t seq_diff;
+	int need_update = 0;
+	int set_mark;
+
+	orig_node = get_orig_node(bat_priv, batman_packet->orig);
+	if (!orig_node)
+		return 0;
+
+	seq_diff = batman_packet->seqno - orig_node->last_real_seqno;
+
+	/* signalize caller that the packet is to be dropped. */
+	if (window_protected(bat_priv, seq_diff,
+			     &orig_node->batman_seqno_reset))
+		return -1;
+
+	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
+
+		is_duplicate |= get_bit_status(tmp_neigh_node->real_bits,
+					       orig_node->last_real_seqno,
+					       batman_packet->seqno);
+
+		if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) &&
+		    (tmp_neigh_node->if_incoming == if_incoming))
+			set_mark = 1;
+		else
+			set_mark = 0;
+
+		/* if the window moved, set the update flag. */
+		need_update |= bit_get_packet(bat_priv,
+					      tmp_neigh_node->real_bits,
+					      seq_diff, set_mark);
+
+		tmp_neigh_node->real_packet_count =
+			bit_packet_count(tmp_neigh_node->real_bits);
+	}
+
+	if (need_update) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"updating last_seqno: old %d, new %d\n",
+			orig_node->last_real_seqno, batman_packet->seqno);
+		orig_node->last_real_seqno = batman_packet->seqno;
+	}
+
+	return is_duplicate;
+}
+
+/* copy primary address for bonding */
+static void mark_bonding_address(struct bat_priv *bat_priv,
+				 struct orig_node *orig_node,
+				 struct orig_node *orig_neigh_node,
+				 struct batman_packet *batman_packet)
+
+{
+	if (batman_packet->flags & PRIMARIES_FIRST_HOP)
+		memcpy(orig_neigh_node->primary_addr,
+		       orig_node->orig, ETH_ALEN);
+
+	return;
+}
+
+/* mark possible bond.candidates in the neighbor list */
+void update_bonding_candidates(struct bat_priv *bat_priv,
+			       struct orig_node *orig_node)
+{
+	int candidates;
+	int interference_candidate;
+	int best_tq;
+	struct neigh_node *tmp_neigh_node, *tmp_neigh_node2;
+	struct neigh_node *first_candidate, *last_candidate;
+
+	/* update the candidates for this originator */
+	if (!orig_node->router) {
+		orig_node->bond.candidates = 0;
+		return;
+	}
+
+	best_tq = orig_node->router->tq_avg;
+
+	/* update bond.candidates */
+
+	candidates = 0;
+
+	/* mark other nodes which also received "PRIMARIES FIRST HOP" packets
+	 * as "bonding partner" */
+
+	/* first, zero the list */
+	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
+		tmp_neigh_node->next_bond_candidate = NULL;
+	}
+
+	first_candidate = NULL;
+	last_candidate = NULL;
+	list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) {
+
+		/* only consider if it has the same primary address ...  */
+		if (memcmp(orig_node->orig,
+				tmp_neigh_node->orig_node->primary_addr,
+				ETH_ALEN) != 0)
+			continue;
+
+		/* ... and is good enough to be considered */
+		if (tmp_neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD)
+			continue;
+
+		/* check if we have another candidate with the same
+		 * mac address or interface. If we do, we won't
+		 * select this candidate because of possible interference. */
+
+		interference_candidate = 0;
+		list_for_each_entry(tmp_neigh_node2,
+				&orig_node->neigh_list, list) {
+
+			if (tmp_neigh_node2 == tmp_neigh_node)
+				continue;
+
+			/* we only care if the other candidate is even
+			 * considered as candidate. */
+			if (!tmp_neigh_node2->next_bond_candidate)
+				continue;
+
+
+			if ((tmp_neigh_node->if_incoming ==
+				tmp_neigh_node2->if_incoming)
+				|| (memcmp(tmp_neigh_node->addr,
+				tmp_neigh_node2->addr, ETH_ALEN) == 0)) {
+
+				interference_candidate = 1;
+				break;
+			}
+		}
+		/* don't care further if it is an interference candidate */
+		if (interference_candidate)
+			continue;
+
+		if (!first_candidate) {
+			first_candidate = tmp_neigh_node;
+			tmp_neigh_node->next_bond_candidate = first_candidate;
+		} else
+			tmp_neigh_node->next_bond_candidate = last_candidate;
+
+		last_candidate = tmp_neigh_node;
+
+		candidates++;
+	}
+
+	if (candidates > 0) {
+		first_candidate->next_bond_candidate = last_candidate;
+		orig_node->bond.selected = first_candidate;
+	}
+
+	orig_node->bond.candidates = candidates;
+}
+
+void receive_bat_packet(struct ethhdr *ethhdr,
+				struct batman_packet *batman_packet,
+				unsigned char *hna_buff, int hna_buff_len,
+				struct batman_if *if_incoming)
+{
+	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	struct batman_if *batman_if;
+	struct orig_node *orig_neigh_node, *orig_node;
+	char has_directlink_flag;
+	char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0;
+	char is_broadcast = 0, is_bidirectional, is_single_hop_neigh;
+	char is_duplicate;
+	uint32_t if_incoming_seqno;
+
+	/* Silently drop when the batman packet is actually not a
+	 * correct packet.
+	 *
+	 * This might happen if a packet is padded (e.g. Ethernet has a
+	 * minimum frame length of 64 byte) and the aggregation interprets
+	 * it as an additional length.
+	 *
+	 * TODO: A more sane solution would be to have a bit in the
+	 * batman_packet to detect whether the packet is the last
+	 * packet in an aggregation.  Here we expect that the padding
+	 * is always zero (or not 0x01)
+	 */
+	if (batman_packet->packet_type != BAT_PACKET)
+		return;
+
+	/* could be changed by schedule_own_packet() */
+	if_incoming_seqno = atomic_read(&if_incoming->seqno);
+
+	has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0);
+
+	is_single_hop_neigh = (compare_orig(ethhdr->h_source,
+					    batman_packet->orig) ? 1 : 0);
+
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"Received BATMAN packet via NB: %pM, IF: %s [%pM] "
+		"(from OG: %pM, via prev OG: %pM, seqno %d, tq %d, "
+		"TTL %d, V %d, IDF %d)\n",
+		ethhdr->h_source, if_incoming->net_dev->name,
+		if_incoming->net_dev->dev_addr, batman_packet->orig,
+		batman_packet->prev_sender, batman_packet->seqno,
+		batman_packet->tq, batman_packet->ttl, batman_packet->version,
+		has_directlink_flag);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if (batman_if->if_status != IF_ACTIVE)
+			continue;
+
+		if (batman_if->soft_iface != if_incoming->soft_iface)
+			continue;
+
+		if (compare_orig(ethhdr->h_source,
+				 batman_if->net_dev->dev_addr))
+			is_my_addr = 1;
+
+		if (compare_orig(batman_packet->orig,
+				 batman_if->net_dev->dev_addr))
+			is_my_orig = 1;
+
+		if (compare_orig(batman_packet->prev_sender,
+				 batman_if->net_dev->dev_addr))
+			is_my_oldorig = 1;
+
+		if (compare_orig(ethhdr->h_source, broadcast_addr))
+			is_broadcast = 1;
+	}
+	rcu_read_unlock();
+
+	if (batman_packet->version != COMPAT_VERSION) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: incompatible batman version (%i)\n",
+			batman_packet->version);
+		return;
+	}
+
+	if (is_my_addr) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: received my own broadcast (sender: %pM"
+			")\n",
+			ethhdr->h_source);
+		return;
+	}
+
+	if (is_broadcast) {
+		bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
+		"ignoring all packets with broadcast source addr (sender: %pM"
+		")\n", ethhdr->h_source);
+		return;
+	}
+
+	if (is_my_orig) {
+		unsigned long *word;
+		int offset;
+
+		orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source);
+
+		if (!orig_neigh_node)
+			return;
+
+		/* neighbor has to indicate direct link and it has to
+		 * come via the corresponding interface */
+		/* if received seqno equals last send seqno save new
+		 * seqno for bidirectional check */
+		if (has_directlink_flag &&
+		    compare_orig(if_incoming->net_dev->dev_addr,
+				 batman_packet->orig) &&
+		    (batman_packet->seqno - if_incoming_seqno + 2 == 0)) {
+			offset = if_incoming->if_num * NUM_WORDS;
+			word = &(orig_neigh_node->bcast_own[offset]);
+			bit_mark(word, 0);
+			orig_neigh_node->bcast_own_sum[if_incoming->if_num] =
+				bit_packet_count(word);
+		}
+
+		bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: "
+			"originator packet from myself (via neighbor)\n");
+		return;
+	}
+
+	if (is_my_oldorig) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: ignoring all rebroadcast echos (sender: "
+			"%pM)\n", ethhdr->h_source);
+		return;
+	}
+
+	orig_node = get_orig_node(bat_priv, batman_packet->orig);
+	if (!orig_node)
+		return;
+
+	is_duplicate = count_real_packets(ethhdr, batman_packet, if_incoming);
+
+	if (is_duplicate == -1) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: packet within seqno protection time "
+			"(sender: %pM)\n", ethhdr->h_source);
+		return;
+	}
+
+	if (batman_packet->tq == 0) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: originator packet with tq equal 0\n");
+		return;
+	}
+
+	/* avoid temporary routing loops */
+	if ((orig_node->router) &&
+	    (orig_node->router->orig_node->router) &&
+	    (compare_orig(orig_node->router->addr,
+			  batman_packet->prev_sender)) &&
+	    !(compare_orig(batman_packet->orig, batman_packet->prev_sender)) &&
+	    (compare_orig(orig_node->router->addr,
+			  orig_node->router->orig_node->router->addr))) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: ignoring all rebroadcast packets that "
+			"may make me loop (sender: %pM)\n", ethhdr->h_source);
+		return;
+	}
+
+	/* if sender is a direct neighbor the sender mac equals
+	 * originator mac */
+	orig_neigh_node = (is_single_hop_neigh ?
+			   orig_node :
+			   get_orig_node(bat_priv, ethhdr->h_source));
+	if (!orig_neigh_node)
+		return;
+
+	/* drop packet if sender is not a direct neighbor and if we
+	 * don't route towards it */
+	if (!is_single_hop_neigh && (!orig_neigh_node->router)) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: OGM via unknown neighbor!\n");
+		return;
+	}
+
+	is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node,
+						batman_packet, if_incoming);
+
+	/* update ranking if it is not a duplicate or has the same
+	 * seqno and similar ttl as the non-duplicate */
+	if (is_bidirectional &&
+	    (!is_duplicate ||
+	     ((orig_node->last_real_seqno == batman_packet->seqno) &&
+	      (orig_node->last_ttl - 3 <= batman_packet->ttl))))
+		update_orig(bat_priv, orig_node, ethhdr, batman_packet,
+			    if_incoming, hna_buff, hna_buff_len, is_duplicate);
+
+	mark_bonding_address(bat_priv, orig_node,
+			     orig_neigh_node, batman_packet);
+	update_bonding_candidates(bat_priv, orig_node);
+
+	/* is single hop (direct) neighbor */
+	if (is_single_hop_neigh) {
+
+		/* mark direct link on incoming interface */
+		schedule_forward_packet(orig_node, ethhdr, batman_packet,
+					1, hna_buff_len, if_incoming);
+
+		bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: "
+			"rebroadcast neighbor packet with direct link flag\n");
+		return;
+	}
+
+	/* multihop originator */
+	if (!is_bidirectional) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: not received via bidirectional link\n");
+		return;
+	}
+
+	if (is_duplicate) {
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"Drop packet: duplicate packet received\n");
+		return;
+	}
+
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"Forwarding packet: rebroadcast originator packet\n");
+	schedule_forward_packet(orig_node, ethhdr, batman_packet,
+				0, hna_buff_len, if_incoming);
+}
+
+int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	struct ethhdr *ethhdr;
+
+	/* drop packet if it has not necessary minimum size */
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct batman_packet))))
+		return NET_RX_DROP;
+
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* packet with broadcast indication but unicast recipient */
+	if (!is_broadcast_ether_addr(ethhdr->h_dest))
+		return NET_RX_DROP;
+
+	/* packet with broadcast sender address */
+	if (is_broadcast_ether_addr(ethhdr->h_source))
+		return NET_RX_DROP;
+
+	/* create a copy of the skb, if needed, to modify it. */
+	if (skb_cow(skb, 0) < 0)
+		return NET_RX_DROP;
+
+	/* keep skb linear */
+	if (skb_linearize(skb) < 0)
+		return NET_RX_DROP;
+
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	receive_aggr_bat_packet(ethhdr,
+				skb->data,
+				skb_headlen(skb),
+				batman_if);
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	kfree_skb(skb);
+	return NET_RX_SUCCESS;
+}
+
+static int recv_my_icmp_packet(struct bat_priv *bat_priv,
+			       struct sk_buff *skb, size_t icmp_len)
+{
+	struct orig_node *orig_node;
+	struct icmp_packet_rr *icmp_packet;
+	struct ethhdr *ethhdr;
+	struct batman_if *batman_if;
+	int ret;
+	uint8_t dstaddr[ETH_ALEN];
+
+	icmp_packet = (struct icmp_packet_rr *)skb->data;
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* add data to device queue */
+	if (icmp_packet->msg_type != ECHO_REQUEST) {
+		bat_socket_receive_packet(icmp_packet, icmp_len);
+		return NET_RX_DROP;
+	}
+
+	if (!bat_priv->primary_if)
+		return NET_RX_DROP;
+
+	/* answer echo request (ping) */
+	/* get routing information */
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
+						   compare_orig, choose_orig,
+						   icmp_packet->orig));
+	ret = NET_RX_DROP;
+
+	if ((orig_node) && (orig_node->router)) {
+
+		/* don't lock while sending the packets ... we therefore
+		 * copy the required data before sending */
+		batman_if = orig_node->router->if_incoming;
+		memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+		/* create a copy of the skb, if needed, to modify it. */
+		if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
+			return NET_RX_DROP;
+
+		icmp_packet = (struct icmp_packet_rr *)skb->data;
+		ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+		memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
+		memcpy(icmp_packet->orig,
+		       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+		icmp_packet->msg_type = ECHO_REPLY;
+		icmp_packet->ttl = TTL;
+
+		send_skb_packet(skb, batman_if, dstaddr);
+		ret = NET_RX_SUCCESS;
+
+	} else
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	return ret;
+}
+
+static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv,
+				  struct sk_buff *skb, size_t icmp_len)
+{
+	struct orig_node *orig_node;
+	struct icmp_packet *icmp_packet;
+	struct ethhdr *ethhdr;
+	struct batman_if *batman_if;
+	int ret;
+	uint8_t dstaddr[ETH_ALEN];
+
+	icmp_packet = (struct icmp_packet *)skb->data;
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* send TTL exceeded if packet is an echo request (traceroute) */
+	if (icmp_packet->msg_type != ECHO_REQUEST) {
+		pr_debug("Warning - can't forward icmp packet from %pM to "
+			 "%pM: ttl exceeded\n", icmp_packet->orig,
+			 icmp_packet->dst);
+		return NET_RX_DROP;
+	}
+
+	if (!bat_priv->primary_if)
+		return NET_RX_DROP;
+
+	/* get routing information */
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	orig_node = ((struct orig_node *)
+		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
+			       icmp_packet->orig));
+	ret = NET_RX_DROP;
+
+	if ((orig_node) && (orig_node->router)) {
+
+		/* don't lock while sending the packets ... we therefore
+		 * copy the required data before sending */
+		batman_if = orig_node->router->if_incoming;
+		memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+		/* create a copy of the skb, if needed, to modify it. */
+		if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
+			return NET_RX_DROP;
+
+		icmp_packet = (struct icmp_packet *) skb->data;
+		ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+		memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN);
+		memcpy(icmp_packet->orig,
+		       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+		icmp_packet->msg_type = TTL_EXCEEDED;
+		icmp_packet->ttl = TTL;
+
+		send_skb_packet(skb, batman_if, dstaddr);
+		ret = NET_RX_SUCCESS;
+
+	} else
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	return ret;
+}
+
+
+int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+	struct icmp_packet_rr *icmp_packet;
+	struct ethhdr *ethhdr;
+	struct orig_node *orig_node;
+	struct batman_if *batman_if;
+	int hdr_size = sizeof(struct icmp_packet);
+	int ret;
+	uint8_t dstaddr[ETH_ALEN];
+
+	/**
+	 * we truncate all incoming icmp packets if they don't match our size
+	 */
+	if (skb->len >= sizeof(struct icmp_packet_rr))
+		hdr_size = sizeof(struct icmp_packet_rr);
+
+	/* drop packet if it has not necessary minimum size */
+	if (unlikely(!pskb_may_pull(skb, hdr_size)))
+		return NET_RX_DROP;
+
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* packet with unicast indication but broadcast recipient */
+	if (is_broadcast_ether_addr(ethhdr->h_dest))
+		return NET_RX_DROP;
+
+	/* packet with broadcast sender address */
+	if (is_broadcast_ether_addr(ethhdr->h_source))
+		return NET_RX_DROP;
+
+	/* not for me */
+	if (!is_my_mac(ethhdr->h_dest))
+		return NET_RX_DROP;
+
+	icmp_packet = (struct icmp_packet_rr *)skb->data;
+
+	/* add record route information if not full */
+	if ((hdr_size == sizeof(struct icmp_packet_rr)) &&
+	    (icmp_packet->rr_cur < BAT_RR_LEN)) {
+		memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]),
+			ethhdr->h_dest, ETH_ALEN);
+		icmp_packet->rr_cur++;
+	}
+
+	/* packet for me */
+	if (is_my_mac(icmp_packet->dst))
+		return recv_my_icmp_packet(bat_priv, skb, hdr_size);
+
+	/* TTL exceeded */
+	if (icmp_packet->ttl < 2)
+		return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size);
+
+	ret = NET_RX_DROP;
+
+	/* get routing information */
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	orig_node = ((struct orig_node *)
+		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
+			       icmp_packet->dst));
+
+	if ((orig_node) && (orig_node->router)) {
+
+		/* don't lock while sending the packets ... we therefore
+		 * copy the required data before sending */
+		batman_if = orig_node->router->if_incoming;
+		memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+		/* create a copy of the skb, if needed, to modify it. */
+		if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
+			return NET_RX_DROP;
+
+		icmp_packet = (struct icmp_packet_rr *)skb->data;
+		ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+		/* decrement ttl */
+		icmp_packet->ttl--;
+
+		/* route it */
+		send_skb_packet(skb, batman_if, dstaddr);
+		ret = NET_RX_SUCCESS;
+
+	} else
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	return ret;
+}
+
+/* find a suitable router for this originator, and use
+ * bonding if possible. */
+struct neigh_node *find_router(struct bat_priv *bat_priv,
+			       struct orig_node *orig_node,
+			       struct batman_if *recv_if)
+{
+	struct orig_node *primary_orig_node;
+	struct orig_node *router_orig;
+	struct neigh_node *router, *first_candidate, *best_router;
+	static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
+	int bonding_enabled;
+
+	if (!orig_node)
+		return NULL;
+
+	if (!orig_node->router)
+		return NULL;
+
+	/* without bonding, the first node should
+	 * always choose the default router. */
+
+	bonding_enabled = atomic_read(&bat_priv->bonding);
+
+	if ((!recv_if) && (!bonding_enabled))
+		return orig_node->router;
+
+	router_orig = orig_node->router->orig_node;
+
+	/* if we have something in the primary_addr, we can search
+	 * for a potential bonding candidate. */
+	if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0)
+		return orig_node->router;
+
+	/* find the orig_node which has the primary interface. might
+	 * even be the same as our router_orig in many cases */
+
+	if (memcmp(router_orig->primary_addr,
+				router_orig->orig, ETH_ALEN) == 0) {
+		primary_orig_node = router_orig;
+	} else {
+		primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig,
+					       choose_orig,
+					       router_orig->primary_addr);
+
+		if (!primary_orig_node)
+			return orig_node->router;
+	}
+
+	/* with less than 2 candidates, we can't do any
+	 * bonding and prefer the original router. */
+
+	if (primary_orig_node->bond.candidates < 2)
+		return orig_node->router;
+
+
+	/* all nodes between should choose a candidate which
+	 * is is not on the interface where the packet came
+	 * in. */
+	first_candidate = primary_orig_node->bond.selected;
+	router = first_candidate;
+
+	if (bonding_enabled) {
+		/* in the bonding case, send the packets in a round
+		 * robin fashion over the remaining interfaces. */
+		do {
+			/* recv_if == NULL on the first node. */
+			if (router->if_incoming != recv_if)
+				break;
+
+			router = router->next_bond_candidate;
+		} while (router != first_candidate);
+
+		primary_orig_node->bond.selected = router->next_bond_candidate;
+
+	} else {
+		/* if bonding is disabled, use the best of the
+		 * remaining candidates which are not using
+		 * this interface. */
+		best_router = first_candidate;
+
+		do {
+			/* recv_if == NULL on the first node. */
+			if ((router->if_incoming != recv_if) &&
+				(router->tq_avg > best_router->tq_avg))
+					best_router = router;
+
+			router = router->next_bond_candidate;
+		} while (router != first_candidate);
+
+		router = best_router;
+	}
+
+	return router;
+}
+
+static int check_unicast_packet(struct sk_buff *skb, int hdr_size)
+{
+	struct ethhdr *ethhdr;
+
+	/* drop packet if it has not necessary minimum size */
+	if (unlikely(!pskb_may_pull(skb, hdr_size)))
+		return -1;
+
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* packet with unicast indication but broadcast recipient */
+	if (is_broadcast_ether_addr(ethhdr->h_dest))
+		return -1;
+
+	/* packet with broadcast sender address */
+	if (is_broadcast_ether_addr(ethhdr->h_source))
+		return -1;
+
+	/* not for me */
+	if (!is_my_mac(ethhdr->h_dest))
+		return -1;
+
+	return 0;
+}
+
+int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
+			 int hdr_size)
+{
+	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+	struct orig_node *orig_node;
+	struct neigh_node *router;
+	struct batman_if *batman_if;
+	uint8_t dstaddr[ETH_ALEN];
+	struct unicast_packet *unicast_packet;
+	struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+	int ret;
+	struct sk_buff *new_skb;
+
+	unicast_packet = (struct unicast_packet *)skb->data;
+
+	/* TTL exceeded */
+	if (unicast_packet->ttl < 2) {
+		pr_debug("Warning - can't forward unicast packet from %pM to "
+			 "%pM: ttl exceeded\n", ethhdr->h_source,
+			 unicast_packet->dest);
+		return NET_RX_DROP;
+	}
+
+	/* get routing information */
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	orig_node = ((struct orig_node *)
+		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
+			       unicast_packet->dest));
+
+	router = find_router(bat_priv, orig_node, recv_if);
+
+	if (!router) {
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+		return NET_RX_DROP;
+	}
+
+	/* don't lock while sending the packets ... we therefore
+	 * copy the required data before sending */
+
+	batman_if = router->if_incoming;
+	memcpy(dstaddr, router->addr, ETH_ALEN);
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	/* create a copy of the skb, if needed, to modify it. */
+	if (skb_cow(skb, sizeof(struct ethhdr)) < 0)
+		return NET_RX_DROP;
+
+	unicast_packet = (struct unicast_packet *)skb->data;
+
+	if (unicast_packet->packet_type == BAT_UNICAST &&
+	    atomic_read(&bat_priv->fragmentation) &&
+	    skb->len > batman_if->net_dev->mtu)
+		return frag_send_skb(skb, bat_priv, batman_if,
+				     dstaddr);
+
+	if (unicast_packet->packet_type == BAT_UNICAST_FRAG &&
+	    2 * skb->len - hdr_size <= batman_if->net_dev->mtu) {
+
+		ret = frag_reassemble_skb(skb, bat_priv, &new_skb);
+
+		if (ret == NET_RX_DROP)
+			return NET_RX_DROP;
+
+		/* packet was buffered for late merge */
+		if (!new_skb)
+			return NET_RX_SUCCESS;
+
+		skb = new_skb;
+		unicast_packet = (struct unicast_packet *)skb->data;
+	}
+
+	/* decrement ttl */
+	unicast_packet->ttl--;
+
+	/* route it */
+	send_skb_packet(skb, batman_if, dstaddr);
+
+	return NET_RX_SUCCESS;
+}
+
+int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if)
+{
+	struct unicast_packet *unicast_packet;
+	int hdr_size = sizeof(struct unicast_packet);
+
+	if (check_unicast_packet(skb, hdr_size) < 0)
+		return NET_RX_DROP;
+
+	unicast_packet = (struct unicast_packet *)skb->data;
+
+	/* packet for me */
+	if (is_my_mac(unicast_packet->dest)) {
+		interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size);
+		return NET_RX_SUCCESS;
+	}
+
+	return route_unicast_packet(skb, recv_if, hdr_size);
+}
+
+int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+	struct unicast_frag_packet *unicast_packet;
+	int hdr_size = sizeof(struct unicast_frag_packet);
+	struct sk_buff *new_skb = NULL;
+	int ret;
+
+	if (check_unicast_packet(skb, hdr_size) < 0)
+		return NET_RX_DROP;
+
+	unicast_packet = (struct unicast_frag_packet *)skb->data;
+
+	/* packet for me */
+	if (is_my_mac(unicast_packet->dest)) {
+
+		ret = frag_reassemble_skb(skb, bat_priv, &new_skb);
+
+		if (ret == NET_RX_DROP)
+			return NET_RX_DROP;
+
+		/* packet was buffered for late merge */
+		if (!new_skb)
+			return NET_RX_SUCCESS;
+
+		interface_rx(recv_if->soft_iface, new_skb, recv_if,
+			     sizeof(struct unicast_packet));
+		return NET_RX_SUCCESS;
+	}
+
+	return route_unicast_packet(skb, recv_if, hdr_size);
+}
+
+
+int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+	struct orig_node *orig_node;
+	struct bcast_packet *bcast_packet;
+	struct ethhdr *ethhdr;
+	int hdr_size = sizeof(struct bcast_packet);
+	int32_t seq_diff;
+
+	/* drop packet if it has not necessary minimum size */
+	if (unlikely(!pskb_may_pull(skb, hdr_size)))
+		return NET_RX_DROP;
+
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* packet with broadcast indication but unicast recipient */
+	if (!is_broadcast_ether_addr(ethhdr->h_dest))
+		return NET_RX_DROP;
+
+	/* packet with broadcast sender address */
+	if (is_broadcast_ether_addr(ethhdr->h_source))
+		return NET_RX_DROP;
+
+	/* ignore broadcasts sent by myself */
+	if (is_my_mac(ethhdr->h_source))
+		return NET_RX_DROP;
+
+	bcast_packet = (struct bcast_packet *)skb->data;
+
+	/* ignore broadcasts originated by myself */
+	if (is_my_mac(bcast_packet->orig))
+		return NET_RX_DROP;
+
+	if (bcast_packet->ttl < 2)
+		return NET_RX_DROP;
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	orig_node = ((struct orig_node *)
+		     hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
+			       bcast_packet->orig));
+
+	if (!orig_node) {
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+		return NET_RX_DROP;
+	}
+
+	/* check whether the packet is a duplicate */
+	if (get_bit_status(orig_node->bcast_bits,
+			   orig_node->last_bcast_seqno,
+			   ntohl(bcast_packet->seqno))) {
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+		return NET_RX_DROP;
+	}
+
+	seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno;
+
+	/* check whether the packet is old and the host just restarted. */
+	if (window_protected(bat_priv, seq_diff,
+			     &orig_node->bcast_seqno_reset)) {
+		spin_unlock_bh(&bat_priv->orig_hash_lock);
+		return NET_RX_DROP;
+	}
+
+	/* mark broadcast in flood history, update window position
+	 * if required. */
+	if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1))
+		orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno);
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+	/* rebroadcast packet */
+	add_bcast_packet_to_list(bat_priv, skb);
+
+	/* broadcast for me */
+	interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size);
+
+	return NET_RX_SUCCESS;
+}
+
+int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if)
+{
+	struct vis_packet *vis_packet;
+	struct ethhdr *ethhdr;
+	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+	int hdr_size = sizeof(struct vis_packet);
+
+	/* keep skb linear */
+	if (skb_linearize(skb) < 0)
+		return NET_RX_DROP;
+
+	if (unlikely(!pskb_may_pull(skb, hdr_size)))
+		return NET_RX_DROP;
+
+	vis_packet = (struct vis_packet *)skb->data;
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	/* not for me */
+	if (!is_my_mac(ethhdr->h_dest))
+		return NET_RX_DROP;
+
+	/* ignore own packets */
+	if (is_my_mac(vis_packet->vis_orig))
+		return NET_RX_DROP;
+
+	if (is_my_mac(vis_packet->sender_orig))
+		return NET_RX_DROP;
+
+	switch (vis_packet->vis_type) {
+	case VIS_TYPE_SERVER_SYNC:
+		receive_server_sync_packet(bat_priv, vis_packet,
+					   skb_headlen(skb));
+		break;
+
+	case VIS_TYPE_CLIENT_UPDATE:
+		receive_client_update_packet(bat_priv, vis_packet,
+					     skb_headlen(skb));
+		break;
+
+	default:	/* ignore unknown packet */
+		break;
+	}
+
+	/* We take a copy of the data in the packet, so we should
+	   always free the skbuf. */
+	return NET_RX_DROP;
+}
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
new file mode 100644
index 000000000000..f108f230bfdb
--- /dev/null
+++ b/net/batman-adv/routing.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_ROUTING_H_
+#define _NET_BATMAN_ADV_ROUTING_H_
+
+#include "types.h"
+
+void slide_own_bcast_window(struct batman_if *batman_if);
+void receive_bat_packet(struct ethhdr *ethhdr,
+				struct batman_packet *batman_packet,
+				unsigned char *hna_buff, int hna_buff_len,
+				struct batman_if *if_incoming);
+void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node,
+		   struct neigh_node *neigh_node, unsigned char *hna_buff,
+		   int hna_buff_len);
+int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if,
+			 int hdr_size);
+int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if);
+int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if);
+int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if);
+int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if);
+int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if);
+int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if);
+struct neigh_node *find_router(struct bat_priv *bat_priv,
+		struct orig_node *orig_node, struct batman_if *recv_if);
+void update_bonding_candidates(struct bat_priv *bat_priv,
+			       struct orig_node *orig_node);
+
+#endif /* _NET_BATMAN_ADV_ROUTING_H_ */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
new file mode 100644
index 000000000000..b89b9f7709ae
--- /dev/null
+++ b/net/batman-adv/send.c
@@ -0,0 +1,585 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "send.h"
+#include "routing.h"
+#include "translation-table.h"
+#include "soft-interface.h"
+#include "hard-interface.h"
+#include "types.h"
+#include "vis.h"
+#include "aggregation.h"
+#include "gateway_common.h"
+#include "originator.h"
+
+static void send_outstanding_bcast_packet(struct work_struct *work);
+
+/* apply hop penalty for a normal link */
+static uint8_t hop_penalty(const uint8_t tq, struct bat_priv *bat_priv)
+{
+	int hop_penalty = atomic_read(&bat_priv->hop_penalty);
+	return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE);
+}
+
+/* when do we schedule our own packet to be sent */
+static unsigned long own_send_time(struct bat_priv *bat_priv)
+{
+	return jiffies + msecs_to_jiffies(
+		   atomic_read(&bat_priv->orig_interval) -
+		   JITTER + (random32() % 2*JITTER));
+}
+
+/* when do we schedule a forwarded packet to be sent */
+static unsigned long forward_send_time(struct bat_priv *bat_priv)
+{
+	return jiffies + msecs_to_jiffies(random32() % (JITTER/2));
+}
+
+/* send out an already prepared packet to the given address via the
+ * specified batman interface */
+int send_skb_packet(struct sk_buff *skb,
+				struct batman_if *batman_if,
+				uint8_t *dst_addr)
+{
+	struct ethhdr *ethhdr;
+
+	if (batman_if->if_status != IF_ACTIVE)
+		goto send_skb_err;
+
+	if (unlikely(!batman_if->net_dev))
+		goto send_skb_err;
+
+	if (!(batman_if->net_dev->flags & IFF_UP)) {
+		pr_warning("Interface %s is not up - can't send packet via "
+			   "that interface!\n", batman_if->net_dev->name);
+		goto send_skb_err;
+	}
+
+	/* push to the ethernet header. */
+	if (my_skb_head_push(skb, sizeof(struct ethhdr)) < 0)
+		goto send_skb_err;
+
+	skb_reset_mac_header(skb);
+
+	ethhdr = (struct ethhdr *) skb_mac_header(skb);
+	memcpy(ethhdr->h_source, batman_if->net_dev->dev_addr, ETH_ALEN);
+	memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
+	ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
+
+	skb_set_network_header(skb, ETH_HLEN);
+	skb->priority = TC_PRIO_CONTROL;
+	skb->protocol = __constant_htons(ETH_P_BATMAN);
+
+	skb->dev = batman_if->net_dev;
+
+	/* dev_queue_xmit() returns a negative result on error.	 However on
+	 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
+	 * (which is > 0). This will not be treated as an error. */
+
+	return dev_queue_xmit(skb);
+send_skb_err:
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+/* Send a packet to a given interface */
+static void send_packet_to_if(struct forw_packet *forw_packet,
+			      struct batman_if *batman_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	char *fwd_str;
+	uint8_t packet_num;
+	int16_t buff_pos;
+	struct batman_packet *batman_packet;
+	struct sk_buff *skb;
+
+	if (batman_if->if_status != IF_ACTIVE)
+		return;
+
+	packet_num = 0;
+	buff_pos = 0;
+	batman_packet = (struct batman_packet *)forw_packet->skb->data;
+
+	/* adjust all flags and log packets */
+	while (aggregated_packet(buff_pos,
+				 forw_packet->packet_len,
+				 batman_packet->num_hna)) {
+
+		/* we might have aggregated direct link packets with an
+		 * ordinary base packet */
+		if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
+		    (forw_packet->if_incoming == batman_if))
+			batman_packet->flags |= DIRECTLINK;
+		else
+			batman_packet->flags &= ~DIRECTLINK;
+
+		fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ?
+							    "Sending own" :
+							    "Forwarding"));
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d,"
+			" IDF %s) on interface %s [%pM]\n",
+			fwd_str, (packet_num > 0 ? "aggregated " : ""),
+			batman_packet->orig, ntohl(batman_packet->seqno),
+			batman_packet->tq, batman_packet->ttl,
+			(batman_packet->flags & DIRECTLINK ?
+			 "on" : "off"),
+			batman_if->net_dev->name, batman_if->net_dev->dev_addr);
+
+		buff_pos += sizeof(struct batman_packet) +
+			(batman_packet->num_hna * ETH_ALEN);
+		packet_num++;
+		batman_packet = (struct batman_packet *)
+			(forw_packet->skb->data + buff_pos);
+	}
+
+	/* create clone because function is called more than once */
+	skb = skb_clone(forw_packet->skb, GFP_ATOMIC);
+	if (skb)
+		send_skb_packet(skb, batman_if, broadcast_addr);
+}
+
+/* send a batman packet */
+static void send_packet(struct forw_packet *forw_packet)
+{
+	struct batman_if *batman_if;
+	struct net_device *soft_iface;
+	struct bat_priv *bat_priv;
+	struct batman_packet *batman_packet =
+		(struct batman_packet *)(forw_packet->skb->data);
+	unsigned char directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0);
+
+	if (!forw_packet->if_incoming) {
+		pr_err("Error - can't forward packet: incoming iface not "
+		       "specified\n");
+		return;
+	}
+
+	soft_iface = forw_packet->if_incoming->soft_iface;
+	bat_priv = netdev_priv(soft_iface);
+
+	if (forw_packet->if_incoming->if_status != IF_ACTIVE)
+		return;
+
+	/* multihomed peer assumed */
+	/* non-primary OGMs are only broadcasted on their interface */
+	if ((directlink && (batman_packet->ttl == 1)) ||
+	    (forw_packet->own && (forw_packet->if_incoming->if_num > 0))) {
+
+		/* FIXME: what about aggregated packets ? */
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"%s packet (originator %pM, seqno %d, TTL %d) "
+			"on interface %s [%pM]\n",
+			(forw_packet->own ? "Sending own" : "Forwarding"),
+			batman_packet->orig, ntohl(batman_packet->seqno),
+			batman_packet->ttl,
+			forw_packet->if_incoming->net_dev->name,
+			forw_packet->if_incoming->net_dev->dev_addr);
+
+		/* skb is only used once and than forw_packet is free'd */
+		send_skb_packet(forw_packet->skb, forw_packet->if_incoming,
+				broadcast_addr);
+		forw_packet->skb = NULL;
+
+		return;
+	}
+
+	/* broadcast on every interface */
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if (batman_if->soft_iface != soft_iface)
+			continue;
+
+		send_packet_to_if(forw_packet, batman_if);
+	}
+	rcu_read_unlock();
+}
+
+static void rebuild_batman_packet(struct bat_priv *bat_priv,
+				  struct batman_if *batman_if)
+{
+	int new_len;
+	unsigned char *new_buff;
+	struct batman_packet *batman_packet;
+
+	new_len = sizeof(struct batman_packet) +
+			(bat_priv->num_local_hna * ETH_ALEN);
+	new_buff = kmalloc(new_len, GFP_ATOMIC);
+
+	/* keep old buffer if kmalloc should fail */
+	if (new_buff) {
+		memcpy(new_buff, batman_if->packet_buff,
+		       sizeof(struct batman_packet));
+		batman_packet = (struct batman_packet *)new_buff;
+
+		batman_packet->num_hna = hna_local_fill_buffer(bat_priv,
+				new_buff + sizeof(struct batman_packet),
+				new_len - sizeof(struct batman_packet));
+
+		kfree(batman_if->packet_buff);
+		batman_if->packet_buff = new_buff;
+		batman_if->packet_len = new_len;
+	}
+}
+
+void schedule_own_packet(struct batman_if *batman_if)
+{
+	struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface);
+	unsigned long send_time;
+	struct batman_packet *batman_packet;
+	int vis_server;
+
+	if ((batman_if->if_status == IF_NOT_IN_USE) ||
+	    (batman_if->if_status == IF_TO_BE_REMOVED))
+		return;
+
+	vis_server = atomic_read(&bat_priv->vis_mode);
+
+	/**
+	 * the interface gets activated here to avoid race conditions between
+	 * the moment of activating the interface in
+	 * hardif_activate_interface() where the originator mac is set and
+	 * outdated packets (especially uninitialized mac addresses) in the
+	 * packet queue
+	 */
+	if (batman_if->if_status == IF_TO_BE_ACTIVATED)
+		batman_if->if_status = IF_ACTIVE;
+
+	/* if local hna has changed and interface is a primary interface */
+	if ((atomic_read(&bat_priv->hna_local_changed)) &&
+	    (batman_if == bat_priv->primary_if))
+		rebuild_batman_packet(bat_priv, batman_if);
+
+	/**
+	 * NOTE: packet_buff might just have been re-allocated in
+	 * rebuild_batman_packet()
+	 */
+	batman_packet = (struct batman_packet *)batman_if->packet_buff;
+
+	/* change sequence number to network order */
+	batman_packet->seqno =
+		htonl((uint32_t)atomic_read(&batman_if->seqno));
+
+	if (vis_server == VIS_TYPE_SERVER_SYNC)
+		batman_packet->flags |= VIS_SERVER;
+	else
+		batman_packet->flags &= ~VIS_SERVER;
+
+	if ((batman_if == bat_priv->primary_if) &&
+	    (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER))
+		batman_packet->gw_flags =
+				(uint8_t)atomic_read(&bat_priv->gw_bandwidth);
+	else
+		batman_packet->gw_flags = 0;
+
+	atomic_inc(&batman_if->seqno);
+
+	slide_own_bcast_window(batman_if);
+	send_time = own_send_time(bat_priv);
+	add_bat_packet_to_list(bat_priv,
+			       batman_if->packet_buff,
+			       batman_if->packet_len,
+			       batman_if, 1, send_time);
+}
+
+void schedule_forward_packet(struct orig_node *orig_node,
+			     struct ethhdr *ethhdr,
+			     struct batman_packet *batman_packet,
+			     uint8_t directlink, int hna_buff_len,
+			     struct batman_if *if_incoming)
+{
+	struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
+	unsigned char in_tq, in_ttl, tq_avg = 0;
+	unsigned long send_time;
+
+	if (batman_packet->ttl <= 1) {
+		bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n");
+		return;
+	}
+
+	in_tq = batman_packet->tq;
+	in_ttl = batman_packet->ttl;
+
+	batman_packet->ttl--;
+	memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN);
+
+	/* rebroadcast tq of our best ranking neighbor to ensure the rebroadcast
+	 * of our best tq value */
+	if ((orig_node->router) && (orig_node->router->tq_avg != 0)) {
+
+		/* rebroadcast ogm of best ranking neighbor as is */
+		if (!compare_orig(orig_node->router->addr, ethhdr->h_source)) {
+			batman_packet->tq = orig_node->router->tq_avg;
+
+			if (orig_node->router->last_ttl)
+				batman_packet->ttl = orig_node->router->last_ttl
+							- 1;
+		}
+
+		tq_avg = orig_node->router->tq_avg;
+	}
+
+	/* apply hop penalty */
+	batman_packet->tq = hop_penalty(batman_packet->tq, bat_priv);
+
+	bat_dbg(DBG_BATMAN, bat_priv,
+		"Forwarding packet: tq_orig: %i, tq_avg: %i, "
+		"tq_forw: %i, ttl_orig: %i, ttl_forw: %i\n",
+		in_tq, tq_avg, batman_packet->tq, in_ttl - 1,
+		batman_packet->ttl);
+
+	batman_packet->seqno = htonl(batman_packet->seqno);
+
+	/* switch of primaries first hop flag when forwarding */
+	batman_packet->flags &= ~PRIMARIES_FIRST_HOP;
+	if (directlink)
+		batman_packet->flags |= DIRECTLINK;
+	else
+		batman_packet->flags &= ~DIRECTLINK;
+
+	send_time = forward_send_time(bat_priv);
+	add_bat_packet_to_list(bat_priv,
+			       (unsigned char *)batman_packet,
+			       sizeof(struct batman_packet) + hna_buff_len,
+			       if_incoming, 0, send_time);
+}
+
+static void forw_packet_free(struct forw_packet *forw_packet)
+{
+	if (forw_packet->skb)
+		kfree_skb(forw_packet->skb);
+	kfree(forw_packet);
+}
+
+static void _add_bcast_packet_to_list(struct bat_priv *bat_priv,
+				      struct forw_packet *forw_packet,
+				      unsigned long send_time)
+{
+	INIT_HLIST_NODE(&forw_packet->list);
+
+	/* add new packet to packet list */
+	spin_lock_bh(&bat_priv->forw_bcast_list_lock);
+	hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list);
+	spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
+
+	/* start timer for this packet */
+	INIT_DELAYED_WORK(&forw_packet->delayed_work,
+			  send_outstanding_bcast_packet);
+	queue_delayed_work(bat_event_workqueue, &forw_packet->delayed_work,
+			   send_time);
+}
+
+#define atomic_dec_not_zero(v)          atomic_add_unless((v), -1, 0)
+/* add a broadcast packet to the queue and setup timers. broadcast packets
+ * are sent multiple times to increase probability for beeing received.
+ *
+ * This function returns NETDEV_TX_OK on success and NETDEV_TX_BUSY on
+ * errors.
+ *
+ * The skb is not consumed, so the caller should make sure that the
+ * skb is freed. */
+int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb)
+{
+	struct forw_packet *forw_packet;
+	struct bcast_packet *bcast_packet;
+
+	if (!atomic_dec_not_zero(&bat_priv->bcast_queue_left)) {
+		bat_dbg(DBG_BATMAN, bat_priv, "bcast packet queue full\n");
+		goto out;
+	}
+
+	if (!bat_priv->primary_if)
+		goto out;
+
+	forw_packet = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC);
+
+	if (!forw_packet)
+		goto out_and_inc;
+
+	skb = skb_copy(skb, GFP_ATOMIC);
+	if (!skb)
+		goto packet_free;
+
+	/* as we have a copy now, it is safe to decrease the TTL */
+	bcast_packet = (struct bcast_packet *)skb->data;
+	bcast_packet->ttl--;
+
+	skb_reset_mac_header(skb);
+
+	forw_packet->skb = skb;
+	forw_packet->if_incoming = bat_priv->primary_if;
+
+	/* how often did we send the bcast packet ? */
+	forw_packet->num_packets = 0;
+
+	_add_bcast_packet_to_list(bat_priv, forw_packet, 1);
+	return NETDEV_TX_OK;
+
+packet_free:
+	kfree(forw_packet);
+out_and_inc:
+	atomic_inc(&bat_priv->bcast_queue_left);
+out:
+	return NETDEV_TX_BUSY;
+}
+
+static void send_outstanding_bcast_packet(struct work_struct *work)
+{
+	struct batman_if *batman_if;
+	struct delayed_work *delayed_work =
+		container_of(work, struct delayed_work, work);
+	struct forw_packet *forw_packet =
+		container_of(delayed_work, struct forw_packet, delayed_work);
+	struct sk_buff *skb1;
+	struct net_device *soft_iface = forw_packet->if_incoming->soft_iface;
+	struct bat_priv *bat_priv = netdev_priv(soft_iface);
+
+	spin_lock_bh(&bat_priv->forw_bcast_list_lock);
+	hlist_del(&forw_packet->list);
+	spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
+
+	if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING)
+		goto out;
+
+	/* rebroadcast packet */
+	rcu_read_lock();
+	list_for_each_entry_rcu(batman_if, &if_list, list) {
+		if (batman_if->soft_iface != soft_iface)
+			continue;
+
+		/* send a copy of the saved skb */
+		skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
+		if (skb1)
+			send_skb_packet(skb1, batman_if, broadcast_addr);
+	}
+	rcu_read_unlock();
+
+	forw_packet->num_packets++;
+
+	/* if we still have some more bcasts to send */
+	if (forw_packet->num_packets < 3) {
+		_add_bcast_packet_to_list(bat_priv, forw_packet,
+					  ((5 * HZ) / 1000));
+		return;
+	}
+
+out:
+	forw_packet_free(forw_packet);
+	atomic_inc(&bat_priv->bcast_queue_left);
+}
+
+void send_outstanding_bat_packet(struct work_struct *work)
+{
+	struct delayed_work *delayed_work =
+		container_of(work, struct delayed_work, work);
+	struct forw_packet *forw_packet =
+		container_of(delayed_work, struct forw_packet, delayed_work);
+	struct bat_priv *bat_priv;
+
+	bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	hlist_del(&forw_packet->list);
+	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+	if (atomic_read(&bat_priv->mesh_state) == MESH_DEACTIVATING)
+		goto out;
+
+	send_packet(forw_packet);
+
+	/**
+	 * we have to have at least one packet in the queue
+	 * to determine the queues wake up time unless we are
+	 * shutting down
+	 */
+	if (forw_packet->own)
+		schedule_own_packet(forw_packet->if_incoming);
+
+out:
+	/* don't count own packet */
+	if (!forw_packet->own)
+		atomic_inc(&bat_priv->batman_queue_left);
+
+	forw_packet_free(forw_packet);
+}
+
+void purge_outstanding_packets(struct bat_priv *bat_priv,
+			       struct batman_if *batman_if)
+{
+	struct forw_packet *forw_packet;
+	struct hlist_node *tmp_node, *safe_tmp_node;
+
+	if (batman_if)
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"purge_outstanding_packets(): %s\n",
+			batman_if->net_dev->name);
+	else
+		bat_dbg(DBG_BATMAN, bat_priv,
+			"purge_outstanding_packets()\n");
+
+	/* free bcast list */
+	spin_lock_bh(&bat_priv->forw_bcast_list_lock);
+	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
+				  &bat_priv->forw_bcast_list, list) {
+
+		/**
+		 * if purge_outstanding_packets() was called with an argmument
+		 * we delete only packets belonging to the given interface
+		 */
+		if ((batman_if) &&
+		    (forw_packet->if_incoming != batman_if))
+			continue;
+
+		spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
+
+		/**
+		 * send_outstanding_bcast_packet() will lock the list to
+		 * delete the item from the list
+		 */
+		cancel_delayed_work_sync(&forw_packet->delayed_work);
+		spin_lock_bh(&bat_priv->forw_bcast_list_lock);
+	}
+	spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
+
+	/* free batman packet list */
+	spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	hlist_for_each_entry_safe(forw_packet, tmp_node, safe_tmp_node,
+				  &bat_priv->forw_bat_list, list) {
+
+		/**
+		 * if purge_outstanding_packets() was called with an argmument
+		 * we delete only packets belonging to the given interface
+		 */
+		if ((batman_if) &&
+		    (forw_packet->if_incoming != batman_if))
+			continue;
+
+		spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+		/**
+		 * send_outstanding_bat_packet() will lock the list to
+		 * delete the item from the list
+		 */
+		cancel_delayed_work_sync(&forw_packet->delayed_work);
+		spin_lock_bh(&bat_priv->forw_bat_list_lock);
+	}
+	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
new file mode 100644
index 000000000000..c4cefa8e4f85
--- /dev/null
+++ b/net/batman-adv/send.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_SEND_H_
+#define _NET_BATMAN_ADV_SEND_H_
+
+#include "types.h"
+
+int send_skb_packet(struct sk_buff *skb,
+				struct batman_if *batman_if,
+				uint8_t *dst_addr);
+void schedule_own_packet(struct batman_if *batman_if);
+void schedule_forward_packet(struct orig_node *orig_node,
+			     struct ethhdr *ethhdr,
+			     struct batman_packet *batman_packet,
+			     uint8_t directlink, int hna_buff_len,
+			     struct batman_if *if_outgoing);
+int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb);
+void send_outstanding_bat_packet(struct work_struct *work);
+void purge_outstanding_packets(struct bat_priv *bat_priv,
+			       struct batman_if *batman_if);
+
+#endif /* _NET_BATMAN_ADV_SEND_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
new file mode 100644
index 000000000000..e89ede192ed0
--- /dev/null
+++ b/net/batman-adv/soft-interface.c
@@ -0,0 +1,697 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "soft-interface.h"
+#include "hard-interface.h"
+#include "routing.h"
+#include "send.h"
+#include "bat_debugfs.h"
+#include "translation-table.h"
+#include "types.h"
+#include "hash.h"
+#include "gateway_common.h"
+#include "gateway_client.h"
+#include "send.h"
+#include "bat_sysfs.h"
+#include <linux/slab.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include "unicast.h"
+#include "routing.h"
+
+
+static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
+static void bat_get_drvinfo(struct net_device *dev,
+			    struct ethtool_drvinfo *info);
+static u32 bat_get_msglevel(struct net_device *dev);
+static void bat_set_msglevel(struct net_device *dev, u32 value);
+static u32 bat_get_link(struct net_device *dev);
+static u32 bat_get_rx_csum(struct net_device *dev);
+static int bat_set_rx_csum(struct net_device *dev, u32 data);
+
+static const struct ethtool_ops bat_ethtool_ops = {
+	.get_settings = bat_get_settings,
+	.get_drvinfo = bat_get_drvinfo,
+	.get_msglevel = bat_get_msglevel,
+	.set_msglevel = bat_set_msglevel,
+	.get_link = bat_get_link,
+	.get_rx_csum = bat_get_rx_csum,
+	.set_rx_csum = bat_set_rx_csum
+};
+
+int my_skb_head_push(struct sk_buff *skb, unsigned int len)
+{
+	int result;
+
+	/**
+	 * TODO: We must check if we can release all references to non-payload
+	 * data using skb_header_release in our skbs to allow skb_cow_header to
+	 * work optimally. This means that those skbs are not allowed to read
+	 * or write any data which is before the current position of skb->data
+	 * after that call and thus allow other skbs with the same data buffer
+	 * to write freely in that area.
+	 */
+	result = skb_cow_head(skb, len);
+	if (result < 0)
+		return result;
+
+	skb_push(skb, len);
+	return 0;
+}
+
+static void softif_neigh_free_ref(struct kref *refcount)
+{
+	struct softif_neigh *softif_neigh;
+
+	softif_neigh = container_of(refcount, struct softif_neigh, refcount);
+	kfree(softif_neigh);
+}
+
+static void softif_neigh_free_rcu(struct rcu_head *rcu)
+{
+	struct softif_neigh *softif_neigh;
+
+	softif_neigh = container_of(rcu, struct softif_neigh, rcu);
+	kref_put(&softif_neigh->refcount, softif_neigh_free_ref);
+}
+
+void softif_neigh_purge(struct bat_priv *bat_priv)
+{
+	struct softif_neigh *softif_neigh, *softif_neigh_tmp;
+	struct hlist_node *node, *node_tmp;
+
+	spin_lock_bh(&bat_priv->softif_neigh_lock);
+
+	hlist_for_each_entry_safe(softif_neigh, node, node_tmp,
+				  &bat_priv->softif_neigh_list, list) {
+
+		if ((!time_after(jiffies, softif_neigh->last_seen +
+				msecs_to_jiffies(SOFTIF_NEIGH_TIMEOUT))) &&
+		    (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE))
+			continue;
+
+		hlist_del_rcu(&softif_neigh->list);
+
+		if (bat_priv->softif_neigh == softif_neigh) {
+			bat_dbg(DBG_ROUTES, bat_priv,
+				 "Current mesh exit point '%pM' vanished "
+				 "(vid: %d).\n",
+				 softif_neigh->addr, softif_neigh->vid);
+			softif_neigh_tmp = bat_priv->softif_neigh;
+			bat_priv->softif_neigh = NULL;
+			kref_put(&softif_neigh_tmp->refcount,
+				 softif_neigh_free_ref);
+		}
+
+		call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu);
+	}
+
+	spin_unlock_bh(&bat_priv->softif_neigh_lock);
+}
+
+static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv,
+					     uint8_t *addr, short vid)
+{
+	struct softif_neigh *softif_neigh;
+	struct hlist_node *node;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(softif_neigh, node,
+				 &bat_priv->softif_neigh_list, list) {
+		if (memcmp(softif_neigh->addr, addr, ETH_ALEN) != 0)
+			continue;
+
+		if (softif_neigh->vid != vid)
+			continue;
+
+		softif_neigh->last_seen = jiffies;
+		goto found;
+	}
+
+	softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC);
+	if (!softif_neigh)
+		goto out;
+
+	memcpy(softif_neigh->addr, addr, ETH_ALEN);
+	softif_neigh->vid = vid;
+	softif_neigh->last_seen = jiffies;
+	kref_init(&softif_neigh->refcount);
+
+	INIT_HLIST_NODE(&softif_neigh->list);
+	spin_lock_bh(&bat_priv->softif_neigh_lock);
+	hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list);
+	spin_unlock_bh(&bat_priv->softif_neigh_lock);
+
+found:
+	kref_get(&softif_neigh->refcount);
+out:
+	rcu_read_unlock();
+	return softif_neigh;
+}
+
+int softif_neigh_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	struct softif_neigh *softif_neigh;
+	struct hlist_node *node;
+	size_t buf_size, pos;
+	char *buff;
+
+	if (!bat_priv->primary_if) {
+		return seq_printf(seq, "BATMAN mesh %s disabled - "
+			       "please specify interfaces to enable it\n",
+			       net_dev->name);
+	}
+
+	seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name);
+
+	buf_size = 1;
+	/* Estimate length for: "   xx:xx:xx:xx:xx:xx\n" */
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(softif_neigh, node,
+				 &bat_priv->softif_neigh_list, list)
+		buf_size += 30;
+	rcu_read_unlock();
+
+	buff = kmalloc(buf_size, GFP_ATOMIC);
+	if (!buff)
+		return -ENOMEM;
+
+	buff[0] = '\0';
+	pos = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(softif_neigh, node,
+				 &bat_priv->softif_neigh_list, list) {
+		pos += snprintf(buff + pos, 31, "%s %pM (vid: %d)\n",
+				bat_priv->softif_neigh == softif_neigh
+				? "=>" : "  ", softif_neigh->addr,
+				softif_neigh->vid);
+	}
+	rcu_read_unlock();
+
+	seq_printf(seq, "%s", buff);
+	kfree(buff);
+	return 0;
+}
+
+static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev,
+			       short vid)
+{
+	struct bat_priv *bat_priv = netdev_priv(dev);
+	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+	struct batman_packet *batman_packet;
+	struct softif_neigh *softif_neigh, *softif_neigh_tmp;
+
+	if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
+		batman_packet = (struct batman_packet *)
+					(skb->data + ETH_HLEN + VLAN_HLEN);
+	else
+		batman_packet = (struct batman_packet *)(skb->data + ETH_HLEN);
+
+	if (batman_packet->version != COMPAT_VERSION)
+		goto err;
+
+	if (batman_packet->packet_type != BAT_PACKET)
+		goto err;
+
+	if (!(batman_packet->flags & PRIMARIES_FIRST_HOP))
+		goto err;
+
+	if (is_my_mac(batman_packet->orig))
+		goto err;
+
+	softif_neigh = softif_neigh_get(bat_priv, batman_packet->orig, vid);
+
+	if (!softif_neigh)
+		goto err;
+
+	if (bat_priv->softif_neigh == softif_neigh)
+		goto out;
+
+	/* we got a neighbor but its mac is 'bigger' than ours  */
+	if (memcmp(bat_priv->primary_if->net_dev->dev_addr,
+		   softif_neigh->addr, ETH_ALEN) < 0)
+		goto out;
+
+	/* switch to new 'smallest neighbor' */
+	if ((bat_priv->softif_neigh) &&
+	    (memcmp(softif_neigh->addr, bat_priv->softif_neigh->addr,
+							ETH_ALEN) < 0)) {
+		bat_dbg(DBG_ROUTES, bat_priv,
+			"Changing mesh exit point from %pM (vid: %d) "
+			"to %pM (vid: %d).\n",
+			 bat_priv->softif_neigh->addr,
+			 bat_priv->softif_neigh->vid,
+			 softif_neigh->addr, softif_neigh->vid);
+		softif_neigh_tmp = bat_priv->softif_neigh;
+		bat_priv->softif_neigh = softif_neigh;
+		kref_put(&softif_neigh_tmp->refcount, softif_neigh_free_ref);
+		/* we need to hold the additional reference */
+		goto err;
+	}
+
+	/* close own batX device and use softif_neigh as exit node */
+	if ((!bat_priv->softif_neigh) &&
+	    (memcmp(softif_neigh->addr,
+		    bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN) < 0)) {
+		bat_dbg(DBG_ROUTES, bat_priv,
+			"Setting mesh exit point to %pM (vid: %d).\n",
+			softif_neigh->addr, softif_neigh->vid);
+		bat_priv->softif_neigh = softif_neigh;
+		/* we need to hold the additional reference */
+		goto err;
+	}
+
+out:
+	kref_put(&softif_neigh->refcount, softif_neigh_free_ref);
+err:
+	kfree_skb(skb);
+	return;
+}
+
+static int interface_open(struct net_device *dev)
+{
+	netif_start_queue(dev);
+	return 0;
+}
+
+static int interface_release(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static struct net_device_stats *interface_stats(struct net_device *dev)
+{
+	struct bat_priv *bat_priv = netdev_priv(dev);
+	return &bat_priv->stats;
+}
+
+static int interface_set_mac_addr(struct net_device *dev, void *p)
+{
+	struct bat_priv *bat_priv = netdev_priv(dev);
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	/* only modify hna-table if it has been initialised before */
+	if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) {
+		hna_local_remove(bat_priv, dev->dev_addr,
+				 "mac address changed");
+		hna_local_add(dev, addr->sa_data);
+	}
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	return 0;
+}
+
+static int interface_change_mtu(struct net_device *dev, int new_mtu)
+{
+	/* check ranges */
+	if ((new_mtu < 68) || (new_mtu > hardif_min_mtu(dev)))
+		return -EINVAL;
+
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
+int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
+{
+	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+	struct bat_priv *bat_priv = netdev_priv(soft_iface);
+	struct bcast_packet *bcast_packet;
+	struct vlan_ethhdr *vhdr;
+	int data_len = skb->len, ret;
+	short vid = -1;
+	bool do_bcast = false;
+
+	if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
+		goto dropped;
+
+	soft_iface->trans_start = jiffies;
+
+	switch (ntohs(ethhdr->h_proto)) {
+	case ETH_P_8021Q:
+		vhdr = (struct vlan_ethhdr *)skb->data;
+		vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+
+		if (ntohs(vhdr->h_vlan_encapsulated_proto) != ETH_P_BATMAN)
+			break;
+
+		/* fall through */
+	case ETH_P_BATMAN:
+		softif_batman_recv(skb, soft_iface, vid);
+		goto end;
+	}
+
+	/**
+	 * if we have a another chosen mesh exit node in range
+	 * it will transport the packets to the mesh
+	 */
+	if ((bat_priv->softif_neigh) && (bat_priv->softif_neigh->vid == vid))
+		goto dropped;
+
+	/* TODO: check this for locks */
+	hna_local_add(soft_iface, ethhdr->h_source);
+
+	if (is_multicast_ether_addr(ethhdr->h_dest)) {
+		ret = gw_is_target(bat_priv, skb);
+
+		if (ret < 0)
+			goto dropped;
+
+		if (ret == 0)
+			do_bcast = true;
+	}
+
+	/* ethernet packet should be broadcasted */
+	if (do_bcast) {
+		if (!bat_priv->primary_if)
+			goto dropped;
+
+		if (my_skb_head_push(skb, sizeof(struct bcast_packet)) < 0)
+			goto dropped;
+
+		bcast_packet = (struct bcast_packet *)skb->data;
+		bcast_packet->version = COMPAT_VERSION;
+		bcast_packet->ttl = TTL;
+
+		/* batman packet type: broadcast */
+		bcast_packet->packet_type = BAT_BCAST;
+
+		/* hw address of first interface is the orig mac because only
+		 * this mac is known throughout the mesh */
+		memcpy(bcast_packet->orig,
+		       bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+
+		/* set broadcast sequence number */
+		bcast_packet->seqno =
+			htonl(atomic_inc_return(&bat_priv->bcast_seqno));
+
+		add_bcast_packet_to_list(bat_priv, skb);
+
+		/* a copy is stored in the bcast list, therefore removing
+		 * the original skb. */
+		kfree_skb(skb);
+
+	/* unicast packet */
+	} else {
+		ret = unicast_send_skb(skb, bat_priv);
+		if (ret != 0)
+			goto dropped_freed;
+	}
+
+	bat_priv->stats.tx_packets++;
+	bat_priv->stats.tx_bytes += data_len;
+	goto end;
+
+dropped:
+	kfree_skb(skb);
+dropped_freed:
+	bat_priv->stats.tx_dropped++;
+end:
+	return NETDEV_TX_OK;
+}
+
+void interface_rx(struct net_device *soft_iface,
+		  struct sk_buff *skb, struct batman_if *recv_if,
+		  int hdr_size)
+{
+	struct bat_priv *bat_priv = netdev_priv(soft_iface);
+	struct unicast_packet *unicast_packet;
+	struct ethhdr *ethhdr;
+	struct vlan_ethhdr *vhdr;
+	short vid = -1;
+	int ret;
+
+	/* check if enough space is available for pulling, and pull */
+	if (!pskb_may_pull(skb, hdr_size))
+		goto dropped;
+
+	skb_pull_rcsum(skb, hdr_size);
+	skb_reset_mac_header(skb);
+
+	ethhdr = (struct ethhdr *)skb_mac_header(skb);
+
+	switch (ntohs(ethhdr->h_proto)) {
+	case ETH_P_8021Q:
+		vhdr = (struct vlan_ethhdr *)skb->data;
+		vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+
+		if (ntohs(vhdr->h_vlan_encapsulated_proto) != ETH_P_BATMAN)
+			break;
+
+		/* fall through */
+	case ETH_P_BATMAN:
+		goto dropped;
+	}
+
+	/**
+	 * if we have a another chosen mesh exit node in range
+	 * it will transport the packets to the non-mesh network
+	 */
+	if ((bat_priv->softif_neigh) && (bat_priv->softif_neigh->vid == vid)) {
+		skb_push(skb, hdr_size);
+		unicast_packet = (struct unicast_packet *)skb->data;
+
+		if ((unicast_packet->packet_type != BAT_UNICAST) &&
+		    (unicast_packet->packet_type != BAT_UNICAST_FRAG))
+			goto dropped;
+
+		skb_reset_mac_header(skb);
+
+		memcpy(unicast_packet->dest,
+		       bat_priv->softif_neigh->addr, ETH_ALEN);
+		ret = route_unicast_packet(skb, recv_if, hdr_size);
+		if (ret == NET_RX_DROP)
+			goto dropped;
+
+		goto out;
+	}
+
+	/* skb->dev & skb->pkt_type are set here */
+	if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+		goto dropped;
+	skb->protocol = eth_type_trans(skb, soft_iface);
+
+	/* should not be neccesary anymore as we use skb_pull_rcsum()
+	 * TODO: please verify this and remove this TODO
+	 * -- Dec 21st 2009, Simon Wunderlich */
+
+/*	skb->ip_summed = CHECKSUM_UNNECESSARY;*/
+
+	bat_priv->stats.rx_packets++;
+	bat_priv->stats.rx_bytes += skb->len + sizeof(struct ethhdr);
+
+	soft_iface->last_rx = jiffies;
+
+	netif_rx(skb);
+	return;
+
+dropped:
+	kfree_skb(skb);
+out:
+	return;
+}
+
+#ifdef HAVE_NET_DEVICE_OPS
+static const struct net_device_ops bat_netdev_ops = {
+	.ndo_open = interface_open,
+	.ndo_stop = interface_release,
+	.ndo_get_stats = interface_stats,
+	.ndo_set_mac_address = interface_set_mac_addr,
+	.ndo_change_mtu = interface_change_mtu,
+	.ndo_start_xmit = interface_tx,
+	.ndo_validate_addr = eth_validate_addr
+};
+#endif
+
+static void interface_setup(struct net_device *dev)
+{
+	struct bat_priv *priv = netdev_priv(dev);
+	char dev_addr[ETH_ALEN];
+
+	ether_setup(dev);
+
+#ifdef HAVE_NET_DEVICE_OPS
+	dev->netdev_ops = &bat_netdev_ops;
+#else
+	dev->open = interface_open;
+	dev->stop = interface_release;
+	dev->get_stats = interface_stats;
+	dev->set_mac_address = interface_set_mac_addr;
+	dev->change_mtu = interface_change_mtu;
+	dev->hard_start_xmit = interface_tx;
+#endif
+	dev->destructor = free_netdev;
+
+	/**
+	 * can't call min_mtu, because the needed variables
+	 * have not been initialized yet
+	 */
+	dev->mtu = ETH_DATA_LEN;
+	dev->hard_header_len = BAT_HEADER_LEN; /* reserve more space in the
+						* skbuff for our header */
+
+	/* generate random address */
+	random_ether_addr(dev_addr);
+	memcpy(dev->dev_addr, dev_addr, ETH_ALEN);
+
+	SET_ETHTOOL_OPS(dev, &bat_ethtool_ops);
+
+	memset(priv, 0, sizeof(struct bat_priv));
+}
+
+struct net_device *softif_create(char *name)
+{
+	struct net_device *soft_iface;
+	struct bat_priv *bat_priv;
+	int ret;
+
+	soft_iface = alloc_netdev(sizeof(struct bat_priv) , name,
+				   interface_setup);
+
+	if (!soft_iface) {
+		pr_err("Unable to allocate the batman interface: %s\n", name);
+		goto out;
+	}
+
+	ret = register_netdev(soft_iface);
+	if (ret < 0) {
+		pr_err("Unable to register the batman interface '%s': %i\n",
+		       name, ret);
+		goto free_soft_iface;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	atomic_set(&bat_priv->aggregated_ogms, 1);
+	atomic_set(&bat_priv->bonding, 0);
+	atomic_set(&bat_priv->vis_mode, VIS_TYPE_CLIENT_UPDATE);
+	atomic_set(&bat_priv->gw_mode, GW_MODE_OFF);
+	atomic_set(&bat_priv->gw_sel_class, 20);
+	atomic_set(&bat_priv->gw_bandwidth, 41);
+	atomic_set(&bat_priv->orig_interval, 1000);
+	atomic_set(&bat_priv->hop_penalty, 10);
+	atomic_set(&bat_priv->log_level, 0);
+	atomic_set(&bat_priv->fragmentation, 1);
+	atomic_set(&bat_priv->bcast_queue_left, BCAST_QUEUE_LEN);
+	atomic_set(&bat_priv->batman_queue_left, BATMAN_QUEUE_LEN);
+
+	atomic_set(&bat_priv->mesh_state, MESH_INACTIVE);
+	atomic_set(&bat_priv->bcast_seqno, 1);
+	atomic_set(&bat_priv->hna_local_changed, 0);
+
+	bat_priv->primary_if = NULL;
+	bat_priv->num_ifaces = 0;
+	bat_priv->softif_neigh = NULL;
+
+	ret = sysfs_add_meshif(soft_iface);
+	if (ret < 0)
+		goto unreg_soft_iface;
+
+	ret = debugfs_add_meshif(soft_iface);
+	if (ret < 0)
+		goto unreg_sysfs;
+
+	ret = mesh_init(soft_iface);
+	if (ret < 0)
+		goto unreg_debugfs;
+
+	return soft_iface;
+
+unreg_debugfs:
+	debugfs_del_meshif(soft_iface);
+unreg_sysfs:
+	sysfs_del_meshif(soft_iface);
+unreg_soft_iface:
+	unregister_netdev(soft_iface);
+	return NULL;
+
+free_soft_iface:
+	free_netdev(soft_iface);
+out:
+	return NULL;
+}
+
+void softif_destroy(struct net_device *soft_iface)
+{
+	debugfs_del_meshif(soft_iface);
+	sysfs_del_meshif(soft_iface);
+	mesh_free(soft_iface);
+	unregister_netdevice(soft_iface);
+}
+
+/* ethtool */
+static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	cmd->supported = 0;
+	cmd->advertising = 0;
+	cmd->speed = SPEED_10;
+	cmd->duplex = DUPLEX_FULL;
+	cmd->port = PORT_TP;
+	cmd->phy_address = 0;
+	cmd->transceiver = XCVR_INTERNAL;
+	cmd->autoneg = AUTONEG_DISABLE;
+	cmd->maxtxpkt = 0;
+	cmd->maxrxpkt = 0;
+
+	return 0;
+}
+
+static void bat_get_drvinfo(struct net_device *dev,
+			    struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "B.A.T.M.A.N. advanced");
+	strcpy(info->version, SOURCE_VERSION);
+	strcpy(info->fw_version, "N/A");
+	strcpy(info->bus_info, "batman");
+}
+
+static u32 bat_get_msglevel(struct net_device *dev)
+{
+	return -EOPNOTSUPP;
+}
+
+static void bat_set_msglevel(struct net_device *dev, u32 value)
+{
+}
+
+static u32 bat_get_link(struct net_device *dev)
+{
+	return 1;
+}
+
+static u32 bat_get_rx_csum(struct net_device *dev)
+{
+	return 0;
+}
+
+static int bat_set_rx_csum(struct net_device *dev, u32 data)
+{
+	return -EOPNOTSUPP;
+}
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
new file mode 100644
index 000000000000..02b77334d10d
--- /dev/null
+++ b/net/batman-adv/soft-interface.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_SOFT_INTERFACE_H_
+#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_
+
+int my_skb_head_push(struct sk_buff *skb, unsigned int len);
+int softif_neigh_seq_print_text(struct seq_file *seq, void *offset);
+void softif_neigh_purge(struct bat_priv *bat_priv);
+int interface_tx(struct sk_buff *skb, struct net_device *soft_iface);
+void interface_rx(struct net_device *soft_iface,
+		  struct sk_buff *skb, struct batman_if *recv_if,
+		  int hdr_size);
+struct net_device *softif_create(char *name);
+void softif_destroy(struct net_device *soft_iface);
+
+#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
new file mode 100644
index 000000000000..a19e16c94da5
--- /dev/null
+++ b/net/batman-adv/translation-table.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "translation-table.h"
+#include "soft-interface.h"
+#include "types.h"
+#include "hash.h"
+#include "originator.h"
+
+static void hna_local_purge(struct work_struct *work);
+static void _hna_global_del_orig(struct bat_priv *bat_priv,
+				 struct hna_global_entry *hna_global_entry,
+				 char *message);
+
+static void hna_local_start_timer(struct bat_priv *bat_priv)
+{
+	INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge);
+	queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ);
+}
+
+int hna_local_init(struct bat_priv *bat_priv)
+{
+	if (bat_priv->hna_local_hash)
+		return 1;
+
+	bat_priv->hna_local_hash = hash_new(1024);
+
+	if (!bat_priv->hna_local_hash)
+		return 0;
+
+	atomic_set(&bat_priv->hna_local_changed, 0);
+	hna_local_start_timer(bat_priv);
+
+	return 1;
+}
+
+void hna_local_add(struct net_device *soft_iface, uint8_t *addr)
+{
+	struct bat_priv *bat_priv = netdev_priv(soft_iface);
+	struct hna_local_entry *hna_local_entry;
+	struct hna_global_entry *hna_global_entry;
+	int required_bytes;
+
+	spin_lock_bh(&bat_priv->hna_lhash_lock);
+	hna_local_entry =
+		((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash,
+						     compare_orig, choose_orig,
+						     addr));
+	spin_unlock_bh(&bat_priv->hna_lhash_lock);
+
+	if (hna_local_entry) {
+		hna_local_entry->last_seen = jiffies;
+		return;
+	}
+
+	/* only announce as many hosts as possible in the batman-packet and
+	   space in batman_packet->num_hna That also should give a limit to
+	   MAC-flooding. */
+	required_bytes = (bat_priv->num_local_hna + 1) * ETH_ALEN;
+	required_bytes += BAT_PACKET_LEN;
+
+	if ((required_bytes > ETH_DATA_LEN) ||
+	    (atomic_read(&bat_priv->aggregated_ogms) &&
+	     required_bytes > MAX_AGGREGATION_BYTES) ||
+	    (bat_priv->num_local_hna + 1 > 255)) {
+		bat_dbg(DBG_ROUTES, bat_priv,
+			"Can't add new local hna entry (%pM): "
+			"number of local hna entries exceeds packet size\n",
+			addr);
+		return;
+	}
+
+	bat_dbg(DBG_ROUTES, bat_priv,
+		"Creating new local hna entry: %pM\n", addr);
+
+	hna_local_entry = kmalloc(sizeof(struct hna_local_entry), GFP_ATOMIC);
+	if (!hna_local_entry)
+		return;
+
+	memcpy(hna_local_entry->addr, addr, ETH_ALEN);
+	hna_local_entry->last_seen = jiffies;
+
+	/* the batman interface mac address should never be purged */
+	if (compare_orig(addr, soft_iface->dev_addr))
+		hna_local_entry->never_purge = 1;
+	else
+		hna_local_entry->never_purge = 0;
+
+	spin_lock_bh(&bat_priv->hna_lhash_lock);
+
+	hash_add(bat_priv->hna_local_hash, compare_orig, choose_orig,
+		 hna_local_entry);
+	bat_priv->num_local_hna++;
+	atomic_set(&bat_priv->hna_local_changed, 1);
+
+	spin_unlock_bh(&bat_priv->hna_lhash_lock);
+
+	/* remove address from global hash if present */
+	spin_lock_bh(&bat_priv->hna_ghash_lock);
+
+	hna_global_entry = ((struct hna_global_entry *)
+				hash_find(bat_priv->hna_global_hash,
+					  compare_orig, choose_orig, addr));
+
+	if (hna_global_entry)
+		_hna_global_del_orig(bat_priv, hna_global_entry,
+				     "local hna received");
+
+	spin_unlock_bh(&bat_priv->hna_ghash_lock);
+}
+
+int hna_local_fill_buffer(struct bat_priv *bat_priv,
+			  unsigned char *buff, int buff_len)
+{
+	struct hashtable_t *hash = bat_priv->hna_local_hash;
+	struct hna_local_entry *hna_local_entry;
+	struct element_t *bucket;
+	int i;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	int count = 0;
+
+	spin_lock_bh(&bat_priv->hna_lhash_lock);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+
+			if (buff_len < (count + 1) * ETH_ALEN)
+				break;
+
+			hna_local_entry = bucket->data;
+			memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr,
+			       ETH_ALEN);
+
+			count++;
+		}
+	}
+
+	/* if we did not get all new local hnas see you next time  ;-) */
+	if (count == bat_priv->num_local_hna)
+		atomic_set(&bat_priv->hna_local_changed, 0);
+
+	spin_unlock_bh(&bat_priv->hna_lhash_lock);
+	return i;
+}
+
+int hna_local_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	struct hashtable_t *hash = bat_priv->hna_local_hash;
+	struct hna_local_entry *hna_local_entry;
+	int i;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	size_t buf_size, pos;
+	char *buff;
+
+	if (!bat_priv->primary_if) {
+		return seq_printf(seq, "BATMAN mesh %s disabled - "
+			       "please specify interfaces to enable it\n",
+			       net_dev->name);
+	}
+
+	seq_printf(seq, "Locally retrieved addresses (from %s) "
+		   "announced via HNA:\n",
+		   net_dev->name);
+
+	spin_lock_bh(&bat_priv->hna_lhash_lock);
+
+	buf_size = 1;
+	/* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each(walk, head)
+			buf_size += 21;
+	}
+
+	buff = kmalloc(buf_size, GFP_ATOMIC);
+	if (!buff) {
+		spin_unlock_bh(&bat_priv->hna_lhash_lock);
+		return -ENOMEM;
+	}
+	buff[0] = '\0';
+	pos = 0;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			hna_local_entry = bucket->data;
+
+			pos += snprintf(buff + pos, 22, " * %pM\n",
+					hna_local_entry->addr);
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->hna_lhash_lock);
+
+	seq_printf(seq, "%s", buff);
+	kfree(buff);
+	return 0;
+}
+
+static void _hna_local_del(void *data, void *arg)
+{
+	struct bat_priv *bat_priv = (struct bat_priv *)arg;
+
+	kfree(data);
+	bat_priv->num_local_hna--;
+	atomic_set(&bat_priv->hna_local_changed, 1);
+}
+
+static void hna_local_del(struct bat_priv *bat_priv,
+			  struct hna_local_entry *hna_local_entry,
+			  char *message)
+{
+	bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n",
+		hna_local_entry->addr, message);
+
+	hash_remove(bat_priv->hna_local_hash, compare_orig, choose_orig,
+		    hna_local_entry->addr);
+	_hna_local_del(hna_local_entry, bat_priv);
+}
+
+void hna_local_remove(struct bat_priv *bat_priv,
+		      uint8_t *addr, char *message)
+{
+	struct hna_local_entry *hna_local_entry;
+
+	spin_lock_bh(&bat_priv->hna_lhash_lock);
+
+	hna_local_entry = (struct hna_local_entry *)
+		hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig,
+			  addr);
+
+	if (hna_local_entry)
+		hna_local_del(bat_priv, hna_local_entry, message);
+
+	spin_unlock_bh(&bat_priv->hna_lhash_lock);
+}
+
+static void hna_local_purge(struct work_struct *work)
+{
+	struct delayed_work *delayed_work =
+		container_of(work, struct delayed_work, work);
+	struct bat_priv *bat_priv =
+		container_of(delayed_work, struct bat_priv, hna_work);
+	struct hashtable_t *hash = bat_priv->hna_local_hash;
+	struct hna_local_entry *hna_local_entry;
+	int i;
+	struct hlist_node *walk, *safe;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	unsigned long timeout;
+
+	spin_lock_bh(&bat_priv->hna_lhash_lock);
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) {
+			hna_local_entry = bucket->data;
+
+			timeout = hna_local_entry->last_seen;
+			timeout += LOCAL_HNA_TIMEOUT * HZ;
+
+			if ((!hna_local_entry->never_purge) &&
+			    time_after(jiffies, timeout))
+				hna_local_del(bat_priv, hna_local_entry,
+					"address timed out");
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->hna_lhash_lock);
+	hna_local_start_timer(bat_priv);
+}
+
+void hna_local_free(struct bat_priv *bat_priv)
+{
+	if (!bat_priv->hna_local_hash)
+		return;
+
+	cancel_delayed_work_sync(&bat_priv->hna_work);
+	hash_delete(bat_priv->hna_local_hash, _hna_local_del, bat_priv);
+	bat_priv->hna_local_hash = NULL;
+}
+
+int hna_global_init(struct bat_priv *bat_priv)
+{
+	if (bat_priv->hna_global_hash)
+		return 1;
+
+	bat_priv->hna_global_hash = hash_new(1024);
+
+	if (!bat_priv->hna_global_hash)
+		return 0;
+
+	return 1;
+}
+
+void hna_global_add_orig(struct bat_priv *bat_priv,
+			 struct orig_node *orig_node,
+			 unsigned char *hna_buff, int hna_buff_len)
+{
+	struct hna_global_entry *hna_global_entry;
+	struct hna_local_entry *hna_local_entry;
+	int hna_buff_count = 0;
+	unsigned char *hna_ptr;
+
+	while ((hna_buff_count + 1) * ETH_ALEN <= hna_buff_len) {
+		spin_lock_bh(&bat_priv->hna_ghash_lock);
+
+		hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN);
+		hna_global_entry = (struct hna_global_entry *)
+			hash_find(bat_priv->hna_global_hash, compare_orig,
+				  choose_orig, hna_ptr);
+
+		if (!hna_global_entry) {
+			spin_unlock_bh(&bat_priv->hna_ghash_lock);
+
+			hna_global_entry =
+				kmalloc(sizeof(struct hna_global_entry),
+					GFP_ATOMIC);
+
+			if (!hna_global_entry)
+				break;
+
+			memcpy(hna_global_entry->addr, hna_ptr, ETH_ALEN);
+
+			bat_dbg(DBG_ROUTES, bat_priv,
+				"Creating new global hna entry: "
+				"%pM (via %pM)\n",
+				hna_global_entry->addr, orig_node->orig);
+
+			spin_lock_bh(&bat_priv->hna_ghash_lock);
+			hash_add(bat_priv->hna_global_hash, compare_orig,
+				 choose_orig, hna_global_entry);
+
+		}
+
+		hna_global_entry->orig_node = orig_node;
+		spin_unlock_bh(&bat_priv->hna_ghash_lock);
+
+		/* remove address from local hash if present */
+		spin_lock_bh(&bat_priv->hna_lhash_lock);
+
+		hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN);
+		hna_local_entry = (struct hna_local_entry *)
+			hash_find(bat_priv->hna_local_hash, compare_orig,
+				  choose_orig, hna_ptr);
+
+		if (hna_local_entry)
+			hna_local_del(bat_priv, hna_local_entry,
+				      "global hna received");
+
+		spin_unlock_bh(&bat_priv->hna_lhash_lock);
+
+		hna_buff_count++;
+	}
+
+	/* initialize, and overwrite if malloc succeeds */
+	orig_node->hna_buff = NULL;
+	orig_node->hna_buff_len = 0;
+
+	if (hna_buff_len > 0) {
+		orig_node->hna_buff = kmalloc(hna_buff_len, GFP_ATOMIC);
+		if (orig_node->hna_buff) {
+			memcpy(orig_node->hna_buff, hna_buff, hna_buff_len);
+			orig_node->hna_buff_len = hna_buff_len;
+		}
+	}
+}
+
+int hna_global_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	struct hashtable_t *hash = bat_priv->hna_global_hash;
+	struct hna_global_entry *hna_global_entry;
+	int i;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	size_t buf_size, pos;
+	char *buff;
+
+	if (!bat_priv->primary_if) {
+		return seq_printf(seq, "BATMAN mesh %s disabled - "
+				  "please specify interfaces to enable it\n",
+				  net_dev->name);
+	}
+
+	seq_printf(seq, "Globally announced HNAs received via the mesh %s\n",
+		   net_dev->name);
+
+	spin_lock_bh(&bat_priv->hna_ghash_lock);
+
+	buf_size = 1;
+	/* Estimate length for: " * xx:xx:xx:xx:xx:xx via xx:xx:xx:xx:xx:xx\n"*/
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each(walk, head)
+			buf_size += 43;
+	}
+
+	buff = kmalloc(buf_size, GFP_ATOMIC);
+	if (!buff) {
+		spin_unlock_bh(&bat_priv->hna_ghash_lock);
+		return -ENOMEM;
+	}
+	buff[0] = '\0';
+	pos = 0;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			hna_global_entry = bucket->data;
+
+			pos += snprintf(buff + pos, 44,
+					" * %pM via %pM\n",
+					hna_global_entry->addr,
+					hna_global_entry->orig_node->orig);
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->hna_ghash_lock);
+
+	seq_printf(seq, "%s", buff);
+	kfree(buff);
+	return 0;
+}
+
+static void _hna_global_del_orig(struct bat_priv *bat_priv,
+				 struct hna_global_entry *hna_global_entry,
+				 char *message)
+{
+	bat_dbg(DBG_ROUTES, bat_priv,
+		"Deleting global hna entry %pM (via %pM): %s\n",
+		hna_global_entry->addr, hna_global_entry->orig_node->orig,
+		message);
+
+	hash_remove(bat_priv->hna_global_hash, compare_orig, choose_orig,
+		    hna_global_entry->addr);
+	kfree(hna_global_entry);
+}
+
+void hna_global_del_orig(struct bat_priv *bat_priv,
+			 struct orig_node *orig_node, char *message)
+{
+	struct hna_global_entry *hna_global_entry;
+	int hna_buff_count = 0;
+	unsigned char *hna_ptr;
+
+	if (orig_node->hna_buff_len == 0)
+		return;
+
+	spin_lock_bh(&bat_priv->hna_ghash_lock);
+
+	while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) {
+		hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN);
+		hna_global_entry = (struct hna_global_entry *)
+			hash_find(bat_priv->hna_global_hash, compare_orig,
+				  choose_orig, hna_ptr);
+
+		if ((hna_global_entry) &&
+		    (hna_global_entry->orig_node == orig_node))
+			_hna_global_del_orig(bat_priv, hna_global_entry,
+					     message);
+
+		hna_buff_count++;
+	}
+
+	spin_unlock_bh(&bat_priv->hna_ghash_lock);
+
+	orig_node->hna_buff_len = 0;
+	kfree(orig_node->hna_buff);
+	orig_node->hna_buff = NULL;
+}
+
+static void hna_global_del(void *data, void *arg)
+{
+	kfree(data);
+}
+
+void hna_global_free(struct bat_priv *bat_priv)
+{
+	if (!bat_priv->hna_global_hash)
+		return;
+
+	hash_delete(bat_priv->hna_global_hash, hna_global_del, NULL);
+	bat_priv->hna_global_hash = NULL;
+}
+
+struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr)
+{
+	struct hna_global_entry *hna_global_entry;
+
+	spin_lock_bh(&bat_priv->hna_ghash_lock);
+	hna_global_entry = (struct hna_global_entry *)
+				hash_find(bat_priv->hna_global_hash,
+					  compare_orig, choose_orig, addr);
+	spin_unlock_bh(&bat_priv->hna_ghash_lock);
+
+	if (!hna_global_entry)
+		return NULL;
+
+	return hna_global_entry->orig_node;
+}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
new file mode 100644
index 000000000000..10c4c5c319b6
--- /dev/null
+++ b/net/batman-adv/translation-table.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
+#define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
+
+#include "types.h"
+
+int hna_local_init(struct bat_priv *bat_priv);
+void hna_local_add(struct net_device *soft_iface, uint8_t *addr);
+void hna_local_remove(struct bat_priv *bat_priv,
+		      uint8_t *addr, char *message);
+int hna_local_fill_buffer(struct bat_priv *bat_priv,
+			  unsigned char *buff, int buff_len);
+int hna_local_seq_print_text(struct seq_file *seq, void *offset);
+void hna_local_free(struct bat_priv *bat_priv);
+int hna_global_init(struct bat_priv *bat_priv);
+void hna_global_add_orig(struct bat_priv *bat_priv,
+			 struct orig_node *orig_node,
+			 unsigned char *hna_buff, int hna_buff_len);
+int hna_global_seq_print_text(struct seq_file *seq, void *offset);
+void hna_global_del_orig(struct bat_priv *bat_priv,
+			 struct orig_node *orig_node, char *message);
+void hna_global_free(struct bat_priv *bat_priv);
+struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr);
+
+#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
new file mode 100644
index 000000000000..97cb23dd3e69
--- /dev/null
+++ b/net/batman-adv/types.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors:
+ *
+ * Marek Lindner, Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+
+
+#ifndef _NET_BATMAN_ADV_TYPES_H_
+#define _NET_BATMAN_ADV_TYPES_H_
+
+#include "packet.h"
+#include "bitarray.h"
+
+#define BAT_HEADER_LEN (sizeof(struct ethhdr) + \
+	((sizeof(struct unicast_packet) > sizeof(struct bcast_packet) ? \
+	 sizeof(struct unicast_packet) : \
+	 sizeof(struct bcast_packet))))
+
+
+struct batman_if {
+	struct list_head list;
+	int16_t if_num;
+	char if_status;
+	struct net_device *net_dev;
+	atomic_t seqno;
+	atomic_t frag_seqno;
+	unsigned char *packet_buff;
+	int packet_len;
+	struct kobject *hardif_obj;
+	struct kref refcount;
+	struct packet_type batman_adv_ptype;
+	struct net_device *soft_iface;
+	struct rcu_head rcu;
+};
+
+/**
+ *	orig_node - structure for orig_list maintaining nodes of mesh
+ *	@primary_addr: hosts primary interface address
+ *	@last_valid: when last packet from this node was received
+ *	@bcast_seqno_reset: time when the broadcast seqno window was reset
+ *	@batman_seqno_reset: time when the batman seqno window was reset
+ *	@gw_flags: flags related to gateway class
+ *	@flags: for now only VIS_SERVER flag
+ *	@last_real_seqno: last and best known squence number
+ *	@last_ttl: ttl of last received packet
+ *	@last_bcast_seqno: last broadcast sequence number received by this host
+ *
+ *	@candidates: how many candidates are available
+ *	@selected: next bonding candidate
+ */
+struct orig_node {
+	uint8_t orig[ETH_ALEN];
+	uint8_t primary_addr[ETH_ALEN];
+	struct neigh_node *router;
+	unsigned long *bcast_own;
+	uint8_t *bcast_own_sum;
+	uint8_t tq_own;
+	int tq_asym_penalty;
+	unsigned long last_valid;
+	unsigned long bcast_seqno_reset;
+	unsigned long batman_seqno_reset;
+	uint8_t gw_flags;
+	uint8_t flags;
+	unsigned char *hna_buff;
+	int16_t hna_buff_len;
+	uint32_t last_real_seqno;
+	uint8_t last_ttl;
+	unsigned long bcast_bits[NUM_WORDS];
+	uint32_t last_bcast_seqno;
+	struct list_head neigh_list;
+	struct list_head frag_list;
+	unsigned long last_frag_packet;
+	struct {
+		uint8_t candidates;
+		struct neigh_node *selected;
+	} bond;
+};
+
+struct gw_node {
+	struct hlist_node list;
+	struct orig_node *orig_node;
+	unsigned long deleted;
+	struct kref refcount;
+	struct rcu_head rcu;
+};
+
+/**
+ *	neigh_node
+ *	@last_valid: when last packet via this neighbor was received
+ */
+struct neigh_node {
+	struct list_head list;
+	uint8_t addr[ETH_ALEN];
+	uint8_t real_packet_count;
+	uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE];
+	uint8_t tq_index;
+	uint8_t tq_avg;
+	uint8_t last_ttl;
+	struct neigh_node *next_bond_candidate;
+	unsigned long last_valid;
+	unsigned long real_bits[NUM_WORDS];
+	struct orig_node *orig_node;
+	struct batman_if *if_incoming;
+};
+
+
+struct bat_priv {
+	atomic_t mesh_state;
+	struct net_device_stats stats;
+	atomic_t aggregated_ogms;	/* boolean */
+	atomic_t bonding;		/* boolean */
+	atomic_t fragmentation;		/* boolean */
+	atomic_t vis_mode;		/* VIS_TYPE_* */
+	atomic_t gw_mode;		/* GW_MODE_* */
+	atomic_t gw_sel_class;		/* uint */
+	atomic_t gw_bandwidth;		/* gw bandwidth */
+	atomic_t orig_interval;		/* uint */
+	atomic_t hop_penalty;		/* uint */
+	atomic_t log_level;		/* uint */
+	atomic_t bcast_seqno;
+	atomic_t bcast_queue_left;
+	atomic_t batman_queue_left;
+	char num_ifaces;
+	struct hlist_head softif_neigh_list;
+	struct softif_neigh *softif_neigh;
+	struct debug_log *debug_log;
+	struct batman_if *primary_if;
+	struct kobject *mesh_obj;
+	struct dentry *debug_dir;
+	struct hlist_head forw_bat_list;
+	struct hlist_head forw_bcast_list;
+	struct hlist_head gw_list;
+	struct list_head vis_send_list;
+	struct hashtable_t *orig_hash;
+	struct hashtable_t *hna_local_hash;
+	struct hashtable_t *hna_global_hash;
+	struct hashtable_t *vis_hash;
+	spinlock_t orig_hash_lock; /* protects orig_hash */
+	spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
+	spinlock_t forw_bcast_list_lock; /* protects  */
+	spinlock_t hna_lhash_lock; /* protects hna_local_hash */
+	spinlock_t hna_ghash_lock; /* protects hna_global_hash */
+	spinlock_t gw_list_lock; /* protects gw_list */
+	spinlock_t vis_hash_lock; /* protects vis_hash */
+	spinlock_t vis_list_lock; /* protects vis_info::recv_list */
+	spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */
+	int16_t num_local_hna;
+	atomic_t hna_local_changed;
+	struct delayed_work hna_work;
+	struct delayed_work orig_work;
+	struct delayed_work vis_work;
+	struct gw_node *curr_gw;
+	struct vis_info *my_vis_info;
+};
+
+struct socket_client {
+	struct list_head queue_list;
+	unsigned int queue_len;
+	unsigned char index;
+	spinlock_t lock; /* protects queue_list, queue_len, index */
+	wait_queue_head_t queue_wait;
+	struct bat_priv *bat_priv;
+};
+
+struct socket_packet {
+	struct list_head list;
+	size_t icmp_len;
+	struct icmp_packet_rr icmp_packet;
+};
+
+struct hna_local_entry {
+	uint8_t addr[ETH_ALEN];
+	unsigned long last_seen;
+	char never_purge;
+};
+
+struct hna_global_entry {
+	uint8_t addr[ETH_ALEN];
+	struct orig_node *orig_node;
+};
+
+/**
+ *	forw_packet - structure for forw_list maintaining packets to be
+ *	              send/forwarded
+ */
+struct forw_packet {
+	struct hlist_node list;
+	unsigned long send_time;
+	uint8_t own;
+	struct sk_buff *skb;
+	uint16_t packet_len;
+	uint32_t direct_link_flags;
+	uint8_t num_packets;
+	struct delayed_work delayed_work;
+	struct batman_if *if_incoming;
+};
+
+/* While scanning for vis-entries of a particular vis-originator
+ * this list collects its interfaces to create a subgraph/cluster
+ * out of them later
+ */
+struct if_list_entry {
+	uint8_t addr[ETH_ALEN];
+	bool primary;
+	struct hlist_node list;
+};
+
+struct debug_log {
+	char log_buff[LOG_BUF_LEN];
+	unsigned long log_start;
+	unsigned long log_end;
+	spinlock_t lock; /* protects log_buff, log_start and log_end */
+	wait_queue_head_t queue_wait;
+};
+
+struct frag_packet_list_entry {
+	struct list_head list;
+	uint16_t seqno;
+	struct sk_buff *skb;
+};
+
+struct vis_info {
+	unsigned long       first_seen;
+	struct list_head    recv_list;
+			    /* list of server-neighbors we received a vis-packet
+			     * from.  we should not reply to them. */
+	struct list_head send_list;
+	struct kref refcount;
+	struct bat_priv *bat_priv;
+	/* this packet might be part of the vis send queue. */
+	struct sk_buff *skb_packet;
+	/* vis_info may follow here*/
+} __attribute__((packed));
+
+struct vis_info_entry {
+	uint8_t  src[ETH_ALEN];
+	uint8_t  dest[ETH_ALEN];
+	uint8_t  quality;	/* quality = 0 means HNA */
+} __attribute__((packed));
+
+struct recvlist_node {
+	struct list_head list;
+	uint8_t mac[ETH_ALEN];
+};
+
+struct softif_neigh {
+	struct hlist_node list;
+	uint8_t addr[ETH_ALEN];
+	unsigned long last_seen;
+	short vid;
+	struct kref refcount;
+	struct rcu_head rcu;
+};
+
+#endif /* _NET_BATMAN_ADV_TYPES_H_ */
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
new file mode 100644
index 000000000000..dc2e28bed844
--- /dev/null
+++ b/net/batman-adv/unicast.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ *
+ * Andreas Langer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "unicast.h"
+#include "send.h"
+#include "soft-interface.h"
+#include "gateway_client.h"
+#include "originator.h"
+#include "hash.h"
+#include "translation-table.h"
+#include "routing.h"
+#include "hard-interface.h"
+
+
+static struct sk_buff *frag_merge_packet(struct list_head *head,
+					 struct frag_packet_list_entry *tfp,
+					 struct sk_buff *skb)
+{
+	struct unicast_frag_packet *up =
+		(struct unicast_frag_packet *)skb->data;
+	struct sk_buff *tmp_skb;
+	struct unicast_packet *unicast_packet;
+	int hdr_len = sizeof(struct unicast_packet),
+	    uni_diff = sizeof(struct unicast_frag_packet) - hdr_len;
+
+	/* set skb to the first part and tmp_skb to the second part */
+	if (up->flags & UNI_FRAG_HEAD) {
+		tmp_skb = tfp->skb;
+	} else {
+		tmp_skb = skb;
+		skb = tfp->skb;
+	}
+
+	skb_pull(tmp_skb, sizeof(struct unicast_frag_packet));
+	if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) {
+		/* free buffered skb, skb will be freed later */
+		kfree_skb(tfp->skb);
+		return NULL;
+	}
+
+	/* move free entry to end */
+	tfp->skb = NULL;
+	tfp->seqno = 0;
+	list_move_tail(&tfp->list, head);
+
+	memcpy(skb_put(skb, tmp_skb->len), tmp_skb->data, tmp_skb->len);
+	kfree_skb(tmp_skb);
+
+	memmove(skb->data + uni_diff, skb->data, hdr_len);
+	unicast_packet = (struct unicast_packet *) skb_pull(skb, uni_diff);
+	unicast_packet->packet_type = BAT_UNICAST;
+
+	return skb;
+}
+
+static void frag_create_entry(struct list_head *head, struct sk_buff *skb)
+{
+	struct frag_packet_list_entry *tfp;
+	struct unicast_frag_packet *up =
+		(struct unicast_frag_packet *)skb->data;
+
+	/* free and oldest packets stand at the end */
+	tfp = list_entry((head)->prev, typeof(*tfp), list);
+	kfree_skb(tfp->skb);
+
+	tfp->seqno = ntohs(up->seqno);
+	tfp->skb = skb;
+	list_move(&tfp->list, head);
+	return;
+}
+
+static int frag_create_buffer(struct list_head *head)
+{
+	int i;
+	struct frag_packet_list_entry *tfp;
+
+	for (i = 0; i < FRAG_BUFFER_SIZE; i++) {
+		tfp = kmalloc(sizeof(struct frag_packet_list_entry),
+			GFP_ATOMIC);
+		if (!tfp) {
+			frag_list_free(head);
+			return -ENOMEM;
+		}
+		tfp->skb = NULL;
+		tfp->seqno = 0;
+		INIT_LIST_HEAD(&tfp->list);
+		list_add(&tfp->list, head);
+	}
+
+	return 0;
+}
+
+static struct frag_packet_list_entry *frag_search_packet(struct list_head *head,
+						 struct unicast_frag_packet *up)
+{
+	struct frag_packet_list_entry *tfp;
+	struct unicast_frag_packet *tmp_up = NULL;
+	uint16_t search_seqno;
+
+	if (up->flags & UNI_FRAG_HEAD)
+		search_seqno = ntohs(up->seqno)+1;
+	else
+		search_seqno = ntohs(up->seqno)-1;
+
+	list_for_each_entry(tfp, head, list) {
+
+		if (!tfp->skb)
+			continue;
+
+		if (tfp->seqno == ntohs(up->seqno))
+			goto mov_tail;
+
+		tmp_up = (struct unicast_frag_packet *)tfp->skb->data;
+
+		if (tfp->seqno == search_seqno) {
+
+			if ((tmp_up->flags & UNI_FRAG_HEAD) !=
+			    (up->flags & UNI_FRAG_HEAD))
+				return tfp;
+			else
+				goto mov_tail;
+		}
+	}
+	return NULL;
+
+mov_tail:
+	list_move_tail(&tfp->list, head);
+	return NULL;
+}
+
+void frag_list_free(struct list_head *head)
+{
+	struct frag_packet_list_entry *pf, *tmp_pf;
+
+	if (!list_empty(head)) {
+
+		list_for_each_entry_safe(pf, tmp_pf, head, list) {
+			kfree_skb(pf->skb);
+			list_del(&pf->list);
+			kfree(pf);
+		}
+	}
+	return;
+}
+
+/* frag_reassemble_skb():
+ * returns NET_RX_DROP if the operation failed - skb is left intact
+ * returns NET_RX_SUCCESS if the fragment was buffered (skb_new will be NULL)
+ * or the skb could be reassembled (skb_new will point to the new packet and
+ * skb was freed)
+ */
+int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
+			struct sk_buff **new_skb)
+{
+	struct orig_node *orig_node;
+	struct frag_packet_list_entry *tmp_frag_entry;
+	int ret = NET_RX_DROP;
+	struct unicast_frag_packet *unicast_packet =
+		(struct unicast_frag_packet *)skb->data;
+
+	*new_skb = NULL;
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	orig_node = ((struct orig_node *)
+		    hash_find(bat_priv->orig_hash, compare_orig, choose_orig,
+			      unicast_packet->orig));
+
+	if (!orig_node) {
+		pr_debug("couldn't find originator in orig_hash\n");
+		goto out;
+	}
+
+	orig_node->last_frag_packet = jiffies;
+
+	if (list_empty(&orig_node->frag_list) &&
+	    frag_create_buffer(&orig_node->frag_list)) {
+		pr_debug("couldn't create frag buffer\n");
+		goto out;
+	}
+
+	tmp_frag_entry = frag_search_packet(&orig_node->frag_list,
+					    unicast_packet);
+
+	if (!tmp_frag_entry) {
+		frag_create_entry(&orig_node->frag_list, skb);
+		ret = NET_RX_SUCCESS;
+		goto out;
+	}
+
+	*new_skb = frag_merge_packet(&orig_node->frag_list, tmp_frag_entry,
+				     skb);
+	/* if not, merge failed */
+	if (*new_skb)
+		ret = NET_RX_SUCCESS;
+out:
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	return ret;
+}
+
+int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
+		  struct batman_if *batman_if, uint8_t dstaddr[])
+{
+	struct unicast_packet tmp_uc, *unicast_packet;
+	struct sk_buff *frag_skb;
+	struct unicast_frag_packet *frag1, *frag2;
+	int uc_hdr_len = sizeof(struct unicast_packet);
+	int ucf_hdr_len = sizeof(struct unicast_frag_packet);
+	int data_len = skb->len;
+
+	if (!bat_priv->primary_if)
+		goto dropped;
+
+	unicast_packet = (struct unicast_packet *) skb->data;
+
+	memcpy(&tmp_uc, unicast_packet, uc_hdr_len);
+	frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
+	skb_split(skb, frag_skb, data_len / 2);
+
+	if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 ||
+	    my_skb_head_push(frag_skb, ucf_hdr_len) < 0)
+		goto drop_frag;
+
+	frag1 = (struct unicast_frag_packet *)skb->data;
+	frag2 = (struct unicast_frag_packet *)frag_skb->data;
+
+	memcpy(frag1, &tmp_uc, sizeof(struct unicast_packet));
+
+	frag1->ttl--;
+	frag1->version = COMPAT_VERSION;
+	frag1->packet_type = BAT_UNICAST_FRAG;
+
+	memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN);
+	memcpy(frag2, frag1, sizeof(struct unicast_frag_packet));
+
+	frag1->flags |= UNI_FRAG_HEAD;
+	frag2->flags &= ~UNI_FRAG_HEAD;
+
+	frag1->seqno = htons((uint16_t)atomic_inc_return(
+			     &batman_if->frag_seqno));
+	frag2->seqno = htons((uint16_t)atomic_inc_return(
+			     &batman_if->frag_seqno));
+
+	send_skb_packet(skb, batman_if, dstaddr);
+	send_skb_packet(frag_skb, batman_if, dstaddr);
+	return NET_RX_SUCCESS;
+
+drop_frag:
+	kfree_skb(frag_skb);
+dropped:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv)
+{
+	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+	struct unicast_packet *unicast_packet;
+	struct orig_node *orig_node;
+	struct batman_if *batman_if;
+	struct neigh_node *router;
+	int data_len = skb->len;
+	uint8_t dstaddr[6];
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+
+	/* get routing information */
+	if (is_multicast_ether_addr(ethhdr->h_dest))
+		orig_node = (struct orig_node *)gw_get_selected(bat_priv);
+	else
+		orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
+							   compare_orig,
+							   choose_orig,
+							   ethhdr->h_dest));
+
+	/* check for hna host */
+	if (!orig_node)
+		orig_node = transtable_search(bat_priv, ethhdr->h_dest);
+
+	router = find_router(bat_priv, orig_node, NULL);
+
+	if (!router)
+		goto unlock;
+
+	/* don't lock while sending the packets ... we therefore
+		* copy the required data before sending */
+
+	batman_if = router->if_incoming;
+	memcpy(dstaddr, router->addr, ETH_ALEN);
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	if (batman_if->if_status != IF_ACTIVE)
+		goto dropped;
+
+	if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0)
+		goto dropped;
+
+	unicast_packet = (struct unicast_packet *)skb->data;
+
+	unicast_packet->version = COMPAT_VERSION;
+	/* batman packet type: unicast */
+	unicast_packet->packet_type = BAT_UNICAST;
+	/* set unicast ttl */
+	unicast_packet->ttl = TTL;
+	/* copy the destination for faster routing */
+	memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
+
+	if (atomic_read(&bat_priv->fragmentation) &&
+	    data_len + sizeof(struct unicast_packet) >
+	    batman_if->net_dev->mtu) {
+		/* send frag skb decreases ttl */
+		unicast_packet->ttl++;
+		return frag_send_skb(skb, bat_priv, batman_if,
+				     dstaddr);
+	}
+	send_skb_packet(skb, batman_if, dstaddr);
+	return 0;
+
+unlock:
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+dropped:
+	kfree_skb(skb);
+	return 1;
+}
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
new file mode 100644
index 000000000000..e32b7867a9a4
--- /dev/null
+++ b/net/batman-adv/unicast.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010 B.A.T.M.A.N. contributors:
+ *
+ * Andreas Langer
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_UNICAST_H_
+#define _NET_BATMAN_ADV_UNICAST_H_
+
+#define FRAG_TIMEOUT 10000	/* purge frag list entrys after time in ms */
+#define FRAG_BUFFER_SIZE 6	/* number of list elements in buffer */
+
+int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
+			struct sk_buff **new_skb);
+void frag_list_free(struct list_head *head);
+int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv);
+int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv,
+		  struct batman_if *batman_if, uint8_t dstaddr[]);
+
+#endif /* _NET_BATMAN_ADV_UNICAST_H_ */
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
new file mode 100644
index 000000000000..cd4c4231fa48
--- /dev/null
+++ b/net/batman-adv/vis.c
@@ -0,0 +1,949 @@
+/*
+ * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors:
+ *
+ * Simon Wunderlich
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#include "main.h"
+#include "send.h"
+#include "translation-table.h"
+#include "vis.h"
+#include "soft-interface.h"
+#include "hard-interface.h"
+#include "hash.h"
+#include "originator.h"
+
+#define MAX_VIS_PACKET_SIZE 1000
+
+/* Returns the smallest signed integer in two's complement with the sizeof x */
+#define smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u)))
+
+/* Checks if a sequence number x is a predecessor/successor of y.
+ * they handle overflows/underflows and can correctly check for a
+ * predecessor/successor unless the variable sequence number has grown by
+ * more then 2**(bitwidth(x)-1)-1.
+ * This means that for a uint8_t with the maximum value 255, it would think:
+ *  - when adding nothing - it is neither a predecessor nor a successor
+ *  - before adding more than 127 to the starting value - it is a predecessor,
+ *  - when adding 128 - it is neither a predecessor nor a successor,
+ *  - after adding more than 127 to the starting value - it is a successor */
+#define seq_before(x, y) ({typeof(x) _dummy = (x - y); \
+			_dummy > smallest_signed_int(_dummy); })
+#define seq_after(x, y) seq_before(y, x)
+
+static void start_vis_timer(struct bat_priv *bat_priv);
+
+/* free the info */
+static void free_info(struct kref *ref)
+{
+	struct vis_info *info = container_of(ref, struct vis_info, refcount);
+	struct bat_priv *bat_priv = info->bat_priv;
+	struct recvlist_node *entry, *tmp;
+
+	list_del_init(&info->send_list);
+	spin_lock_bh(&bat_priv->vis_list_lock);
+	list_for_each_entry_safe(entry, tmp, &info->recv_list, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+
+	spin_unlock_bh(&bat_priv->vis_list_lock);
+	kfree_skb(info->skb_packet);
+}
+
+/* Compare two vis packets, used by the hashing algorithm */
+static int vis_info_cmp(void *data1, void *data2)
+{
+	struct vis_info *d1, *d2;
+	struct vis_packet *p1, *p2;
+	d1 = data1;
+	d2 = data2;
+	p1 = (struct vis_packet *)d1->skb_packet->data;
+	p2 = (struct vis_packet *)d2->skb_packet->data;
+	return compare_orig(p1->vis_orig, p2->vis_orig);
+}
+
+/* hash function to choose an entry in a hash table of given size */
+/* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */
+static int vis_info_choose(void *data, int size)
+{
+	struct vis_info *vis_info = data;
+	struct vis_packet *packet;
+	unsigned char *key;
+	uint32_t hash = 0;
+	size_t i;
+
+	packet = (struct vis_packet *)vis_info->skb_packet->data;
+	key = packet->vis_orig;
+	for (i = 0; i < ETH_ALEN; i++) {
+		hash += key[i];
+		hash += (hash << 10);
+		hash ^= (hash >> 6);
+	}
+
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
+
+	return hash % size;
+}
+
+/* insert interface to the list of interfaces of one originator, if it
+ * does not already exist in the list */
+static void vis_data_insert_interface(const uint8_t *interface,
+				      struct hlist_head *if_list,
+				      bool primary)
+{
+	struct if_list_entry *entry;
+	struct hlist_node *pos;
+
+	hlist_for_each_entry(entry, pos, if_list, list) {
+		if (compare_orig(entry->addr, (void *)interface))
+			return;
+	}
+
+	/* its a new address, add it to the list */
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return;
+	memcpy(entry->addr, interface, ETH_ALEN);
+	entry->primary = primary;
+	hlist_add_head(&entry->list, if_list);
+}
+
+static ssize_t vis_data_read_prim_sec(char *buff, struct hlist_head *if_list)
+{
+	struct if_list_entry *entry;
+	struct hlist_node *pos;
+	size_t len = 0;
+
+	hlist_for_each_entry(entry, pos, if_list, list) {
+		if (entry->primary)
+			len += sprintf(buff + len, "PRIMARY, ");
+		else
+			len += sprintf(buff + len,  "SEC %pM, ", entry->addr);
+	}
+
+	return len;
+}
+
+static size_t vis_data_count_prim_sec(struct hlist_head *if_list)
+{
+	struct if_list_entry *entry;
+	struct hlist_node *pos;
+	size_t count = 0;
+
+	hlist_for_each_entry(entry, pos, if_list, list) {
+		if (entry->primary)
+			count += 9;
+		else
+			count += 23;
+	}
+
+	return count;
+}
+
+/* read an entry  */
+static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry,
+				   uint8_t *src, bool primary)
+{
+	/* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */
+	if (primary && entry->quality == 0)
+		return sprintf(buff, "HNA %pM, ", entry->dest);
+	else if (compare_orig(entry->src, src))
+		return sprintf(buff, "TQ %pM %d, ", entry->dest,
+			       entry->quality);
+
+	return 0;
+}
+
+int vis_seq_print_text(struct seq_file *seq, void *offset)
+{
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct vis_info *info;
+	struct vis_packet *packet;
+	struct vis_info_entry *entries;
+	struct net_device *net_dev = (struct net_device *)seq->private;
+	struct bat_priv *bat_priv = netdev_priv(net_dev);
+	struct hashtable_t *hash = bat_priv->vis_hash;
+	HLIST_HEAD(vis_if_list);
+	struct if_list_entry *entry;
+	struct hlist_node *pos, *n;
+	int i, j;
+	int vis_server = atomic_read(&bat_priv->vis_mode);
+	size_t buff_pos, buf_size;
+	char *buff;
+	int compare;
+
+	if ((!bat_priv->primary_if) ||
+	    (vis_server == VIS_TYPE_CLIENT_UPDATE))
+		return 0;
+
+	buf_size = 1;
+	/* Estimate length */
+	spin_lock_bh(&bat_priv->vis_hash_lock);
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			info = bucket->data;
+			packet = (struct vis_packet *)info->skb_packet->data;
+			entries = (struct vis_info_entry *)
+				((char *)packet + sizeof(struct vis_packet));
+
+			for (j = 0; j < packet->entries; j++) {
+				if (entries[j].quality == 0)
+					continue;
+				compare =
+				 compare_orig(entries[j].src, packet->vis_orig);
+				vis_data_insert_interface(entries[j].src,
+							  &vis_if_list,
+							  compare);
+			}
+
+			hlist_for_each_entry(entry, pos, &vis_if_list, list) {
+				buf_size += 18 + 26 * packet->entries;
+
+				/* add primary/secondary records */
+				if (compare_orig(entry->addr, packet->vis_orig))
+					buf_size +=
+					  vis_data_count_prim_sec(&vis_if_list);
+
+				buf_size += 1;
+			}
+
+			hlist_for_each_entry_safe(entry, pos, n, &vis_if_list,
+						  list) {
+				hlist_del(&entry->list);
+				kfree(entry);
+			}
+		}
+	}
+
+	buff = kmalloc(buf_size, GFP_ATOMIC);
+	if (!buff) {
+		spin_unlock_bh(&bat_priv->vis_hash_lock);
+		return -ENOMEM;
+	}
+	buff[0] = '\0';
+	buff_pos = 0;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			info = bucket->data;
+			packet = (struct vis_packet *)info->skb_packet->data;
+			entries = (struct vis_info_entry *)
+				((char *)packet + sizeof(struct vis_packet));
+
+			for (j = 0; j < packet->entries; j++) {
+				if (entries[j].quality == 0)
+					continue;
+				compare =
+				 compare_orig(entries[j].src, packet->vis_orig);
+				vis_data_insert_interface(entries[j].src,
+							  &vis_if_list,
+							  compare);
+			}
+
+			hlist_for_each_entry(entry, pos, &vis_if_list, list) {
+				buff_pos += sprintf(buff + buff_pos, "%pM,",
+						entry->addr);
+
+				for (i = 0; i < packet->entries; i++)
+					buff_pos += vis_data_read_entry(
+							buff + buff_pos,
+							&entries[i],
+							entry->addr,
+							entry->primary);
+
+				/* add primary/secondary records */
+				if (compare_orig(entry->addr, packet->vis_orig))
+					buff_pos +=
+					 vis_data_read_prim_sec(buff + buff_pos,
+								&vis_if_list);
+
+				buff_pos += sprintf(buff + buff_pos, "\n");
+			}
+
+			hlist_for_each_entry_safe(entry, pos, n, &vis_if_list,
+						  list) {
+				hlist_del(&entry->list);
+				kfree(entry);
+			}
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->vis_hash_lock);
+
+	seq_printf(seq, "%s", buff);
+	kfree(buff);
+
+	return 0;
+}
+
+/* add the info packet to the send list, if it was not
+ * already linked in. */
+static void send_list_add(struct bat_priv *bat_priv, struct vis_info *info)
+{
+	if (list_empty(&info->send_list)) {
+		kref_get(&info->refcount);
+		list_add_tail(&info->send_list, &bat_priv->vis_send_list);
+	}
+}
+
+/* delete the info packet from the send list, if it was
+ * linked in. */
+static void send_list_del(struct vis_info *info)
+{
+	if (!list_empty(&info->send_list)) {
+		list_del_init(&info->send_list);
+		kref_put(&info->refcount, free_info);
+	}
+}
+
+/* tries to add one entry to the receive list. */
+static void recv_list_add(struct bat_priv *bat_priv,
+			  struct list_head *recv_list, char *mac)
+{
+	struct recvlist_node *entry;
+
+	entry = kmalloc(sizeof(struct recvlist_node), GFP_ATOMIC);
+	if (!entry)
+		return;
+
+	memcpy(entry->mac, mac, ETH_ALEN);
+	spin_lock_bh(&bat_priv->vis_list_lock);
+	list_add_tail(&entry->list, recv_list);
+	spin_unlock_bh(&bat_priv->vis_list_lock);
+}
+
+/* returns 1 if this mac is in the recv_list */
+static int recv_list_is_in(struct bat_priv *bat_priv,
+			   struct list_head *recv_list, char *mac)
+{
+	struct recvlist_node *entry;
+
+	spin_lock_bh(&bat_priv->vis_list_lock);
+	list_for_each_entry(entry, recv_list, list) {
+		if (memcmp(entry->mac, mac, ETH_ALEN) == 0) {
+			spin_unlock_bh(&bat_priv->vis_list_lock);
+			return 1;
+		}
+	}
+	spin_unlock_bh(&bat_priv->vis_list_lock);
+	return 0;
+}
+
+/* try to add the packet to the vis_hash. return NULL if invalid (e.g. too old,
+ * broken.. ).	vis hash must be locked outside.  is_new is set when the packet
+ * is newer than old entries in the hash. */
+static struct vis_info *add_packet(struct bat_priv *bat_priv,
+				   struct vis_packet *vis_packet,
+				   int vis_info_len, int *is_new,
+				   int make_broadcast)
+{
+	struct vis_info *info, *old_info;
+	struct vis_packet *search_packet, *old_packet;
+	struct vis_info search_elem;
+	struct vis_packet *packet;
+	int hash_added;
+
+	*is_new = 0;
+	/* sanity check */
+	if (!bat_priv->vis_hash)
+		return NULL;
+
+	/* see if the packet is already in vis_hash */
+	search_elem.skb_packet = dev_alloc_skb(sizeof(struct vis_packet));
+	if (!search_elem.skb_packet)
+		return NULL;
+	search_packet = (struct vis_packet *)skb_put(search_elem.skb_packet,
+						     sizeof(struct vis_packet));
+
+	memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN);
+	old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
+			     &search_elem);
+	kfree_skb(search_elem.skb_packet);
+
+	if (old_info) {
+		old_packet = (struct vis_packet *)old_info->skb_packet->data;
+		if (!seq_after(ntohl(vis_packet->seqno),
+			       ntohl(old_packet->seqno))) {
+			if (old_packet->seqno == vis_packet->seqno) {
+				recv_list_add(bat_priv, &old_info->recv_list,
+					      vis_packet->sender_orig);
+				return old_info;
+			} else {
+				/* newer packet is already in hash. */
+				return NULL;
+			}
+		}
+		/* remove old entry */
+		hash_remove(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
+			    old_info);
+		send_list_del(old_info);
+		kref_put(&old_info->refcount, free_info);
+	}
+
+	info = kmalloc(sizeof(struct vis_info), GFP_ATOMIC);
+	if (!info)
+		return NULL;
+
+	info->skb_packet = dev_alloc_skb(sizeof(struct vis_packet) +
+					 vis_info_len + sizeof(struct ethhdr));
+	if (!info->skb_packet) {
+		kfree(info);
+		return NULL;
+	}
+	skb_reserve(info->skb_packet, sizeof(struct ethhdr));
+	packet = (struct vis_packet *)skb_put(info->skb_packet,
+					      sizeof(struct vis_packet) +
+					      vis_info_len);
+
+	kref_init(&info->refcount);
+	INIT_LIST_HEAD(&info->send_list);
+	INIT_LIST_HEAD(&info->recv_list);
+	info->first_seen = jiffies;
+	info->bat_priv = bat_priv;
+	memcpy(packet, vis_packet, sizeof(struct vis_packet) + vis_info_len);
+
+	/* initialize and add new packet. */
+	*is_new = 1;
+
+	/* Make it a broadcast packet, if required */
+	if (make_broadcast)
+		memcpy(packet->target_orig, broadcast_addr, ETH_ALEN);
+
+	/* repair if entries is longer than packet. */
+	if (packet->entries * sizeof(struct vis_info_entry) > vis_info_len)
+		packet->entries = vis_info_len / sizeof(struct vis_info_entry);
+
+	recv_list_add(bat_priv, &info->recv_list, packet->sender_orig);
+
+	/* try to add it */
+	hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
+			      info);
+	if (hash_added < 0) {
+		/* did not work (for some reason) */
+		kref_put(&old_info->refcount, free_info);
+		info = NULL;
+	}
+
+	return info;
+}
+
+/* handle the server sync packet, forward if needed. */
+void receive_server_sync_packet(struct bat_priv *bat_priv,
+				struct vis_packet *vis_packet,
+				int vis_info_len)
+{
+	struct vis_info *info;
+	int is_new, make_broadcast;
+	int vis_server = atomic_read(&bat_priv->vis_mode);
+
+	make_broadcast = (vis_server == VIS_TYPE_SERVER_SYNC);
+
+	spin_lock_bh(&bat_priv->vis_hash_lock);
+	info = add_packet(bat_priv, vis_packet, vis_info_len,
+			  &is_new, make_broadcast);
+	if (!info)
+		goto end;
+
+	/* only if we are server ourselves and packet is newer than the one in
+	 * hash.*/
+	if (vis_server == VIS_TYPE_SERVER_SYNC && is_new)
+		send_list_add(bat_priv, info);
+end:
+	spin_unlock_bh(&bat_priv->vis_hash_lock);
+}
+
+/* handle an incoming client update packet and schedule forward if needed. */
+void receive_client_update_packet(struct bat_priv *bat_priv,
+				  struct vis_packet *vis_packet,
+				  int vis_info_len)
+{
+	struct vis_info *info;
+	struct vis_packet *packet;
+	int is_new;
+	int vis_server = atomic_read(&bat_priv->vis_mode);
+	int are_target = 0;
+
+	/* clients shall not broadcast. */
+	if (is_broadcast_ether_addr(vis_packet->target_orig))
+		return;
+
+	/* Are we the target for this VIS packet? */
+	if (vis_server == VIS_TYPE_SERVER_SYNC	&&
+	    is_my_mac(vis_packet->target_orig))
+		are_target = 1;
+
+	spin_lock_bh(&bat_priv->vis_hash_lock);
+	info = add_packet(bat_priv, vis_packet, vis_info_len,
+			  &is_new, are_target);
+
+	if (!info)
+		goto end;
+	/* note that outdated packets will be dropped at this point. */
+
+	packet = (struct vis_packet *)info->skb_packet->data;
+
+	/* send only if we're the target server or ... */
+	if (are_target && is_new) {
+		packet->vis_type = VIS_TYPE_SERVER_SYNC;	/* upgrade! */
+		send_list_add(bat_priv, info);
+
+		/* ... we're not the recipient (and thus need to forward). */
+	} else if (!is_my_mac(packet->target_orig)) {
+		send_list_add(bat_priv, info);
+	}
+
+end:
+	spin_unlock_bh(&bat_priv->vis_hash_lock);
+}
+
+/* Walk the originators and find the VIS server with the best tq. Set the packet
+ * address to its address and return the best_tq.
+ *
+ * Must be called with the originator hash locked */
+static int find_best_vis_server(struct bat_priv *bat_priv,
+				struct vis_info *info)
+{
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct orig_node *orig_node;
+	struct vis_packet *packet;
+	int best_tq = -1, i;
+
+	packet = (struct vis_packet *)info->skb_packet->data;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			orig_node = bucket->data;
+			if ((orig_node) && (orig_node->router) &&
+			(orig_node->flags & VIS_SERVER) &&
+			(orig_node->router->tq_avg > best_tq)) {
+				best_tq = orig_node->router->tq_avg;
+				memcpy(packet->target_orig, orig_node->orig,
+				       ETH_ALEN);
+			}
+		}
+	}
+
+	return best_tq;
+}
+
+/* Return true if the vis packet is full. */
+static bool vis_packet_full(struct vis_info *info)
+{
+	struct vis_packet *packet;
+	packet = (struct vis_packet *)info->skb_packet->data;
+
+	if (MAX_VIS_PACKET_SIZE / sizeof(struct vis_info_entry)
+		< packet->entries + 1)
+		return true;
+	return false;
+}
+
+/* generates a packet of own vis data,
+ * returns 0 on success, -1 if no packet could be generated */
+static int generate_vis_packet(struct bat_priv *bat_priv)
+{
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct orig_node *orig_node;
+	struct neigh_node *neigh_node;
+	struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info;
+	struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data;
+	struct vis_info_entry *entry;
+	struct hna_local_entry *hna_local_entry;
+	int best_tq = -1, i;
+
+	info->first_seen = jiffies;
+	packet->vis_type = atomic_read(&bat_priv->vis_mode);
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	memcpy(packet->target_orig, broadcast_addr, ETH_ALEN);
+	packet->ttl = TTL;
+	packet->seqno = htonl(ntohl(packet->seqno) + 1);
+	packet->entries = 0;
+	skb_trim(info->skb_packet, sizeof(struct vis_packet));
+
+	if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) {
+		best_tq = find_best_vis_server(bat_priv, info);
+
+		if (best_tq < 0) {
+			spin_unlock_bh(&bat_priv->orig_hash_lock);
+			return -1;
+		}
+	}
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			orig_node = bucket->data;
+			neigh_node = orig_node->router;
+
+			if (!neigh_node)
+				continue;
+
+			if (!compare_orig(neigh_node->addr, orig_node->orig))
+				continue;
+
+			if (neigh_node->if_incoming->if_status != IF_ACTIVE)
+				continue;
+
+			if (neigh_node->tq_avg < 1)
+				continue;
+
+			/* fill one entry into buffer. */
+			entry = (struct vis_info_entry *)
+				      skb_put(info->skb_packet, sizeof(*entry));
+			memcpy(entry->src,
+			       neigh_node->if_incoming->net_dev->dev_addr,
+			       ETH_ALEN);
+			memcpy(entry->dest, orig_node->orig, ETH_ALEN);
+			entry->quality = neigh_node->tq_avg;
+			packet->entries++;
+
+			if (vis_packet_full(info)) {
+				spin_unlock_bh(&bat_priv->orig_hash_lock);
+				return 0;
+			}
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	hash = bat_priv->hna_local_hash;
+
+	spin_lock_bh(&bat_priv->hna_lhash_lock);
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			hna_local_entry = bucket->data;
+			entry = (struct vis_info_entry *)
+					skb_put(info->skb_packet,
+						sizeof(*entry));
+			memset(entry->src, 0, ETH_ALEN);
+			memcpy(entry->dest, hna_local_entry->addr, ETH_ALEN);
+			entry->quality = 0; /* 0 means HNA */
+			packet->entries++;
+
+			if (vis_packet_full(info)) {
+				spin_unlock_bh(&bat_priv->hna_lhash_lock);
+				return 0;
+			}
+		}
+	}
+
+	spin_unlock_bh(&bat_priv->hna_lhash_lock);
+	return 0;
+}
+
+/* free old vis packets. Must be called with this vis_hash_lock
+ * held */
+static void purge_vis_packets(struct bat_priv *bat_priv)
+{
+	int i;
+	struct hashtable_t *hash = bat_priv->vis_hash;
+	struct hlist_node *walk, *safe;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct vis_info *info;
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) {
+			info = bucket->data;
+
+			/* never purge own data. */
+			if (info == bat_priv->my_vis_info)
+				continue;
+
+			if (time_after(jiffies,
+				       info->first_seen + VIS_TIMEOUT * HZ)) {
+				hlist_del(walk);
+				kfree(bucket);
+				send_list_del(info);
+				kref_put(&info->refcount, free_info);
+			}
+		}
+	}
+}
+
+static void broadcast_vis_packet(struct bat_priv *bat_priv,
+				 struct vis_info *info)
+{
+	struct hashtable_t *hash = bat_priv->orig_hash;
+	struct hlist_node *walk;
+	struct hlist_head *head;
+	struct element_t *bucket;
+	struct orig_node *orig_node;
+	struct vis_packet *packet;
+	struct sk_buff *skb;
+	struct batman_if *batman_if;
+	uint8_t dstaddr[ETH_ALEN];
+	int i;
+
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	packet = (struct vis_packet *)info->skb_packet->data;
+
+	/* send to all routers in range. */
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		hlist_for_each_entry(bucket, walk, head, hlist) {
+			orig_node = bucket->data;
+
+			/* if it's a vis server and reachable, send it. */
+			if ((!orig_node) || (!orig_node->router))
+				continue;
+			if (!(orig_node->flags & VIS_SERVER))
+				continue;
+			/* don't send it if we already received the packet from
+			* this node. */
+			if (recv_list_is_in(bat_priv, &info->recv_list,
+					    orig_node->orig))
+				continue;
+
+			memcpy(packet->target_orig, orig_node->orig, ETH_ALEN);
+			batman_if = orig_node->router->if_incoming;
+			memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
+			spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+			skb = skb_clone(info->skb_packet, GFP_ATOMIC);
+			if (skb)
+				send_skb_packet(skb, batman_if, dstaddr);
+
+			spin_lock_bh(&bat_priv->orig_hash_lock);
+		}
+
+	}
+
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+}
+
+static void unicast_vis_packet(struct bat_priv *bat_priv,
+			       struct vis_info *info)
+{
+	struct orig_node *orig_node;
+	struct sk_buff *skb;
+	struct vis_packet *packet;
+	struct batman_if *batman_if;
+	uint8_t dstaddr[ETH_ALEN];
+
+	spin_lock_bh(&bat_priv->orig_hash_lock);
+	packet = (struct vis_packet *)info->skb_packet->data;
+	orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash,
+						   compare_orig, choose_orig,
+						   packet->target_orig));
+
+	if ((!orig_node) || (!orig_node->router))
+		goto out;
+
+	/* don't lock while sending the packets ... we therefore
+	 * copy the required data before sending */
+	batman_if = orig_node->router->if_incoming;
+	memcpy(dstaddr, orig_node->router->addr, ETH_ALEN);
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+
+	skb = skb_clone(info->skb_packet, GFP_ATOMIC);
+	if (skb)
+		send_skb_packet(skb, batman_if, dstaddr);
+
+	return;
+
+out:
+	spin_unlock_bh(&bat_priv->orig_hash_lock);
+}
+
+/* only send one vis packet. called from send_vis_packets() */
+static void send_vis_packet(struct bat_priv *bat_priv, struct vis_info *info)
+{
+	struct vis_packet *packet;
+
+	packet = (struct vis_packet *)info->skb_packet->data;
+	if (packet->ttl < 2) {
+		pr_debug("Error - can't send vis packet: ttl exceeded\n");
+		return;
+	}
+
+	memcpy(packet->sender_orig, bat_priv->primary_if->net_dev->dev_addr,
+	       ETH_ALEN);
+	packet->ttl--;
+
+	if (is_broadcast_ether_addr(packet->target_orig))
+		broadcast_vis_packet(bat_priv, info);
+	else
+		unicast_vis_packet(bat_priv, info);
+	packet->ttl++; /* restore TTL */
+}
+
+/* called from timer; send (and maybe generate) vis packet. */
+static void send_vis_packets(struct work_struct *work)
+{
+	struct delayed_work *delayed_work =
+		container_of(work, struct delayed_work, work);
+	struct bat_priv *bat_priv =
+		container_of(delayed_work, struct bat_priv, vis_work);
+	struct vis_info *info, *temp;
+
+	spin_lock_bh(&bat_priv->vis_hash_lock);
+	purge_vis_packets(bat_priv);
+
+	if (generate_vis_packet(bat_priv) == 0) {
+		/* schedule if generation was successful */
+		send_list_add(bat_priv, bat_priv->my_vis_info);
+	}
+
+	list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list,
+				 send_list) {
+
+		kref_get(&info->refcount);
+		spin_unlock_bh(&bat_priv->vis_hash_lock);
+
+		if (bat_priv->primary_if)
+			send_vis_packet(bat_priv, info);
+
+		spin_lock_bh(&bat_priv->vis_hash_lock);
+		send_list_del(info);
+		kref_put(&info->refcount, free_info);
+	}
+	spin_unlock_bh(&bat_priv->vis_hash_lock);
+	start_vis_timer(bat_priv);
+}
+
+/* init the vis server. this may only be called when if_list is already
+ * initialized (e.g. bat0 is initialized, interfaces have been added) */
+int vis_init(struct bat_priv *bat_priv)
+{
+	struct vis_packet *packet;
+	int hash_added;
+
+	if (bat_priv->vis_hash)
+		return 1;
+
+	spin_lock_bh(&bat_priv->vis_hash_lock);
+
+	bat_priv->vis_hash = hash_new(256);
+	if (!bat_priv->vis_hash) {
+		pr_err("Can't initialize vis_hash\n");
+		goto err;
+	}
+
+	bat_priv->my_vis_info = kmalloc(MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
+	if (!bat_priv->my_vis_info) {
+		pr_err("Can't initialize vis packet\n");
+		goto err;
+	}
+
+	bat_priv->my_vis_info->skb_packet = dev_alloc_skb(
+						sizeof(struct vis_packet) +
+						MAX_VIS_PACKET_SIZE +
+						sizeof(struct ethhdr));
+	if (!bat_priv->my_vis_info->skb_packet)
+		goto free_info;
+
+	skb_reserve(bat_priv->my_vis_info->skb_packet, sizeof(struct ethhdr));
+	packet = (struct vis_packet *)skb_put(
+					bat_priv->my_vis_info->skb_packet,
+					sizeof(struct vis_packet));
+
+	/* prefill the vis info */
+	bat_priv->my_vis_info->first_seen = jiffies -
+						msecs_to_jiffies(VIS_INTERVAL);
+	INIT_LIST_HEAD(&bat_priv->my_vis_info->recv_list);
+	INIT_LIST_HEAD(&bat_priv->my_vis_info->send_list);
+	kref_init(&bat_priv->my_vis_info->refcount);
+	bat_priv->my_vis_info->bat_priv = bat_priv;
+	packet->version = COMPAT_VERSION;
+	packet->packet_type = BAT_VIS;
+	packet->ttl = TTL;
+	packet->seqno = 0;
+	packet->entries = 0;
+
+	INIT_LIST_HEAD(&bat_priv->vis_send_list);
+
+	hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose,
+			      bat_priv->my_vis_info);
+	if (hash_added < 0) {
+		pr_err("Can't add own vis packet into hash\n");
+		/* not in hash, need to remove it manually. */
+		kref_put(&bat_priv->my_vis_info->refcount, free_info);
+		goto err;
+	}
+
+	spin_unlock_bh(&bat_priv->vis_hash_lock);
+	start_vis_timer(bat_priv);
+	return 1;
+
+free_info:
+	kfree(bat_priv->my_vis_info);
+	bat_priv->my_vis_info = NULL;
+err:
+	spin_unlock_bh(&bat_priv->vis_hash_lock);
+	vis_quit(bat_priv);
+	return 0;
+}
+
+/* Decrease the reference count on a hash item info */
+static void free_info_ref(void *data, void *arg)
+{
+	struct vis_info *info = data;
+
+	send_list_del(info);
+	kref_put(&info->refcount, free_info);
+}
+
+/* shutdown vis-server */
+void vis_quit(struct bat_priv *bat_priv)
+{
+	if (!bat_priv->vis_hash)
+		return;
+
+	cancel_delayed_work_sync(&bat_priv->vis_work);
+
+	spin_lock_bh(&bat_priv->vis_hash_lock);
+	/* properly remove, kill timers ... */
+	hash_delete(bat_priv->vis_hash, free_info_ref, NULL);
+	bat_priv->vis_hash = NULL;
+	bat_priv->my_vis_info = NULL;
+	spin_unlock_bh(&bat_priv->vis_hash_lock);
+}
+
+/* schedule packets for (re)transmission */
+static void start_vis_timer(struct bat_priv *bat_priv)
+{
+	INIT_DELAYED_WORK(&bat_priv->vis_work, send_vis_packets);
+	queue_delayed_work(bat_event_workqueue, &bat_priv->vis_work,
+			   msecs_to_jiffies(VIS_INTERVAL));
+}
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
new file mode 100644
index 000000000000..2c3b33089a9b
--- /dev/null
+++ b/net/batman-adv/vis.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors:
+ *
+ * Simon Wunderlich, Marek Lindner
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ *
+ */
+
+#ifndef _NET_BATMAN_ADV_VIS_H_
+#define _NET_BATMAN_ADV_VIS_H_
+
+#define VIS_TIMEOUT		200	/* timeout of vis packets in seconds */
+
+int vis_seq_print_text(struct seq_file *seq, void *offset);
+void receive_server_sync_packet(struct bat_priv *bat_priv,
+				struct vis_packet *vis_packet,
+				int vis_info_len);
+void receive_client_update_packet(struct bat_priv *bat_priv,
+				  struct vis_packet *vis_packet,
+				  int vis_info_len);
+int vis_init(struct bat_priv *bat_priv);
+void vis_quit(struct bat_priv *bat_priv);
+
+#endif /* _NET_BATMAN_ADV_VIS_H_ */
-- 
cgit v1.2.3


From 443457242beb6716b43db4d62fe148eab5515505 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Mon, 13 Dec 2010 12:44:07 +0000
Subject: net: factorize sync-rcu call in unregister_netdevice_many

Add dev_close_many and dev_deactivate_many to factorize another
sync-rcu operation on the netdevice unregister path.

$ modprobe dummy numdummies=10000
$ ip link set dev dummy* up
$ time rmmod dummy

Without the patch           With the patch

real    0m 24.63s           real    0m 5.15s
user    0m 0.00s            user    0m 0.00s
sys     0m 6.05s            sys     0m 5.14s

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |   1 +
 net/core/dev.c            | 118 +++++++++++++++++++++++++++++-----------------
 net/sched/sch_generic.c   |  29 +++++++++---
 3 files changed, 99 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ea1f8a83160d..786cc396cb4a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -321,6 +321,7 @@ extern void dev_init_scheduler(struct net_device *dev);
 extern void dev_shutdown(struct net_device *dev);
 extern void dev_activate(struct net_device *dev);
 extern void dev_deactivate(struct net_device *dev);
+extern void dev_deactivate_many(struct list_head *head);
 extern struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 				     struct Qdisc *qdisc);
 extern void qdisc_reset(struct Qdisc *qdisc);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ac26d2b9722..794b20de5d44 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1222,52 +1222,90 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close(struct net_device *dev)
+static int __dev_close_many(struct list_head *head)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
+	struct net_device *dev;
 
 	ASSERT_RTNL();
 	might_sleep();
 
-	/*
-	 *	Tell people we are going down, so that they can
-	 *	prepare to death, when device is still operating.
-	 */
-	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		/*
+		 *	Tell people we are going down, so that they can
+		 *	prepare to death, when device is still operating.
+		 */
+		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
-	clear_bit(__LINK_STATE_START, &dev->state);
+		clear_bit(__LINK_STATE_START, &dev->state);
 
-	/* Synchronize to scheduled poll. We cannot touch poll list,
-	 * it can be even on different cpu. So just clear netif_running().
-	 *
-	 * dev->stop() will invoke napi_disable() on all of it's
-	 * napi_struct instances on this device.
-	 */
-	smp_mb__after_clear_bit(); /* Commit netif_running(). */
+		/* Synchronize to scheduled poll. We cannot touch poll list, it
+		 * can be even on different cpu. So just clear netif_running().
+		 *
+		 * dev->stop() will invoke napi_disable() on all of it's
+		 * napi_struct instances on this device.
+		 */
+		smp_mb__after_clear_bit(); /* Commit netif_running(). */
+	}
 
-	dev_deactivate(dev);
+	dev_deactivate_many(head);
 
-	/*
-	 *	Call the device specific close. This cannot fail.
-	 *	Only if device is UP
-	 *
-	 *	We allow it to be called even after a DETACH hot-plug
-	 *	event.
-	 */
-	if (ops->ndo_stop)
-		ops->ndo_stop(dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		const struct net_device_ops *ops = dev->netdev_ops;
 
-	/*
-	 *	Device is now down.
-	 */
+		/*
+		 *	Call the device specific close. This cannot fail.
+		 *	Only if device is UP
+		 *
+		 *	We allow it to be called even after a DETACH hot-plug
+		 *	event.
+		 */
+		if (ops->ndo_stop)
+			ops->ndo_stop(dev);
+
+		/*
+		 *	Device is now down.
+		 */
+
+		dev->flags &= ~IFF_UP;
+
+		/*
+		 *	Shutdown NET_DMA
+		 */
+		net_dmaengine_put();
+	}
 
-	dev->flags &= ~IFF_UP;
+	return 0;
+}
+
+static int __dev_close(struct net_device *dev)
+{
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	return __dev_close_many(&single);
+}
+
+int dev_close_many(struct list_head *head)
+{
+	struct net_device *dev, *tmp;
+	LIST_HEAD(tmp_list);
+
+	list_for_each_entry_safe(dev, tmp, head, unreg_list)
+		if (!(dev->flags & IFF_UP))
+			list_move(&dev->unreg_list, &tmp_list);
+
+	__dev_close_many(head);
 
 	/*
-	 *	Shutdown NET_DMA
+	 * Tell people we are down
 	 */
-	net_dmaengine_put();
+	list_for_each_entry(dev, head, unreg_list) {
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
 
+	/* rollback_registered_many needs the complete original list */
+	list_splice(&tmp_list, head);
 	return 0;
 }
 
@@ -1282,16 +1320,10 @@ static int __dev_close(struct net_device *dev)
  */
 int dev_close(struct net_device *dev)
 {
-	if (!(dev->flags & IFF_UP))
-		return 0;
-
-	__dev_close(dev);
+	LIST_HEAD(single);
 
-	/*
-	 * Tell people we are down
-	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
-	call_netdevice_notifiers(NETDEV_DOWN, dev);
+	list_add(&dev->unreg_list, &single);
+	dev_close_many(&single);
 
 	return 0;
 }
@@ -4963,10 +4995,12 @@ static void rollback_registered_many(struct list_head *head)
 		}
 
 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
+	}
 
-		/* If device is running, close it first. */
-		dev_close(dev);
+	/* If device is running, close it first. */
+	dev_close_many(head);
 
+	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
 		unlist_netdevice(dev);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 0918834ee4a1..34dc598440a2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -810,20 +810,35 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 	return false;
 }
 
-void dev_deactivate(struct net_device *dev)
+void dev_deactivate_many(struct list_head *head)
 {
-	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
-	if (dev_ingress_queue(dev))
-		dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
+	struct net_device *dev;
 
-	dev_watchdog_down(dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
+					 &noop_qdisc);
+		if (dev_ingress_queue(dev))
+			dev_deactivate_queue(dev, dev_ingress_queue(dev),
+					     &noop_qdisc);
+
+		dev_watchdog_down(dev);
+	}
 
 	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
 	synchronize_rcu();
 
 	/* Wait for outstanding qdisc_run calls. */
-	while (some_qdisc_is_busy(dev))
-		yield();
+	list_for_each_entry(dev, head, unreg_list)
+		while (some_qdisc_is_busy(dev))
+			yield();
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	dev_deactivate_many(&single);
 }
 
 static void dev_init_scheduler_queue(struct net_device *dev,
-- 
cgit v1.2.3


From 55508d601dab7df5cbcc7a63f4be8620eface204 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 14 Dec 2010 15:24:08 +0000
Subject: net: Use skb_checksum_start_offset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace skb->csum_start - skb_headroom(skb) with skb_checksum_start_offset().

Note for usb/smsc95xx: skb->data - skb->head == skb_headroom(skb).

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atlx/atl1.c    | 2 +-
 drivers/net/macvtap.c      | 3 +--
 drivers/net/tun.c          | 2 +-
 drivers/net/usb/smsc95xx.c | 7 +++----
 drivers/net/virtio_net.c   | 2 +-
 net/core/dev.c             | 6 +++---
 net/core/skbuff.c          | 2 +-
 net/ipv4/udp.c             | 2 +-
 net/packet/af_packet.c     | 3 +--
 9 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 53363108994e..def8df83359c 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -2174,7 +2174,7 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
 	u8 css, cso;
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		css = (u8) (skb->csum_start - skb_headroom(skb));
+		css = skb_checksum_start_offset(skb);
 		cso = css + (u8) skb->csum_offset;
 		if (unlikely(css & 0x1)) {
 			/* L1 hardware requires an even number here */
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 42567279843e..21845affea13 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -504,8 +504,7 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		vnet_hdr->csum_start = skb->csum_start -
-					skb_headroom(skb);
+		vnet_hdr->csum_start = skb_checksum_start_offset(skb);
 		vnet_hdr->csum_offset = skb->csum_offset;
 	} /* else everything is zero */
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 55f3a3e667a9..7599c457abd1 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -757,7 +757,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-			gso.csum_start = skb->csum_start - skb_headroom(skb);
+			gso.csum_start = skb_checksum_start_offset(skb);
 			gso.csum_offset = skb->csum_offset;
 		} /* else everything is zero */
 
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 65cb1abfbe57..bc86f4b6ecc2 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1163,9 +1163,8 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 static u32 smsc95xx_calc_csum_preamble(struct sk_buff *skb)
 {
-	int len = skb->data - skb->head;
-	u16 high_16 = (u16)(skb->csum_offset + skb->csum_start - len);
-	u16 low_16 = (u16)(skb->csum_start - len);
+	u16 low_16 = (u16)skb_checksum_start_offset(skb);
+	u16 high_16 = low_16 + skb->csum_offset;
 	return (high_16 << 16) | low_16;
 }
 
@@ -1193,7 +1192,7 @@ static struct sk_buff *smsc95xx_tx_fixup(struct usbnet *dev,
 		if (skb->len <= 45) {
 			/* workaround - hardware tx checksum does not work
 			 * properly with extremely small packets */
-			long csstart = skb->csum_start - skb_headroom(skb);
+			long csstart = skb_checksum_start_offset(skb);
 			__wsum calc = csum_partial(skb->data + csstart,
 				skb->len - csstart, 0);
 			*((__sum16 *)(skb->data + csstart
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b6d402806ae6..90a23e410d1b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -519,7 +519,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		hdr->hdr.csum_start = skb->csum_start - skb_headroom(skb);
+		hdr->hdr.csum_start = skb_checksum_start_offset(skb);
 		hdr->hdr.csum_offset = skb->csum_offset;
 	} else {
 		hdr->hdr.flags = 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index 794b20de5d44..92d414ac0e30 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1793,7 +1793,7 @@ int skb_checksum_help(struct sk_buff *skb)
 		goto out_set_summed;
 	}
 
-	offset = skb->csum_start - skb_headroom(skb);
+	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -2090,8 +2090,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			 * checksumming here.
 			 */
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
-				skb_set_transport_header(skb, skb->csum_start -
-					      skb_headroom(skb));
+				skb_set_transport_header(skb,
+					skb_checksum_start_offset(skb));
 				if (!dev_can_checksum(dev, skb) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8814a9a52f47..19d6c21220fd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1824,7 +1824,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 	long csstart;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		csstart = skb->csum_start - skb_headroom(skb);
+		csstart = skb_checksum_start_offset(skb);
 	else
 		csstart = skb_headlen(skb);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b37181da487c..1198adf45102 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2226,7 +2226,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
 	/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
 	 * do checksum of UDP packets sent as multiple IP fragments.
 	 */
-	offset = skb->csum_start - skb_headroom(skb);
+	offset = skb_checksum_start_offset(skb);
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
 	offset += skb->csum_offset;
 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e79efaf06389..91cb1d71f018 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1650,8 +1650,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
 			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-			vnet_hdr.csum_start = skb->csum_start -
-							skb_headroom(skb);
+			vnet_hdr.csum_start = skb_checksum_start_offset(skb);
 			vnet_hdr.csum_offset = skb->csum_offset;
 		} /* else everything is zero */
 
-- 
cgit v1.2.3


From bc3ef6605ea325e41b586a76aadc3f731c317504 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 16 Dec 2010 17:42:40 +0000
Subject: ipv6: fib6_ifdown cleanup

Remove (unnecessary) casts to make code cleaner.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d5c3b45d829e..373bd0416f69 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2009,11 +2009,11 @@ struct arg_dev_net {
 
 static int fib6_ifdown(struct rt6_info *rt, void *arg)
 {
-	struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
-	struct net *net = ((struct arg_dev_net *)arg)->net;
+	const struct arg_dev_net *adn = arg;
+	const struct net_device *dev = adn->dev;
 
-	if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
-	    rt != net->ipv6.ip6_null_entry) {
+	if ((rt->rt6i_dev == dev || dev == NULL) &&
+	    rt != adn->net->ipv6.ip6_null_entry) {
 		RT6_TRACE("deleted by ifdown %p\n", rt);
 		return -1;
 	}
-- 
cgit v1.2.3


From d1ed113f1669390da9898da3beddcc058d938587 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Thu, 16 Dec 2010 17:42:54 +0000
Subject: ipv6: remove duplicate neigh_ifdown

When device is being set to down, neigh_ifdown was being called
twice. Once from addrconf notifier and once from ndisc notifier.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 99d1888af363..5b189c97c2fc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2672,7 +2672,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	/* Flush routes if device is being removed or it is not loopback */
 	if (how || !(dev->flags & IFF_LOOPBACK))
 		rt6_ifdown(net, dev);
-	neigh_ifdown(&nd_tbl, dev);
 
 	idev = __in6_dev_get(dev);
 	if (idev == NULL)
-- 
cgit v1.2.3


From 71d9dec24dce548bf699815c976cf063ad9257e2 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 15 Dec 2010 19:57:25 +0000
Subject: net: increase skb->users instead of skb_clone()

In dev_queue_xmit_nit(), we have to clone skbs as we need to mangle skbs,
however, we don't need to clone skbs for all the packet_types.

Except for the first packet_type, we increase skb->users instead of
skb_clone().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 92d414ac0e30..59877290bca7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1528,6 +1528,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
+static inline int deliver_skb(struct sk_buff *skb,
+			      struct packet_type *pt_prev,
+			      struct net_device *orig_dev)
+{
+	atomic_inc(&skb->users);
+	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1536,6 +1544,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb);
 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
+	struct sk_buff *skb2 = NULL;
+	struct packet_type *pt_prev = NULL;
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
@@ -1552,7 +1562,13 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		if ((ptype->dev == dev || !ptype->dev) &&
 		    (ptype->af_packet_priv == NULL ||
 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
-			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+			if (pt_prev) {
+				deliver_skb(skb2, pt_prev, skb->dev);
+				pt_prev = ptype;
+				continue;
+			}
+
+			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (!skb2)
 				break;
 
@@ -1574,9 +1590,11 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 			skb2->transport_header = skb2->network_header;
 			skb2->pkt_type = PACKET_OUTGOING;
-			ptype->func(skb2, skb->dev, ptype, skb->dev);
+			pt_prev = ptype;
 		}
 	}
+	if (pt_prev)
+		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
 	rcu_read_unlock();
 }
 
@@ -2820,14 +2838,6 @@ static void net_tx_action(struct softirq_action *h)
 	}
 }
 
-static inline int deliver_skb(struct sk_buff *skb,
-			      struct packet_type *pt_prev,
-			      struct net_device *orig_dev)
-{
-	atomic_inc(&skb->users);
-	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-}
-
 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
-- 
cgit v1.2.3


From 4c306a9291a077879fc3e933326caac3bc319caa Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Sun, 19 Dec 2010 21:59:35 -0800
Subject: net: kill unused macros

These macros never be used, so remove them.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 1 -
 include/net/tcp.h                  | 2 --
 net/atm/mpc.c                      | 2 --
 net/core/neighbour.c               | 1 -
 net/core/netpoll.c                 | 1 -
 net/netlabel/netlabel_cipso_v4.h   | 1 -
 net/netlabel/netlabel_mgmt.h       | 1 -
 net/netlabel/netlabel_unlabeled.h  | 1 -
 8 files changed, 10 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 6c93a56cc958..6ac4e3b5007f 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -132,7 +132,6 @@ struct inet_connection_sock {
 #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
 #define ICSK_TIME_DACK		2	/* Delayed ack timer */
 #define ICSK_TIME_PROBE0	3	/* Zero window probe timer */
-#define ICSK_TIME_KEEPOPEN	4	/* Keepalive timer */
 
 static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
 {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2ab6c9c1c53a..b4480300cadf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1158,8 +1158,6 @@ struct tcp_md5sig_pool {
 	union tcp_md5sum_block	md5_blk;
 };
 
-#define TCP_MD5SIG_MAXKEYS	(~(u32)0)	/* really?! */
-
 /* - functions */
 extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
 			       struct sock *sk, struct request_sock *req,
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 74bcc662c3dd..644cdf071642 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -64,8 +64,6 @@
 	do { if (0) printk(KERN_CONT format, ##args); } while (0)
 #endif
 
-#define MPOA_TAG_LEN 4
-
 /* mpc_daemon -> kernel */
 static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc);
 static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8cc8f9a79db9..60a902913429 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -41,7 +41,6 @@
 
 #define NEIGH_PRINTK(x...) printk(x)
 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
-#define NEIGH_PRINTK0 NEIGH_PRINTK
 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index ee38acb6d463..72d9b50109fc 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -35,7 +35,6 @@
 
 #define MAX_UDP_CHUNK 1460
 #define MAX_SKBS 32
-#define MAX_QUEUE_DEPTH (MAX_SKBS / 2)
 
 static struct sk_buff_head skb_pool;
 
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h
index c8a4079261f0..af7f3355103e 100644
--- a/net/netlabel/netlabel_cipso_v4.h
+++ b/net/netlabel/netlabel_cipso_v4.h
@@ -107,7 +107,6 @@ enum {
 	NLBL_CIPSOV4_C_LISTALL,
 	__NLBL_CIPSOV4_C_MAX,
 };
-#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1)
 
 /* NetLabel CIPSOv4 attributes */
 enum {
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index 05d96431f819..0a25838bcf45 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -173,7 +173,6 @@ enum {
 	NLBL_MGMT_C_VERSION,
 	__NLBL_MGMT_C_MAX,
 };
-#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1)
 
 /* NetLabel Management attributes */
 enum {
diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h
index 7aba63595137..0bc8dc3f9e3c 100644
--- a/net/netlabel/netlabel_unlabeled.h
+++ b/net/netlabel/netlabel_unlabeled.h
@@ -180,7 +180,6 @@ enum {
 	NLBL_UNLABEL_C_STATICLISTDEF,
 	__NLBL_UNLABEL_C_MAX,
 };
-#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1)
 
 /* NetLabel Unlabeled attributes */
 enum {
-- 
cgit v1.2.3


From 58231186c4532821cb815a3a3248ca02ce5f6f0d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 20 Dec 2010 10:30:06 -0800
Subject: pktgen: Remove unnecessary prefix from pr_<level>

Remove "pktgen: " prefix string from one pr_info.
pr_fmt adds it, so this is a duplicate.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 18fe20dacc60..a9e7fc4c461f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3553,8 +3553,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		break;
 	default: /* Drivers are not supposed to return other values! */
 		if (net_ratelimit())
-			pr_info("pktgen: %s xmit error: %d\n",
-				pkt_dev->odevname, ret);
+			pr_info("%s xmit error: %d\n", pkt_dev->odevname, ret);
 		pkt_dev->errors++;
 		/* fallthru */
 	case NETDEV_TX_LOCKED:
-- 
cgit v1.2.3


From 53320fe3bb1b1eef1aaff8dd47aae530ebeeb1e5 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 20 Dec 2010 10:32:03 -0800
Subject: batman-adv: Return hna count on local buffer fill

hna_local_fill_buffer must return the number of added hna entries and
not the last checked hash bucket.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/batman-adv/translation-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a19e16c94da5..a633b5a435e2 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -162,7 +162,7 @@ int hna_local_fill_buffer(struct bat_priv *bat_priv,
 		atomic_set(&bat_priv->hna_local_changed, 0);
 
 	spin_unlock_bh(&bat_priv->hna_lhash_lock);
-	return i;
+	return count;
 }
 
 int hna_local_seq_print_text(struct seq_file *seq, void *offset)
-- 
cgit v1.2.3


From 6561a3b12d62ed5317e6ac32182d87a03f62c8dc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 19 Dec 2010 21:11:20 -0800
Subject: ipv4: Flush per-ns routing cache more sanely.

Flush the routing cache only of entries that match the
network namespace in which the purge event occurred.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/route.h     |  2 +-
 net/ipv4/fib_frontend.c |  6 ++++-
 net/ipv4/route.c        | 64 +++++++++++++++++++------------------------------
 3 files changed, 30 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/include/net/route.h b/include/net/route.h
index 27002362944a..93e10c453f6b 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -114,7 +114,7 @@ extern int		ip_rt_init(void);
 extern void		ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
 				       __be32 src, struct net_device *dev);
 extern void		rt_cache_flush(struct net *net, int how);
-extern void		rt_cache_flush_batch(void);
+extern void		rt_cache_flush_batch(struct net *net);
 extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
 extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index d3a1112b9d9c..9f8bb68911e4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		rt_cache_flush(dev_net(dev), 0);
 		break;
 	case NETDEV_UNREGISTER_BATCH:
-		rt_cache_flush_batch();
+		/* The batch unregister is only called on the first
+		 * device in the list of devices being unregistered.
+		 * Therefore we should not pass dev_net(dev) in here.
+		 */
+		rt_cache_flush_batch(NULL);
 		break;
 	}
 	return NOTIFY_DONE;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ae520963540f..d8b4f4d0d66e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
  * Can be called by a softirq or a process.
  * In the later case, we want to be reschedule if necessary
  */
-static void rt_do_flush(int process_context)
+static void rt_do_flush(struct net *net, int process_context)
 {
 	unsigned int i;
 	struct rtable *rth, *next;
-	struct rtable * tail;
 
 	for (i = 0; i <= rt_hash_mask; i++) {
+		struct rtable __rcu **pprev;
+		struct rtable *list;
+
 		if (process_context && need_resched())
 			cond_resched();
 		rth = rcu_dereference_raw(rt_hash_table[i].chain);
@@ -731,50 +733,32 @@ static void rt_do_flush(int process_context)
 			continue;
 
 		spin_lock_bh(rt_hash_lock_addr(i));
-#ifdef CONFIG_NET_NS
-		{
-		struct rtable __rcu **prev;
-		struct rtable *p;
 
-		rth = rcu_dereference_protected(rt_hash_table[i].chain,
+		list = NULL;
+		pprev = &rt_hash_table[i].chain;
+		rth = rcu_dereference_protected(*pprev,
 			lockdep_is_held(rt_hash_lock_addr(i)));
 
-		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail;
-		     tail = rcu_dereference_protected(tail->dst.rt_next,
-				lockdep_is_held(rt_hash_lock_addr(i))))
-			if (!rt_is_expired(tail))
-				break;
-		if (rth != tail)
-			rt_hash_table[i].chain = tail;
-
-		/* call rt_free on entries after the tail requiring flush */
-		prev = &rt_hash_table[i].chain;
-		for (p = rcu_dereference_protected(*prev,
+		while (rth) {
+			next = rcu_dereference_protected(rth->dst.rt_next,
 				lockdep_is_held(rt_hash_lock_addr(i)));
-		     p != NULL;
-		     p = next) {
-			next = rcu_dereference_protected(p->dst.rt_next,
-				lockdep_is_held(rt_hash_lock_addr(i)));
-			if (!rt_is_expired(p)) {
-				prev = &p->dst.rt_next;
+
+			if (!net ||
+			    net_eq(dev_net(rth->dst.dev), net)) {
+				rcu_assign_pointer(*pprev, next);
+				rcu_assign_pointer(rth->dst.rt_next, list);
+				list = rth;
 			} else {
-				*prev = next;
-				rt_free(p);
+				pprev = &rth->dst.rt_next;
 			}
+			rth = next;
 		}
-		}
-#else
-		rth = rcu_dereference_protected(rt_hash_table[i].chain,
-			lockdep_is_held(rt_hash_lock_addr(i)));
-		rcu_assign_pointer(rt_hash_table[i].chain, NULL);
-		tail = NULL;
-#endif
+
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
-		for (; rth != tail; rth = next) {
-			next = rcu_dereference_protected(rth->dst.rt_next, 1);
-			rt_free(rth);
+		for (; list; list = next) {
+			next = rcu_dereference_protected(list->dst.rt_next, 1);
+			rt_free(list);
 		}
 	}
 }
@@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
 {
 	rt_cache_invalidate(net);
 	if (delay >= 0)
-		rt_do_flush(!in_softirq());
+		rt_do_flush(net, !in_softirq());
 }
 
 /* Flush previous cache invalidated entries from the cache */
-void rt_cache_flush_batch(void)
+void rt_cache_flush_batch(struct net *net)
 {
-	rt_do_flush(!in_softirq());
+	rt_do_flush(net, !in_softirq());
 }
 
 static void rt_emergency_hash_rebuild(struct net *net)
-- 
cgit v1.2.3


From 24bdd9f4c9af75b33b438d60381a67626de0128d Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 16 Dec 2010 17:37:48 -0800
Subject: mac80211: Rename mesh_params to mesh_config to prepare for mesh_setup

Mesh parameters can be to setup a mesh or to configure it.
This patch renames the ambiguous name mesh_params to mesh_config
in preparation for mesh_setup.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 15 ++++++++++-----
 include/net/cfg80211.h  |  8 ++++----
 net/mac80211/cfg.c      |  8 ++++----
 net/wireless/nl80211.c  | 40 ++++++++++++++++++++--------------------
 4 files changed, 38 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 7483a89cee8f..11a1de67b618 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -172,10 +172,10 @@
  * 	to the specified ISO/IEC 3166-1 alpha2 country code. The core will
  * 	store this as a valid request and then query userspace for it.
  *
- * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the
+ * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the
  *	interface identified by %NL80211_ATTR_IFINDEX
  *
- * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the
+ * @NL80211_CMD_SET_MESH_CONFIG: Set mesh networking properties for the
  *      interface identified by %NL80211_ATTR_IFINDEX
  *
  * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The
@@ -448,8 +448,8 @@ enum nl80211_commands {
 	NL80211_CMD_SET_REG,
 	NL80211_CMD_REQ_SET_REG,
 
-	NL80211_CMD_GET_MESH_PARAMS,
-	NL80211_CMD_SET_MESH_PARAMS,
+	NL80211_CMD_GET_MESH_CONFIG,
+	NL80211_CMD_SET_MESH_CONFIG,
 
 	NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */,
 
@@ -538,6 +538,10 @@ enum nl80211_commands {
 #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
 #define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT
 
+/* source-level API compatibility */
+#define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
+#define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
+
 /**
  * enum nl80211_attrs - nl80211 netlink attributes
  *
@@ -922,7 +926,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_REG_ALPHA2,
 	NL80211_ATTR_REG_RULES,
 
-	NL80211_ATTR_MESH_PARAMS,
+	NL80211_ATTR_MESH_CONFIG,
 
 	NL80211_ATTR_BSS_BASIC_RATES,
 
@@ -1058,6 +1062,7 @@ enum nl80211_attrs {
 
 /* source-level API compatibility */
 #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION
+#define	NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
 
 /*
  * Allow user space programs to use #ifdef on new attributes by defining them
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6dc665a727c2..7283496c2d05 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1096,9 +1096,9 @@ struct cfg80211_pmksa {
  * @get_mpath: get a mesh path for the given parameters
  * @dump_mpath: dump mesh path callback -- resume dump at index @idx
  *
- * @get_mesh_params: Put the current mesh parameters into *params
+ * @get_mesh_config: Get the current mesh configuration
  *
- * @update_mesh_params: Update mesh parameters on a running mesh.
+ * @update_mesh_config: Update mesh parameters on a running mesh.
  *	The mask is a bitfield which tells us which parameters to
  *	set, and which to leave alone.
  *
@@ -1246,10 +1246,10 @@ struct cfg80211_ops {
 	int	(*dump_mpath)(struct wiphy *wiphy, struct net_device *dev,
 			       int idx, u8 *dst, u8 *next_hop,
 			       struct mpath_info *pinfo);
-	int	(*get_mesh_params)(struct wiphy *wiphy,
+	int	(*get_mesh_config)(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf);
-	int	(*update_mesh_params)(struct wiphy *wiphy,
+	int	(*update_mesh_config)(struct wiphy *wiphy,
 				      struct net_device *dev, u32 mask,
 				      const struct mesh_config *nconf);
 	int	(*join_mesh)(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ea06f92801e9..1c94a2ae22ee 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -984,7 +984,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_get_mesh_params(struct wiphy *wiphy,
+static int ieee80211_get_mesh_config(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf)
 {
@@ -1000,7 +1000,7 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask)
 	return (mask >> (parm-1)) & 0x1;
 }
 
-static int ieee80211_update_mesh_params(struct wiphy *wiphy,
+static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 					struct net_device *dev, u32 mask,
 					const struct mesh_config *nconf)
 {
@@ -1787,8 +1787,8 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_mpath = ieee80211_change_mpath,
 	.get_mpath = ieee80211_get_mpath,
 	.dump_mpath = ieee80211_dump_mpath,
-	.update_mesh_params = ieee80211_update_mesh_params,
-	.get_mesh_params = ieee80211_get_mesh_params,
+	.update_mesh_config = ieee80211_update_mesh_config,
+	.get_mesh_config = ieee80211_get_mesh_config,
 	.join_mesh = ieee80211_join_mesh,
 	.leave_mesh = ieee80211_leave_mesh,
 #endif
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index aefce54d47e2..10be9350752e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -123,7 +123,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 					   .len = NL80211_MAX_SUPP_RATES },
 	[NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 },
 
-	[NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED },
+	[NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
 
 	[NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
 					 .len = NL80211_HT_CAPABILITY_LEN },
@@ -719,7 +719,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(add_beacon, NEW_BEACON);
 	CMD(add_station, NEW_STATION);
 	CMD(add_mpath, NEW_MPATH);
-	CMD(update_mesh_params, SET_MESH_PARAMS);
+	CMD(update_mesh_config, SET_MESH_CONFIG);
 	CMD(change_bss, SET_BSS);
 	CMD(auth, AUTHENTICATE);
 	CMD(assoc, ASSOCIATE);
@@ -2673,7 +2673,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	return r;
 }
 
-static int nl80211_get_mesh_params(struct sk_buff *skb,
+static int nl80211_get_mesh_config(struct sk_buff *skb,
 				   struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -2688,7 +2688,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
 		return -EOPNOTSUPP;
 
-	if (!rdev->ops->get_mesh_params)
+	if (!rdev->ops->get_mesh_config)
 		return -EOPNOTSUPP;
 
 	wdev_lock(wdev);
@@ -2696,7 +2696,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	if (!wdev->mesh_id_len)
 		memcpy(&cur_params, &default_mesh_config, sizeof(cur_params));
 	else
-		err = rdev->ops->get_mesh_params(&rdev->wiphy, dev,
+		err = rdev->ops->get_mesh_config(&rdev->wiphy, dev,
 						 &cur_params);
 	wdev_unlock(wdev);
 
@@ -2708,10 +2708,10 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	if (!msg)
 		return -ENOMEM;
 	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
-			     NL80211_CMD_GET_MESH_PARAMS);
+			     NL80211_CMD_GET_MESH_CONFIG);
 	if (!hdr)
 		goto nla_put_failure;
-	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_PARAMS);
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG);
 	if (!pinfoattr)
 		goto nla_put_failure;
 	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
@@ -2773,7 +2773,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
 };
 
-static int nl80211_parse_mesh_params(struct genl_info *info,
+static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
 {
@@ -2789,10 +2789,10 @@ do {\
 } while (0);\
 
 
-	if (!info->attrs[NL80211_ATTR_MESH_PARAMS])
+	if (!info->attrs[NL80211_ATTR_MESH_CONFIG])
 		return -EINVAL;
 	if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX,
-			     info->attrs[NL80211_ATTR_MESH_PARAMS],
+			     info->attrs[NL80211_ATTR_MESH_CONFIG],
 			     nl80211_meshconf_params_policy))
 		return -EINVAL;
 
@@ -2847,7 +2847,7 @@ do {\
 #undef FILL_IN_MESH_PARAM_IF_SET
 }
 
-static int nl80211_update_mesh_params(struct sk_buff *skb,
+static int nl80211_update_mesh_config(struct sk_buff *skb,
 				      struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -2860,10 +2860,10 @@ static int nl80211_update_mesh_params(struct sk_buff *skb,
 	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT)
 		return -EOPNOTSUPP;
 
-	if (!rdev->ops->update_mesh_params)
+	if (!rdev->ops->update_mesh_config)
 		return -EOPNOTSUPP;
 
-	err = nl80211_parse_mesh_params(info, &cfg, &mask);
+	err = nl80211_parse_mesh_config(info, &cfg, &mask);
 	if (err)
 		return err;
 
@@ -2872,7 +2872,7 @@ static int nl80211_update_mesh_params(struct sk_buff *skb,
 		err = -ENOLINK;
 
 	if (!err)
-		err = rdev->ops->update_mesh_params(&rdev->wiphy, dev,
+		err = rdev->ops->update_mesh_config(&rdev->wiphy, dev,
 						    mask, &cfg);
 
 	wdev_unlock(wdev);
@@ -4672,9 +4672,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	/* start with default */
 	memcpy(&cfg, &default_mesh_config, sizeof(cfg));
 
-	if (info->attrs[NL80211_ATTR_MESH_PARAMS]) {
+	if (info->attrs[NL80211_ATTR_MESH_CONFIG]) {
 		/* and parse parameters if given */
-		err = nl80211_parse_mesh_params(info, &cfg, NULL);
+		err = nl80211_parse_mesh_config(info, &cfg, NULL);
 		if (err)
 			return err;
 	}
@@ -4952,16 +4952,16 @@ static struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 	},
 	{
-		.cmd = NL80211_CMD_GET_MESH_PARAMS,
-		.doit = nl80211_get_mesh_params,
+		.cmd = NL80211_CMD_GET_MESH_CONFIG,
+		.doit = nl80211_get_mesh_config,
 		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
-		.cmd = NL80211_CMD_SET_MESH_PARAMS,
-		.doit = nl80211_update_mesh_params,
+		.cmd = NL80211_CMD_SET_MESH_CONFIG,
+		.doit = nl80211_update_mesh_config,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
-- 
cgit v1.2.3


From c80d545da3f7c0e534ccd4a780f322f80a92cff1 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 16 Dec 2010 17:37:49 -0800
Subject: mac80211: Let userspace enable and configure vendor specific path
 selection.

Userspace will now be allowed to toggle between the default path
selection algorithm (HWMP, implemented in the kernel), and a vendor
specific alternative.  Also in the same patch, allow userspace to add
information elements to mesh beacons.  This is accordance with the
Extensible Path Selection Framework specified in version 7.0 of the
802.11s draft.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 25 ++++++++++++++++++
 include/linux/nl80211.h    | 47 +++++++++++++++++++++++++++++++---
 include/net/cfg80211.h     |  8 ++++++
 net/mac80211/cfg.c         | 39 +++++++++++++++++++++++++---
 net/mac80211/ieee80211_i.h |  4 +--
 net/mac80211/mesh.c        |  7 ++++++
 net/mac80211/mesh_plink.c  |  3 ++-
 net/mac80211/tx.c          |  3 ++-
 net/wireless/core.c        | 22 +++++++++++-----
 net/wireless/core.h        |  5 ++--
 net/wireless/mesh.c        | 24 ++++++++++--------
 net/wireless/nl80211.c     | 63 ++++++++++++++++++++++++++++++++++++++++++----
 12 files changed, 214 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7f2354534242..cd681681d211 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1291,6 +1291,31 @@ enum ieee80211_key_len {
 	WLAN_KEY_LEN_AES_CMAC = 16,
 };
 
+/**
+ * enum - mesh path selection protocol identifier
+ *
+ * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol
+ * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will
+ * be specified in a vendor specific information element
+ */
+enum {
+	IEEE80211_PATH_PROTOCOL_HWMP = 0,
+	IEEE80211_PATH_PROTOCOL_VENDOR = 255,
+};
+
+/**
+ * enum - mesh path selection metric identifier
+ *
+ * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric
+ * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be
+ * specified in a vendor specific information element
+ */
+enum {
+	IEEE80211_PATH_METRIC_AIRTIME = 0,
+	IEEE80211_PATH_METRIC_VENDOR = 255,
+};
+
+
 /*
  * IEEE 802.11-2007 7.3.2.9 Country information element
  *
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 11a1de67b618..69eaccac78c4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -872,6 +872,9 @@ enum nl80211_commands {
  *	attributes, specifying what a key should be set as default as.
  *	See &enum nl80211_key_default_types.
  *
+ * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters.  These cannot be
+ * changed once the mesh is active.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1054,6 +1057,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
 
+	NL80211_ATTR_MESH_SETUP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1564,7 +1569,8 @@ enum nl80211_mntr_flags {
 /**
  * enum nl80211_meshconf_params - mesh configuration parameters
  *
- * Mesh configuration parameters
+ * Mesh configuration parameters. These can be changed while the mesh is
+ * active.
  *
  * @__NL80211_MESHCONF_INVALID: internal use
  *
@@ -1587,9 +1593,6 @@ enum nl80211_mntr_flags {
  * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
  * point.
  *
- * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
- * source mesh point for path selection elements.
- *
  * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically
  * open peer links when we detect compatible mesh peers.
  *
@@ -1616,6 +1619,9 @@ enum nl80211_mntr_flags {
  *
  * @NL80211_MESHCONF_ROOTMODE: whether root mode is enabled or not
  *
+ * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a
+ * source mesh point for path selection elements.
+ *
  * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
  *
  * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
@@ -1643,6 +1649,39 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_mesh_setup_params - mesh setup parameters
+ *
+ * Mesh setup parameters.  These are used to start/join a mesh and cannot be
+ * changed while the mesh is active.
+ *
+ * @__NL80211_MESH_SETUP_INVALID: Internal use
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a
+ * vendor specific path selection algorithm or disable it to use the default
+ * HWMP.
+ *
+ * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a
+ * vendor specific path metric or disable it to use the default Airtime
+ * metric.
+ *
+ * @NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE: A vendor specific information
+ * element that vendors will use to identify the path selection methods and
+ * metrics in use.
+ *
+ * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
+ */
+enum nl80211_mesh_setup_params {
+	__NL80211_MESH_SETUP_INVALID,
+	NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL,
+	NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC,
+	NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE,
+
+	/* keep last */
+	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
+	NL80211_MESH_SETUP_ATTR_MAX = __NL80211_MESH_SETUP_ATTR_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_txq_attr - TX queue parameter attributes
  * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7283496c2d05..924d60366233 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -649,12 +649,20 @@ struct mesh_config {
  * struct mesh_setup - 802.11s mesh setup configuration
  * @mesh_id: the mesh ID
  * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes
+ * @path_sel_proto: which path selection protocol to use
+ * @path_metric: which metric to use
+ * @vendor_ie: vendor information elements (optional)
+ * @vendor_ie_len: length of vendor information elements
  *
  * These parameters are fixed when the mesh is created.
  */
 struct mesh_setup {
 	const u8 *mesh_id;
 	u8 mesh_id_len;
+	u8  path_sel_proto;
+	u8  path_metric;
+	const u8 *vendor_ie;
+	u8 vendor_ie_len;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 1c94a2ae22ee..ae2c7127a8aa 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1000,6 +1000,36 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask)
 	return (mask >> (parm-1)) & 0x1;
 }
 
+static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
+		const struct mesh_setup *setup)
+{
+	u8 *new_ie;
+	const u8 *old_ie;
+
+	/* first allocate the new vendor information element */
+	new_ie = NULL;
+	old_ie = ifmsh->vendor_ie;
+
+	ifmsh->vendor_ie_len = setup->vendor_ie_len;
+	if (setup->vendor_ie_len) {
+		new_ie = kmemdup(setup->vendor_ie, setup->vendor_ie_len,
+				GFP_KERNEL);
+		if (!new_ie)
+			return -ENOMEM;
+	}
+
+	/* now copy the rest of the setup parameters */
+	ifmsh->mesh_id_len = setup->mesh_id_len;
+	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
+	ifmsh->mesh_pp_id = setup->path_sel_proto;
+	ifmsh->mesh_pm_id = setup->path_metric;
+	ifmsh->vendor_ie = new_ie;
+
+	kfree(old_ie);
+
+	return 0;
+}
+
 static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 					struct net_device *dev, u32 mask,
 					const struct mesh_config *nconf)
@@ -1059,11 +1089,12 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	int err;
 
-	memcpy(&sdata->u.mesh.mshcfg, conf, sizeof(struct mesh_config));
-	ifmsh->mesh_id_len = setup->mesh_id_len;
-	memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len);
-
+	memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config));
+	err = copy_mesh_setup(ifmsh, setup);
+	if (err)
+		return err;
 	ieee80211_start_mesh(sdata);
 
 	return 0;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ce58b2a676e2..eadaa243a3da 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -484,6 +484,8 @@ struct ieee80211_if_mesh {
 	struct mesh_config mshcfg;
 	u32 mesh_seqnum;
 	bool accepting_plinks;
+	const u8 *vendor_ie;
+	u8 vendor_ie_len;
 };
 
 #ifdef CONFIG_MAC80211_MESH
@@ -585,9 +587,7 @@ struct ieee80211_sub_if_data {
 		struct ieee80211_if_vlan vlan;
 		struct ieee80211_if_managed mgd;
 		struct ieee80211_if_ibss ibss;
-#ifdef CONFIG_MAC80211_MESH
 		struct ieee80211_if_mesh mesh;
-#endif
 		u32 mntr_flags;
 	} u;
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 63e1188d5062..c326e009389d 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -287,6 +287,13 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	*pos++ |= sdata->u.mesh.accepting_plinks ?
 	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
+
+	if (sdata->u.mesh.vendor_ie) {
+		int len = sdata->u.mesh.vendor_ie_len;
+		const u8 *data = sdata->u.mesh.vendor_ie;
+		if (skb_tailroom(skb) > len)
+			memcpy(skb_put(skb, len), data, len);
+	}
 }
 
 u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 1c91f0f3c307..44b53931ba5e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -160,7 +160,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid,
 		__le16 reason) {
 	struct ieee80211_local *local = sdata->local;
-	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
+	struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 +
+			sdata->u.mesh.vendor_ie_len);
 	struct ieee80211_mgmt *mgmt;
 	bool include_plid = false;
 	static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A };
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 157bde993ef5..f4b1b624ea9f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2290,7 +2290,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 		u8 *pos;
 
 		/* headroom, head length, tail length and maximum TIM length */
-		skb = dev_alloc_skb(local->tx_headroom + 400);
+		skb = dev_alloc_skb(local->tx_headroom + 400 +
+				sdata->u.mesh.vendor_ie_len);
 		if (!skb)
 			goto out;
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 79772fcc37bc..e9a5f8ca4c27 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -789,13 +789,23 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			cfg80211_mgd_wext_connect(rdev, wdev);
 			break;
 #endif
+#ifdef CONFIG_MAC80211_MESH
 		case NL80211_IFTYPE_MESH_POINT:
-			/* backward compat code ... */
-			if (wdev->mesh_id_up_len)
-				__cfg80211_join_mesh(rdev, dev, wdev->ssid,
-						     wdev->mesh_id_up_len,
-						     &default_mesh_config);
-			break;
+			{
+				/* backward compat code... */
+				struct mesh_setup setup;
+				memcpy(&setup, &default_mesh_setup,
+						sizeof(setup));
+				 /* back compat only needed for mesh_id */
+				setup.mesh_id = wdev->ssid;
+				setup.mesh_id_len = wdev->mesh_id_up_len;
+				if (wdev->mesh_id_up_len)
+					__cfg80211_join_mesh(rdev, dev,
+							&setup,
+							&default_mesh_config);
+				break;
+			}
+#endif
 		default:
 			break;
 		}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 743203bb61ac..26a0a084e16b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -287,13 +287,14 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 
 /* mesh */
 extern const struct mesh_config default_mesh_config;
+extern const struct mesh_setup default_mesh_setup;
 int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
-			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_setup *setup,
 			 const struct mesh_config *conf);
 int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_setup *setup,
 		       const struct mesh_config *conf);
 int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index e0b9747fe50a..73e39c171ffb 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -50,17 +50,19 @@ const struct mesh_config default_mesh_config = {
 	.min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT,
 };
 
+const struct mesh_setup default_mesh_setup = {
+	.path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP,
+	.path_metric = IEEE80211_PATH_METRIC_AIRTIME,
+	.vendor_ie = NULL,
+	.vendor_ie_len = 0,
+};
 
 int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
-			 const u8 *mesh_id, u8 mesh_id_len,
+			 const struct mesh_setup *setup,
 			 const struct mesh_config *conf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct mesh_setup setup = {
-		.mesh_id = mesh_id,
-		.mesh_id_len = mesh_id_len,
-	};
 	int err;
 
 	BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
@@ -73,16 +75,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 	if (wdev->mesh_id_len)
 		return -EALREADY;
 
-	if (!mesh_id_len)
+	if (!setup->mesh_id_len)
 		return -EINVAL;
 
 	if (!rdev->ops->join_mesh)
 		return -EOPNOTSUPP;
 
-	err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, &setup);
+	err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, setup);
 	if (!err) {
-		memcpy(wdev->ssid, mesh_id, mesh_id_len);
-		wdev->mesh_id_len = mesh_id_len;
+		memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
+		wdev->mesh_id_len = setup->mesh_id_len;
 	}
 
 	return err;
@@ -90,14 +92,14 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 
 int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       const u8 *mesh_id, u8 mesh_id_len,
+		       const struct mesh_setup *setup,
 		       const struct mesh_config *conf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_join_mesh(rdev, dev, mesh_id, mesh_id_len, conf);
+	err = __cfg80211_join_mesh(rdev, dev, setup, conf);
 	wdev_unlock(wdev);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 10be9350752e..eef89d0b558b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2773,6 +2773,14 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
 };
 
+static const struct nla_policy
+	nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = {
+	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
+	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
+	[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE] = { .type = NLA_BINARY,
+		.len = IEEE80211_MAX_DATA_LEN },
+};
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
@@ -2839,14 +2847,50 @@ do {\
 			dot11MeshHWMPRootMode, mask,
 			NL80211_MESHCONF_HWMP_ROOTMODE,
 			nla_get_u8);
-
 	if (mask_out)
 		*mask_out = mask;
+
 	return 0;
 
 #undef FILL_IN_MESH_PARAM_IF_SET
 }
 
+static int nl80211_parse_mesh_setup(struct genl_info *info,
+				     struct mesh_setup *setup)
+{
+	struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];
+
+	if (!info->attrs[NL80211_ATTR_MESH_SETUP])
+		return -EINVAL;
+	if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX,
+			     info->attrs[NL80211_ATTR_MESH_SETUP],
+			     nl80211_mesh_setup_params_policy))
+		return -EINVAL;
+
+	if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])
+		setup->path_sel_proto =
+		(nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])) ?
+		 IEEE80211_PATH_PROTOCOL_VENDOR :
+		 IEEE80211_PATH_PROTOCOL_HWMP;
+
+	if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])
+		setup->path_metric =
+		(nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])) ?
+		 IEEE80211_PATH_METRIC_VENDOR :
+		 IEEE80211_PATH_METRIC_AIRTIME;
+
+	if (tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]) {
+		struct nlattr *ieattr =
+			tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE];
+		if (!is_valid_ie_attr(ieattr))
+			return -EINVAL;
+		setup->vendor_ie = nla_data(ieattr);
+		setup->vendor_ie_len = nla_len(ieattr);
+	}
+
+	return 0;
+}
+
 static int nl80211_update_mesh_config(struct sk_buff *skb,
 				      struct genl_info *info)
 {
@@ -4667,10 +4711,12 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct mesh_config cfg;
+	struct mesh_setup setup;
 	int err;
 
 	/* start with default */
 	memcpy(&cfg, &default_mesh_config, sizeof(cfg));
+	memcpy(&setup, &default_mesh_setup, sizeof(setup));
 
 	if (info->attrs[NL80211_ATTR_MESH_CONFIG]) {
 		/* and parse parameters if given */
@@ -4683,10 +4729,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	    !nla_len(info->attrs[NL80211_ATTR_MESH_ID]))
 		return -EINVAL;
 
-	return cfg80211_join_mesh(rdev, dev,
-				  nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
-				  nla_len(info->attrs[NL80211_ATTR_MESH_ID]),
-				  &cfg);
+	setup.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
+	setup.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
+
+	if (info->attrs[NL80211_ATTR_MESH_SETUP]) {
+		/* parse additional setup parameters if given */
+		err = nl80211_parse_mesh_setup(info, &setup);
+		if (err)
+			return err;
+	}
+
+	return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
 }
 
 static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info)
-- 
cgit v1.2.3


From c7108a7111cd9e592d6ad498be37276dbea75d2b Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 16 Dec 2010 17:37:50 -0800
Subject: mac80211: Send mesh non-HWMP path selection frames to userspace

Let path selection frames for protocols other than HWMP be sent to
userspace via NL80211_CMD_REGISTER_FRAME.  Also allow userspace to send
and receive mesh path selection frames.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c     |  1 +
 net/mac80211/main.c    |  4 ++++
 net/mac80211/mesh.c    | 13 +++----------
 net/mac80211/mesh.h    |  7 +++++++
 net/mac80211/rx.c      |  5 ++++-
 net/wireless/nl80211.c |  2 ++
 6 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ae2c7127a8aa..5892b0302454 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1670,6 +1670,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_MESH_POINT:
 		if (!ieee80211_is_action(mgmt->frame_control) ||
 		    mgmt->u.action.category == WLAN_CATEGORY_PUBLIC)
 			break;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d87eb005690f..a21d049caf19 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -484,6 +484,10 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
 			BIT(IEEE80211_STYPE_DEAUTH >> 4) |
 			BIT(IEEE80211_STYPE_ACTION >> 4),
 	},
+	[NL80211_IFTYPE_MESH_POINT] = {
+		.tx = 0xffff,
+		.rx = BIT(IEEE80211_STYPE_ACTION >> 4),
+	},
 };
 
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c326e009389d..8b5906c83f93 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -124,15 +124,6 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 		ieee80211_mesh_housekeeping_timer((unsigned long) sdata);
 }
 
-void mesh_ids_set_default(struct ieee80211_if_mesh *sta)
-{
-	sta->mesh_pp_id = 0;	/* HWMP */
-	sta->mesh_pm_id = 0;	/* Airtime */
-	sta->mesh_cc_id = 0;	/* Disabled */
-	sta->mesh_sp_id = 0;	/* Neighbor Offset */
-	sta->mesh_auth_id = 0;	/* Disabled */
-}
-
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
 {
 	int i;
@@ -525,6 +516,9 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 	atomic_inc(&local->iff_allmultis);
 	ieee80211_configure_filter(local);
 
+	ifmsh->mesh_cc_id = 0;	/* Disabled */
+	ifmsh->mesh_sp_id = 0;	/* Neighbor Offset */
+	ifmsh->mesh_auth_id = 0;	/* Disabled */
 	set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
 	ieee80211_mesh_root_setup(ifmsh);
 	ieee80211_queue_work(&local->hw, &sdata->work);
@@ -695,7 +689,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
 		ieee80211s_init();
-	mesh_ids_set_default(ifmsh);
 	setup_timer(&ifmsh->mesh_path_timer,
 		    ieee80211_mesh_path_timer,
 		    (unsigned long) sdata);
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 5b828fa8c541..890dd19c2eca 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -284,6 +284,11 @@ static inline void mesh_path_activate(struct mesh_path *mpath)
 	mpath->flags |= MESH_PATH_ACTIVE | MESH_PATH_RESOLVED;
 }
 
+static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
+{
+	return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
+}
+
 #define for_each_mesh_entry(x, p, node, i) \
 	for (i = 0; i <= x->hash_mask; i++) \
 		hlist_for_each_entry_rcu(node, p, &x->hash_buckets[i], list)
@@ -304,6 +309,8 @@ static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
 {}
 static inline void mesh_plink_quiesce(struct sta_info *sta) {}
 static inline void mesh_plink_restart(struct sta_info *sta) {}
+static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
+{ return false; }
 #endif
 
 #endif /* IEEE80211S_H */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4573ce1e1d15..7c5d1b2ec453 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2161,10 +2161,13 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 		}
 		break;
 	case WLAN_CATEGORY_MESH_PLINK:
-	case WLAN_CATEGORY_MESH_PATH_SEL:
 		if (!ieee80211_vif_is_mesh(&sdata->vif))
 			break;
 		goto queue;
+	case WLAN_CATEGORY_MESH_PATH_SEL:
+		if (!mesh_path_sel_is_hwmp(sdata))
+			break;
+		goto queue;
 	}
 
 	return RX_CONTINUE;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index eef89d0b558b..6a5d6fa11e46 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4445,6 +4445,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 		return -EOPNOTSUPP;
 
@@ -4485,6 +4486,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From 7f531e03abf0162df3966c4fa5fa6fdd9302cb6b Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 16 Dec 2010 11:30:22 +0900
Subject: cfg80211: Separate available antennas for RX and TX

As has been pointed out by Daniel Halperin some devices (e.g. Intel IWL5100)
can only TX from a subset of RX antennas, so use separate availability masks
for RX and TX.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 12 +++++++++---
 net/wireless/nl80211.c | 17 ++++++++++-------
 2 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 924d60366233..bcc9f448ec4e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1482,8 +1482,13 @@ struct ieee80211_txrx_stypes {
  *	transmitted through nl80211, points to an array indexed by interface
  *	type
  *
- * @available_antennas: bitmap of antennas which are available to configure.
- *	antenna configuration commands will be rejected unless this is set.
+ * @available_antennas_tx: bitmap of antennas which are available to be
+ *	configured as TX antennas. Antenna configuration commands will be
+ *	rejected unless this or @available_antennas_rx is set.
+ *
+ * @available_antennas_rx: bitmap of antennas which are available to be
+ *	configured as RX antennas. Antenna configuration commands will be
+ *	rejected unless this or @available_antennas_tx is set.
  *
  * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation
  *	may request, if implemented.
@@ -1528,7 +1533,8 @@ struct wiphy {
 
 	u8 max_num_pmkids;
 
-	u32 available_antennas;
+	u32 available_antennas_tx;
+	u32 available_antennas_rx;
 
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6a5d6fa11e46..8d2f5f8d8080 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -605,7 +605,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
 
-	if (dev->wiphy.available_antennas && dev->ops->get_antenna) {
+	if ((dev->wiphy.available_antennas_tx ||
+	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
 		u32 tx_ant = 0, rx_ant = 0;
 		int res;
 		res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant);
@@ -1107,7 +1108,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] &&
 	    info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) {
 		u32 tx_ant, rx_ant;
-		if (!rdev->wiphy.available_antennas || !rdev->ops->set_antenna) {
+		if ((!rdev->wiphy.available_antennas_tx &&
+		     !rdev->wiphy.available_antennas_rx) ||
+		    !rdev->ops->set_antenna) {
 			result = -EOPNOTSUPP;
 			goto bad_res;
 		}
@@ -1116,15 +1119,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]);
 
 		/* reject antenna configurations which don't match the
-		 * available antenna mask, except for the "all" mask */
-		if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas)) ||
-		    (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas))) {
+		 * available antenna masks, except for the "all" mask */
+		if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) ||
+		    (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) {
 			result = -EINVAL;
 			goto bad_res;
 		}
 
-		tx_ant = tx_ant & rdev->wiphy.available_antennas;
-		rx_ant = rx_ant & rdev->wiphy.available_antennas;
+		tx_ant = tx_ant & rdev->wiphy.available_antennas_tx;
+		rx_ant = rx_ant & rdev->wiphy.available_antennas_rx;
 
 		result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant);
 		if (result)
-- 
cgit v1.2.3


From 39fd5de4472b7b222c6cec78d72b069133f694e4 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Thu, 16 Dec 2010 11:30:28 +0900
Subject: nl80211: Export available antennas

Export the information which antennas are available for configuration as TX or
RX antennas via nl80211.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 9 +++++++++
 net/wireless/nl80211.c  | 5 +++++
 2 files changed, 14 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 69eaccac78c4..2b89b712565b 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -858,6 +858,12 @@ enum nl80211_commands {
  *	the hardware should not be configured to receive on this antenna.
  *	For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX.
  *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX: Bitmap of antennas which are available
+ *	for configuration as TX antennas via the above parameters.
+ *
+ * @NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX: Bitmap of antennas which are available
+ *	for configuration as RX antennas via the above parameters.
+ *
  * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS
  *
  * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be
@@ -1059,6 +1065,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MESH_SETUP,
 
+	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+	NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8d2f5f8d8080..9b62710891a2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -605,6 +605,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
 		NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
 
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+		    dev->wiphy.available_antennas_tx);
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+		    dev->wiphy.available_antennas_rx);
+
 	if ((dev->wiphy.available_antennas_tx ||
 	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
 		u32 tx_ant = 0, rx_ant = 0;
-- 
cgit v1.2.3


From 61ad5394590c5c5338ab4ec50553d809a9996d50 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 16 Dec 2010 17:23:34 -0800
Subject: mac80211: Remove unused third address from mesh address extension
 header.

The Mesh Control header only includes 0, 1 or 2 addresses. If there is
one address, it should be interpreted as Address 4.  If there are 2,
they are interpreted as Addresses 5 and 6 (Address 4 being the 4th
address in the 802.11 header).

The address extension used to hold up to 3 addresses instead of the current 2.
I'm not sure which draft version changed this, but it is very unlikely that it
will change again given the state of the approval process of this draft.  See
section 7.1.3.6.3 in current draft (8.0).

Also, note that the extra address that I'm removing was not being used, so this
change has no effect on over-the-air frame formats.  But I thought I better
remove it before someone does start using it.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  1 -
 net/mac80211/mesh.c       | 32 +++++++++++++-------------------
 net/mac80211/mesh.h       |  4 ++--
 net/mac80211/tx.c         |  4 +---
 4 files changed, 16 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index cd681681d211..6042228954a7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -536,7 +536,6 @@ struct ieee80211s_hdr {
 	__le32 seqnum;
 	u8 eaddr1[6];
 	u8 eaddr2[6];
-	u8 eaddr3[6];
 } __attribute__ ((packed));
 
 /* Mesh flags */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 8b5906c83f93..ca3af4685b0a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -410,39 +410,33 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
  * ieee80211_new_mesh_header - create a new mesh header
  * @meshhdr:    uninitialized mesh header
  * @sdata:	mesh interface to be used
- * @addr4:	addr4 of the mesh frame (1st in ae header)
- *              may be NULL
- * @addr5:	addr5 of the mesh frame (1st or 2nd in ae header)
- *              may be NULL unless addr6 is present
- * @addr6:	addr6 of the mesh frame (2nd or 3rd in ae header)
- * 		may be NULL unless addr5 is present
+ * @addr4or5:   1st address in the ae header, which may correspond to address 4
+ *              (if addr6 is NULL) or address 5 (if addr6 is present). It may
+ *              be NULL.
+ * @addr6:	2nd address in the ae header, which corresponds to addr6 of the
+ *              mesh frame
  *
  * Return the header length.
  */
 int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
-		struct ieee80211_sub_if_data *sdata, char *addr4,
-		char *addr5, char *addr6)
+		struct ieee80211_sub_if_data *sdata, char *addr4or5,
+		char *addr6)
 {
 	int aelen = 0;
+	BUG_ON(!addr4or5 && addr6);
 	memset(meshhdr, 0, sizeof(*meshhdr));
 	meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL;
 	put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum);
 	sdata->u.mesh.mesh_seqnum++;
-	if (addr4) {
+	if (addr4or5 && !addr6) {
 		meshhdr->flags |= MESH_FLAGS_AE_A4;
 		aelen += ETH_ALEN;
-		memcpy(meshhdr->eaddr1, addr4, ETH_ALEN);
-	}
-	if (addr5 && addr6) {
+		memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN);
+	} else if (addr4or5 && addr6) {
 		meshhdr->flags |= MESH_FLAGS_AE_A5_A6;
 		aelen += 2 * ETH_ALEN;
-		if (!addr4) {
-			memcpy(meshhdr->eaddr1, addr5, ETH_ALEN);
-			memcpy(meshhdr->eaddr2, addr6, ETH_ALEN);
-		} else {
-			memcpy(meshhdr->eaddr2, addr5, ETH_ALEN);
-			memcpy(meshhdr->eaddr3, addr6, ETH_ALEN);
-		}
+		memcpy(meshhdr->eaddr1, addr4or5, ETH_ALEN);
+		memcpy(meshhdr->eaddr2, addr6, ETH_ALEN);
 	}
 	return 6 + aelen;
 }
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 890dd19c2eca..b99e230fe31c 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -187,8 +187,8 @@ struct mesh_rmc {
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
 				  const u8 *da, const u8 *sa);
 int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
-		struct ieee80211_sub_if_data *sdata, char *addr4,
-		char *addr5, char *addr6);
+		struct ieee80211_sub_if_data *sdata, char *addr4or5,
+		char *addr6);
 int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr,
 		struct ieee80211_sub_if_data *sdata);
 bool mesh_matches_local(struct ieee802_11_elems *ie,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f4b1b624ea9f..807dcd06e268 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1811,7 +1811,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					skb->data, skb->data + ETH_ALEN);
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
-					sdata, NULL, NULL, NULL);
+					sdata, NULL, NULL);
 		} else {
 			/* packet from other interface */
 			struct mesh_path *mppath;
@@ -1844,13 +1844,11 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
 							skb->data + ETH_ALEN,
-							NULL,
 							NULL);
 			else
 				meshhdrlen =
 					ieee80211_new_mesh_header(&mesh_hdr,
 							sdata,
-							NULL,
 							skb->data,
 							skb->data + ETH_ALEN);
 
-- 
cgit v1.2.3


From 489ee9195a7de9e6bc833d639ff6b553ffdad90e Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 18 Dec 2010 19:30:48 +0100
Subject: mac80211: fix initialization of skb->cb in ieee80211_subif_start_xmit

The change 'mac80211: Fix BUG in pskb_expand_head when transmitting shared skbs'
added a check for copying the skb if it's shared, however the tx info variable
still points at the cb of the old skb

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Acked-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 807dcd06e268..8d01d21dff1e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1739,7 +1739,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_tx_info *info;
 	int ret = NETDEV_TX_BUSY, head_need;
 	u16 ethertype, hdrlen,  meshhdrlen = 0;
 	__le16 fc;
@@ -2028,6 +2028,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	skb_set_network_header(skb, nh_pos);
 	skb_set_transport_header(skb, h_pos);
 
+	info = IEEE80211_SKB_CB(skb);
 	memset(info, 0, sizeof(*info));
 
 	dev->trans_start = jiffies;
-- 
cgit v1.2.3


From 4cd06a344db752f513437138953af191cbe9a691 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 18 Dec 2010 19:30:49 +0100
Subject: mac80211: skip unnecessary pskb_expand_head calls

If the skb is not cloned and we don't need any extra headroom, there
is no point in reallocating the skb head.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8d01d21dff1e..90ee23550033 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1546,8 +1546,10 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
 
 	if (skb_header_cloned(skb))
 		I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
-	else
+	else if (head_need || tail_need)
 		I802_DEBUG_INC(local->tx_expand_skb_head);
+	else
+		return 0;
 
 	if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) {
 		wiphy_debug(local->hw.wiphy,
-- 
cgit v1.2.3


From f8a0a781488ec7288d1049e5d2022850aa98f7b6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 18 Dec 2010 19:30:50 +0100
Subject: mac80211: fix potentially redundant skb data copying

When an skb is shared, it needs to be duplicated, along with its data buffer.
If the skb does not have enough headroom, using skb_copy might cause the data
buffer to be copied twice (once by skb_copy and once by pskb_expand_head).
Fix this by using skb_clone initially and letting ieee80211_skb_resize sort
out the rest.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 90ee23550033..d2b4b67a7b53 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1934,7 +1934,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	 */
 	if (skb_shared(skb)) {
 		tmp_skb = skb;
-		skb = skb_copy(skb, GFP_ATOMIC);
+		skb = skb_clone(skb, GFP_ATOMIC);
 		kfree_skb(tmp_skb);
 
 		if (!skb) {
-- 
cgit v1.2.3


From c4266263249f22479eb1abb1a1709c38240b1597 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 15 Dec 2010 08:18:36 +0000
Subject: net_sched: sch_sfq: add backlog info in sfq_dump_class_stats()

We currently return for each active SFQ slot the number of packets in
queue. We can also give number of bytes accounted for these packets.

tc -s class show dev ifb0

Before patch :

class sfq 11:3d9 parent 11:
 (dropped 0, overlimits 0 requeues 0)
 backlog 0b 3p requeues 0
 allot 1266

After patch :

class sfq 11:3e4 parent 11:
 (dropped 0, overlimits 0 requeues 0)
 backlog 4380b 3p requeues 0
 allot 1212

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3cf478d012dd..cb331dea7fe0 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -548,8 +548,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	sfq_index idx = q->ht[cl-1];
-	struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
+	struct sk_buff_head *list = &q->qs[idx];
+	struct gnet_stats_queue qs = { .qlen = list->qlen };
 	struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
+	struct sk_buff *skb;
+
+	skb_queue_walk(list, skb)
+		qs.backlog += qdisc_pkt_len(skb);
 
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;
-- 
cgit v1.2.3


From eda83e3b63e88351310c13c99178eb4634f137b2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 20 Dec 2010 12:54:58 +0000
Subject: net_sched: sch_sfq: better struct layouts

Here is a respin of patch.

I'll send a short patch to make SFQ more fair in presence of large
packets as well.

Thanks

[PATCH v3 net-next-2.6] net_sched: sch_sfq: better struct layouts

This patch shrinks sizeof(struct sfq_sched_data)
from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and
reduce text size as well.

   text    data     bss     dec     hex filename
   4821     152       0    4973    136d old/net/sched/sch_sfq.o
   4627     136       0    4763    129b new/net/sched/sch_sfq.o

All data for a slot/flow is now grouped in a compact and cache friendly
structure, instead of being spreaded in many different points.

struct sfq_slot {
        struct sk_buff  *skblist_next;
        struct sk_buff  *skblist_prev;
        sfq_index       qlen; /* number of skbs in skblist */
        sfq_index       next; /* next slot in sfq chain */
        struct sfq_head dep; /* anchor in dep[] chains */
        unsigned short  hash; /* hash value (index in ht[]) */
        short           allot; /* credit for this slot */
};

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 260 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 162 insertions(+), 98 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42396c965dd6..13322e8a0456 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -67,27 +67,42 @@
 
 	IMPLEMENTATION:
 	This implementation limits maximal queue length to 128;
-	maximal mtu to 2^15-1; number of hash buckets to 1024.
+	maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
 	The only goal of this restrictions was that all data
-	fit into one 4K page :-). Struct sfq_sched_data is
-	organized in anti-cache manner: all the data for a bucket
-	are scattered over different locations. This is not good,
-	but it allowed me to put it into 4K.
+	fit into one 4K page on 32bit arches.
 
 	It is easy to increase these values, but not in flight.  */
 
-#define SFQ_DEPTH		128
+#define SFQ_DEPTH		128 /* max number of packets per flow */
+#define SFQ_SLOTS		128 /* max number of flows */
+#define SFQ_EMPTY_SLOT		255
 #define SFQ_HASH_DIVISOR	1024
 
-/* This type should contain at least SFQ_DEPTH*2 values */
+/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
 
+/*
+ * We dont use pointers to save space.
+ * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * are 'pointers' to dep[] array
+ */
 struct sfq_head
 {
 	sfq_index	next;
 	sfq_index	prev;
 };
 
+struct sfq_slot {
+	struct sk_buff	*skblist_next;
+	struct sk_buff	*skblist_prev;
+	sfq_index	qlen; /* number of skbs in skblist */
+	sfq_index	next; /* next slot in sfq chain */
+	struct sfq_head dep; /* anchor in dep[] chains */
+	unsigned short	hash; /* hash value (index in ht[]) */
+	short		allot; /* credit for this slot */
+};
+
 struct sfq_sched_data
 {
 /* Parameters */
@@ -99,17 +114,24 @@ struct sfq_sched_data
 	struct tcf_proto *filter_list;
 	struct timer_list perturb_timer;
 	u32		perturbation;
-	sfq_index	tail;		/* Index of current slot in round */
-	sfq_index	max_depth;	/* Maximal depth */
+	sfq_index	cur_depth;	/* depth of longest slot */
 
+	struct sfq_slot *tail;		/* current slot in round */
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
-	sfq_index	next[SFQ_DEPTH];	/* Active slots link */
-	short		allot[SFQ_DEPTH];	/* Current allotment per slot */
-	unsigned short	hash[SFQ_DEPTH];	/* Hash value indexed by slots */
-	struct sk_buff_head	qs[SFQ_DEPTH];		/* Slot queue */
-	struct sfq_head	dep[SFQ_DEPTH*2];	/* Linked list of slots, indexed by depth */
+	struct sfq_slot	slots[SFQ_SLOTS];
+	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
 };
 
+/*
+ * sfq_head are either in a sfq_slot or in dep[] array
+ */
+static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
+{
+	if (val < SFQ_SLOTS)
+		return &q->slots[val].dep;
+	return &q->dep[val - SFQ_SLOTS];
+}
+
 static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
 {
 	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 	return 0;
 }
 
+/*
+ * x : slot number [0 .. SFQ_SLOTS - 1]
+ */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
-	int d = q->qs[x].qlen + SFQ_DEPTH;
+	int qlen = q->slots[x].qlen;
+
+	p = qlen + SFQ_SLOTS;
+	n = q->dep[qlen].next;
 
-	p = d;
-	n = q->dep[d].next;
-	q->dep[x].next = n;
-	q->dep[x].prev = p;
-	q->dep[p].next = q->dep[n].prev = x;
+	q->slots[x].dep.next = n;
+	q->slots[x].dep.prev = p;
+
+	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
+	sfq_dep_head(q, n)->prev = x;
 }
 
+#define sfq_unlink(q, x, n, p)			\
+	n = q->slots[x].dep.next;		\
+	p = q->slots[x].dep.prev;		\
+	sfq_dep_head(q, p)->next = n;		\
+	sfq_dep_head(q, n)->prev = p
+
+
 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
+	int d;
 
-	n = q->dep[x].next;
-	p = q->dep[x].prev;
-	q->dep[p].next = n;
-	q->dep[n].prev = p;
-
-	if (n == p && q->max_depth == q->qs[x].qlen + 1)
-		q->max_depth--;
+	sfq_unlink(q, x, n, p);
 
+	d = q->slots[x].qlen--;
+	if (n == p && q->cur_depth == d)
+		q->cur_depth--;
 	sfq_link(q, x);
 }
 
@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
 	sfq_index p, n;
 	int d;
 
-	n = q->dep[x].next;
-	p = q->dep[x].prev;
-	q->dep[p].next = n;
-	q->dep[n].prev = p;
-	d = q->qs[x].qlen;
-	if (q->max_depth < d)
-		q->max_depth = d;
+	sfq_unlink(q, x, n, p);
 
+	d = ++q->slots[x].qlen;
+	if (q->cur_depth < d)
+		q->cur_depth = d;
 	sfq_link(q, x);
 }
 
+/* helper functions : might be changed when/if skb use a standard list_head */
+
+/* remove one skb from tail of slot queue */
+static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
+{
+	struct sk_buff *skb = slot->skblist_prev;
+
+	slot->skblist_prev = skb->prev;
+	skb->next = skb->prev = NULL;
+	return skb;
+}
+
+/* remove one skb from head of slot queue */
+static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
+{
+	struct sk_buff *skb = slot->skblist_next;
+
+	slot->skblist_next = skb->next;
+	skb->next = skb->prev = NULL;
+	return skb;
+}
+
+static inline void slot_queue_init(struct sfq_slot *slot)
+{
+	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
+}
+
+/* add skb to slot queue (tail add) */
+static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
+{
+	skb->prev = slot->skblist_prev;
+	skb->next = (struct sk_buff *)slot;
+	slot->skblist_prev->next = skb;
+	slot->skblist_prev = skb;
+}
+
+#define	slot_queue_walk(slot, skb)		\
+	for (skb = slot->skblist_next;		\
+	     skb != (struct sk_buff *)slot;	\
+	     skb = skb->next)
+
 static unsigned int sfq_drop(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index d = q->max_depth;
+	sfq_index x, d = q->cur_depth;
 	struct sk_buff *skb;
 	unsigned int len;
+	struct sfq_slot *slot;
 
-	/* Queue is full! Find the longest slot and
-	   drop a packet from it */
-
+	/* Queue is full! Find the longest slot and drop tail packet from it */
 	if (d > 1) {
-		sfq_index x = q->dep[d + SFQ_DEPTH].next;
-		skb = q->qs[x].prev;
+		x = q->dep[d].next;
+		slot = &q->slots[x];
+drop:
+		skb = slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
-		__skb_unlink(skb, &q->qs[x]);
-		kfree_skb(skb);
 		sfq_dec(q, x);
+		kfree_skb(skb);
 		sch->q.qlen--;
 		sch->qstats.drops++;
 		sch->qstats.backlog -= len;
@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 
 	if (d == 1) {
 		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
-		d = q->next[q->tail];
-		q->next[q->tail] = q->next[d];
-		skb = q->qs[d].prev;
-		len = qdisc_pkt_len(skb);
-		__skb_unlink(skb, &q->qs[d]);
-		kfree_skb(skb);
-		sfq_dec(q, d);
-		sch->q.qlen--;
-		q->ht[q->hash[d]] = SFQ_DEPTH;
-		sch->qstats.drops++;
-		sch->qstats.backlog -= len;
-		return len;
+		x = q->tail->next;
+		slot = &q->slots[x];
+		q->tail->next = slot->next;
+		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+		goto drop;
 	}
 
 	return 0;
@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned int hash;
 	sfq_index x;
+	struct sfq_slot *slot;
 	int uninitialized_var(ret);
 
 	hash = sfq_classify(skb, sch, &ret);
@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	hash--;
 
 	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
-		q->hash[x] = hash;
+	slot = &q->slots[x];
+	if (x == SFQ_EMPTY_SLOT) {
+		x = q->dep[0].next; /* get a free slot */
+		q->ht[hash] = x;
+		slot = &q->slots[x];
+		slot->hash = hash;
+		slot_queue_init(slot);
 	}
 
-	/* If selected queue has length q->limit, this means that
-	 * all another queues are empty and that we do simple tail drop,
+	/* If selected queue has length q->limit, do simple tail drop,
 	 * i.e. drop _this_ packet.
 	 */
-	if (q->qs[x].qlen >= q->limit)
+	if (slot->qlen >= q->limit)
 		return qdisc_drop(skb, sch);
 
 	sch->qstats.backlog += qdisc_pkt_len(skb);
-	__skb_queue_tail(&q->qs[x], skb);
+	slot_queue_add(slot, skb);
 	sfq_inc(q, x);
-	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
-			q->next[x] = x;
+	if (slot->qlen == 1) {		/* The flow is new */
+		if (q->tail == NULL) {	/* It is the first flow */
+			slot->next = x;
 		} else {
-			q->next[x] = q->next[q->tail];
-			q->next[q->tail] = x;
+			slot->next = q->tail->next;
+			q->tail->next = x;
 		}
-		q->tail = x;
-		q->allot[x] = q->quantum;
+		q->tail = slot;
+		slot->allot = q->quantum;
 	}
 	if (++sch->q.qlen <= q->limit) {
 		sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -342,14 +410,12 @@ static struct sk_buff *
 sfq_peek(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index a;
 
 	/* No active slots */
-	if (q->tail == SFQ_DEPTH)
+	if (q->tail == NULL)
 		return NULL;
 
-	a = q->next[q->tail];
-	return skb_peek(&q->qs[a]);
+	return q->slots[q->tail->next].skblist_next;
 }
 
 static struct sk_buff *
@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 	sfq_index a, next_a;
+	struct sfq_slot *slot;
 
 	/* No active slots */
-	if (q->tail == SFQ_DEPTH)
+	if (q->tail == NULL)
 		return NULL;
 
-	a = q->next[q->tail];
-
-	/* Grab packet */
-	skb = __skb_dequeue(&q->qs[a]);
+	a = q->tail->next;
+	slot = &q->slots[a];
+	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
 
 	/* Is the slot empty? */
-	if (q->qs[a].qlen == 0) {
-		q->ht[q->hash[a]] = SFQ_DEPTH;
-		next_a = q->next[a];
+	if (slot->qlen == 0) {
+		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+		next_a = slot->next;
 		if (a == next_a) {
-			q->tail = SFQ_DEPTH;
+			q->tail = NULL; /* no more active slots */
 			return skb;
 		}
-		q->next[q->tail] = next_a;
-	} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
-		q->allot[a] += q->quantum;
-		q->tail = a;
+		q->tail->next = next_a;
+	} else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
+		q->tail = slot;
+		slot->allot += q->quantum;
 	}
 	return skb;
 }
@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	init_timer_deferrable(&q->perturb_timer);
 
 	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-		q->ht[i] = SFQ_DEPTH;
+		q->ht[i] = SFQ_EMPTY_SLOT;
 
 	for (i = 0; i < SFQ_DEPTH; i++) {
-		skb_queue_head_init(&q->qs[i]);
-		q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
-		q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
+		q->dep[i].next = i + SFQ_SLOTS;
+		q->dep[i].prev = i + SFQ_SLOTS;
 	}
 
 	q->limit = SFQ_DEPTH - 1;
-	q->max_depth = 0;
-	q->tail = SFQ_DEPTH;
+	q->cur_depth = 0;
+	q->tail = NULL;
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
 		q->perturb_period = 0;
@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
-	for (i = 0; i < SFQ_DEPTH; i++)
+	for (i = 0; i < SFQ_SLOTS; i++)
 		sfq_link(q, i);
 	return 0;
 }
@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				struct gnet_dump *d)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	sfq_index idx = q->ht[cl-1];
-	struct sk_buff_head *list = &q->qs[idx];
-	struct gnet_stats_queue qs = { .qlen = list->qlen };
-	struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
+	const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
+	struct gnet_stats_queue qs = { .qlen = slot->qlen };
+	struct tc_sfq_xstats xstats = { .allot = slot->allot };
 	struct sk_buff *skb;
 
-	skb_queue_walk(list, skb)
+	slot_queue_walk(slot, skb)
 		qs.backlog += qdisc_pkt_len(skb);
 
 	if (gnet_stats_copy_queue(d, &qs) < 0)
@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 		return;
 
 	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
-		if (q->ht[i] == SFQ_DEPTH ||
+		if (q->ht[i] == SFQ_EMPTY_SLOT ||
 		    arg->count < arg->skip) {
 			arg->count++;
 			continue;
-- 
cgit v1.2.3


From 356f039822b8d802138f7121c80d2a9286976dbd Mon Sep 17 00:00:00 2001
From: Nandita Dukkipati <nanditad@google.com>
Date: Mon, 20 Dec 2010 14:15:56 +0000
Subject: TCP: increase default initial receive window.

This patch changes the default initial receive window to 10 mss
(defined constant). The default window is limited to the maximum
of 10*1460 and 2*mss (when mss > 1460).

draft-ietf-tcpm-initcwnd-00 is a proposal to the IETF that recommends
increasing TCP's initial congestion window to 10 mss or about 15KB.
Leading up to this proposal were several large-scale live Internet
experiments with an initial congestion window of 10 mss (IW10), where
we showed that the average latency of HTTP responses improved by
approximately 10%. This was accompanied by a slight increase in
retransmission rate (0.5%), most of which is coming from applications
opening multiple simultaneous connections. To understand the extreme
worst case scenarios, and fairness issues (IW10 versus IW3), we further
conducted controlled testbed experiments. We came away finding minimal
negative impact even under low link bandwidths (dial-ups) and small
buffers.  These results are extremely encouraging to adopting IW10.

However, an initial congestion window of 10 mss is useless unless a TCP
receiver advertises an initial receive window of at least 10 mss.
Fortunately, in the large-scale Internet experiments we found that most
widely used operating systems advertised large initial receive windows
of 64KB, allowing us to experiment with a wide range of initial
congestion windows. Linux systems were among the few exceptions that
advertised a small receive window of 6KB. The purpose of this patch is
to fix this shortcoming.

References:
1. A comprehensive list of all IW10 references to date.
http://code.google.com/speed/protocols/tcpm-IW10.html

2. Paper describing results from large-scale Internet experiments with IW10.
http://ccr.sigcomm.org/drupal/?q=node/621

3. Controlled testbed experiments under worst case scenarios and a
fairness study.
http://www.ietf.org/proceedings/79/slides/tcpm-0.pdf

4. Raw test data from testbed experiments (Linux senders/receivers)
with initial congestion and receive windows of both 10 mss.
http://research.csc.ncsu.edu/netsrv/?q=content/iw10

5. Internet-Draft. Increasing TCP's Initial Window.
https://datatracker.ietf.org/doc/draft-ietf-tcpm-initcwnd/

Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  3 +++
 net/ipv4/tcp_output.c | 11 ++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b4480300cadf..38509f047382 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -60,6 +60,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
  */
 #define MAX_TCP_WINDOW		32767U
 
+/* Offer an initial receive window of 10 mss. */
+#define TCP_DEFAULT_INIT_RCVWND	10
+
 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
 #define TCP_MIN_MSS		88U
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2d390669d406..dc7c096ddfef 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -228,10 +228,15 @@ void tcp_select_initial_window(int __space, __u32 mss,
 		}
 	}
 
-	/* Set initial window to value enough for senders, following RFC5681. */
+	/* Set initial window to a value enough for senders starting with
+	 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
+	 * a limit on the initial window when mss is larger than 1460.
+	 */
 	if (mss > (1 << *rcv_wscale)) {
-		int init_cwnd = rfc3390_bytes_to_packets(mss);
-
+		int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
+		if (mss > 1460)
+			init_cwnd =
+			max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
 		/* when initializing use the value from init_rcv_wnd
 		 * rather than the default from above
 		 */
-- 
cgit v1.2.3


From b3bcedadf23264c3b7afcbfbfe1965a17ef1352c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 21 Dec 2010 10:52:24 -0500
Subject: net/sunrpc/clnt.c: Convert sprintf_symbol to %ps

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4e8210dcda72..57d344cf2256 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1829,23 +1829,15 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
 			  const struct rpc_task *task)
 {
 	const char *rpc_waitq = "none";
-	char *p, action[KSYM_SYMBOL_LEN];
 
 	if (RPC_IS_QUEUED(task))
 		rpc_waitq = rpc_qname(task->tk_waitqueue);
 
-	/* map tk_action pointer to a function name; then trim off
-	 * the "+0x0 [sunrpc]" */
-	sprint_symbol(action, (unsigned long)task->tk_action);
-	p = strchr(action, '+');
-	if (p)
-		*p = '\0';
-
-	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%s q:%s\n",
+	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
 		task->tk_pid, task->tk_flags, task->tk_status,
 		clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
 		clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
-		action, rpc_waitq);
+		task->tk_action, rpc_waitq);
 }
 
 void rpc_show_tasks(void)
-- 
cgit v1.2.3


From 70978182d431e0348e6ef711d0f962d12c03bc46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 20 Dec 2010 21:22:51 +0000
Subject: net: timestamp cloned packet in dev_queue_xmit_nit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le vendredi 17 décembre 2010 à 10:26 +0100, Eric Dumazet a écrit :

>
> I think we can add this after latest Changli patch :
>
> He does one skb_clone() before calling the sniffers.
> We could set timestamp on this clone, instead of original skb.
>
> Problem solved.
>

[PATCH net-next-2.6] net: timestamp cloned packet in dev_queue_xmit_nit

Now we do one clone of skb if at least one sniffer might take packet,
we also can do the skb timestamping on the clone and let original packet
unchanged.

This is a generalization of commit 8caf153974f2 (net: sch_netem: Fix an
inconsistency in ingress netem timestamps.)

This way, we can have a good idea when packets are delivered to our
stack (tcpdump -i ifb0), while a tcpdump on original device gives
timestamps right before ingressing.

This also speedup our stack, avoiding taking timestamps if not needed.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Changli Gao <xiaosuo@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 59877290bca7..a215269d2e35 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1547,13 +1547,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	struct sk_buff *skb2 = NULL;
 	struct packet_type *pt_prev = NULL;
 
-#ifdef CONFIG_NET_CLS_ACT
-	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-		net_timestamp_set(skb);
-#else
-	net_timestamp_set(skb);
-#endif
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		/* Never send packets back to the socket
@@ -1572,6 +1565,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 			if (!skb2)
 				break;
 
+			net_timestamp_set(skb2);
+
 			/* skb->nh should be correctly
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
-- 
cgit v1.2.3


From 12b16dadbc2406144d408754f96d0f44aa016239 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 15 Dec 2010 19:45:28 +0000
Subject: filter: optimize accesses to ancillary data

We can translate pseudo load instructions at filter check time to
dedicated instructions to speed up filtering and avoid one switch().
libpcap currently uses SKF_AD_PROTOCOL, but custom filters probably use
other ancillary accesses.

Note : I made the assertion that ancillary data was always accessed with
BPF_LD|BPF_?|BPF_ABS instructions, not with BPF_LD|BPF_?|BPF_IND ones
(offset given by K constant, not by K + X register)

On x86_64, this saves a few bytes of text :

# size net/core/filter.o.*
   text	   data	    bss	    dec	    hex	filename
   4864	      0	      0	   4864	   1300	net/core/filter.o.new
   4944	      0	      0	   4944	   1350	net/core/filter.o.old

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 72 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index e8a6ac411ffb..2b27d4efdd48 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -85,6 +85,17 @@ enum {
 	BPF_S_JMP_JGT_X,
 	BPF_S_JMP_JSET_K,
 	BPF_S_JMP_JSET_X,
+	/* Ancillary data */
+	BPF_S_ANC_PROTOCOL,
+	BPF_S_ANC_PKTTYPE,
+	BPF_S_ANC_IFINDEX,
+	BPF_S_ANC_NLATTR,
+	BPF_S_ANC_NLATTR_NEST,
+	BPF_S_ANC_MARK,
+	BPF_S_ANC_QUEUE,
+	BPF_S_ANC_HATYPE,
+	BPF_S_ANC_RXHASH,
+	BPF_S_ANC_CPU,
 };
 
 /* No hurry in this branch */
@@ -107,11 +118,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
 {
 	if (k >= 0)
 		return skb_header_pointer(skb, k, size, buffer);
-	else {
-		if (k >= SKF_AD_OFF)
-			return NULL;
-		return __load_pointer(skb, k, size);
-	}
+	return __load_pointer(skb, k, size);
 }
 
 /**
@@ -269,7 +276,7 @@ load_w:
 				A = get_unaligned_be32(ptr);
 				continue;
 			}
-			break;
+			return 0;
 		case BPF_S_LD_H_ABS:
 			k = K;
 load_h:
@@ -278,7 +285,7 @@ load_h:
 				A = get_unaligned_be16(ptr);
 				continue;
 			}
-			break;
+			return 0;
 		case BPF_S_LD_B_ABS:
 			k = K;
 load_b:
@@ -287,7 +294,7 @@ load_b:
 				A = *(u8 *)ptr;
 				continue;
 			}
-			break;
+			return 0;
 		case BPF_S_LD_W_LEN:
 			A = skb->len;
 			continue;
@@ -338,45 +345,35 @@ load_b:
 		case BPF_S_STX:
 			mem[K] = X;
 			continue;
-		default:
-			WARN_ON(1);
-			return 0;
-		}
-
-		/*
-		 * Handle ancillary data, which are impossible
-		 * (or very difficult) to get parsing packet contents.
-		 */
-		switch (k-SKF_AD_OFF) {
-		case SKF_AD_PROTOCOL:
+		case BPF_S_ANC_PROTOCOL:
 			A = ntohs(skb->protocol);
 			continue;
-		case SKF_AD_PKTTYPE:
+		case BPF_S_ANC_PKTTYPE:
 			A = skb->pkt_type;
 			continue;
-		case SKF_AD_IFINDEX:
+		case BPF_S_ANC_IFINDEX:
 			if (!skb->dev)
 				return 0;
 			A = skb->dev->ifindex;
 			continue;
-		case SKF_AD_MARK:
+		case BPF_S_ANC_MARK:
 			A = skb->mark;
 			continue;
-		case SKF_AD_QUEUE:
+		case BPF_S_ANC_QUEUE:
 			A = skb->queue_mapping;
 			continue;
-		case SKF_AD_HATYPE:
+		case BPF_S_ANC_HATYPE:
 			if (!skb->dev)
 				return 0;
 			A = skb->dev->type;
 			continue;
-		case SKF_AD_RXHASH:
+		case BPF_S_ANC_RXHASH:
 			A = skb->rxhash;
 			continue;
-		case SKF_AD_CPU:
+		case BPF_S_ANC_CPU:
 			A = raw_smp_processor_id();
 			continue;
-		case SKF_AD_NLATTR: {
+		case BPF_S_ANC_NLATTR: {
 			struct nlattr *nla;
 
 			if (skb_is_nonlinear(skb))
@@ -392,7 +389,7 @@ load_b:
 				A = 0;
 			continue;
 		}
-		case SKF_AD_NLATTR_NEST: {
+		case BPF_S_ANC_NLATTR_NEST: {
 			struct nlattr *nla;
 
 			if (skb_is_nonlinear(skb))
@@ -412,6 +409,7 @@ load_b:
 			continue;
 		}
 		default:
+			WARN_ON(1);
 			return 0;
 		}
 	}
@@ -600,6 +598,24 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
 			break;
+		case BPF_S_LD_W_ABS:
+		case BPF_S_LD_H_ABS:
+		case BPF_S_LD_B_ABS:
+#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
+				code = BPF_S_ANC_##CODE;	\
+				break
+			switch (ftest->k) {
+			ANCILLARY(PROTOCOL);
+			ANCILLARY(PKTTYPE);
+			ANCILLARY(IFINDEX);
+			ANCILLARY(NLATTR);
+			ANCILLARY(NLATTR_NEST);
+			ANCILLARY(MARK);
+			ANCILLARY(QUEUE);
+			ANCILLARY(HATYPE);
+			ANCILLARY(RXHASH);
+			ANCILLARY(CPU);
+			}
 		}
 		ftest->code = code;
 	}
-- 
cgit v1.2.3


From fe67c913f1ec2a01aaa9176c80ef36eaf87d705d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 27 Nov 2010 20:02:59 +0100
Subject: mac80211: make LED trigger names available early

The throughput trigger will require doing LED
classdev/trigger handling before register_hw(),
so drivers should have access to the trigger
names before it. If trigger registration fails,
this will still make the trigger name available,
but that's not a big problem since the default
trigger will the simply not be found.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/led.c  | 36 ++++++++++++++++--------------------
 net/mac80211/led.h  |  4 ++++
 net/mac80211/main.c |  2 ++
 3 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 063aad944246..740a1d4e0a9c 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -54,12 +54,22 @@ void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
 		led_trigger_event(local->radio_led, LED_OFF);
 }
 
+void ieee80211_led_names(struct ieee80211_local *local)
+{
+	snprintf(local->rx_led_name, sizeof(local->rx_led_name),
+		 "%srx", wiphy_name(local->hw.wiphy));
+	snprintf(local->tx_led_name, sizeof(local->tx_led_name),
+		 "%stx", wiphy_name(local->hw.wiphy));
+	snprintf(local->assoc_led_name, sizeof(local->assoc_led_name),
+		 "%sassoc", wiphy_name(local->hw.wiphy));
+	snprintf(local->radio_led_name, sizeof(local->radio_led_name),
+		 "%sradio", wiphy_name(local->hw.wiphy));
+}
+
 void ieee80211_led_init(struct ieee80211_local *local)
 {
 	local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
 	if (local->rx_led) {
-		snprintf(local->rx_led_name, sizeof(local->rx_led_name),
-			 "%srx", wiphy_name(local->hw.wiphy));
 		local->rx_led->name = local->rx_led_name;
 		if (led_trigger_register(local->rx_led)) {
 			kfree(local->rx_led);
@@ -69,8 +79,6 @@ void ieee80211_led_init(struct ieee80211_local *local)
 
 	local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
 	if (local->tx_led) {
-		snprintf(local->tx_led_name, sizeof(local->tx_led_name),
-			 "%stx", wiphy_name(local->hw.wiphy));
 		local->tx_led->name = local->tx_led_name;
 		if (led_trigger_register(local->tx_led)) {
 			kfree(local->tx_led);
@@ -80,8 +88,6 @@ void ieee80211_led_init(struct ieee80211_local *local)
 
 	local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
 	if (local->assoc_led) {
-		snprintf(local->assoc_led_name, sizeof(local->assoc_led_name),
-			 "%sassoc", wiphy_name(local->hw.wiphy));
 		local->assoc_led->name = local->assoc_led_name;
 		if (led_trigger_register(local->assoc_led)) {
 			kfree(local->assoc_led);
@@ -91,8 +97,6 @@ void ieee80211_led_init(struct ieee80211_local *local)
 
 	local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
 	if (local->radio_led) {
-		snprintf(local->radio_led_name, sizeof(local->radio_led_name),
-			 "%sradio", wiphy_name(local->hw.wiphy));
 		local->radio_led->name = local->radio_led_name;
 		if (led_trigger_register(local->radio_led)) {
 			kfree(local->radio_led);
@@ -125,9 +129,7 @@ char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	if (local->radio_led)
-		return local->radio_led_name;
-	return NULL;
+	return local->radio_led_name;
 }
 EXPORT_SYMBOL(__ieee80211_get_radio_led_name);
 
@@ -135,9 +137,7 @@ char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	if (local->assoc_led)
-		return local->assoc_led_name;
-	return NULL;
+	return local->assoc_led_name;
 }
 EXPORT_SYMBOL(__ieee80211_get_assoc_led_name);
 
@@ -145,9 +145,7 @@ char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	if (local->tx_led)
-		return local->tx_led_name;
-	return NULL;
+	return local->tx_led_name;
 }
 EXPORT_SYMBOL(__ieee80211_get_tx_led_name);
 
@@ -155,8 +153,6 @@ char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	if (local->rx_led)
-		return local->rx_led_name;
-	return NULL;
+	return local->rx_led_name;
 }
 EXPORT_SYMBOL(__ieee80211_get_rx_led_name);
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index 77b1e1ba6039..8320cbac61c6 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -18,6 +18,7 @@ extern void ieee80211_led_assoc(struct ieee80211_local *local,
 				bool associated);
 extern void ieee80211_led_radio(struct ieee80211_local *local,
 				bool enabled);
+extern void ieee80211_led_names(struct ieee80211_local *local);
 extern void ieee80211_led_init(struct ieee80211_local *local);
 extern void ieee80211_led_exit(struct ieee80211_local *local);
 #else
@@ -35,6 +36,9 @@ static inline void ieee80211_led_radio(struct ieee80211_local *local,
 				       bool enabled)
 {
 }
+static inline void ieee80211_led_names(struct ieee80211_local *local)
+{
+}
 static inline void ieee80211_led_init(struct ieee80211_local *local)
 {
 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a21d049caf19..bbe8e0ac6e52 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -605,6 +605,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	/* init dummy netdev for use w/ NAPI */
 	init_dummy_netdev(&local->napi_dev);
 
+	ieee80211_led_names(local);
+
 	return local_to_hw(local);
 }
 EXPORT_SYMBOL(ieee80211_alloc_hw);
-- 
cgit v1.2.3


From e1e5406854378dfada3f33c7192b012083a5b8e0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 30 Nov 2010 08:58:45 +0100
Subject: mac80211: add throughput based LED blink trigger

iwlwifi and other drivers like to blink their LED
based on throughput. Implement this generically in
mac80211, based on a throughput table the driver
specifies. That way, drivers can set the blink
frequencies depending on their desired behaviour
and max throughput.

All the drivers need to do is provide an LED class
device, best with blink hardware offload.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  38 ++++++++++++++
 net/mac80211/ieee80211_i.h |  13 +++++
 net/mac80211/iface.c       |   1 +
 net/mac80211/led.c         | 121 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/led.h         |  44 +++++++++++++----
 net/mac80211/rx.c          |   3 ++
 net/mac80211/tx.c          |   3 ++
 net/mac80211/util.c        |   2 +
 8 files changed, 216 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 69ded1ee49ce..40a93d582c79 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1852,11 +1852,26 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
  */
 int ieee80211_register_hw(struct ieee80211_hw *hw);
 
+/**
+ * struct ieee80211_tpt_blink - throughput blink description
+ * @throughput: throughput in Kbit/sec
+ * @blink_time: blink time in milliseconds
+ *	(full cycle, ie. one off + one on period)
+ */
+struct ieee80211_tpt_blink {
+	int throughput;
+	int blink_time;
+};
+
 #ifdef CONFIG_MAC80211_LEDS
 extern char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw);
 extern char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw);
 extern char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw);
 extern char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw);
+extern char *__ieee80211_create_tpt_led_trigger(
+				struct ieee80211_hw *hw,
+				const struct ieee80211_tpt_blink *blink_table,
+				unsigned int blink_table_len);
 #endif
 /**
  * ieee80211_get_tx_led_name - get name of TX LED
@@ -1934,6 +1949,29 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
 #endif
 }
 
+/**
+ * ieee80211_create_tpt_led_trigger - create throughput LED trigger
+ * @hw: the hardware to create the trigger for
+ * @blink_table: the blink table -- needs to be ordered by throughput
+ * @blink_table_len: size of the blink table
+ *
+ * This function returns %NULL (in case of error, or if no LED
+ * triggers are configured) or the name of the new trigger.
+ * This function must be called before ieee80211_register_hw().
+ */
+static inline char *
+ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
+				 const struct ieee80211_tpt_blink *blink_table,
+				 unsigned int blink_table_len)
+{
+#ifdef CONFIG_MAC80211_LEDS
+	return __ieee80211_create_tpt_led_trigger(hw, blink_table,
+						  blink_table_len);
+#else
+	return NULL;
+#endif
+}
+
 /**
  * ieee80211_unregister_hw - Unregister a hardware device
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index eadaa243a3da..523b90be8dc5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/etherdevice.h>
+#include <linux/leds.h>
 #include <net/ieee80211_radiotap.h>
 #include <net/cfg80211.h>
 #include <net/mac80211.h>
@@ -630,6 +631,17 @@ enum queue_stop_reason {
 	IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
 };
 
+struct tpt_led_trigger {
+	struct led_trigger trig;
+	char name[32];
+	const struct ieee80211_tpt_blink *blink_table;
+	unsigned int blink_table_len;
+	struct timer_list timer;
+	bool running;
+	unsigned long prev_traffic;
+	unsigned long tx_bytes, rx_bytes;
+};
+
 /**
  * mac80211 scan flags - currently active scan mode
  *
@@ -838,6 +850,7 @@ struct ieee80211_local {
 #ifdef CONFIG_MAC80211_LEDS
 	int tx_led_counter, rx_led_counter;
 	struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
+	struct tpt_led_trigger *tpt_led_trigger;
 	char tx_led_name[32], rx_led_name[32],
 	     assoc_led_name[32], radio_led_name[32];
 #endif
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f0f11bb794af..989df7065c21 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -220,6 +220,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 		/* we're brought up, everything changes */
 		hw_reconf_flags = ~0;
 		ieee80211_led_radio(local, true);
+		ieee80211_start_tpt_led_trig(local);
 	}
 
 	/*
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 740a1d4e0a9c..79b13090aed7 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -103,6 +103,13 @@ void ieee80211_led_init(struct ieee80211_local *local)
 			local->radio_led = NULL;
 		}
 	}
+
+	if (local->tpt_led_trigger) {
+		if (led_trigger_register(&local->tpt_led_trigger->trig)) {
+			kfree(local->tpt_led_trigger);
+			local->tpt_led_trigger = NULL;
+		}
+	}
 }
 
 void ieee80211_led_exit(struct ieee80211_local *local)
@@ -123,6 +130,11 @@ void ieee80211_led_exit(struct ieee80211_local *local)
 		led_trigger_unregister(local->rx_led);
 		kfree(local->rx_led);
 	}
+
+	if (local->tpt_led_trigger) {
+		led_trigger_unregister(&local->tpt_led_trigger->trig);
+		kfree(local->tpt_led_trigger);
+	}
 }
 
 char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
@@ -156,3 +168,112 @@ char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
 	return local->rx_led_name;
 }
 EXPORT_SYMBOL(__ieee80211_get_rx_led_name);
+
+static unsigned long tpt_trig_traffic(struct ieee80211_local *local,
+				      struct tpt_led_trigger *tpt_trig)
+{
+	unsigned long traffic, delta;
+
+	traffic = tpt_trig->tx_bytes + tpt_trig->rx_bytes;
+
+	delta = traffic - tpt_trig->prev_traffic;
+	tpt_trig->prev_traffic = traffic;
+	return DIV_ROUND_UP(delta, 1024 / 8);
+}
+
+static void tpt_trig_timer(unsigned long data)
+{
+	struct ieee80211_local *local = (void *)data;
+	struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
+	struct led_classdev *led_cdev;
+	unsigned long on, off, tpt;
+	int i;
+
+	if (!tpt_trig->running)
+		return;
+
+	mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));
+
+	tpt = tpt_trig_traffic(local, tpt_trig);
+
+	/* default to just solid on */
+	on = 1;
+	off = 0;
+
+	for (i = tpt_trig->blink_table_len - 1; i >= 0; i--) {
+		if (tpt_trig->blink_table[i].throughput < 0 ||
+		    tpt > tpt_trig->blink_table[i].throughput) {
+			off = tpt_trig->blink_table[i].blink_time / 2;
+			on = tpt_trig->blink_table[i].blink_time - off;
+			break;
+		}
+	}
+
+	read_lock(&tpt_trig->trig.leddev_list_lock);
+	list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list)
+		led_blink_set(led_cdev, &on, &off);
+	read_unlock(&tpt_trig->trig.leddev_list_lock);
+}
+
+extern char *__ieee80211_create_tpt_led_trigger(
+				struct ieee80211_hw *hw,
+				const struct ieee80211_tpt_blink *blink_table,
+				unsigned int blink_table_len)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct tpt_led_trigger *tpt_trig;
+
+	if (WARN_ON(local->tpt_led_trigger))
+		return NULL;
+
+	tpt_trig = kzalloc(sizeof(struct tpt_led_trigger), GFP_KERNEL);
+	if (!tpt_trig)
+		return NULL;
+
+	snprintf(tpt_trig->name, sizeof(tpt_trig->name),
+		 "%stpt", wiphy_name(local->hw.wiphy));
+
+	tpt_trig->trig.name = tpt_trig->name;
+
+	tpt_trig->blink_table = blink_table;
+	tpt_trig->blink_table_len = blink_table_len;
+
+	setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local);
+
+	local->tpt_led_trigger = tpt_trig;
+
+	return tpt_trig->name;
+}
+EXPORT_SYMBOL(__ieee80211_create_tpt_led_trigger);
+
+void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
+{
+	struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
+
+	if (!tpt_trig)
+		return;
+
+	/* reset traffic */
+	tpt_trig_traffic(local, tpt_trig);
+	tpt_trig->running = true;
+
+	tpt_trig_timer((unsigned long)local);
+	mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));
+}
+
+void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
+{
+	struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
+	struct led_classdev *led_cdev;
+
+	if (!tpt_trig)
+		return;
+
+	tpt_trig->running = false;
+	del_timer_sync(&tpt_trig->timer);
+
+	read_lock(&tpt_trig->trig.leddev_list_lock);
+	list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list)
+		led_brightness_set(led_cdev, LED_OFF);
+	read_unlock(&tpt_trig->trig.leddev_list_lock);
+}
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index 8320cbac61c6..6c215dc0fc96 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -12,15 +12,17 @@
 #include "ieee80211_i.h"
 
 #ifdef CONFIG_MAC80211_LEDS
-extern void ieee80211_led_rx(struct ieee80211_local *local);
-extern void ieee80211_led_tx(struct ieee80211_local *local, int q);
-extern void ieee80211_led_assoc(struct ieee80211_local *local,
-				bool associated);
-extern void ieee80211_led_radio(struct ieee80211_local *local,
-				bool enabled);
-extern void ieee80211_led_names(struct ieee80211_local *local);
-extern void ieee80211_led_init(struct ieee80211_local *local);
-extern void ieee80211_led_exit(struct ieee80211_local *local);
+void ieee80211_led_rx(struct ieee80211_local *local);
+void ieee80211_led_tx(struct ieee80211_local *local, int q);
+void ieee80211_led_assoc(struct ieee80211_local *local,
+			 bool associated);
+void ieee80211_led_radio(struct ieee80211_local *local,
+			 bool enabled);
+void ieee80211_led_names(struct ieee80211_local *local);
+void ieee80211_led_init(struct ieee80211_local *local);
+void ieee80211_led_exit(struct ieee80211_local *local);
+void ieee80211_start_tpt_led_trig(struct ieee80211_local *local);
+void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local);
 #else
 static inline void ieee80211_led_rx(struct ieee80211_local *local)
 {
@@ -45,4 +47,28 @@ static inline void ieee80211_led_init(struct ieee80211_local *local)
 static inline void ieee80211_led_exit(struct ieee80211_local *local)
 {
 }
+static inline void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
+{
+}
+static inline void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
+{
+}
+#endif
+
+static inline void
+ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes)
+{
+#ifdef CONFIG_MAC80211_LEDS
+	if (local->tpt_led_trigger && ieee80211_is_data(fc))
+		local->tpt_led_trigger->tx_bytes += bytes;
 #endif
+}
+
+static inline void
+ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes)
+{
+#ifdef CONFIG_MAC80211_LEDS
+	if (local->tpt_led_trigger && ieee80211_is_data(fc))
+		local->tpt_led_trigger->rx_bytes += bytes;
+#endif
+}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7c5d1b2ec453..01a3f2630eaf 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2928,6 +2928,9 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
 		return;
 	}
 
+	ieee80211_tpt_led_trig_rx(local,
+			((struct ieee80211_hdr *)skb->data)->frame_control,
+			skb->len);
 	__ieee80211_rx_handle_packet(hw, skb);
 
 	rcu_read_unlock();
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d2b4b67a7b53..68c2fbd16ebb 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1297,6 +1297,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 
 	while (skb) {
 		int q = skb_get_queue_mapping(skb);
+		__le16 fc;
 
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 		ret = IEEE80211_TX_OK;
@@ -1339,6 +1340,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 		else
 			info->control.sta = NULL;
 
+		fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
 		ret = drv_tx(local, skb);
 		if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) {
 			dev_kfree_skb(skb);
@@ -1349,6 +1351,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 			return IEEE80211_TX_AGAIN;
 		}
 
+		ieee80211_tpt_led_trig_tx(local, fc, len);
 		*skbp = skb = next;
 		ieee80211_led_tx(local, 1);
 		fragm = true;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e497476174ce..48306415a1cb 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1116,6 +1116,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
 void ieee80211_stop_device(struct ieee80211_local *local)
 {
 	ieee80211_led_radio(local, false);
+	ieee80211_stop_tpt_led_trig(local);
 
 	cancel_work_sync(&local->reconfig_filter);
 
@@ -1150,6 +1151,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		}
 
 		ieee80211_led_radio(local, true);
+		ieee80211_start_tpt_led_trig(local);
 	}
 
 	/* add interfaces */
-- 
cgit v1.2.3


From 67408c8c7b9daf28b50e33be3541334c07d15789 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 30 Nov 2010 08:59:23 +0100
Subject: mac80211: selective throughput LED trigger active

The throughput LED trigger was always active when
the radio was enabled. In most cases that's likely
the desired behaviour, but iwlwifi requires it to
be only active when one of the virtual interfaces
is actually "connected" in some way.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     | 20 +++++++++++++++++---
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/iface.c       | 16 +++++++++++++++-
 net/mac80211/led.c         | 39 ++++++++++++++++++++++++++++++++++-----
 net/mac80211/led.h         | 11 +++++------
 net/mac80211/util.c        |  5 +++--
 6 files changed, 76 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 40a93d582c79..479c35e160e3 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1863,13 +1863,26 @@ struct ieee80211_tpt_blink {
 	int blink_time;
 };
 
+/**
+ * enum ieee80211_tpt_led_trigger_flags - throughput trigger flags
+ * @IEEE80211_TPT_LEDTRIG_FL_RADIO: enable blinking with radio
+ * @IEEE80211_TPT_LEDTRIG_FL_WORK: enable blinking when working
+ * @IEEE80211_TPT_LEDTRIG_FL_CONNECTED: enable blinking when at least one
+ *	interface is connected in some way, including being an AP
+ */
+enum ieee80211_tpt_led_trigger_flags {
+	IEEE80211_TPT_LEDTRIG_FL_RADIO		= BIT(0),
+	IEEE80211_TPT_LEDTRIG_FL_WORK		= BIT(1),
+	IEEE80211_TPT_LEDTRIG_FL_CONNECTED	= BIT(2),
+};
+
 #ifdef CONFIG_MAC80211_LEDS
 extern char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw);
 extern char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw);
 extern char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw);
 extern char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw);
 extern char *__ieee80211_create_tpt_led_trigger(
-				struct ieee80211_hw *hw,
+				struct ieee80211_hw *hw, unsigned int flags,
 				const struct ieee80211_tpt_blink *blink_table,
 				unsigned int blink_table_len);
 #endif
@@ -1952,6 +1965,7 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
 /**
  * ieee80211_create_tpt_led_trigger - create throughput LED trigger
  * @hw: the hardware to create the trigger for
+ * @flags: trigger flags, see &enum ieee80211_tpt_led_trigger_flags
  * @blink_table: the blink table -- needs to be ordered by throughput
  * @blink_table_len: size of the blink table
  *
@@ -1960,12 +1974,12 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
  * This function must be called before ieee80211_register_hw().
  */
 static inline char *
-ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
+ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, unsigned int flags,
 				 const struct ieee80211_tpt_blink *blink_table,
 				 unsigned int blink_table_len)
 {
 #ifdef CONFIG_MAC80211_LEDS
-	return __ieee80211_create_tpt_led_trigger(hw, blink_table,
+	return __ieee80211_create_tpt_led_trigger(hw, flags, blink_table,
 						  blink_table_len);
 #else
 	return NULL;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 523b90be8dc5..3810c72ac062 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -637,9 +637,10 @@ struct tpt_led_trigger {
 	const struct ieee80211_tpt_blink *blink_table;
 	unsigned int blink_table_len;
 	struct timer_list timer;
-	bool running;
 	unsigned long prev_traffic;
 	unsigned long tx_bytes, rx_bytes;
+	unsigned int active, want;
+	bool running;
 };
 
 /**
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 989df7065c21..b6db237672ff 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -220,7 +220,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 		/* we're brought up, everything changes */
 		hw_reconf_flags = ~0;
 		ieee80211_led_radio(local, true);
-		ieee80211_start_tpt_led_trig(local);
+		ieee80211_mod_tpt_led_trig(local,
+					   IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
 	}
 
 	/*
@@ -1265,6 +1266,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 	int count = 0;
 	bool working = false, scanning = false;
 	struct ieee80211_work *wk;
+	unsigned int led_trig_start = 0, led_trig_stop = 0;
 
 #ifdef CONFIG_PROVE_LOCKING
 	WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
@@ -1314,6 +1316,18 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
 	}
 
+	if (working || scanning)
+		led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK;
+	else
+		led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK;
+
+	if (count)
+		led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED;
+	else
+		led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED;
+
+	ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop);
+
 	if (working)
 		return ieee80211_idle_off(local, "working");
 	if (scanning)
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 79b13090aed7..4905eb8af572 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -216,7 +216,7 @@ static void tpt_trig_timer(unsigned long data)
 }
 
 extern char *__ieee80211_create_tpt_led_trigger(
-				struct ieee80211_hw *hw,
+				struct ieee80211_hw *hw, unsigned int flags,
 				const struct ieee80211_tpt_blink *blink_table,
 				unsigned int blink_table_len)
 {
@@ -237,6 +237,7 @@ extern char *__ieee80211_create_tpt_led_trigger(
 
 	tpt_trig->blink_table = blink_table;
 	tpt_trig->blink_table_len = blink_table_len;
+	tpt_trig->want = flags;
 
 	setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local);
 
@@ -246,11 +247,11 @@ extern char *__ieee80211_create_tpt_led_trigger(
 }
 EXPORT_SYMBOL(__ieee80211_create_tpt_led_trigger);
 
-void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
+static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
 {
 	struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
 
-	if (!tpt_trig)
+	if (tpt_trig->running)
 		return;
 
 	/* reset traffic */
@@ -261,12 +262,12 @@ void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
 	mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ));
 }
 
-void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
+static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
 {
 	struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
 	struct led_classdev *led_cdev;
 
-	if (!tpt_trig)
+	if (!tpt_trig->running)
 		return;
 
 	tpt_trig->running = false;
@@ -277,3 +278,31 @@ void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
 		led_brightness_set(led_cdev, LED_OFF);
 	read_unlock(&tpt_trig->trig.leddev_list_lock);
 }
+
+void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
+				unsigned int types_on, unsigned int types_off)
+{
+	struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
+	bool allowed;
+
+	WARN_ON(types_on & types_off);
+
+	if (!tpt_trig)
+		return;
+
+	tpt_trig->active &= ~types_off;
+	tpt_trig->active |= types_on;
+
+	/*
+	 * Regardless of wanted state, we shouldn't blink when
+	 * the radio is disabled -- this can happen due to some
+	 * code ordering issues with __ieee80211_recalc_idle()
+	 * being called before the radio is started.
+	 */
+	allowed = tpt_trig->active & IEEE80211_TPT_LEDTRIG_FL_RADIO;
+
+	if (!allowed || !(tpt_trig->active & tpt_trig->want))
+		ieee80211_stop_tpt_led_trig(local);
+	else
+		ieee80211_start_tpt_led_trig(local);
+}
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index 6c215dc0fc96..e0275d9befa8 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -21,8 +21,8 @@ void ieee80211_led_radio(struct ieee80211_local *local,
 void ieee80211_led_names(struct ieee80211_local *local);
 void ieee80211_led_init(struct ieee80211_local *local);
 void ieee80211_led_exit(struct ieee80211_local *local);
-void ieee80211_start_tpt_led_trig(struct ieee80211_local *local);
-void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local);
+void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
+				unsigned int types_on, unsigned int types_off);
 #else
 static inline void ieee80211_led_rx(struct ieee80211_local *local)
 {
@@ -47,10 +47,9 @@ static inline void ieee80211_led_init(struct ieee80211_local *local)
 static inline void ieee80211_led_exit(struct ieee80211_local *local)
 {
 }
-static inline void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
-{
-}
-static inline void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
+static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
+					      unsigned int types_on,
+					      unsigned int types_off)
 {
 }
 #endif
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 48306415a1cb..cf68700abffa 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1116,7 +1116,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
 void ieee80211_stop_device(struct ieee80211_local *local)
 {
 	ieee80211_led_radio(local, false);
-	ieee80211_stop_tpt_led_trig(local);
+	ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO);
 
 	cancel_work_sync(&local->reconfig_filter);
 
@@ -1151,7 +1151,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		}
 
 		ieee80211_led_radio(local, true);
-		ieee80211_start_tpt_led_trig(local);
+		ieee80211_mod_tpt_led_trig(local,
+					   IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
 	}
 
 	/* add interfaces */
-- 
cgit v1.2.3


From 65a6538a56d4c7ae8465f2a8420ddc65877b6779 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@nokia.com>
Date: Tue, 21 Dec 2010 16:02:17 +0200
Subject: mac80211: check for CONFIG_MAC80211_LEDS in the tpt_led_trigger
 declaration

If CONFIG_MAC80211_LEDS is not set, ieee80211_i.h was failing to compile,
because struct led_trigger is only declared when CONFIG_LEDS_TRIGGERS is
set.

This patch adds ifdefs around the tpt_led_trigger declaration in
ieee80211_i.h to avoid the problem.

Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3810c72ac062..a05893a238b7 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -631,6 +631,7 @@ enum queue_stop_reason {
 	IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
 };
 
+#ifdef CONFIG_MAC80211_LEDS
 struct tpt_led_trigger {
 	struct led_trigger trig;
 	char name[32];
@@ -642,6 +643,7 @@ struct tpt_led_trigger {
 	unsigned int active, want;
 	bool running;
 };
+#endif
 
 /**
  * mac80211 scan flags - currently active scan mode
-- 
cgit v1.2.3


From ee09b3c1cff0335137dc1b146488e4352f640f13 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Dec 2010 11:39:59 -0800
Subject: sfq: fix sfq class stats handling

sfq_walk() runs without qdisc lock. By the time it selects a non empty
hash slot and sfq_dump_class_stats() is run (with lock held), slot might
have been freed : We then access q->slots[SFQ_EMPTY_SLOT], out of
bounds, and crash in slot_queue_walk()

On previous kernels, bug is here but out of bounds qs[SFQ_DEPTH] and
allot[SFQ_DEPTH] are located in struct sfq_sched_data, so no illegal
memory access happens, only possibly wrong data reported to user.

Also, slot_dequeue_tail() should make sure slot skb chain is correctly
terminated, or sfq_dump_class_stats() can access freed skbs.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 13322e8a0456..6a2f88fea6d8 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -281,6 +281,7 @@ static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
 	struct sk_buff *skb = slot->skblist_prev;
 
 	slot->skblist_prev = skb->prev;
+	skb->prev->next = (struct sk_buff *)slot;
 	skb->next = skb->prev = NULL;
 	return skb;
 }
@@ -608,14 +609,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 				struct gnet_dump *d)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
-	struct gnet_stats_queue qs = { .qlen = slot->qlen };
-	struct tc_sfq_xstats xstats = { .allot = slot->allot };
+	sfq_index idx = q->ht[cl - 1];
+	struct gnet_stats_queue qs = { 0 };
+	struct tc_sfq_xstats xstats = { 0 };
 	struct sk_buff *skb;
 
-	slot_queue_walk(slot, skb)
-		qs.backlog += qdisc_pkt_len(skb);
+	if (idx != SFQ_EMPTY_SLOT) {
+		const struct sfq_slot *slot = &q->slots[idx];
 
+		xstats.allot = slot->allot;
+		qs.qlen = slot->qlen;
+		slot_queue_walk(slot, skb)
+			qs.backlog += qdisc_pkt_len(skb);
+	}
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
-- 
cgit v1.2.3


From 172128468f61e16e1427238278b9ad775584aa89 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 22 Dec 2010 10:15:30 +0100
Subject: mac80211: cleanup select_queue

There's a redundant rcu_read_lock/unlock pair, a
redundant variable, and a few redundant accesses
to the 1d_to_ac array. Fix this to make the code
neater and easier to follow.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wme.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 58e75bbc1f91..28bc084dbfb9 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -59,26 +59,22 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta = NULL;
-	u32 sta_flags = 0;
 	const u8 *ra = NULL;
 	bool qos = false;
 
 	if (local->hw.queues < 4 || skb->len < 6) {
 		skb->priority = 0; /* required for correct WPA/11i MIC */
-		return min_t(u16, local->hw.queues - 1,
-			     ieee802_1d_to_ac[skb->priority]);
+		return min_t(u16, local->hw.queues - 1, IEEE80211_AC_BE);
 	}
 
 	rcu_read_lock();
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP_VLAN:
-		rcu_read_lock();
 		sta = rcu_dereference(sdata->u.vlan.sta);
-		if (sta)
-			sta_flags = get_sta_flags(sta);
-		rcu_read_unlock();
-		if (sta)
+		if (sta) {
+			qos = get_sta_flags(sta) & WLAN_STA_WME;
 			break;
+		}
 	case NL80211_IFTYPE_AP:
 		ra = skb->data;
 		break;
@@ -107,17 +103,13 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
 	if (!sta && ra && !is_multicast_ether_addr(ra)) {
 		sta = sta_info_get(sdata, ra);
 		if (sta)
-			sta_flags = get_sta_flags(sta);
+			qos = get_sta_flags(sta) & WLAN_STA_WME;
 	}
-
-	if (sta_flags & WLAN_STA_WME)
-		qos = true;
-
 	rcu_read_unlock();
 
 	if (!qos) {
 		skb->priority = 0; /* required for correct WPA/11i MIC */
-		return ieee802_1d_to_ac[skb->priority];
+		return IEEE80211_AC_BE;
 	}
 
 	/* use the data classifier to determine what 802.1d tag the
-- 
cgit v1.2.3


From e41d8b4e131a41f2a3b74aaa783b16aa46376d8e Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Mon, 13 Dec 2010 21:07:03 +0200
Subject: Bluetooth: Add error handling for managment command handlers

The command handlers for bluetooth management messaging should be able
to report errors (such as memory allocation failures) to the higher
levels in the call stack.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/mgmt.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d15bf676c350..7ea5489e7977 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -29,7 +29,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/mgmt.h>
 
-static void cmd_status(struct sock *sk, u16 cmd, u8 status)
+static int cmd_status(struct sock *sk, u16 cmd, u8 status)
 {
 	struct sk_buff *skb;
 	struct mgmt_hdr *hdr;
@@ -39,7 +39,7 @@ static void cmd_status(struct sock *sk, u16 cmd, u8 status)
 
 	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC);
 	if (!skb)
-		return;
+		return -ENOMEM;
 
 	hdr = (void *) skb_put(skb, sizeof(*hdr));
 
@@ -52,6 +52,8 @@ static void cmd_status(struct sock *sk, u16 cmd, u8 status)
 
 	if (sock_queue_rcv_skb(sk, skb) < 0)
 		kfree_skb(skb);
+
+	return 0;
 }
 
 int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
@@ -87,10 +89,13 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 	switch (opcode) {
 	default:
 		BT_DBG("Unknown op %u", opcode);
-		cmd_status(sk, opcode, 0x01);
+		err = cmd_status(sk, opcode, 0x01);
 		break;
 	}
 
+	if (err < 0)
+		goto done;
+
 	err = msglen;
 
 done:
-- 
cgit v1.2.3


From 02d981292ad3149e8e5f37cffbccedab1a8576d8 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Mon, 13 Dec 2010 21:07:04 +0200
Subject: Bluetooth: Add read_version management command

This patch implements the initial read_version command that userspace
will use before any other management interface operations.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/mgmt.h |  6 ++++++
 net/bluetooth/mgmt.c         | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 95974daa725e..d353d64bfffb 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -27,6 +27,12 @@ struct mgmt_hdr {
 } __packed;
 #define MGMT_HDR_SIZE			4
 
+#define MGMT_OP_READ_VERSION		0x0001
+struct mgmt_rp_read_version {
+	__u8 version;
+	__le16 revision;
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16 opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7ea5489e7977..3e24c0bf18e7 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -29,6 +29,39 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/mgmt.h>
 
+#define MGMT_VERSION	0
+#define MGMT_REVISION	1
+
+static int read_version(struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct mgmt_hdr *hdr;
+	struct mgmt_ev_cmd_complete *ev;
+	struct mgmt_rp_read_version *rp;
+
+	BT_DBG("sock %p", sk);
+
+	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = (void *) skb_put(skb, sizeof(*hdr));
+	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
+	hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp));
+
+	ev = (void *) skb_put(skb, sizeof(*ev));
+	put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode);
+
+	rp = (void *) skb_put(skb, sizeof(*rp));
+	rp->version = MGMT_VERSION;
+	put_unaligned_le16(MGMT_REVISION, &rp->revision);
+
+	if (sock_queue_rcv_skb(sk, skb) < 0)
+		kfree_skb(skb);
+
+	return 0;
+}
+
 static int cmd_status(struct sock *sk, u16 cmd, u8 status)
 {
 	struct sk_buff *skb;
@@ -87,6 +120,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 	}
 
 	switch (opcode) {
+	case MGMT_OP_READ_VERSION:
+		err = read_version(sk);
+		break;
 	default:
 		BT_DBG("Unknown op %u", opcode);
 		err = cmd_status(sk, opcode, 0x01);
-- 
cgit v1.2.3


From faba42eb2a8cf905ed26d540c3c93d429e327224 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Mon, 13 Dec 2010 21:07:05 +0200
Subject: Bluetooth: Add read_index_list management command

This patch implements the read_index_list command through which
userspace can get a list of current adapter indices.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/mgmt.h |  6 +++++
 net/bluetooth/mgmt.c         | 53 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index d353d64bfffb..c2b4c83ab175 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -33,6 +33,12 @@ struct mgmt_rp_read_version {
 	__le16 revision;
 } __packed;
 
+#define MGMT_OP_READ_INDEX_LIST		0x0003
+struct mgmt_rp_read_index_list {
+	__le16 num_controllers;
+	__le16 index[0];
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16 opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3e24c0bf18e7..7a8e321875c9 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -62,6 +62,56 @@ static int read_version(struct sock *sk)
 	return 0;
 }
 
+static int read_index_list(struct sock *sk)
+{
+	struct sk_buff *skb;
+	struct mgmt_hdr *hdr;
+	struct mgmt_ev_cmd_complete *ev;
+	struct mgmt_rp_read_index_list *rp;
+	struct list_head *p;
+	size_t body_len;
+	u16 count;
+	int i;
+
+	BT_DBG("sock %p", sk);
+
+	read_lock(&hci_dev_list_lock);
+
+	count = 0;
+	list_for_each(p, &hci_dev_list) {
+		count++;
+	}
+
+	body_len = sizeof(*ev) + sizeof(*rp) + (2 * count);
+	skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = (void *) skb_put(skb, sizeof(*hdr));
+	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
+	hdr->len = cpu_to_le16(body_len);
+
+	ev = (void *) skb_put(skb, sizeof(*ev));
+	put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode);
+
+	rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count));
+	put_unaligned_le16(count, &rp->num_controllers);
+
+	i = 0;
+	list_for_each(p, &hci_dev_list) {
+		struct hci_dev *d = list_entry(p, struct hci_dev, list);
+		put_unaligned_le16(d->id, &rp->index[i++]);
+		BT_DBG("Added hci%u", d->id);
+	}
+
+	read_unlock(&hci_dev_list_lock);
+
+	if (sock_queue_rcv_skb(sk, skb) < 0)
+		kfree_skb(skb);
+
+	return 0;
+}
+
 static int cmd_status(struct sock *sk, u16 cmd, u8 status)
 {
 	struct sk_buff *skb;
@@ -123,6 +173,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 	case MGMT_OP_READ_VERSION:
 		err = read_version(sk);
 		break;
+	case MGMT_OP_READ_INDEX_LIST:
+		err = read_index_list(sk);
+		break;
 	default:
 		BT_DBG("Unknown op %u", opcode);
 		err = cmd_status(sk, opcode, 0x01);
-- 
cgit v1.2.3


From f7b64e69c7c75c8e9f2d5e23edec8de1ce883bcc Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Mon, 13 Dec 2010 21:07:06 +0200
Subject: Bluetooth: Add read_info management command

This patch implements the read_info command which is used to fetch basic
info about an adapter.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/mgmt.h | 19 ++++++++++
 net/bluetooth/mgmt.c         | 90 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 101 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index c2b4c83ab175..70985aacc14b 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -39,6 +39,25 @@ struct mgmt_rp_read_index_list {
 	__le16 index[0];
 } __packed;
 
+#define MGMT_OP_READ_INFO		0x0004
+struct mgmt_cp_read_info {
+	__le16 index;
+} __packed;
+struct mgmt_rp_read_info {
+	__le16 index;
+	__u8 type;
+	__u8 powered;
+	__u8 discoverable;
+	__u8 pairable;
+	__u8 sec_mode;
+	bdaddr_t bdaddr;
+	__u8 dev_class[3];
+	__u8 features[8];
+	__u16 manufacturer;
+	__u8 hci_ver;
+	__u16 hci_rev;
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16 opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7a8e321875c9..d6c5a32de0b6 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -32,6 +32,33 @@
 #define MGMT_VERSION	0
 #define MGMT_REVISION	1
 
+static int cmd_status(struct sock *sk, u16 cmd, u8 status)
+{
+	struct sk_buff *skb;
+	struct mgmt_hdr *hdr;
+	struct mgmt_ev_cmd_status *ev;
+
+	BT_DBG("sock %p", sk);
+
+	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	hdr = (void *) skb_put(skb, sizeof(*hdr));
+
+	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS);
+	hdr->len = cpu_to_le16(sizeof(*ev));
+
+	ev = (void *) skb_put(skb, sizeof(*ev));
+	ev->status = status;
+	put_unaligned_le16(cmd, &ev->opcode);
+
+	if (sock_queue_rcv_skb(sk, skb) < 0)
+		kfree_skb(skb);
+
+	return 0;
+}
+
 static int read_version(struct sock *sk)
 {
 	struct sk_buff *skb;
@@ -112,26 +139,70 @@ static int read_index_list(struct sock *sk)
 	return 0;
 }
 
-static int cmd_status(struct sock *sk, u16 cmd, u8 status)
+static int read_controller_info(struct sock *sk, unsigned char *data, u16 len)
 {
 	struct sk_buff *skb;
 	struct mgmt_hdr *hdr;
-	struct mgmt_ev_cmd_status *ev;
+	struct mgmt_ev_cmd_complete *ev;
+	struct mgmt_rp_read_info *rp;
+	struct mgmt_cp_read_info *cp;
+	struct hci_dev *hdev;
+	u16 dev_id;
 
 	BT_DBG("sock %p", sk);
 
-	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC);
+	if (len != 2)
+		return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL);
+
+	skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
 
 	hdr = (void *) skb_put(skb, sizeof(*hdr));
-
-	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS);
-	hdr->len = cpu_to_le16(sizeof(*ev));
+	hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE);
+	hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp));
 
 	ev = (void *) skb_put(skb, sizeof(*ev));
-	ev->status = status;
-	put_unaligned_le16(cmd, &ev->opcode);
+	put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode);
+
+	rp = (void *) skb_put(skb, sizeof(*rp));
+
+	cp = (void *) data;
+	dev_id = get_unaligned_le16(&cp->index);
+
+	BT_DBG("request for hci%u", dev_id);
+
+	hdev = hci_dev_get(dev_id);
+	if (!hdev) {
+		kfree_skb(skb);
+		return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV);
+	}
+
+	hci_dev_lock_bh(hdev);
+
+	put_unaligned_le16(hdev->id, &rp->index);
+	rp->type = hdev->dev_type;
+
+	rp->powered = test_bit(HCI_UP, &hdev->flags);
+	rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags);
+	rp->pairable = test_bit(HCI_PSCAN, &hdev->flags);
+
+	if (test_bit(HCI_AUTH, &hdev->flags))
+		rp->sec_mode = 3;
+	else if (hdev->ssp_mode > 0)
+		rp->sec_mode = 4;
+	else
+		rp->sec_mode = 2;
+
+	bacpy(&rp->bdaddr, &hdev->bdaddr);
+	memcpy(rp->features, hdev->features, 8);
+	memcpy(rp->dev_class, hdev->dev_class, 3);
+	put_unaligned_le16(hdev->manufacturer, &rp->manufacturer);
+	rp->hci_ver = hdev->hci_ver;
+	put_unaligned_le16(hdev->hci_rev, &rp->hci_rev);
+
+	hci_dev_unlock_bh(hdev);
+	hci_dev_put(hdev);
 
 	if (sock_queue_rcv_skb(sk, skb) < 0)
 		kfree_skb(skb);
@@ -176,6 +247,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 	case MGMT_OP_READ_INDEX_LIST:
 		err = read_index_list(sk);
 		break;
+	case MGMT_OP_READ_INFO:
+		err = read_controller_info(sk, buf + sizeof(*hdr), len);
+		break;
 	default:
 		BT_DBG("Unknown op %u", opcode);
 		err = cmd_status(sk, opcode, 0x01);
-- 
cgit v1.2.3


From c71e97bfaadfa727669fcfcf12301744fd169091 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Mon, 13 Dec 2010 21:07:07 +0200
Subject: Bluetooth: Add management events for controller addition & removal

This patch adds Bluetooth Management interface events for controller
addition and removal. The events correspond to the existing HCI_DEV_REG
and HCI_DEV_UNREG stack internal events.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/hci_core.h |  2 ++
 include/net/bluetooth/mgmt.h     | 10 ++++++++++
 net/bluetooth/hci_core.c         |  2 ++
 net/bluetooth/mgmt.c             | 41 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 55 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1992fac7e921..3786ee83604e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -662,6 +662,8 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb);
 
 /* Management interface */
 int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len);
+int mgmt_index_added(u16 index);
+int mgmt_index_removed(u16 index);
 
 /* HCI info for socket */
 #define hci_pi(sk) ((struct hci_pinfo *) sk)
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 70985aacc14b..ca29c1367ffd 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -75,3 +75,13 @@ struct mgmt_ev_controller_error {
 	__le16 index;
 	__u8 error_code;
 } __packed;
+
+#define MGMT_EV_INDEX_ADDED		0x0004
+struct mgmt_ev_index_added {
+	__le16 index;
+} __packed;
+
+#define MGMT_EV_INDEX_REMOVED		0x0005
+struct mgmt_ev_index_removed {
+	__le16 index;
+} __packed;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 51c61f75a797..1a4ec97d5ac4 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -960,6 +960,7 @@ int hci_register_dev(struct hci_dev *hdev)
 		}
 	}
 
+	mgmt_index_added(hdev->id);
 	hci_notify(hdev, HCI_DEV_REG);
 
 	return id;
@@ -989,6 +990,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
 	for (i = 0; i < NUM_REASSEMBLY; i++)
 		kfree_skb(hdev->reassembly[i]);
 
+	mgmt_index_removed(hdev->id);
 	hci_notify(hdev, HCI_DEV_UNREG);
 
 	if (hdev->rfkill) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d6c5a32de0b6..f827fd908380 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -265,3 +265,44 @@ done:
 	kfree(buf);
 	return err;
 }
+
+static int mgmt_event(u16 event, void *data, u16 data_len)
+{
+	struct sk_buff *skb;
+	struct mgmt_hdr *hdr;
+
+	skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	bt_cb(skb)->channel = HCI_CHANNEL_CONTROL;
+
+	hdr = (void *) skb_put(skb, sizeof(*hdr));
+	hdr->opcode = cpu_to_le16(event);
+	hdr->len = cpu_to_le16(data_len);
+
+	memcpy(skb_put(skb, data_len), data, data_len);
+
+	hci_send_to_sock(NULL, skb);
+	kfree_skb(skb);
+
+	return 0;
+}
+
+int mgmt_index_added(u16 index)
+{
+	struct mgmt_ev_index_added ev;
+
+	put_unaligned_le16(index, &ev.index);
+
+	return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev));
+}
+
+int mgmt_index_removed(u16 index)
+{
+	struct mgmt_ev_index_added ev;
+
+	put_unaligned_le16(index, &ev.index);
+
+	return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev));
+}
-- 
cgit v1.2.3


From 23bb57633df97ede067ea26f3cdc8a7ba2cd8109 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Tue, 21 Dec 2010 23:01:27 +0200
Subject: Bluetooth: Fix __hci_request synchronization for hci_open_dev

The initialization function used by hci_open_dev (hci_init_req) sends
many different HCI commands. The __hci_request function should only
return when all of these commands have completed (or a timeout occurs).
Several of these commands cause hci_req_complete to be called which
causes __hci_request to return prematurely.

This patch fixes the issue by adding a new hdev->req_last_cmd variable
which is set during the initialization procedure. The hci_req_complete
function will no longer mark the request as complete until the command
matching hdev->req_last_cmd completes.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 include/net/bluetooth/hci_core.h |  3 ++-
 net/bluetooth/hci_core.c         | 15 ++++++++++++---
 net/bluetooth/hci_event.c        | 33 +++++++++++++++++++++++----------
 3 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3786ee83604e..a29feb01854e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -129,6 +129,7 @@ struct hci_dev {
 	wait_queue_head_t	req_wait_q;
 	__u32			req_status;
 	__u32			req_result;
+	__u16			req_last_cmd;
 
 	struct inquiry_cache	inq_cache;
 	struct hci_conn_hash	conn_hash;
@@ -693,6 +694,6 @@ struct hci_sec_filter {
 #define hci_req_lock(d)		mutex_lock(&d->req_lock)
 #define hci_req_unlock(d)	mutex_unlock(&d->req_lock)
 
-void hci_req_complete(struct hci_dev *hdev, int result);
+void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result);
 
 #endif /* __HCI_CORE_H */
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1a4ec97d5ac4..8b602d881fd7 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -91,9 +91,16 @@ static void hci_notify(struct hci_dev *hdev, int event)
 
 /* ---- HCI requests ---- */
 
-void hci_req_complete(struct hci_dev *hdev, int result)
+void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result)
 {
-	BT_DBG("%s result 0x%2.2x", hdev->name, result);
+	BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result);
+
+	/* If the request has set req_last_cmd (typical for multi-HCI
+	 * command requests) check if the completed command matches
+	 * this, and if not just return. Single HCI command requests
+	 * typically leave req_last_cmd as 0 */
+	if (hdev->req_last_cmd && cmd != hdev->req_last_cmd)
+		return;
 
 	if (hdev->req_status == HCI_REQ_PEND) {
 		hdev->req_result = result;
@@ -149,7 +156,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
 		break;
 	}
 
-	hdev->req_status = hdev->req_result = 0;
+	hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0;
 
 	BT_DBG("%s end: err %d", hdev->name, err);
 
@@ -252,6 +259,8 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	/* Connection accept timeout ~20 secs */
 	param = cpu_to_le16(0x7d00);
 	hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
+
+	hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT;
 }
 
 static void hci_scan_req(struct hci_dev *hdev, unsigned long opt)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 8923b36a67a2..38100170d380 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -58,7 +58,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
 
 	clear_bit(HCI_INQUIRY, &hdev->flags);
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status);
 
 	hci_conn_check_pending(hdev);
 }
@@ -174,7 +174,7 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev, struct sk_buff *s
 	if (!status)
 		hdev->link_policy = get_unaligned_le16(sent);
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status);
 }
 
 static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
@@ -183,7 +183,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%x", hdev->name, status);
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_RESET, status);
 }
 
 static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -235,7 +235,7 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
 			clear_bit(HCI_AUTH, &hdev->flags);
 	}
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status);
 }
 
 static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -258,7 +258,7 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
 			clear_bit(HCI_ENCRYPT, &hdev->flags);
 	}
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status);
 }
 
 static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
@@ -285,7 +285,7 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
 			set_bit(HCI_PSCAN, &hdev->flags);
 	}
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
 }
 
 static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
@@ -383,7 +383,7 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%x", hdev->name, status);
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status);
 }
 
 static void hci_cc_read_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -536,7 +536,16 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!rp->status)
 		bacpy(&hdev->bdaddr, &rp->bdaddr);
 
-	hci_req_complete(hdev, rp->status);
+	hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status);
+}
+
+static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%x", hdev->name, status);
+
+	hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
 }
 
 static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
@@ -544,7 +553,7 @@ static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
 	BT_DBG("%s status 0x%x", hdev->name, status);
 
 	if (status) {
-		hci_req_complete(hdev, status);
+		hci_req_complete(hdev, HCI_OP_INQUIRY, status);
 
 		hci_conn_check_pending(hdev);
 	} else
@@ -871,7 +880,7 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff
 
 	clear_bit(HCI_INQUIRY, &hdev->flags);
 
-	hci_req_complete(hdev, status);
+	hci_req_complete(hdev, HCI_OP_INQUIRY, status);
 
 	hci_conn_check_pending(hdev);
 }
@@ -1379,6 +1388,10 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk
 		hci_cc_read_bd_addr(hdev, skb);
 		break;
 
+	case HCI_OP_WRITE_CA_TIMEOUT:
+		hci_cc_write_ca_timeout(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%x", hdev->name, opcode);
 		break;
-- 
cgit v1.2.3


From 17f9cc3124c97f50a19a7597e5f29f915b5b835c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 22 Dec 2010 23:00:34 -0200
Subject: Bluetooth: Improve handling of HCI control channel in bind

Does not allow any channel different of HCI_CHANNEL_RAW and
HCI_CHANNEL_CONTROL to bind.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
---
 net/bluetooth/hci_sock.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f6c18abab797..29827c77f6ce 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -380,7 +380,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
 	if (haddr.hci_family != AF_BLUETOOTH)
 		return -EINVAL;
 
-	if (haddr.hci_channel != HCI_CHANNEL_RAW && !enable_mgmt)
+	if (haddr.hci_channel > HCI_CHANNEL_CONTROL)
+		return -EINVAL;
+
+	if (haddr.hci_channel == HCI_CHANNEL_CONTROL && !enable_mgmt)
 		return -EINVAL;
 
 	lock_sock(sk);
-- 
cgit v1.2.3


From d9f4fbaf7053af43e6c72909c2aff18654717aed Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 22 Dec 2010 23:23:38 +0000
Subject: tcp: cleanup of cwnd initialization in tcp_init_metrics()

Commit 86bcebafc5e7f5 ("tcp: fix >2 iw selection") fixed a case
when congestion window initialization has been mistakenly omitted
by introducing cwnd label and putting backwards goto from the
end of the function.

This makes the code unnecessarily tricky to read and understand
on a first sight.

Shuffle the code around a little bit to make it more obvious.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 824e8c8a17ad..2549b29b062d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -916,25 +916,20 @@ static void tcp_init_metrics(struct sock *sk)
 		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
 	}
 	tcp_set_rto(sk);
-	if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
-		goto reset;
-
-cwnd:
-	tp->snd_cwnd = tcp_init_cwnd(tp, dst);
-	tp->snd_cwnd_stamp = tcp_time_stamp;
-	return;
-
+	if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
 reset:
-	/* Play conservative. If timestamps are not
-	 * supported, TCP will fail to recalculate correct
-	 * rtt, if initial rto is too small. FORGET ALL AND RESET!
-	 */
-	if (!tp->rx_opt.saw_tstamp && tp->srtt) {
-		tp->srtt = 0;
-		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
-		inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+		/* Play conservative. If timestamps are not
+		 * supported, TCP will fail to recalculate correct
+		 * rtt, if initial rto is too small. FORGET ALL AND RESET!
+		 */
+		if (!tp->rx_opt.saw_tstamp && tp->srtt) {
+			tp->srtt = 0;
+			tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
+			inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+		}
 	}
-	goto cwnd;
+	tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
 static void tcp_update_reordering(struct sock *sk, const int metric,
-- 
cgit v1.2.3


From 7f6b0db9f63ba423d989e29f6318fe7e68760421 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 24 Dec 2010 15:59:06 +0100
Subject: net/dsa: don't use flush_scheduled_work()

flush_scheduled_work() is deprecated and scheduled to be removed.
Directly flush dst->link_poll_work on remove instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Lennert Buytenhek <buytenh@wantstofly.org>
---
 net/dsa/dsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6112a12578b2..0c877a74e1f4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -390,7 +390,7 @@ static int dsa_remove(struct platform_device *pdev)
 	if (dst->link_poll_needed)
 		del_timer_sync(&dst->link_poll_timer);
 
-	flush_scheduled_work();
+	flush_work_sync(&dst->link_poll_work);
 
 	for (i = 0; i < dst->pd->nr_chips; i++) {
 		struct dsa_switch *ds = dst->ds[i];
-- 
cgit v1.2.3


From 3e29027af43728c2a91fe3f735ab2822edaf54a8 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 30 Dec 2010 09:25:46 +0000
Subject: dcbnl: add support for ieee8021Qaz attributes

The IEEE8021Qaz is the IEEE standard version of CEE. The
standard has had enough significant changes from the CEE
version that many of the CEE attributes have no meaning
in the new spec or do not easily map to IEEE standards.

Rather then attempt to create a complicated mapping
between CEE and IEEE standards this patch adds a nested
IEEE attribute to the list of DCB attributes. The policy
is,

	[DCB_ATTR_IFNAME]
	[DCB_ATTR_STATE]
	...
	[DCB_ATTR_IEEE]
		[DCB_ATTR_IEEE_ETS]
		[DCB_ATTR_IEEE_PFC]
		[DCB_ATTR_IEEE_APP_TABLE]
			[DCB_ATTR_IEEE_APP]
			...

The following dcbnl_rtnl_ops routines were added to handle
the IEEE standard,

	int (*ieee_getets) (struct net_device *, struct ieee_ets *);
	int (*ieee_setets) (struct net_device *, struct ieee_ets *);
	int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *);
	int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
	int (*ieee_getapp) (struct net_device *, struct dcb_app *);
	int (*ieee_setapp) (struct net_device *, struct dcb_app *);

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h | 106 ++++++++++++++++++++++++++++++++++++++++
 include/net/dcbnl.h   |  11 +++++
 net/dcb/dcbnl.c       | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 248 insertions(+)

(limited to 'net')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 8723491f7dfd..287b5618e296 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -22,6 +22,87 @@
 
 #include <linux/types.h>
 
+/* IEEE 802.1Qaz std supported values */
+#define IEEE_8021QAZ_MAX_TCS	8
+
+/* This structure contains the IEEE 802.1Qaz ETS managed object
+ *
+ * @willing: willing bit in ETS configuratin TLV
+ * @ets_cap: indicates supported capacity of ets feature
+ * @cbs: credit based shaper ets algorithm supported
+ * @tc_tx_bw: tc tx bandwidth indexed by traffic class
+ * @tc_rx_bw: tc rx bandwidth indexed by traffic class
+ * @tc_tsa: TSA Assignment table, indexed by traffic class
+ * @prio_tc: priority assignment table mapping 8021Qp to traffic class
+ * @tc_reco_bw: recommended tc bandwidth indexed by traffic class for TLV
+ * @tc_reco_tsa: recommended tc bandwidth indexed by traffic class for TLV
+ * @reco_prio_tc: recommended tc tx bandwidth indexed by traffic class for TLV
+ *
+ * Recommended values are used to set fields in the ETS recommendation TLV
+ * with hardware offloaded LLDP.
+ *
+ * ----
+ *  TSA Assignment 8 bit identifiers
+ *	0	strict priority
+ *	1	credit-based shaper
+ *	2	enhanced transmission selection
+ *	3-254	reserved
+ *	255	vendor specific
+ */
+struct ieee_ets {
+	__u8	willing;
+	__u8	ets_cap;
+	__u8	cbs;
+	__u8	tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_rx_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_tsa[IEEE_8021QAZ_MAX_TCS];
+	__u8	prio_tc[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_reco_bw[IEEE_8021QAZ_MAX_TCS];
+	__u8	tc_reco_tsa[IEEE_8021QAZ_MAX_TCS];
+	__u8	reco_prio_tc[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qaz PFC managed object
+ *
+ * @pfc_cap: Indicates the number of traffic classes on the local device
+ *	     that may simultaneously have PFC enabled.
+ * @pfc_en: bitmap indicating pfc enabled traffic classes
+ * @mbc: enable macsec bypass capability
+ * @delay: the allowance made for a round-trip propagation delay of the
+ *	   link in bits.
+ * @requests: count of the sent pfc frames
+ * @indications: count of the received pfc frames
+ */
+struct ieee_pfc {
+	__u8	pfc_cap;
+	__u8	pfc_en;
+	__u8	mbc;
+	__u16	delay;
+	__u64	requests[IEEE_8021QAZ_MAX_TCS];
+	__u64	indications[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qaz APP managed object
+ *
+ * @selector: protocol identifier type
+ * @protocol: protocol of type indicated
+ * @priority: 3-bit unsigned integer indicating priority
+ *
+ * ----
+ *  Selector field values
+ *	0	Reserved
+ *	1	Ethertype
+ *	2	Well known port number over TCP or SCTP
+ *	3	Well known port number over UDP or DCCP
+ *	4	Well known port number over TCP, SCTP, UDP, or DCCP
+ *	5-7	Reserved
+ */
+struct dcb_app {
+	__u8	selector;
+	__u32	protocol;
+	__u8	priority;
+};
+
 struct dcbmsg {
 	__u8               dcb_family;
 	__u8               cmd;
@@ -50,6 +131,8 @@ struct dcbmsg {
  * @DCB_CMD_SBCN: get backward congestion notification configration.
  * @DCB_CMD_GAPP: get application protocol configuration
  * @DCB_CMD_SAPP: set application protocol configuration
+ * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration
+ * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -83,6 +166,9 @@ enum dcbnl_commands {
 	DCB_CMD_GAPP,
 	DCB_CMD_SAPP,
 
+	DCB_CMD_IEEE_SET,
+	DCB_CMD_IEEE_GET,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
@@ -102,6 +188,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED)
  * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED)
  * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
+ * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -119,10 +206,29 @@ enum dcbnl_attrs {
 	DCB_ATTR_BCN,
 	DCB_ATTR_APP,
 
+	/* IEEE std attributes */
+	DCB_ATTR_IEEE,
+
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
 };
 
+enum ieee_attrs {
+	DCB_ATTR_IEEE_UNSPEC,
+	DCB_ATTR_IEEE_ETS,
+	DCB_ATTR_IEEE_PFC,
+	DCB_ATTR_IEEE_APP_TABLE,
+	__DCB_ATTR_IEEE_MAX
+};
+#define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)
+
+enum ieee_attrs_app {
+	DCB_ATTR_IEEE_APP_UNSPEC,
+	DCB_ATTR_IEEE_APP,
+	__DCB_ATTR_IEEE_APP_MAX
+};
+#define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1)
+
 /**
  * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs
  *
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index b36ac7e0914d..e2d841e963b3 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -20,11 +20,22 @@
 #ifndef __NET_DCBNL_H__
 #define __NET_DCBNL_H__
 
+#include <linux/dcbnl.h>
+
 /*
  * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
  * the netdevice struct.
  */
 struct dcbnl_rtnl_ops {
+	/* IEEE 802.1Qaz std */
+	int (*ieee_getets) (struct net_device *, struct ieee_ets *);
+	int (*ieee_setets) (struct net_device *, struct ieee_ets *);
+	int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *);
+	int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
+	int (*ieee_getapp) (struct net_device *, struct dcb_app *);
+	int (*ieee_setapp) (struct net_device *, struct dcb_app *);
+
+	/* CEE std */
 	u8   (*getstate)(struct net_device *);
 	u8   (*setstate)(struct net_device *, u8);
 	void (*getpermhwaddr)(struct net_device *, u8 *);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 19ac2b985485..2ff908498924 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -66,6 +66,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_PFC_STATE]   = {.type = NLA_U8},
 	[DCB_ATTR_BCN]         = {.type = NLA_NESTED},
 	[DCB_ATTR_APP]         = {.type = NLA_NESTED},
+	[DCB_ATTR_IEEE]	       = {.type = NLA_NESTED},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -167,6 +168,17 @@ static const struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = {
 	[DCB_APP_ATTR_PRIORITY]     = {.type = NLA_U8},
 };
 
+/* IEEE 802.1Qaz nested attributes. */
+static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
+	[DCB_ATTR_IEEE_ETS]	    = {.len = sizeof(struct ieee_ets)},
+	[DCB_ATTR_IEEE_PFC]	    = {.len = sizeof(struct ieee_pfc)},
+	[DCB_ATTR_IEEE_APP_TABLE]   = {.type = NLA_NESTED},
+};
+
+static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
+	[DCB_ATTR_IEEE_APP]	    = {.len = sizeof(struct dcb_app)},
+};
+
 /* standard netlink reply call */
 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
                        u32 seq, u16 flags)
@@ -1118,6 +1130,117 @@ err:
 	return ret;
 }
 
+/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not
+ * be completed the entire msg is aborted and error value is returned.
+ * No attempt is made to reconcile the case where only part of the
+ * cmd can be completed.
+ */
+static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
+			  u32 pid, u32 seq, u16 flags)
+{
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
+	int err = -EOPNOTSUPP;
+
+	if (!ops)
+		goto err;
+
+	err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX,
+			       tb[DCB_ATTR_IEEE], dcbnl_ieee_policy);
+	if (err)
+		goto err;
+
+	if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) {
+		struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]);
+		err = ops->ieee_setets(netdev, ets);
+		if (err)
+			goto err;
+	}
+
+	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) {
+		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
+		err = ops->ieee_setpfc(netdev, pfc);
+		if (err)
+			goto err;
+	}
+
+	if (ieee[DCB_ATTR_IEEE_APP_TABLE] && ops->ieee_setapp) {
+		struct nlattr *attr;
+		int rem;
+
+		nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
+			struct dcb_app *app_data;
+			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
+				continue;
+			app_data = nla_data(attr);
+			err = ops->ieee_setapp(netdev, app_data);
+			if (err)
+				goto err;
+		}
+	}
+
+err:
+	dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE,
+		    pid, seq, flags);
+	return err;
+}
+
+
+/* Handle IEEE 802.1Qaz GET commands. */
+static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
+			  u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *ieee;
+	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
+	int err;
+
+	if (!ops)
+		return -EOPNOTSUPP;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_IEEE_GET;
+
+	NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name);
+
+	ieee = nla_nest_start(skb, DCB_ATTR_IEEE);
+	if (!ieee)
+		goto nla_put_failure;
+
+	if (ops->ieee_getets) {
+		struct ieee_ets ets;
+		err = ops->ieee_getets(netdev, &ets);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets);
+	}
+
+	if (ops->ieee_getpfc) {
+		struct ieee_pfc pfc;
+		err = ops->ieee_getpfc(netdev, &pfc);
+		if (!err)
+			NLA_PUT(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc);
+	}
+
+	nla_nest_end(skb, ieee);
+	nlmsg_end(skb, nlh);
+
+	return rtnl_unicast(skb, &init_net, pid);
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+nlmsg_failure:
+	kfree_skb(skb);
+	return -1;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1223,6 +1346,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_setapp(netdev, tb, pid, nlh->nlmsg_seq,
 		                   nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_IEEE_SET:
+		ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq,
+				 nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_IEEE_GET:
+		ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq,
+				 nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3


From 9ab933ab2cc80f04690d6aa385b1110075c5e507 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 30 Dec 2010 09:26:31 +0000
Subject: dcbnl: add appliction tlv handlers

This patch adds application tlv handlers. Networking stacks
may use the application priority to set the skb priority of
their stack using the negoatiated dcbx priority.

This patch provides the dcb_{get|set}app() routines for the
stack to query these parameters. Notice lower layer drivers
can use the dcbnl_ops routines if additional handling is
needed. Perhaps in the firmware case for example

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h |   4 +-
 include/net/dcbnl.h   |   9 ++++
 net/dcb/dcbnl.c       | 133 ++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 135 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 287b5618e296..775bdb4465bf 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -82,7 +82,9 @@ struct ieee_pfc {
 	__u64	indications[IEEE_8021QAZ_MAX_TCS];
 };
 
-/* This structure contains the IEEE 802.1Qaz APP managed object
+/* This structure contains the IEEE 802.1Qaz APP managed object. This
+ * object is also used for the CEE std as well. There is no difference
+ * between the objects.
  *
  * @selector: protocol identifier type
  * @protocol: protocol of type indicated
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index e2d841e963b3..ab7d623a2793 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -22,6 +22,15 @@
 
 #include <linux/dcbnl.h>
 
+struct dcb_app_type {
+	char		  name[IFNAMSIZ];
+	struct dcb_app	  app;
+	struct list_head  list;
+};
+
+u8 dcb_setapp(struct net_device *, struct dcb_app *);
+u8 dcb_getapp(struct net_device *, struct dcb_app *);
+
 /*
  * Ops struct for the netlink callbacks.  Used by DCB-enabled drivers through
  * the netdevice struct.
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 2ff908498924..cfd731faf6c6 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -179,6 +179,9 @@ static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
 	[DCB_ATTR_IEEE_APP]	    = {.len = sizeof(struct dcb_app)},
 };
 
+static LIST_HEAD(dcb_app_list);
+static DEFINE_SPINLOCK(dcb_lock);
+
 /* standard netlink reply call */
 static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid,
                        u32 seq, u16 flags)
@@ -634,12 +637,12 @@ out:
 static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb,
                         u32 pid, u32 seq, u16 flags)
 {
-	int ret = -EINVAL;
+	int err, ret = -EINVAL;
 	u16 id;
 	u8 up, idtype;
 	struct nlattr *app_tb[DCB_APP_ATTR_MAX + 1];
 
-	if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->setapp)
+	if (!tb[DCB_ATTR_APP])
 		goto out;
 
 	ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP],
@@ -663,9 +666,18 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb,
 	id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]);
 	up = nla_get_u8(app_tb[DCB_APP_ATTR_PRIORITY]);
 
-	ret = dcbnl_reply(netdev->dcbnl_ops->setapp(netdev, idtype, id, up),
-	                  RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP,
-	                  pid, seq, flags);
+	if (netdev->dcbnl_ops->setapp) {
+		err = netdev->dcbnl_ops->setapp(netdev, idtype, id, up);
+	} else {
+		struct dcb_app app;
+		app.selector = idtype;
+		app.protocol = id;
+		app.priority = up;
+		err = dcb_setapp(netdev, &app);
+	}
+
+	ret = dcbnl_reply(err, RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP,
+			  pid, seq, flags);
 out:
 	return ret;
 }
@@ -1164,7 +1176,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 			goto err;
 	}
 
-	if (ieee[DCB_ATTR_IEEE_APP_TABLE] && ops->ieee_setapp) {
+	if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
 		struct nlattr *attr;
 		int rem;
 
@@ -1173,7 +1185,10 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
 			if (nla_type(attr) != DCB_ATTR_IEEE_APP)
 				continue;
 			app_data = nla_data(attr);
-			err = ops->ieee_setapp(netdev, app_data);
+			if (ops->ieee_setapp)
+				err = ops->ieee_setapp(netdev, app_data);
+			else
+				err = dcb_setapp(netdev, app_data);
 			if (err)
 				goto err;
 		}
@@ -1193,7 +1208,8 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
 	struct dcbmsg *dcb;
-	struct nlattr *ieee;
+	struct nlattr *ieee, *app;
+	struct dcb_app_type *itr;
 	const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
 	int err;
 
@@ -1230,6 +1246,19 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 			NLA_PUT(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc);
 	}
 
+	app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE);
+	if (!app)
+		goto nla_put_failure;
+
+	spin_lock(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0)
+			NLA_PUT(skb, DCB_ATTR_IEEE_APP,
+				sizeof(itr->app), &itr->app);
+	}
+	spin_unlock(&dcb_lock);
+	nla_nest_end(skb, app);
+
 	nla_nest_end(skb, ieee);
 	nlmsg_end(skb, nlh);
 
@@ -1364,8 +1393,93 @@ out:
 	return ret;
 }
 
+/**
+ * dcb_getapp - retrieve the DCBX application user priority
+ *
+ * On success returns a non-zero 802.1p user priority bitmap
+ * otherwise returns 0 as the invalid user priority bitmap to
+ * indicate an error.
+ */
+u8 dcb_getapp(struct net_device *dev, struct dcb_app *app)
+{
+	struct dcb_app_type *itr;
+	u8 prio = 0;
+
+	spin_lock(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->app.selector == app->selector &&
+		    itr->app.protocol == app->protocol &&
+		    (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+			prio = itr->app.priority;
+			break;
+		}
+	}
+	spin_unlock(&dcb_lock);
+
+	return prio;
+}
+EXPORT_SYMBOL(dcb_getapp);
+
+/**
+ * ixgbe_dcbnl_setapp - add dcb application data to app list
+ *
+ * Priority 0 is the default priority this removes applications
+ * from the app list if the priority is set to zero.
+ */
+u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
+{
+	struct dcb_app_type *itr;
+
+	spin_lock(&dcb_lock);
+	/* Search for existing match and replace */
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->app.selector == new->selector &&
+		    itr->app.protocol == new->protocol &&
+		    (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) {
+			if (new->priority)
+				itr->app.priority = new->priority;
+			else {
+				list_del(&itr->list);
+				kfree(itr);
+			}
+			goto out;
+		}
+	}
+	/* App type does not exist add new application type */
+	if (new->priority) {
+		struct dcb_app_type *entry;
+		entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC);
+		if (!entry) {
+			spin_unlock(&dcb_lock);
+			return -ENOMEM;
+		}
+
+		memcpy(&entry->app, new, sizeof(*new));
+		strncpy(entry->name, dev->name, IFNAMSIZ);
+		list_add(&entry->list, &dcb_app_list);
+	}
+out:
+	spin_unlock(&dcb_lock);
+	return 0;
+}
+EXPORT_SYMBOL(dcb_setapp);
+
+void dcb_flushapp(void)
+{
+	struct dcb_app_type *app;
+
+	spin_lock(&dcb_lock);
+	list_for_each_entry(app, &dcb_app_list, list) {
+		list_del(&app->list);
+		kfree(app);
+	}
+	spin_unlock(&dcb_lock);
+}
+
 static int __init dcbnl_init(void)
 {
+	INIT_LIST_HEAD(&dcb_app_list);
+
 	rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL);
 	rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL);
 
@@ -1377,7 +1491,6 @@ static void __exit dcbnl_exit(void)
 {
 	rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
 	rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
+	dcb_flushapp();
 }
 module_exit(dcbnl_exit);
-
-
-- 
cgit v1.2.3


From 96b99684e365f28d49bdb1221ca022b75cb91a98 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 30 Dec 2010 09:26:37 +0000
Subject: net_dcb: add application notifiers

DCBx applications priorities can be changed dynamically. If
application stacks are expected to keep the skb priority
consistent with the dcbx priority the stack will need to
be notified when these changes occur.

This patch adds application notifiers for the stack to register
with.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbevent.h | 31 +++++++++++++++++++++++++++++++
 net/dcb/Makefile       |  2 +-
 net/dcb/dcbevent.c     | 40 ++++++++++++++++++++++++++++++++++++++++
 net/dcb/dcbnl.c        |  2 ++
 4 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 include/net/dcbevent.h
 create mode 100644 net/dcb/dcbevent.c

(limited to 'net')

diff --git a/include/net/dcbevent.h b/include/net/dcbevent.h
new file mode 100644
index 000000000000..bc1e7ef40171
--- /dev/null
+++ b/include/net/dcbevent.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: John Fastabend <john.r.fastabend@intel.com>
+ */
+
+#ifndef _DCB_EVENT_H
+#define _DCB_EVENT_H
+
+enum dcbevent_notif_type {
+	DCB_APP_EVENT = 1,
+};
+
+extern int register_dcbevent_notifier(struct notifier_block *nb);
+extern int unregister_dcbevent_notifier(struct notifier_block *nb);
+extern int call_dcbevent_notifiers(unsigned long val, void *v);
+
+#endif
diff --git a/net/dcb/Makefile b/net/dcb/Makefile
index 9930f4cde818..c1282c9e64fa 100644
--- a/net/dcb/Makefile
+++ b/net/dcb/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_DCB) += dcbnl.o
+obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o
diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c
new file mode 100644
index 000000000000..665a8802105a
--- /dev/null
+++ b/net/dcb/dcbevent.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: John Fastabend <john.r.fastabend@intel.com>
+ */
+
+#include <linux/rtnetlink.h>
+#include <linux/notifier.h>
+
+static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain);
+
+int register_dcbevent_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&dcbevent_notif_chain, nb);
+}
+EXPORT_SYMBOL(register_dcbevent_notifier);
+
+int unregister_dcbevent_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&dcbevent_notif_chain, nb);
+}
+EXPORT_SYMBOL(unregister_dcbevent_notifier);
+
+int call_dcbevent_notifiers(unsigned long val, void *v)
+{
+	return atomic_notifier_call_chain(&dcbevent_notif_chain, val, v);
+}
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index cfd731faf6c6..69144125fc4f 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -23,6 +23,7 @@
 #include <net/netlink.h>
 #include <net/rtnetlink.h>
 #include <linux/dcbnl.h>
+#include <net/dcbevent.h>
 #include <linux/rtnetlink.h>
 #include <net/sock.h>
 
@@ -1460,6 +1461,7 @@ u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
 	}
 out:
 	spin_unlock(&dcb_lock);
+	call_dcbevent_notifiers(DCB_APP_EVENT, new);
 	return 0;
 }
 EXPORT_SYMBOL(dcb_setapp);
-- 
cgit v1.2.3


From 6241b6259b16aa390ff4bf50f520685b3801200b Mon Sep 17 00:00:00 2001
From: Shmulik Ravid <shmulikr@broadcom.com>
Date: Thu, 30 Dec 2010 06:26:48 +0000
Subject: dcbnl: adding DCBX engine capability

Adding an optional DCBX capability and a pair for get-set routines for
setting the device DCBX mode. The DCBX capability is a bit field of
supported attributes. The user is expected to set the DCBX mode with a
subset of the advertised attributes.

This patch is dependent on the following patches:
[net-next-2.6 PATCH 1/3] dcbnl: add support for ieee8021Qaz attributes
[net-next-2.6 PATCH 2/3] dcbnl: add appliction tlv handlers
[net-next-2.6 PATCH 3/3] net_dcb: add application notifiers

Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/net/dcbnl.h   |  5 +++++
 net/dcb/dcbnl.c       | 43 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)

(limited to 'net')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 775bdb4465bf..16eea36d8934 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -135,6 +135,8 @@ struct dcbmsg {
  * @DCB_CMD_SAPP: set application protocol configuration
  * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration
  * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration
+ * @DCB_CMD_GDCBX: get DCBX engine configuration
+ * @DCB_CMD_SDCBX: set DCBX engine configuration
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -171,6 +173,9 @@ enum dcbnl_commands {
 	DCB_CMD_IEEE_SET,
 	DCB_CMD_IEEE_GET,
 
+	DCB_CMD_GDCBX,
+	DCB_CMD_SDCBX,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
@@ -191,6 +196,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED)
  * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
  * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED)
+ * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -211,6 +217,8 @@ enum dcbnl_attrs {
 	/* IEEE std attributes */
 	DCB_ATTR_IEEE,
 
+	DCB_ATTR_DCBX,
+
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
 };
@@ -370,6 +378,8 @@ enum dcbnl_tc_attrs {
  * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority
  * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion
  *                             Notification
+ * @DCB_CAP_ATTR_DCBX: (NLA_U8) device supports DCBX engine
+ *
  */
 enum dcbnl_cap_attrs {
 	DCB_CAP_ATTR_UNDEFINED,
@@ -381,11 +391,44 @@ enum dcbnl_cap_attrs {
 	DCB_CAP_ATTR_PFC_TCS,
 	DCB_CAP_ATTR_GSP,
 	DCB_CAP_ATTR_BCN,
+	DCB_CAP_ATTR_DCBX,
 
 	__DCB_CAP_ATTR_ENUM_MAX,
 	DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1,
 };
 
+/**
+ * DCBX capability flags
+ *
+ * @DCB_CAP_DCBX_HOST: DCBX negotiation is performed by the host LLDP agent.
+ *                     'set' routines are used to configure the device with
+ *                     the negotiated parameters
+ *
+ * @DCB_CAP_DCBX_LLD_MANAGED: DCBX negotiation is not performed in the host but
+ *                            by another entity
+ *                            'get' routines are used to retrieve the
+ *                            negotiated parameters
+ *                            'set' routines can be used to set the initial
+ *                            negotiation configuration
+ *
+ * @DCB_CAP_DCBX_VER_CEE: for a non-host DCBX engine, indicates the engine
+ *                        supports the CEE protocol flavor
+ *
+ * @DCB_CAP_DCBX_VER_IEEE: for a non-host DCBX engine, indicates the engine
+ *                         supports the IEEE protocol flavor
+ *
+ * @DCB_CAP_DCBX_STATIC: for a non-host DCBX engine, indicates the engine
+ *                       supports static configuration (i.e no actual
+ *                       negotiation is performed negotiated parameters equal
+ *                       the initial configuration)
+ *
+ */
+#define DCB_CAP_DCBX_HOST		0x01
+#define DCB_CAP_DCBX_LLD_MANAGED	0x02
+#define DCB_CAP_DCBX_VER_CEE		0x04
+#define DCB_CAP_DCBX_VER_IEEE		0x08
+#define DCB_CAP_DCBX_STATIC		0x10
+
 /**
  * enum dcbnl_numtcs_attrs - number of traffic classes
  *
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index ab7d623a2793..c65347b3cbbf 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -70,6 +70,11 @@ struct dcbnl_rtnl_ops {
 	void (*setbcnrp)(struct net_device *, int, u8);
 	u8   (*setapp)(struct net_device *, u8, u16, u8);
 	u8   (*getapp)(struct net_device *, u8, u16);
+
+	/* DCBX configuration */
+	u8   (*getdcbx)(struct net_device *);
+	u8   (*setdcbx)(struct net_device *, u8);
+
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 69144125fc4f..8f83ad859d9b 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -68,6 +68,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_BCN]         = {.type = NLA_NESTED},
 	[DCB_ATTR_APP]         = {.type = NLA_NESTED},
 	[DCB_ATTR_IEEE]	       = {.type = NLA_NESTED},
+	[DCB_ATTR_DCBX]        = {.type = NLA_U8},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -124,6 +125,7 @@ static const struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = {
 	[DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8},
 	[DCB_CAP_ATTR_GSP]     = {.type = NLA_U8},
 	[DCB_CAP_ATTR_BCN]     = {.type = NLA_U8},
+	[DCB_CAP_ATTR_DCBX]    = {.type = NLA_U8},
 };
 
 /* DCB capabilities nested attributes. */
@@ -1271,6 +1273,39 @@ nlmsg_failure:
 	return -1;
 }
 
+/* DCBX configuration */
+static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb,
+			 u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+
+	if (!netdev->dcbnl_ops->getdcbx)
+		return ret;
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->getdcbx(netdev), RTM_GETDCB,
+			  DCB_CMD_GDCBX, DCB_ATTR_DCBX, pid, seq, flags);
+
+	return ret;
+}
+
+static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb,
+			 u32 pid, u32 seq, u16 flags)
+{
+	int ret = -EINVAL;
+	u8 value;
+
+	if (!tb[DCB_ATTR_DCBX] || !netdev->dcbnl_ops->setdcbx)
+		return ret;
+
+	value = nla_get_u8(tb[DCB_ATTR_DCBX]);
+
+	ret = dcbnl_reply(netdev->dcbnl_ops->setdcbx(netdev, value),
+			  RTM_SETDCB, DCB_CMD_SDCBX, DCB_ATTR_DCBX,
+			  pid, seq, flags);
+
+	return ret;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1384,6 +1419,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq,
 				 nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_GDCBX:
+		ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq,
+				    nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SDCBX:
+		ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq,
+				    nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3


From ea45fe4e176a42d2396878f530cfdc8265bef37b Mon Sep 17 00:00:00 2001
From: Shmulik Ravid <shmulikr@broadcom.com>
Date: Thu, 30 Dec 2010 06:26:55 +0000
Subject: dcbnl: adding DCBX feature flags get-set

Adding a pair of set-get routines to dcbnl for setting the negotiation
flags of the various DCB features. Conforms to the CEE flavor of DCBX
The user sets these flags (enable, advertise, willing) for each feature
to be used by the DCBX engine. The 'get' routine returns which of the
features is enabled after the negotiation.

This patch is dependent on the following patches:
[net-next-2.6 PATCH 1/3] dcbnl: add support for ieee8021Qaz attributes
[net-next-2.6 PATCH 2/3] dcbnl: add appliction tlv handlers
[net-next-2.6 PATCH 3/3] net_dcb: add application notifiers

Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h |  33 +++++++++++++
 include/net/dcbnl.h   |   3 ++
 net/dcb/dcbnl.c       | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 169 insertions(+)

(limited to 'net')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 16eea36d8934..68cd248f6d3e 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -137,6 +137,8 @@ struct dcbmsg {
  * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration
  * @DCB_CMD_GDCBX: get DCBX engine configuration
  * @DCB_CMD_SDCBX: set DCBX engine configuration
+ * @DCB_CMD_GFEATCFG: get DCBX features flags
+ * @DCB_CMD_SFEATCFG: set DCBX features negotiation flags
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -176,6 +178,9 @@ enum dcbnl_commands {
 	DCB_CMD_GDCBX,
 	DCB_CMD_SDCBX,
 
+	DCB_CMD_GFEATCFG,
+	DCB_CMD_SFEATCFG,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
@@ -197,6 +202,7 @@ enum dcbnl_commands {
  * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED)
  * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED)
  * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8)
+ * @DCB_ATTR_FEATCFG: DCBX features flags (NLA_NESTED)
  */
 enum dcbnl_attrs {
 	DCB_ATTR_UNDEFINED,
@@ -218,6 +224,7 @@ enum dcbnl_attrs {
 	DCB_ATTR_IEEE,
 
 	DCB_ATTR_DCBX,
+	DCB_ATTR_FEATCFG,
 
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
@@ -506,4 +513,30 @@ enum dcbnl_app_attrs {
 	DCB_APP_ATTR_MAX = __DCB_APP_ATTR_ENUM_MAX - 1,
 };
 
+/**
+ * enum dcbnl_featcfg_attrs - features conifiguration flags
+ *
+ * @DCB_FEATCFG_ATTR_UNDEFINED: unspecified attribute to catch errors
+ * @DCB_FEATCFG_ATTR_ALL: (NLA_FLAG) all features configuration attributes
+ * @DCB_FEATCFG_ATTR_PG: (NLA_U8) configuration flags for priority groups
+ * @DCB_FEATCFG_ATTR_PFC: (NLA_U8) configuration flags for priority
+ *                                 flow control
+ * @DCB_FEATCFG_ATTR_APP: (NLA_U8) configuration flags for application TLV
+ *
+ */
+#define DCB_FEATCFG_ERROR	0x01	/* error in feature resolution */
+#define DCB_FEATCFG_ENABLE	0x02	/* enable feature */
+#define DCB_FEATCFG_WILLING	0x04	/* feature is willing */
+#define DCB_FEATCFG_ADVERTISE	0x08	/* advertise feature */
+enum dcbnl_featcfg_attrs {
+	DCB_FEATCFG_ATTR_UNDEFINED,
+	DCB_FEATCFG_ATTR_ALL,
+	DCB_FEATCFG_ATTR_PG,
+	DCB_FEATCFG_ATTR_PFC,
+	DCB_FEATCFG_ATTR_APP,
+
+	__DCB_FEATCFG_ATTR_ENUM_MAX,
+	DCB_FEATCFG_ATTR_MAX = __DCB_FEATCFG_ATTR_ENUM_MAX - 1,
+};
+
 #endif /* __LINUX_DCBNL_H__ */
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index c65347b3cbbf..a8e7852b10ab 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -70,11 +70,14 @@ struct dcbnl_rtnl_ops {
 	void (*setbcnrp)(struct net_device *, int, u8);
 	u8   (*setapp)(struct net_device *, u8, u16, u8);
 	u8   (*getapp)(struct net_device *, u8, u16);
+	u8   (*getfeatcfg)(struct net_device *, int, u8 *);
+	u8   (*setfeatcfg)(struct net_device *, int, u8);
 
 	/* DCBX configuration */
 	u8   (*getdcbx)(struct net_device *);
 	u8   (*setdcbx)(struct net_device *, u8);
 
+
 };
 
 #endif /* __NET_DCBNL_H__ */
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 8f83ad859d9b..075af0a08d84 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -69,6 +69,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = {
 	[DCB_ATTR_APP]         = {.type = NLA_NESTED},
 	[DCB_ATTR_IEEE]	       = {.type = NLA_NESTED},
 	[DCB_ATTR_DCBX]        = {.type = NLA_U8},
+	[DCB_ATTR_FEATCFG]     = {.type = NLA_NESTED},
 };
 
 /* DCB priority flow control to User Priority nested attributes */
@@ -182,6 +183,14 @@ static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
 	[DCB_ATTR_IEEE_APP]	    = {.len = sizeof(struct dcb_app)},
 };
 
+/* DCB number of traffic classes nested attributes. */
+static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
+	[DCB_FEATCFG_ATTR_ALL]      = {.type = NLA_FLAG},
+	[DCB_FEATCFG_ATTR_PG]       = {.type = NLA_U8},
+	[DCB_FEATCFG_ATTR_PFC]      = {.type = NLA_U8},
+	[DCB_FEATCFG_ATTR_APP]      = {.type = NLA_U8},
+};
+
 static LIST_HEAD(dcb_app_list);
 static DEFINE_SPINLOCK(dcb_lock);
 
@@ -1306,6 +1315,122 @@ static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb,
 	return ret;
 }
 
+static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb,
+			    u32 pid, u32 seq, u16 flags)
+{
+	struct sk_buff *dcbnl_skb;
+	struct nlmsghdr *nlh;
+	struct dcbmsg *dcb;
+	struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1], *nest;
+	u8 value;
+	int ret = -EINVAL;
+	int i;
+	int getall = 0;
+
+	if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->getfeatcfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
+			       dcbnl_featcfg_nest);
+	if (ret) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!dcbnl_skb) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags);
+
+	dcb = NLMSG_DATA(nlh);
+	dcb->dcb_family = AF_UNSPEC;
+	dcb->cmd = DCB_CMD_GFEATCFG;
+
+	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_FEATCFG);
+	if (!nest) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (data[DCB_FEATCFG_ATTR_ALL])
+		getall = 1;
+
+	for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
+		if (!getall && !data[i])
+			continue;
+
+		ret = netdev->dcbnl_ops->getfeatcfg(netdev, i, &value);
+		if (!ret) {
+			ret = nla_put_u8(dcbnl_skb, i, value);
+
+			if (ret) {
+				nla_nest_cancel(dcbnl_skb, nest);
+				ret = -EINVAL;
+				goto err;
+			}
+		} else
+			goto err;
+	}
+	nla_nest_end(dcbnl_skb, nest);
+
+	nlmsg_end(dcbnl_skb, nlh);
+
+	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
+	if (ret) {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	return 0;
+nlmsg_failure:
+err:
+	kfree_skb(dcbnl_skb);
+err_out:
+	return ret;
+}
+
+static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb,
+			    u32 pid, u32 seq, u16 flags)
+{
+	struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1];
+	int ret = -EINVAL;
+	u8 value;
+	int i;
+
+	if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->setfeatcfg)
+		return ret;
+
+	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
+			       dcbnl_featcfg_nest);
+
+	if (ret) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
+		if (data[i] == NULL)
+			continue;
+
+		value = nla_get_u8(data[i]);
+
+		ret = netdev->dcbnl_ops->setfeatcfg(netdev, i, value);
+
+		if (ret)
+			goto operr;
+	}
+
+operr:
+	ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SFEATCFG,
+			  DCB_ATTR_FEATCFG, pid, seq, flags);
+
+err:
+	return ret;
+}
+
 static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1427,6 +1552,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq,
 				    nlh->nlmsg_flags);
 		goto out;
+	case DCB_CMD_GFEATCFG:
+		ret = dcbnl_getfeatcfg(netdev, tb, pid, nlh->nlmsg_seq,
+				       nlh->nlmsg_flags);
+		goto out;
+	case DCB_CMD_SFEATCFG:
+		ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq,
+				       nlh->nlmsg_flags);
+		goto out;
 	default:
 		goto errout;
 	}
-- 
cgit v1.2.3


From 7c14c3f10e6dcd7f70e49f77b6e1ae605c4861e6 Mon Sep 17 00:00:00 2001
From: Shmulik Ravid <shmulikr@broadcom.com>
Date: Thu, 30 Dec 2010 06:27:10 +0000
Subject: dcbnl: cleanup

A couple of small cleanups for patches:
[net-next-2.6 PATCH 1/3] dcbnl: add support for ieee8021Qaz attributes
[net-next-2.6 PATCH 2/3] dcbnl: add appliction tlv handlers
[net-next-2.6 PATCH 3/3] net_dcb: add application notifiers

Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 075af0a08d84..ff3c12d2cd72 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1642,7 +1642,7 @@ out:
 }
 EXPORT_SYMBOL(dcb_setapp);
 
-void dcb_flushapp(void)
+static void dcb_flushapp(void)
 {
 	struct dcb_app_type *app;
 
-- 
cgit v1.2.3


From eeaeb068f1393b4db4861481bf594bcd1c3eda7a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 28 Dec 2010 21:53:33 +0000
Subject: sch_sfq: allow big packets and be fair

SFQ is currently 'limited' to small packets, because it uses a 15bit
allotment number per flow. Introduce a scale by 8, so that we can handle
full size TSO/GRO packets.

Use appropriate handling to make sure allot is positive before a new
packet is dequeued, so that fairness is respected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 6a2f88fea6d8..b76d46b71466 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -67,7 +67,7 @@
 
 	IMPLEMENTATION:
 	This implementation limits maximal queue length to 128;
-	maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
+	max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
 	The only goal of this restrictions was that all data
 	fit into one 4K page on 32bit arches.
 
@@ -77,6 +77,11 @@
 #define SFQ_SLOTS		128 /* max number of flows */
 #define SFQ_EMPTY_SLOT		255
 #define SFQ_HASH_DIVISOR	1024
+/* We use 16 bits to store allot, and want to handle packets up to 64K
+ * Scale allot by 8 (1<<3) so that no overflow occurs.
+ */
+#define SFQ_ALLOT_SHIFT		3
+#define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 
 /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
@@ -115,7 +120,7 @@ struct sfq_sched_data
 	struct timer_list perturb_timer;
 	u32		perturbation;
 	sfq_index	cur_depth;	/* depth of longest slot */
-
+	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
 	struct sfq_slot	slots[SFQ_SLOTS];
@@ -395,7 +400,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			q->tail->next = x;
 		}
 		q->tail = slot;
-		slot->allot = q->quantum;
+		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit) {
 		sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -431,8 +436,14 @@ sfq_dequeue(struct Qdisc *sch)
 	if (q->tail == NULL)
 		return NULL;
 
+next_slot:
 	a = q->tail->next;
 	slot = &q->slots[a];
+	if (slot->allot <= 0) {
+		q->tail = slot;
+		slot->allot += q->scaled_quantum;
+		goto next_slot;
+	}
 	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
 	sch->q.qlen--;
@@ -447,9 +458,8 @@ sfq_dequeue(struct Qdisc *sch)
 			return skb;
 		}
 		q->tail->next = next_a;
-	} else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
-		q->tail = slot;
-		slot->allot += q->quantum;
+	} else {
+		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
 	}
 	return skb;
 }
@@ -485,6 +495,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 
 	sch_tree_lock(sch);
 	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
+	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
@@ -525,6 +536,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->tail = NULL;
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
+		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 		q->perturb_period = 0;
 		q->perturbation = net_random();
 	} else {
@@ -617,7 +629,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	if (idx != SFQ_EMPTY_SLOT) {
 		const struct sfq_slot *slot = &q->slots[idx];
 
-		xstats.allot = slot->allot;
+		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 		qs.qlen = slot->qlen;
 		slot_queue_walk(slot, skb)
 			qs.backlog += qdisc_pkt_len(skb);
-- 
cgit v1.2.3


From 18c8d82ae5b802c5d82e0dfbcc08b1b568955f46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 31 Dec 2010 12:48:55 -0800
Subject: sfq: fix slot_dequeue_head()

slot_dequeue_head() should make sure slot skb chain is correct in both
ways, or we can crash if all possible flows are in use.

Jarek pointed out slot_queue_init() can now be done in sfq_init() once,
instead each time a flow is setup.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b76d46b71466..d54ac94066c2 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -297,6 +297,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
 	struct sk_buff *skb = slot->skblist_next;
 
 	slot->skblist_next = skb->next;
+	skb->next->prev = (struct sk_buff *)slot;
 	skb->next = skb->prev = NULL;
 	return skb;
 }
@@ -380,7 +381,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
-		slot_queue_init(slot);
 	}
 
 	/* If selected queue has length q->limit, do simple tail drop,
@@ -545,8 +545,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 			return err;
 	}
 
-	for (i = 0; i < SFQ_SLOTS; i++)
+	for (i = 0; i < SFQ_SLOTS; i++) {
+		slot_queue_init(&q->slots[i]);
 		sfq_link(q, i);
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 51f98a8d70583b18cb08b19353aeed5efb0244af Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:16 +0000
Subject: tipc: Remove prototype code for supporting multiple zones

Eliminates routines, data structures, and files that were intended
to allows TIPC to support a network containing multiple zones.
Currently, TIPC supports only networks consisting of a single cluster
within a single zone, so this code is unnecessary.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   4 +-
 net/tipc/Kconfig            |  12 ----
 net/tipc/Makefile           |   2 +-
 net/tipc/addr.c             |   4 --
 net/tipc/cluster.c          |  14 +---
 net/tipc/cluster.h          |  12 ++--
 net/tipc/config.c           |  30 ++-------
 net/tipc/core.c             |   6 --
 net/tipc/core.h             |   1 -
 net/tipc/net.c              |  41 +++++++-----
 net/tipc/net.h              |   8 ++-
 net/tipc/node.c             |   7 +-
 net/tipc/node.h             |   1 -
 net/tipc/zone.c             | 159 --------------------------------------------
 net/tipc/zone.h             |  70 -------------------
 15 files changed, 46 insertions(+), 325 deletions(-)
 delete mode 100644 net/tipc/zone.c
 delete mode 100644 net/tipc/zone.h

(limited to 'net')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 9cde86c32412..fa3aeaafeab1 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -94,7 +94,7 @@
 #define  TIPC_CMD_GET_MAX_PORTS     0x4004    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_PUBL      0x4005    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SUBSCR    0x4006    /* tx none, rx unsigned */
-#define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_NODES     0x4009    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* tx none, rx unsigned */
@@ -130,7 +130,7 @@
 #define  TIPC_CMD_SET_MAX_PORTS     0x8004    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_PUBL      0x8005    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SUBSCR    0x8006    /* tx unsigned, rx none */
-#define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_NODES     0x8009    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* tx unsigned, rx none */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index b74f78d0c033..06d32908fcfb 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -29,18 +29,6 @@ config TIPC_ADVANCED
 	  Saying Y here will open some advanced configuration for TIPC.
 	  Most users do not need to bother; if unsure, just say N.
 
-config TIPC_ZONES
-	int "Maximum number of zones in a network"
-	depends on TIPC_ADVANCED
-	range 1 255
-	default "3"
-	help
-	  Specifies how many zones can be supported in a TIPC network.
-	  Can range from 1 to 255 zones; default is 3.
-
-	  Setting this to a smaller value saves some memory;
-	  setting it to a higher value allows for more zones.
-
 config TIPC_CLUSTERS
 	int "Maximum number of clusters in a zone"
 	depends on TIPC_ADVANCED
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index dceb7027946c..3d936f0560c7 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -8,6 +8,6 @@ tipc-y	+= addr.o bcast.o bearer.o config.o cluster.o \
 	   core.o handler.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
-	   socket.o user_reg.o zone.o dbg.o eth_media.o
+	   socket.o user_reg.o dbg.o eth_media.o
 
 # End of file
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 886715a75259..3d0f97da5b5f 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -35,8 +35,6 @@
  */
 
 #include "core.h"
-#include "addr.h"
-#include "zone.h"
 #include "cluster.h"
 
 /**
@@ -61,8 +59,6 @@ int tipc_addr_domain_valid(u32 addr)
 		return 0;
 	if (c > tipc_max_clusters)
 		return 0;
-	if (z > tipc_max_zones)
-		return 0;
 
 	if (n && (!z || !c))
 		return 0;
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 405be87157ba..996b2b67687e 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -47,7 +47,6 @@ u32 tipc_highest_allowed_slave = 0;
 
 struct cluster *tipc_cltr_create(u32 addr)
 {
-	struct _zone *z_ptr;
 	struct cluster *c_ptr;
 	int max_nodes;
 
@@ -75,18 +74,7 @@ struct cluster *tipc_cltr_create(u32 addr)
 	c_ptr->highest_slave = LOWEST_SLAVE - 1;
 	c_ptr->highest_node = 0;
 
-	z_ptr = tipc_zone_find(tipc_zone(addr));
-	if (!z_ptr) {
-		z_ptr = tipc_zone_create(addr);
-	}
-	if (!z_ptr) {
-		kfree(c_ptr->nodes);
-		kfree(c_ptr);
-		return NULL;
-	}
-
-	tipc_zone_attach_cluster(z_ptr, c_ptr);
-	c_ptr->owner = z_ptr;
+	tipc_net.clusters[1] = c_ptr;
 	return c_ptr;
 }
 
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 32636d98c9c6..21493f7beb95 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -38,14 +38,13 @@
 #define _TIPC_CLUSTER_H
 
 #include "addr.h"
-#include "zone.h"
+#include "net.h"
 
 #define LOWEST_SLAVE  2048u
 
 /**
  * struct cluster - TIPC cluster structure
  * @addr: network address of cluster
- * @owner: pointer to zone that cluster belongs to
  * @nodes: array of pointers to all nodes within cluster
  * @highest_node: id of highest numbered node within cluster
  * @highest_slave: (used for secondary node support)
@@ -53,7 +52,6 @@
 
 struct cluster {
 	u32 addr;
-	struct _zone *owner;
 	struct tipc_node **nodes;
 	u32 highest_node;
 	u32 highest_slave;
@@ -82,11 +80,9 @@ void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi)
 
 static inline struct cluster *tipc_cltr_find(u32 addr)
 {
-	struct _zone *z_ptr = tipc_zone_find(addr);
-
-	if (z_ptr)
-		return z_ptr->clusters[1];
-	return NULL;
+	if (!in_own_cluster(addr))
+		return NULL;
+	return tipc_net.clusters[1];
 }
 
 #endif
diff --git a/net/tipc/config.c b/net/tipc/config.c
index bdde39f0436b..8de97ddb0427 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -269,25 +269,6 @@ static struct sk_buff *cfg_set_max_ports(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_zones(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value == tipc_max_zones)
-		return tipc_cfg_reply_none();
-	if (value != delimit(value, 1, 255))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (max zones must be 1-255)");
-	if (tipc_mode == TIPC_NET_MODE)
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-			" (cannot change max zones once TIPC has joined a network)");
-	tipc_max_zones = value;
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_max_clusters(void)
 {
 	u32 value;
@@ -452,9 +433,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_SUBSCR:
 		rep_tlv_buf = cfg_set_max_subscriptions();
 		break;
-	case TIPC_CMD_SET_MAX_ZONES:
-		rep_tlv_buf = cfg_set_max_zones();
-		break;
 	case TIPC_CMD_SET_MAX_CLUSTERS:
 		rep_tlv_buf = cfg_set_max_clusters();
 		break;
@@ -479,9 +457,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_SUBSCR:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
 		break;
-	case TIPC_CMD_GET_MAX_ZONES:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_zones);
-		break;
 	case TIPC_CMD_GET_MAX_CLUSTERS:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_clusters);
 		break;
@@ -498,6 +473,11 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 		rep_tlv_buf =
 			tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN);
 		break;
+	case TIPC_CMD_SET_MAX_ZONES:
+	case TIPC_CMD_GET_MAX_ZONES:
+		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
+							  " (obsolete command)");
+		break;
 	default:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (unknown command)");
diff --git a/net/tipc/core.c b/net/tipc/core.c
index f5d62c174de2..13946331bd4d 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,10 +48,6 @@
 #include "config.h"
 
 
-#ifndef CONFIG_TIPC_ZONES
-#define CONFIG_TIPC_ZONES 3
-#endif
-
 #ifndef CONFIG_TIPC_CLUSTERS
 #define CONFIG_TIPC_CLUSTERS 1
 #endif
@@ -84,7 +80,6 @@ const char tipc_alphabet[] =
 /* configurable TIPC parameters */
 
 u32 tipc_own_addr;
-int tipc_max_zones;
 int tipc_max_clusters;
 int tipc_max_nodes;
 int tipc_max_slaves;
@@ -209,7 +204,6 @@ static int __init tipc_init(void)
 	tipc_max_publications = 10000;
 	tipc_max_subscriptions = 2000;
 	tipc_max_ports = CONFIG_TIPC_PORTS;
-	tipc_max_zones = CONFIG_TIPC_ZONES;
 	tipc_max_clusters = CONFIG_TIPC_CLUSTERS;
 	tipc_max_nodes = CONFIG_TIPC_NODES;
 	tipc_max_slaves = CONFIG_TIPC_SLAVE_NODES;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index ca7e171c1043..9403a226a570 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -184,7 +184,6 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
  */
 
 extern u32 tipc_own_addr;
-extern int tipc_max_zones;
 extern int tipc_max_clusters;
 extern int tipc_max_nodes;
 extern int tipc_max_slaves;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index c2b4b86c2e6a..a25f8bb1e1d9 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -36,7 +36,6 @@
 
 #include "core.h"
 #include "net.h"
-#include "zone.h"
 #include "name_table.h"
 #include "name_distr.h"
 #include "subscr.h"
@@ -111,46 +110,56 @@
 */
 
 DEFINE_RWLOCK(tipc_net_lock);
-static struct _zone *tipc_zones[256] = { NULL, };
-struct network tipc_net = { tipc_zones };
+struct network tipc_net;
 
 struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
 {
-	return tipc_zone_select_remote_node(tipc_net.zones[tipc_zone(addr)], addr, ref);
+	struct cluster *c_ptr;
+
+	c_ptr = tipc_net.clusters[1];
+	if (!c_ptr)
+		return NULL;
+	return tipc_cltr_select_node(c_ptr, ref);
 }
 
 u32 tipc_net_select_router(u32 addr, u32 ref)
 {
-	return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref);
+	struct cluster *c_ptr;
+
+	c_ptr = tipc_net.clusters[1];
+	if (!c_ptr)
+		return 0;
+	return tipc_cltr_select_router(c_ptr, ref);
 }
 
 void tipc_net_remove_as_router(u32 router)
 {
-	u32 z_num;
+	u32 c_num;
 
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
-		if (!tipc_net.zones[z_num])
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		if (!tipc_net.clusters[c_num])
 			continue;
-		tipc_zone_remove_as_router(tipc_net.zones[z_num], router);
+		tipc_cltr_remove_as_router(tipc_net.clusters[c_num], router);
 	}
 }
 
 void tipc_net_send_external_routes(u32 dest)
 {
-	u32 z_num;
+	u32 c_num;
 
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++) {
-		if (tipc_net.zones[z_num])
-			tipc_zone_send_external_routes(tipc_net.zones[z_num], dest);
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
+		if (tipc_net.clusters[c_num])
+			tipc_cltr_send_ext_routes(tipc_net.clusters[c_num],
+						  dest);
 	}
 }
 
 static void net_stop(void)
 {
-	u32 z_num;
+	u32 c_num;
 
-	for (z_num = 1; z_num <= tipc_max_zones; z_num++)
-		tipc_zone_delete(tipc_net.zones[z_num]);
+	for (c_num = 1; c_num <= tipc_max_clusters; c_num++)
+		tipc_cltr_delete(tipc_net.clusters[c_num]);
 }
 
 static void net_route_named_msg(struct sk_buff *buf)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index de2b9ad8f646..786c94007470 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -37,15 +37,17 @@
 #ifndef _TIPC_NET_H
 #define _TIPC_NET_H
 
-struct _zone;
+struct cluster;
 
 /**
  * struct network - TIPC network structure
- * @zones: array of pointers to all zones within network
+ * @clusters: array of pointers to all clusters within zone
+ * @links: number of (unicast) links to cluster
  */
 
 struct network {
-	struct _zone **zones;
+	struct cluster *clusters[2]; /* currently limited to just 1 cluster */
+	u32 links;
 };
 
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index df71dfc3a9ae..c20bd851a44a 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -257,7 +257,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 
 		if (!n_ptr->links[bearer_id]) {
 			n_ptr->links[bearer_id] = l_ptr;
-			tipc_net.zones[tipc_zone(l_ptr->addr)]->links++;
+			tipc_net.links++;
 			n_ptr->link_cnt++;
 			return n_ptr;
 		}
@@ -271,7 +271,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
 {
 	n_ptr->links[l_ptr->b_ptr->identity] = NULL;
-	tipc_net.zones[tipc_zone(l_ptr->addr)]->links--;
+	tipc_net.links--;
 	n_ptr->link_cnt--;
 }
 
@@ -656,8 +656,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 
 	/* Get space for all unicast links + multicast link */
 
-	payload_size = TLV_SPACE(sizeof(link_info)) *
-		(tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1);
+	payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1);
 	if (payload_size > 32768u) {
 		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
diff --git a/net/tipc/node.h b/net/tipc/node.h
index fff331b2d26c..7bfaf5e8c201 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -38,7 +38,6 @@
 #define _TIPC_NODE_H
 
 #include "node_subscr.h"
-#include "addr.h"
 #include "cluster.h"
 #include "bearer.h"
 
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
deleted file mode 100644
index 1b61ca8c48ef..000000000000
--- a/net/tipc/zone.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * net/tipc/zone.c: TIPC zone management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "zone.h"
-#include "cluster.h"
-#include "node.h"
-
-struct _zone *tipc_zone_create(u32 addr)
-{
-	struct _zone *z_ptr;
-	u32 z_num;
-
-	if (!tipc_addr_domain_valid(addr)) {
-		err("Zone creation failed, invalid domain 0x%x\n", addr);
-		return NULL;
-	}
-
-	z_ptr = kzalloc(sizeof(*z_ptr), GFP_ATOMIC);
-	if (!z_ptr) {
-		warn("Zone creation failed, insufficient memory\n");
-		return NULL;
-	}
-
-	z_num = tipc_zone(addr);
-	z_ptr->addr = tipc_addr(z_num, 0, 0);
-	tipc_net.zones[z_num] = z_ptr;
-	return z_ptr;
-}
-
-void tipc_zone_delete(struct _zone *z_ptr)
-{
-	u32 c_num;
-
-	if (!z_ptr)
-		return;
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		tipc_cltr_delete(z_ptr->clusters[c_num]);
-	}
-	kfree(z_ptr);
-}
-
-void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr)
-{
-	u32 c_num = tipc_cluster(c_ptr->addr);
-
-	assert(c_ptr->addr);
-	assert(c_num <= tipc_max_clusters);
-	assert(z_ptr->clusters[c_num] == NULL);
-	z_ptr->clusters[c_num] = c_ptr;
-}
-
-void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (z_ptr->clusters[c_num]) {
-			tipc_cltr_remove_as_router(z_ptr->clusters[c_num],
-						   router);
-		}
-	}
-}
-
-void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (z_ptr->clusters[c_num]) {
-			if (in_own_cluster(z_ptr->addr))
-				continue;
-			tipc_cltr_send_ext_routes(z_ptr->clusters[c_num], dest);
-		}
-	}
-}
-
-struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-	struct tipc_node *n_ptr;
-	u32 c_num;
-
-	if (!z_ptr)
-		return NULL;
-	c_ptr = z_ptr->clusters[tipc_cluster(addr)];
-	if (!c_ptr)
-		return NULL;
-	n_ptr = tipc_cltr_select_node(c_ptr, ref);
-	if (n_ptr)
-		return n_ptr;
-
-	/* Links to any other clusters within this zone ? */
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		c_ptr = z_ptr->clusters[c_num];
-		if (!c_ptr)
-			return NULL;
-		n_ptr = tipc_cltr_select_node(c_ptr, ref);
-		if (n_ptr)
-			return n_ptr;
-	}
-	return NULL;
-}
-
-u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-	u32 c_num;
-	u32 router;
-
-	if (!z_ptr)
-		return 0;
-	c_ptr = z_ptr->clusters[tipc_cluster(addr)];
-	router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0;
-	if (router)
-		return router;
-
-	/* Links to any other clusters within the zone? */
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		c_ptr = z_ptr->clusters[c_num];
-		router = c_ptr ? tipc_cltr_select_router(c_ptr, ref) : 0;
-		if (router)
-			return router;
-	}
-	return 0;
-}
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
deleted file mode 100644
index bd1c20ce9d06..000000000000
--- a/net/tipc/zone.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * net/tipc/zone.h: Include file for TIPC zone management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005-2006, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_ZONE_H
-#define _TIPC_ZONE_H
-
-#include "node_subscr.h"
-#include "net.h"
-
-
-/**
- * struct _zone - TIPC zone structure
- * @addr: network address of zone
- * @clusters: array of pointers to all clusters within zone
- * @links: number of (unicast) links to zone
- */
-
-struct _zone {
-	u32 addr;
-	struct cluster *clusters[2]; /* currently limited to just 1 cluster */
-	u32 links;
-};
-
-struct tipc_node *tipc_zone_select_remote_node(struct _zone *z_ptr, u32 addr, u32 ref);
-u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref);
-void tipc_zone_remove_as_router(struct _zone *z_ptr, u32 router);
-void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest);
-struct _zone *tipc_zone_create(u32 addr);
-void tipc_zone_delete(struct _zone *z_ptr);
-void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr);
-
-static inline struct _zone *tipc_zone_find(u32 addr)
-{
-	return tipc_net.zones[tipc_zone(addr)];
-}
-
-#endif
-- 
cgit v1.2.3


From 08c80e9a031df0a8f0269477a32f5eae47d7a146 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:17 +0000
Subject: tipc: Remove prototype code for supporting slave nodes

Simplifies routines and data structures that were intended to allow
TIPC to support slave nodes (i.e. nodes that did not have links to
all of the other nodes in its cluster, forcing TIPC to route messages
that it could not deliver directly through a non-slave node).

Currently, TIPC supports only networks containing non-slave nodes,
so this code is unnecessary.

Note: The latest edition of the TIPC 2.0 Specification has eliminated
the concept of slave nodes entirely.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   4 +-
 net/tipc/addr.c             |   2 -
 net/tipc/addr.h             |  10 ---
 net/tipc/cluster.c          | 166 ++++++--------------------------------------
 net/tipc/cluster.h          |   7 --
 net/tipc/config.c           |  21 +-----
 net/tipc/core.c             |   6 --
 net/tipc/core.h             |   1 -
 net/tipc/discover.c         |   4 --
 net/tipc/msg.h              |   2 +-
 net/tipc/node.c             |  59 ++++------------
 net/tipc/port.c             |   5 +-
 12 files changed, 40 insertions(+), 247 deletions(-)

(limited to 'net')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index fa3aeaafeab1..dcc2b8796d0a 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -97,7 +97,7 @@
 #define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_NODES     0x4009    /* tx none, rx unsigned */
-#define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* obsoleted */
 #define  TIPC_CMD_GET_NETID         0x400B    /* tx none, rx unsigned */
 
 #define  TIPC_CMD_ENABLE_BEARER     0x4101    /* tx bearer_config, rx none */
@@ -133,7 +133,7 @@
 #define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_NODES     0x8009    /* tx unsigned, rx none */
-#define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* obsoleted */
 #define  TIPC_CMD_SET_NETID         0x800B    /* tx unsigned, rx none */
 
 /*
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 3d0f97da5b5f..8823e03e52e0 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -53,8 +53,6 @@ int tipc_addr_domain_valid(u32 addr)
 	u32 z = tipc_zone(addr);
 	u32 max_nodes = tipc_max_nodes;
 
-	if (is_slave(addr))
-		max_nodes = LOWEST_SLAVE + tipc_max_slaves;
 	if (n > max_nodes)
 		return 0;
 	if (c > tipc_max_clusters)
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index c1cc5724d8cc..a16c6c87208e 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -57,16 +57,6 @@ static inline int in_own_cluster(u32 addr)
 	return !((addr ^ tipc_own_addr) >> 12);
 }
 
-static inline int is_slave(u32 addr)
-{
-	return addr & 0x800;
-}
-
-static inline int may_route(u32 addr)
-{
-	return(addr ^ tipc_own_addr) >> 11;
-}
-
 /**
  * addr_domain - convert 2-bit scope value to equivalent message lookup domain
  *
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 996b2b67687e..6bc9f07be945 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -43,7 +43,6 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 
 struct tipc_node **tipc_local_nodes = NULL;
 struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
-u32 tipc_highest_allowed_slave = 0;
 
 struct cluster *tipc_cltr_create(u32 addr)
 {
@@ -57,10 +56,7 @@ struct cluster *tipc_cltr_create(u32 addr)
 	}
 
 	c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
-	if (in_own_cluster(addr))
-		max_nodes = LOWEST_SLAVE + tipc_max_slaves;
-	else
-		max_nodes = tipc_max_nodes + 1;
+	max_nodes = tipc_max_nodes + 1;
 
 	c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC);
 	if (c_ptr->nodes == NULL) {
@@ -71,7 +67,6 @@ struct cluster *tipc_cltr_create(u32 addr)
 
 	if (in_own_cluster(addr))
 		tipc_local_nodes = c_ptr->nodes;
-	c_ptr->highest_slave = LOWEST_SLAVE - 1;
 	c_ptr->highest_node = 0;
 
 	tipc_net.clusters[1] = c_ptr;
@@ -87,9 +82,6 @@ void tipc_cltr_delete(struct cluster *c_ptr)
 	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
 		tipc_node_delete(c_ptr->nodes[n_num]);
 	}
-	for (n_num = LOWEST_SLAVE; n_num <= c_ptr->highest_slave; n_num++) {
-		tipc_node_delete(c_ptr->nodes[n_num]);
-	}
 	kfree(c_ptr->nodes);
 	kfree(c_ptr);
 }
@@ -100,8 +92,6 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
 	u32 n_num = tipc_node(n_ptr->addr);
 	u32 max_n_num = tipc_max_nodes;
 
-	if (in_own_cluster(n_ptr->addr))
-		max_n_num = tipc_highest_allowed_slave;
 	assert(n_num > 0);
 	assert(n_num <= max_n_num);
 	assert(c_ptr->nodes[n_num] == NULL);
@@ -237,41 +227,6 @@ void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest,
 	}
 }
 
-void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest)
-{
-	struct sk_buff *buf;
-	struct tipc_msg *msg;
-	u32 highest = c_ptr->highest_slave;
-	u32 n_num;
-	int send = 0;
-
-	assert(!is_slave(dest));
-	assert(in_own_cluster(dest));
-	assert(in_own_cluster(c_ptr->addr));
-	if (highest <= LOWEST_SLAVE)
-		return;
-	buf = tipc_cltr_prepare_routing_msg(highest - LOWEST_SLAVE + 1,
-					    c_ptr->addr);
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, c_ptr->addr);
-		msg_set_type(msg, SLAVE_ROUTING_TABLE);
-		for (n_num = LOWEST_SLAVE; n_num <= highest; n_num++) {
-			if (c_ptr->nodes[n_num] &&
-			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
-				send = 1;
-				msg_set_dataoctet(msg, n_num);
-			}
-		}
-		if (send)
-			tipc_link_send(buf, dest, dest);
-		else
-			buf_discard(buf);
-	} else {
-		warn("Memory squeeze: broadcast of lost route failed\n");
-	}
-}
-
 void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
 {
 	struct sk_buff *buf;
@@ -282,7 +237,6 @@ void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
 
 	if (in_own_cluster(c_ptr->addr))
 		return;
-	assert(!is_slave(dest));
 	assert(in_own_cluster(dest));
 	highest = c_ptr->highest_node;
 	buf = tipc_cltr_prepare_routing_msg(highest + 1, c_ptr->addr);
@@ -306,37 +260,6 @@ void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
 	}
 }
 
-void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest)
-{
-	struct sk_buff *buf;
-	struct tipc_msg *msg;
-	u32 highest = c_ptr->highest_node;
-	u32 n_num;
-	int send = 0;
-
-	assert(is_slave(dest));
-	assert(in_own_cluster(c_ptr->addr));
-	buf = tipc_cltr_prepare_routing_msg(highest, c_ptr->addr);
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, c_ptr->addr);
-		msg_set_type(msg, LOCAL_ROUTING_TABLE);
-		for (n_num = 1; n_num <= highest; n_num++) {
-			if (c_ptr->nodes[n_num] &&
-			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
-				send = 1;
-				msg_set_dataoctet(msg, n_num);
-			}
-		}
-		if (send)
-			tipc_link_send(buf, dest, dest);
-		else
-			buf_discard(buf);
-	} else {
-		warn("Memory squeeze: broadcast of local route failed\n");
-	}
-}
-
 void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 {
 	struct tipc_msg *msg = buf_msg(buf);
@@ -366,8 +289,6 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 	c_num = tipc_cluster(rem_node);
 
 	switch (msg_type(msg)) {
-	case LOCAL_ROUTING_TABLE:
-		assert(is_slave(tipc_own_addr));
 	case EXT_ROUTING_TABLE:
 		for (n_num = 1; n_num < table_size; n_num++) {
 			if (node_table[n_num]) {
@@ -382,29 +303,10 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 		}
 		break;
 	case SLAVE_ROUTING_TABLE:
-		assert(!is_slave(tipc_own_addr));
 		assert(in_own_cluster(c_ptr->addr));
-		for (n_num = 1; n_num < table_size; n_num++) {
-			if (node_table[n_num]) {
-				u32 slave_num = n_num + LOWEST_SLAVE;
-				u32 addr = tipc_addr(z_num, c_num, slave_num);
-				n_ptr = c_ptr->nodes[slave_num];
-				if (!n_ptr) {
-					n_ptr = tipc_node_create(addr);
-				}
-				if (n_ptr)
-					tipc_node_add_router(n_ptr, router);
-			}
-		}
 		break;
 	case ROUTE_ADDITION:
-		if (!is_slave(tipc_own_addr)) {
-			assert(!in_own_cluster(c_ptr->addr) ||
-			       is_slave(rem_node));
-		} else {
-			assert(in_own_cluster(c_ptr->addr) &&
-			       !is_slave(rem_node));
-		}
+		assert(!in_own_cluster(c_ptr->addr));
 		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
 		if (!n_ptr)
 			n_ptr = tipc_node_create(rem_node);
@@ -412,13 +314,7 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 			tipc_node_add_router(n_ptr, router);
 		break;
 	case ROUTE_REMOVAL:
-		if (!is_slave(tipc_own_addr)) {
-			assert(!in_own_cluster(c_ptr->addr) ||
-			       is_slave(rem_node));
-		} else {
-			assert(in_own_cluster(c_ptr->addr) &&
-			       !is_slave(rem_node));
-		}
+		assert(!in_own_cluster(c_ptr->addr));
 		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
 		if (n_ptr)
 			tipc_node_remove_router(n_ptr, router);
@@ -431,22 +327,12 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf)
 
 void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router)
 {
-	u32 start_entry;
-	u32 tstop;
 	u32 n_num;
 
-	if (is_slave(router))
-		return;	/* Slave nodes can not be routers */
-
-	if (in_own_cluster(c_ptr->addr)) {
-		start_entry = LOWEST_SLAVE;
-		tstop = c_ptr->highest_slave;
-	} else {
-		start_entry = 1;
-		tstop = c_ptr->highest_node;
-	}
+	if (in_own_cluster(c_ptr->addr))
+		return;
 
-	for (n_num = start_entry; n_num <= tstop; n_num++) {
+	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
 		if (c_ptr->nodes[n_num]) {
 			tipc_node_remove_router(c_ptr->nodes[n_num], router);
 		}
@@ -466,13 +352,11 @@ static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
 	u32 tstop;
 
 	assert(lower <= upper);
-	assert(((lower >= 1) && (lower <= tipc_max_nodes)) ||
-	       ((lower >= LOWEST_SLAVE) && (lower <= tipc_highest_allowed_slave)));
-	assert(((upper >= 1) && (upper <= tipc_max_nodes)) ||
-	       ((upper >= LOWEST_SLAVE) && (upper <= tipc_highest_allowed_slave)));
+	assert((lower >= 1) && (lower <= tipc_max_nodes));
+	assert((upper >= 1) && (upper <= tipc_max_nodes));
 	assert(in_own_cluster(c_ptr->addr));
 
-	tstop = is_slave(upper) ? c_ptr->highest_slave : c_ptr->highest_node;
+	tstop = c_ptr->highest_node;
 	if (tstop > upper)
 		tstop = upper;
 	for (n_num = lower; n_num <= tstop; n_num++) {
@@ -498,32 +382,23 @@ void tipc_cltr_broadcast(struct sk_buff *buf)
 	struct cluster *c_ptr;
 	struct tipc_node *n_ptr;
 	u32 n_num;
-	u32 tstart;
-	u32 tstop;
-	u32 node_type;
 
 	if (tipc_mode == TIPC_NET_MODE) {
 		c_ptr = tipc_cltr_find(tipc_own_addr);
 		assert(in_own_cluster(c_ptr->addr));	/* For now */
 
-		/* Send to standard nodes, then repeat loop sending to slaves */
-		tstart = 1;
-		tstop = c_ptr->highest_node;
-		for (node_type = 1; node_type <= 2; node_type++) {
-			for (n_num = tstart; n_num <= tstop; n_num++) {
-				n_ptr = c_ptr->nodes[n_num];
-				if (n_ptr && tipc_node_has_active_links(n_ptr)) {
-					buf_copy = skb_copy(buf, GFP_ATOMIC);
-					if (buf_copy == NULL)
-						goto exit;
-					msg_set_destnode(buf_msg(buf_copy),
-							 n_ptr->addr);
-					tipc_link_send(buf_copy, n_ptr->addr,
-						       n_ptr->addr);
-				}
+		/* Send to nodes */
+		for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
+			n_ptr = c_ptr->nodes[n_num];
+			if (n_ptr && tipc_node_has_active_links(n_ptr)) {
+				buf_copy = skb_copy(buf, GFP_ATOMIC);
+				if (buf_copy == NULL)
+					goto exit;
+				msg_set_destnode(buf_msg(buf_copy),
+						 n_ptr->addr);
+				tipc_link_send(buf_copy, n_ptr->addr,
+					       n_ptr->addr);
 			}
-			tstart = LOWEST_SLAVE;
-			tstop = c_ptr->highest_slave;
 		}
 	}
 exit:
@@ -532,7 +407,6 @@ exit:
 
 int tipc_cltr_init(void)
 {
-	tipc_highest_allowed_slave = LOWEST_SLAVE + tipc_max_slaves;
 	return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM;
 }
 
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 21493f7beb95..aa1fd6ab4d11 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -40,26 +40,21 @@
 #include "addr.h"
 #include "net.h"
 
-#define LOWEST_SLAVE  2048u
-
 /**
  * struct cluster - TIPC cluster structure
  * @addr: network address of cluster
  * @nodes: array of pointers to all nodes within cluster
  * @highest_node: id of highest numbered node within cluster
- * @highest_slave: (used for secondary node support)
  */
 
 struct cluster {
 	u32 addr;
 	struct tipc_node **nodes;
 	u32 highest_node;
-	u32 highest_slave;
 };
 
 
 extern struct tipc_node **tipc_local_nodes;
-extern u32 tipc_highest_allowed_slave;
 extern struct tipc_node_map tipc_cltr_bcast_nodes;
 
 void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router);
@@ -70,12 +65,10 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf);
 struct cluster *tipc_cltr_create(u32 addr);
 void tipc_cltr_delete(struct cluster *c_ptr);
 void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
-void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest);
 void tipc_cltr_broadcast(struct sk_buff *buf);
 int tipc_cltr_init(void);
 
 void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
-void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest);
 void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
 
 static inline struct cluster *tipc_cltr_find(u32 addr)
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 8de97ddb0427..05dc102300ae 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -301,19 +301,6 @@ static struct sk_buff *cfg_set_max_nodes(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_slaves(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value != 0)
-		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
-						   " (max secondary nodes fixed at 0)");
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_netid(void)
 {
 	u32 value;
@@ -439,9 +426,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_NODES:
 		rep_tlv_buf = cfg_set_max_nodes();
 		break;
-	case TIPC_CMD_SET_MAX_SLAVES:
-		rep_tlv_buf = cfg_set_max_slaves();
-		break;
 	case TIPC_CMD_SET_NETID:
 		rep_tlv_buf = cfg_set_netid();
 		break;
@@ -463,9 +447,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_NODES:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes);
 		break;
-	case TIPC_CMD_GET_MAX_SLAVES:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_slaves);
-		break;
 	case TIPC_CMD_GET_NETID:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
 		break;
@@ -475,6 +456,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 		break;
 	case TIPC_CMD_SET_MAX_ZONES:
 	case TIPC_CMD_GET_MAX_ZONES:
+	case TIPC_CMD_SET_MAX_SLAVES:
+	case TIPC_CMD_GET_MAX_SLAVES:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (obsolete command)");
 		break;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 13946331bd4d..8b7af893971e 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -56,10 +56,6 @@
 #define CONFIG_TIPC_NODES 255
 #endif
 
-#ifndef CONFIG_TIPC_SLAVE_NODES
-#define CONFIG_TIPC_SLAVE_NODES 0
-#endif
-
 #ifndef CONFIG_TIPC_PORTS
 #define CONFIG_TIPC_PORTS 8191
 #endif
@@ -82,7 +78,6 @@ const char tipc_alphabet[] =
 u32 tipc_own_addr;
 int tipc_max_clusters;
 int tipc_max_nodes;
-int tipc_max_slaves;
 int tipc_max_ports;
 int tipc_max_subscriptions;
 int tipc_max_publications;
@@ -206,7 +201,6 @@ static int __init tipc_init(void)
 	tipc_max_ports = CONFIG_TIPC_PORTS;
 	tipc_max_clusters = CONFIG_TIPC_CLUSTERS;
 	tipc_max_nodes = CONFIG_TIPC_NODES;
-	tipc_max_slaves = CONFIG_TIPC_SLAVE_NODES;
 	tipc_net_id = 4711;
 
 	if ((res = tipc_core_start()))
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 9403a226a570..8313a1689565 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -186,7 +186,6 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
 extern u32 tipc_own_addr;
 extern int tipc_max_clusters;
 extern int tipc_max_nodes;
-extern int tipc_max_slaves;
 extern int tipc_max_ports;
 extern int tipc_max_subscriptions;
 extern int tipc_max_publications;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index f2ce36baf42e..80799f6ba892 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -149,10 +149,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 	}
 	if (!tipc_in_scope(dest, tipc_own_addr))
 		return;
-	if (is_slave(tipc_own_addr) && is_slave(orig))
-		return;
-	if (is_slave(orig) && !in_own_cluster(orig))
-		return;
 	if (in_own_cluster(orig)) {
 		/* Always accept link here */
 		struct sk_buff *rbuf;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index aee53864d7a0..c1b6217838e6 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -850,7 +850,7 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
  * Routing table message types
  */
 #define EXT_ROUTING_TABLE    0
-#define LOCAL_ROUTING_TABLE  1
+#define LOCAL_ROUTING_TABLE  1		/* obsoleted */
 #define SLAVE_ROUTING_TABLE  2
 #define ROUTE_ADDITION       3
 #define ROUTE_REMOVAL        4
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c20bd851a44a..8ffbdb33b2cb 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -333,8 +333,6 @@ static void node_established_contact(struct tipc_node *n_ptr)
 	/* Syncronize broadcast acks */
 	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
 
-	if (is_slave(tipc_own_addr))
-		return;
 	if (!in_own_cluster(n_ptr->addr)) {
 		/* Usage case 1 (see above) */
 		c_ptr = tipc_cltr_find(tipc_own_addr);
@@ -347,13 +345,6 @@ static void node_established_contact(struct tipc_node *n_ptr)
 	}
 
 	c_ptr = n_ptr->owner;
-	if (is_slave(n_ptr->addr)) {
-		/* Usage case 2 (see above) */
-		tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1, tipc_max_nodes);
-		tipc_cltr_send_local_routes(c_ptr, n_ptr->addr);
-		return;
-	}
-
 	if (n_ptr->bclink.supported) {
 		tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
@@ -362,9 +353,6 @@ static void node_established_contact(struct tipc_node *n_ptr)
 
 	/* Case 3 (see above) */
 	tipc_net_send_external_routes(n_ptr->addr);
-	tipc_cltr_send_slave_routes(c_ptr, n_ptr->addr);
-	tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, LOWEST_SLAVE,
-				  tipc_highest_allowed_slave);
 }
 
 static void node_cleanup_finished(unsigned long node_addr)
@@ -404,33 +392,20 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	}
 
 	/* Update routing tables */
-	if (is_slave(tipc_own_addr)) {
-		tipc_net_remove_as_router(n_ptr->addr);
+	if (!in_own_cluster(n_ptr->addr)) {
+		/* Case 4 (see above) */
+		c_ptr = tipc_cltr_find(tipc_own_addr);
+		tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
+					   tipc_max_nodes);
 	} else {
-		if (!in_own_cluster(n_ptr->addr)) {
-			/* Case 4 (see above) */
-			c_ptr = tipc_cltr_find(tipc_own_addr);
-			tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
-						   tipc_max_nodes);
-		} else {
-			/* Case 5 (see above) */
-			c_ptr = tipc_cltr_find(n_ptr->addr);
-			if (is_slave(n_ptr->addr)) {
-				tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
-							   tipc_max_nodes);
-			} else {
-				if (n_ptr->bclink.supported) {
-					tipc_nmap_remove(&tipc_cltr_bcast_nodes,
-							 n_ptr->addr);
-					if (n_ptr->addr < tipc_own_addr)
-						tipc_own_tag--;
-				}
-				tipc_net_remove_as_router(n_ptr->addr);
-				tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr,
-							   LOWEST_SLAVE,
-							   tipc_highest_allowed_slave);
-			}
+		/* Case 5 (see above) */
+		c_ptr = tipc_cltr_find(n_ptr->addr);
+		if (n_ptr->bclink.supported) {
+			tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
+			if (n_ptr->addr < tipc_own_addr)
+				tipc_own_tag--;
 		}
+		tipc_net_remove_as_router(n_ptr->addr);
 	}
 	if (tipc_node_has_active_routes(n_ptr))
 		return;
@@ -482,7 +457,7 @@ struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector)
 		return n_ptr;
 
 	/* Cluster local system nodes *must* have direct links */
-	if (!is_slave(addr) && in_own_cluster(addr))
+	if (in_own_cluster(addr))
 		return NULL;
 
 	/* Look for cluster local router with direct link to node */
@@ -490,11 +465,6 @@ struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector)
 	if (router_addr)
 		return tipc_node_select(router_addr, selector);
 
-	/* Slave nodes can only be accessed within own cluster via a
-	   known router with direct link -- if no router was found,give up */
-	if (is_slave(addr))
-		return NULL;
-
 	/* Inter zone/cluster -- find any direct link to remote cluster */
 	addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
 	n_ptr = tipc_net_select_remote_node(addr, selector);
@@ -603,8 +573,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 		return tipc_cfg_reply_none();
 	}
 
-	/* For now, get space for all other nodes
-	   (will need to modify this when slave nodes are supported */
+	/* For now, get space for all other nodes */
 
 	payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1);
 	if (payload_size > 32768u) {
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 7873283f4965..c033cb87b964 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1110,10 +1110,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 	msg_set_origport(msg, p_ptr->publ.ref);
 	msg_set_transp_seqno(msg, 42);
 	msg_set_type(msg, TIPC_CONN_MSG);
-	if (!may_route(peer->node))
-		msg_set_hdr_sz(msg, SHORT_H_SIZE);
-	else
-		msg_set_hdr_sz(msg, LONG_H_SIZE);
+	msg_set_hdr_sz(msg, SHORT_H_SIZE);
 
 	p_ptr->probing_interval = PROBING_INTERVAL;
 	p_ptr->probing_state = CONFIRMED;
-- 
cgit v1.2.3


From 51a8e4dee7653698ba4c6e7de71053665f075273 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:18 +0000
Subject: tipc: Remove prototype code for supporting inter-cluster routing

Eliminates routines and data structures that were intended to allow
TIPC to route messages to other clusters. Currently, TIPC supports only
networks consisting of a single cluster within a single zone, so this
code is unnecessary.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   5 -
 net/tipc/addr.h             |  15 ---
 net/tipc/cluster.c          | 279 +-------------------------------------------
 net/tipc/cluster.h          |   8 --
 net/tipc/link.c             |  13 +--
 net/tipc/msg.h              |   7 +-
 net/tipc/net.c              |  45 -------
 net/tipc/net.h              |   4 -
 net/tipc/node.c             | 166 ++------------------------
 net/tipc/node.h             |  21 ----
 10 files changed, 15 insertions(+), 548 deletions(-)

(limited to 'net')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index dcc2b8796d0a..1f38df1202ba 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -254,11 +254,6 @@ struct tipc_link_create {
 	struct tipc_media_addr peer_addr;
 	char bearer_name[TIPC_MAX_BEARER_NAME];
 };
-
-struct tipc_route_info {
-	__u32 dest;
-	__u32 router;
-};
 #endif
 
 /*
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index a16c6c87208e..2490fadd0caf 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -37,21 +37,6 @@
 #ifndef _TIPC_ADDR_H
 #define _TIPC_ADDR_H
 
-static inline u32 own_node(void)
-{
-	return tipc_node(tipc_own_addr);
-}
-
-static inline u32 own_cluster(void)
-{
-	return tipc_cluster(tipc_own_addr);
-}
-
-static inline u32 own_zone(void)
-{
-	return tipc_zone(tipc_own_addr);
-}
-
 static inline int in_own_cluster(u32 addr)
 {
 	return !((addr ^ tipc_own_addr) >> 12);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index 6bc9f07be945..ba6f5bfa0cdb 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -38,9 +38,6 @@
 #include "cluster.h"
 #include "link.h"
 
-static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
-				u32 lower, u32 upper);
-
 struct tipc_node **tipc_local_nodes = NULL;
 struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
 
@@ -65,8 +62,7 @@ struct cluster *tipc_cltr_create(u32 addr)
 		return NULL;
 	}
 
-	if (in_own_cluster(addr))
-		tipc_local_nodes = c_ptr->nodes;
+	tipc_local_nodes = c_ptr->nodes;
 	c_ptr->highest_node = 0;
 
 	tipc_net.clusters[1] = c_ptr;
@@ -100,278 +96,6 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
 		c_ptr->highest_node = n_num;
 }
 
-/**
- * tipc_cltr_select_router - select router to a cluster
- *
- * Uses deterministic and fair algorithm.
- */
-
-u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref)
-{
-	u32 n_num;
-	u32 ulim = c_ptr->highest_node;
-	u32 mask;
-	u32 tstart;
-
-	assert(!in_own_cluster(c_ptr->addr));
-	if (!ulim)
-		return 0;
-
-	/* Start entry must be random */
-	mask = tipc_max_nodes;
-	while (mask > ulim)
-		mask >>= 1;
-	tstart = ref & mask;
-	n_num = tstart;
-
-	/* Lookup upwards with wrap-around */
-	do {
-		if (tipc_node_is_up(c_ptr->nodes[n_num]))
-			break;
-	} while (++n_num <= ulim);
-	if (n_num > ulim) {
-		n_num = 1;
-		do {
-			if (tipc_node_is_up(c_ptr->nodes[n_num]))
-				break;
-		} while (++n_num < tstart);
-		if (n_num == tstart)
-			return 0;
-	}
-	assert(n_num <= ulim);
-	return tipc_node_select_router(c_ptr->nodes[n_num], ref);
-}
-
-/**
- * tipc_cltr_select_node - select destination node within a remote cluster
- *
- * Uses deterministic and fair algorithm.
- */
-
-struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
-{
-	u32 n_num;
-	u32 mask = tipc_max_nodes;
-	u32 start_entry;
-
-	assert(!in_own_cluster(c_ptr->addr));
-	if (!c_ptr->highest_node)
-		return NULL;
-
-	/* Start entry must be random */
-	while (mask > c_ptr->highest_node) {
-		mask >>= 1;
-	}
-	start_entry = (selector & mask) ? selector & mask : 1u;
-	assert(start_entry <= c_ptr->highest_node);
-
-	/* Lookup upwards with wrap-around */
-	for (n_num = start_entry; n_num <= c_ptr->highest_node; n_num++) {
-		if (tipc_node_has_active_links(c_ptr->nodes[n_num]))
-			return c_ptr->nodes[n_num];
-	}
-	for (n_num = 1; n_num < start_entry; n_num++) {
-		if (tipc_node_has_active_links(c_ptr->nodes[n_num]))
-			return c_ptr->nodes[n_num];
-	}
-	return NULL;
-}
-
-/*
- *    Routing table management: See description in node.c
- */
-
-static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
-{
-	u32 size = INT_H_SIZE + data_size;
-	struct sk_buff *buf = tipc_buf_acquire(size);
-	struct tipc_msg *msg;
-
-	if (buf) {
-		msg = buf_msg(buf);
-		memset((char *)msg, 0, size);
-		tipc_msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
-	}
-	return buf;
-}
-
-void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest,
-			     u32 lower, u32 upper)
-{
-	struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr);
-	struct tipc_msg *msg;
-
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, dest);
-		msg_set_type(msg, ROUTE_ADDITION);
-		tipc_cltr_multicast(c_ptr, buf, lower, upper);
-	} else {
-		warn("Memory squeeze: broadcast of new route failed\n");
-	}
-}
-
-void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest,
-				u32 lower, u32 upper)
-{
-	struct sk_buff *buf = tipc_cltr_prepare_routing_msg(0, c_ptr->addr);
-	struct tipc_msg *msg;
-
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, dest);
-		msg_set_type(msg, ROUTE_REMOVAL);
-		tipc_cltr_multicast(c_ptr, buf, lower, upper);
-	} else {
-		warn("Memory squeeze: broadcast of lost route failed\n");
-	}
-}
-
-void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest)
-{
-	struct sk_buff *buf;
-	struct tipc_msg *msg;
-	u32 highest = c_ptr->highest_node;
-	u32 n_num;
-	int send = 0;
-
-	if (in_own_cluster(c_ptr->addr))
-		return;
-	assert(in_own_cluster(dest));
-	highest = c_ptr->highest_node;
-	buf = tipc_cltr_prepare_routing_msg(highest + 1, c_ptr->addr);
-	if (buf) {
-		msg = buf_msg(buf);
-		msg_set_remote_node(msg, c_ptr->addr);
-		msg_set_type(msg, EXT_ROUTING_TABLE);
-		for (n_num = 1; n_num <= highest; n_num++) {
-			if (c_ptr->nodes[n_num] &&
-			    tipc_node_has_active_links(c_ptr->nodes[n_num])) {
-				send = 1;
-				msg_set_dataoctet(msg, n_num);
-			}
-		}
-		if (send)
-			tipc_link_send(buf, dest, dest);
-		else
-			buf_discard(buf);
-	} else {
-		warn("Memory squeeze: broadcast of external route failed\n");
-	}
-}
-
-void tipc_cltr_recv_routing_table(struct sk_buff *buf)
-{
-	struct tipc_msg *msg = buf_msg(buf);
-	struct cluster *c_ptr;
-	struct tipc_node *n_ptr;
-	unchar *node_table;
-	u32 table_size;
-	u32 router;
-	u32 rem_node = msg_remote_node(msg);
-	u32 z_num;
-	u32 c_num;
-	u32 n_num;
-
-	c_ptr = tipc_cltr_find(rem_node);
-	if (!c_ptr) {
-		c_ptr = tipc_cltr_create(rem_node);
-		if (!c_ptr) {
-			buf_discard(buf);
-			return;
-		}
-	}
-
-	node_table = buf->data + msg_hdr_sz(msg);
-	table_size = msg_size(msg) - msg_hdr_sz(msg);
-	router = msg_prevnode(msg);
-	z_num = tipc_zone(rem_node);
-	c_num = tipc_cluster(rem_node);
-
-	switch (msg_type(msg)) {
-	case EXT_ROUTING_TABLE:
-		for (n_num = 1; n_num < table_size; n_num++) {
-			if (node_table[n_num]) {
-				u32 addr = tipc_addr(z_num, c_num, n_num);
-				n_ptr = c_ptr->nodes[n_num];
-				if (!n_ptr) {
-					n_ptr = tipc_node_create(addr);
-				}
-				if (n_ptr)
-					tipc_node_add_router(n_ptr, router);
-			}
-		}
-		break;
-	case SLAVE_ROUTING_TABLE:
-		assert(in_own_cluster(c_ptr->addr));
-		break;
-	case ROUTE_ADDITION:
-		assert(!in_own_cluster(c_ptr->addr));
-		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
-		if (!n_ptr)
-			n_ptr = tipc_node_create(rem_node);
-		if (n_ptr)
-			tipc_node_add_router(n_ptr, router);
-		break;
-	case ROUTE_REMOVAL:
-		assert(!in_own_cluster(c_ptr->addr));
-		n_ptr = c_ptr->nodes[tipc_node(rem_node)];
-		if (n_ptr)
-			tipc_node_remove_router(n_ptr, router);
-		break;
-	default:
-		assert(!"Illegal routing manager message received\n");
-	}
-	buf_discard(buf);
-}
-
-void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router)
-{
-	u32 n_num;
-
-	if (in_own_cluster(c_ptr->addr))
-		return;
-
-	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
-		if (c_ptr->nodes[n_num]) {
-			tipc_node_remove_router(c_ptr->nodes[n_num], router);
-		}
-	}
-}
-
-/**
- * tipc_cltr_multicast - multicast message to local nodes
- */
-
-static void tipc_cltr_multicast(struct cluster *c_ptr, struct sk_buff *buf,
-			 u32 lower, u32 upper)
-{
-	struct sk_buff *buf_copy;
-	struct tipc_node *n_ptr;
-	u32 n_num;
-	u32 tstop;
-
-	assert(lower <= upper);
-	assert((lower >= 1) && (lower <= tipc_max_nodes));
-	assert((upper >= 1) && (upper <= tipc_max_nodes));
-	assert(in_own_cluster(c_ptr->addr));
-
-	tstop = c_ptr->highest_node;
-	if (tstop > upper)
-		tstop = upper;
-	for (n_num = lower; n_num <= tstop; n_num++) {
-		n_ptr = c_ptr->nodes[n_num];
-		if (n_ptr && tipc_node_has_active_links(n_ptr)) {
-			buf_copy = skb_copy(buf, GFP_ATOMIC);
-			if (buf_copy == NULL)
-				break;
-			msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
-			tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
-		}
-	}
-	buf_discard(buf);
-}
-
 /**
  * tipc_cltr_broadcast - broadcast message to all nodes within cluster
  */
@@ -385,7 +109,6 @@ void tipc_cltr_broadcast(struct sk_buff *buf)
 
 	if (tipc_mode == TIPC_NET_MODE) {
 		c_ptr = tipc_cltr_find(tipc_own_addr);
-		assert(in_own_cluster(c_ptr->addr));	/* For now */
 
 		/* Send to nodes */
 		for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index aa1fd6ab4d11..e4b6e4e27371 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -57,20 +57,12 @@ struct cluster {
 extern struct tipc_node **tipc_local_nodes;
 extern struct tipc_node_map tipc_cltr_bcast_nodes;
 
-void tipc_cltr_remove_as_router(struct cluster *c_ptr, u32 router);
-void tipc_cltr_send_ext_routes(struct cluster *c_ptr, u32 dest);
-struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector);
-u32 tipc_cltr_select_router(struct cluster *c_ptr, u32 ref);
-void tipc_cltr_recv_routing_table(struct sk_buff *buf);
 struct cluster *tipc_cltr_create(u32 addr);
 void tipc_cltr_delete(struct cluster *c_ptr);
 void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
 void tipc_cltr_broadcast(struct sk_buff *buf);
 int tipc_cltr_init(void);
 
-void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
-void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
-
 static inline struct cluster *tipc_cltr_find(u32 addr)
 {
 	if (!in_own_cluster(addr))
diff --git a/net/tipc/link.c b/net/tipc/link.c
index cf414cf05e72..671ffd3c0e53 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1061,7 +1061,7 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
 	int res = -ELINKCONG;
 
 	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_select(dest, selector);
+	n_ptr = tipc_node_find(dest);
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
@@ -1137,7 +1137,7 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
 		return tipc_port_recv_msg(buf);
 
 	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_select(destnode, selector);
+	n_ptr = tipc_node_find(destnode);
 	if (likely(n_ptr)) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector];
@@ -1186,7 +1186,7 @@ again:
 			!sender->user_port, &buf);
 
 	read_lock_bh(&tipc_net_lock);
-	node = tipc_node_select(destaddr, selector);
+	node = tipc_node_find(destaddr);
 	if (likely(node)) {
 		tipc_node_lock(node);
 		l_ptr = node->active_links[selector];
@@ -1376,7 +1376,7 @@ error:
 	 * Now we have a buffer chain. Select a link and check
 	 * that packet size is still OK
 	 */
-	node = tipc_node_select(destaddr, sender->publ.ref & 1);
+	node = tipc_node_find(destaddr);
 	if (likely(node)) {
 		tipc_node_lock(node);
 		l_ptr = node->active_links[sender->publ.ref & 1];
@@ -1893,7 +1893,7 @@ deliver:
 						continue;
 					case ROUTE_DISTRIBUTOR:
 						tipc_node_unlock(n_ptr);
-						tipc_cltr_recv_routing_table(buf);
+						buf_discard(buf);
 						continue;
 					case NAME_DISTRIBUTOR:
 						tipc_node_unlock(n_ptr);
@@ -2852,7 +2852,6 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
 	l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900;
 	l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200;
 	l_ptr->queue_limit[CONN_MANAGER] = 1200;
-	l_ptr->queue_limit[ROUTE_DISTRIBUTOR] = 1200;
 	l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500;
 	l_ptr->queue_limit[NAME_DISTRIBUTOR] = 3000;
 	/* FRAGMENT and LAST_FRAGMENT packets */
@@ -3154,7 +3153,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
 		return MAX_MSG_SIZE;
 
 	read_lock_bh(&tipc_net_lock);
-	n_ptr = tipc_node_select(dest, selector);
+	n_ptr = tipc_node_find(dest);
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c1b6217838e6..68b65ef3e74b 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -540,7 +540,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
 #define  MSG_BUNDLER          6
 #define  LINK_PROTOCOL        7
 #define  CONN_MANAGER         8
-#define  ROUTE_DISTRIBUTOR    9
+#define  ROUTE_DISTRIBUTOR    9		/* obsoleted */
 #define  CHANGEOVER_PROTOCOL  10
 #define  NAME_DISTRIBUTOR     11
 #define  MSG_FRAGMENTER       12
@@ -819,11 +819,6 @@ static inline void msg_set_remote_node(struct tipc_msg *m, u32 a)
 	msg_set_word(m, msg_hdr_sz(m)/4, a);
 }
 
-static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
-{
-	msg_data(m)[pos + 4] = 1;
-}
-
 /*
  * Segmentation message types
  */
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a25f8bb1e1d9..3967f1f6d97f 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,48 +112,6 @@
 DEFINE_RWLOCK(tipc_net_lock);
 struct network tipc_net;
 
-struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-
-	c_ptr = tipc_net.clusters[1];
-	if (!c_ptr)
-		return NULL;
-	return tipc_cltr_select_node(c_ptr, ref);
-}
-
-u32 tipc_net_select_router(u32 addr, u32 ref)
-{
-	struct cluster *c_ptr;
-
-	c_ptr = tipc_net.clusters[1];
-	if (!c_ptr)
-		return 0;
-	return tipc_cltr_select_router(c_ptr, ref);
-}
-
-void tipc_net_remove_as_router(u32 router)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (!tipc_net.clusters[c_num])
-			continue;
-		tipc_cltr_remove_as_router(tipc_net.clusters[c_num], router);
-	}
-}
-
-void tipc_net_send_external_routes(u32 dest)
-{
-	u32 c_num;
-
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++) {
-		if (tipc_net.clusters[c_num])
-			tipc_cltr_send_ext_routes(tipc_net.clusters[c_num],
-						  dest);
-	}
-}
-
 static void net_stop(void)
 {
 	u32 c_num;
@@ -225,9 +183,6 @@ void tipc_net_route_msg(struct sk_buff *buf)
 			return;
 		}
 		switch (msg_user(msg)) {
-		case ROUTE_DISTRIBUTOR:
-			tipc_cltr_recv_routing_table(buf);
-			break;
 		case NAME_DISTRIBUTOR:
 			tipc_named_recv(buf);
 			break;
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 786c94007470..6e402d9b33e0 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -54,11 +54,7 @@ struct network {
 extern struct network tipc_net;
 extern rwlock_t tipc_net_lock;
 
-void tipc_net_remove_as_router(u32 router);
-void tipc_net_send_external_routes(u32 dest);
 void tipc_net_route_msg(struct sk_buff *buf);
-struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref);
-u32 tipc_net_select_router(u32 addr, u32 ref);
 
 int tipc_net_start(u32 addr);
 void tipc_net_stop(void);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8ffbdb33b2cb..c47cc69eb575 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -95,11 +95,10 @@ struct tipc_node *tipc_node_create(u32 addr)
 	}
 
 	n_ptr->addr = addr;
-		spin_lock_init(&n_ptr->lock);
+	spin_lock_init(&n_ptr->lock);
 	INIT_LIST_HEAD(&n_ptr->nsub);
 	n_ptr->owner = c_ptr;
 	tipc_cltr_attach_node(c_ptr, n_ptr);
-	n_ptr->last_router = -1;
 
 	/* Insert node into ordered list */
 	for (curr_node = &tipc_nodes; *curr_node;
@@ -229,14 +228,9 @@ int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
 	return n_ptr->working_links > 1;
 }
 
-static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
-{
-	return n_ptr && (n_ptr->last_router >= 0);
-}
-
 int tipc_node_is_up(struct tipc_node *n_ptr)
 {
-	return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr);
+	return tipc_node_has_active_links(n_ptr);
 }
 
 struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
@@ -323,36 +317,17 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
 
 static void node_established_contact(struct tipc_node *n_ptr)
 {
-	struct cluster *c_ptr;
-
 	dbg("node_established_contact:-> %x\n", n_ptr->addr);
-	if (!tipc_node_has_active_routes(n_ptr) && in_own_cluster(n_ptr->addr)) {
-		tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
-	}
+	tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
 
 	/* Syncronize broadcast acks */
 	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
 
-	if (!in_own_cluster(n_ptr->addr)) {
-		/* Usage case 1 (see above) */
-		c_ptr = tipc_cltr_find(tipc_own_addr);
-		if (!c_ptr)
-			c_ptr = tipc_cltr_create(tipc_own_addr);
-		if (c_ptr)
-			tipc_cltr_bcast_new_route(c_ptr, n_ptr->addr, 1,
-						  tipc_max_nodes);
-		return;
-	}
-
-	c_ptr = n_ptr->owner;
 	if (n_ptr->bclink.supported) {
 		tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
 			tipc_own_tag++;
 	}
-
-	/* Case 3 (see above) */
-	tipc_net_send_external_routes(n_ptr->addr);
 }
 
 static void node_cleanup_finished(unsigned long node_addr)
@@ -371,7 +346,6 @@ static void node_cleanup_finished(unsigned long node_addr)
 
 static void node_lost_contact(struct tipc_node *n_ptr)
 {
-	struct cluster *c_ptr;
 	struct tipc_node_subscr *ns, *tns;
 	char addr_string[16];
 	u32 i;
@@ -392,23 +366,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	}
 
 	/* Update routing tables */
-	if (!in_own_cluster(n_ptr->addr)) {
-		/* Case 4 (see above) */
-		c_ptr = tipc_cltr_find(tipc_own_addr);
-		tipc_cltr_bcast_lost_route(c_ptr, n_ptr->addr, 1,
-					   tipc_max_nodes);
-	} else {
-		/* Case 5 (see above) */
-		c_ptr = tipc_cltr_find(n_ptr->addr);
-		if (n_ptr->bclink.supported) {
-			tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
-			if (n_ptr->addr < tipc_own_addr)
-				tipc_own_tag--;
-		}
-		tipc_net_remove_as_router(n_ptr->addr);
+	if (n_ptr->bclink.supported) {
+		tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
+		if (n_ptr->addr < tipc_own_addr)
+			tipc_own_tag--;
 	}
-	if (tipc_node_has_active_routes(n_ptr))
-		return;
 
 	info("Lost contact with %s\n",
 	     tipc_addr_string_fill(addr_string, n_ptr->addr));
@@ -437,120 +399,6 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
 }
 
-/**
- * tipc_node_select_next_hop - find the next-hop node for a message
- *
- * Called by when cluster local lookup has failed.
- */
-
-struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector)
-{
-	struct tipc_node *n_ptr;
-	u32 router_addr;
-
-	if (!tipc_addr_domain_valid(addr))
-		return NULL;
-
-	/* Look for direct link to destination processsor */
-	n_ptr = tipc_node_find(addr);
-	if (n_ptr && tipc_node_has_active_links(n_ptr))
-		return n_ptr;
-
-	/* Cluster local system nodes *must* have direct links */
-	if (in_own_cluster(addr))
-		return NULL;
-
-	/* Look for cluster local router with direct link to node */
-	router_addr = tipc_node_select_router(n_ptr, selector);
-	if (router_addr)
-		return tipc_node_select(router_addr, selector);
-
-	/* Inter zone/cluster -- find any direct link to remote cluster */
-	addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
-	n_ptr = tipc_net_select_remote_node(addr, selector);
-	if (n_ptr && tipc_node_has_active_links(n_ptr))
-		return n_ptr;
-
-	/* Last resort -- look for any router to anywhere in remote zone */
-	router_addr =  tipc_net_select_router(addr, selector);
-	if (router_addr)
-		return tipc_node_select(router_addr, selector);
-
-	return NULL;
-}
-
-/**
- * tipc_node_select_router - select router to reach specified node
- *
- * Uses a deterministic and fair algorithm for selecting router node.
- */
-
-u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref)
-{
-	u32 ulim;
-	u32 mask;
-	u32 start;
-	u32 r;
-
-	if (!n_ptr)
-		return 0;
-
-	if (n_ptr->last_router < 0)
-		return 0;
-	ulim = ((n_ptr->last_router + 1) * 32) - 1;
-
-	/* Start entry must be random */
-	mask = tipc_max_nodes;
-	while (mask > ulim)
-		mask >>= 1;
-	start = ref & mask;
-	r = start;
-
-	/* Lookup upwards with wrap-around */
-	do {
-		if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1)
-			break;
-	} while (++r <= ulim);
-	if (r > ulim) {
-		r = 1;
-		do {
-			if (((n_ptr->routers[r / 32]) >> (r % 32)) & 1)
-				break;
-		} while (++r < start);
-		assert(r != start);
-	}
-	assert(r && (r <= ulim));
-	return tipc_addr(own_zone(), own_cluster(), r);
-}
-
-void tipc_node_add_router(struct tipc_node *n_ptr, u32 router)
-{
-	u32 r_num = tipc_node(router);
-
-	n_ptr->routers[r_num / 32] =
-		((1 << (r_num % 32)) | n_ptr->routers[r_num / 32]);
-	n_ptr->last_router = tipc_max_nodes / 32;
-	while ((--n_ptr->last_router >= 0) &&
-	       !n_ptr->routers[n_ptr->last_router]);
-}
-
-void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router)
-{
-	u32 r_num = tipc_node(router);
-
-	if (n_ptr->last_router < 0)
-		return;		/* No routes */
-
-	n_ptr->routers[r_num / 32] =
-		((~(1 << (r_num % 32))) & (n_ptr->routers[r_num / 32]));
-	n_ptr->last_router = tipc_max_nodes / 32;
-	while ((--n_ptr->last_router >= 0) &&
-	       !n_ptr->routers[n_ptr->last_router]);
-
-	if (!tipc_node_is_up(n_ptr))
-		node_lost_contact(n_ptr);
-}
-
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 {
 	u32 domain;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 7bfaf5e8c201..3abaaa24c77d 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -54,8 +54,6 @@
  * @cleanup_required: non-zero if cleaning up after a prior loss of contact
  * @link_cnt: number of links to node
  * @permit_changeover: non-zero if node has redundant links to this system
- * @routers: bitmap (used for multicluster communication)
- * @last_router: (used for multicluster communication)
  * @bclink: broadcast-related info
  *    @supported: non-zero if node supports TIPC b'cast capability
  *    @acked: sequence # of last outbound b'cast message acknowledged by node
@@ -80,8 +78,6 @@ struct tipc_node {
 	int working_links;
 	int cleanup_required;
 	int permit_changeover;
-	u32 routers[512/32];
-	int last_router;
 	struct {
 		int supported;
 		u32 acked;
@@ -105,11 +101,7 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr);
 void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr);
 int tipc_node_has_active_links(struct tipc_node *n_ptr);
 int tipc_node_has_redundant_links(struct tipc_node *n_ptr);
-u32 tipc_node_select_router(struct tipc_node *n_ptr, u32 ref);
-struct tipc_node *tipc_node_select_next_hop(u32 addr, u32 selector);
 int tipc_node_is_up(struct tipc_node *n_ptr);
-void tipc_node_add_router(struct tipc_node *n_ptr, u32 router);
-void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router);
 struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
 
@@ -117,22 +109,9 @@ static inline struct tipc_node *tipc_node_find(u32 addr)
 {
 	if (likely(in_own_cluster(addr)))
 		return tipc_local_nodes[tipc_node(addr)];
-	else if (tipc_addr_domain_valid(addr)) {
-		struct cluster *c_ptr = tipc_cltr_find(addr);
-
-		if (c_ptr)
-			return c_ptr->nodes[tipc_node(addr)];
-	}
 	return NULL;
 }
 
-static inline struct tipc_node *tipc_node_select(u32 addr, u32 selector)
-{
-	if (likely(in_own_cluster(addr)))
-		return tipc_local_nodes[tipc_node(addr)];
-	return tipc_node_select_next_hop(addr, selector);
-}
-
 static inline void tipc_node_lock(struct tipc_node *n_ptr)
 {
 	spin_lock_bh(&n_ptr->lock);
-- 
cgit v1.2.3


From 8f92df6ad49da958d97e171762d0a97a3dc738f1 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:19 +0000
Subject: tipc: Remove prototype code for supporting multiple clusters

Eliminates routines, data structures, and files that were intended
to allow TIPC to support a network containing multiple clusters.
Currently, TIPC supports only networks consisting of a single cluster
within a single zone, so this code is unnecessary.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |   4 +-
 net/tipc/Kconfig            |  10 ----
 net/tipc/Makefile           |   2 +-
 net/tipc/addr.c             |   5 +-
 net/tipc/bcast.c            |   9 ++-
 net/tipc/bcast.h            |   1 +
 net/tipc/cluster.c          | 135 --------------------------------------------
 net/tipc/cluster.h          |  73 ------------------------
 net/tipc/config.c           |  21 +------
 net/tipc/core.c             |   6 --
 net/tipc/core.h             |   1 -
 net/tipc/name_distr.c       |  28 +++++++--
 net/tipc/net.c              |  25 ++++++--
 net/tipc/net.h              |   8 ++-
 net/tipc/node.c             |  37 ++++++------
 net/tipc/node.h             |   7 +--
 16 files changed, 83 insertions(+), 289 deletions(-)
 delete mode 100644 net/tipc/cluster.c
 delete mode 100644 net/tipc/cluster.h

(limited to 'net')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 1f38df1202ba..677aa13dc5d7 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -95,7 +95,7 @@
 #define  TIPC_CMD_GET_MAX_PUBL      0x4005    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SUBSCR    0x4006    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
-#define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_NODES     0x4009    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_SLAVES    0x400A    /* obsoleted */
 #define  TIPC_CMD_GET_NETID         0x400B    /* tx none, rx unsigned */
@@ -131,7 +131,7 @@
 #define  TIPC_CMD_SET_MAX_PUBL      0x8005    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SUBSCR    0x8006    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
-#define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_NODES     0x8009    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_SLAVES    0x800A    /* obsoleted */
 #define  TIPC_CMD_SET_NETID         0x800B    /* tx unsigned, rx none */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 06d32908fcfb..c02d3e9c156d 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -29,16 +29,6 @@ config TIPC_ADVANCED
 	  Saying Y here will open some advanced configuration for TIPC.
 	  Most users do not need to bother; if unsure, just say N.
 
-config TIPC_CLUSTERS
-	int "Maximum number of clusters in a zone"
-	depends on TIPC_ADVANCED
-	range 1 1
-	default "1"
-	help
-	  Specifies how many clusters can be supported in a TIPC zone.
-
-	  *** Currently TIPC only supports a single cluster per zone. ***
-
 config TIPC_NODES
 	int "Maximum number of nodes in a cluster"
 	depends on TIPC_ADVANCED
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 3d936f0560c7..849d819bfc74 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_TIPC) := tipc.o
 
-tipc-y	+= addr.o bcast.o bearer.o config.o cluster.o \
+tipc-y	+= addr.o bcast.o bearer.o config.o \
 	   core.o handler.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 8823e03e52e0..483868a75b88 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -35,7 +35,8 @@
  */
 
 #include "core.h"
-#include "cluster.h"
+#include "node.h"
+#include "addr.h"
 
 /**
  * tipc_addr_domain_valid - validates a network domain address
@@ -55,8 +56,6 @@ int tipc_addr_domain_valid(u32 addr)
 
 	if (n > max_nodes)
 		return 0;
-	if (c > tipc_max_clusters)
-		return 0;
 
 	if (n && (!z || !c))
 		return 0;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 6d828d9eda42..110829eab96a 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -111,6 +111,9 @@ static struct bclink *bclink = NULL;
 static struct link *bcl = NULL;
 static DEFINE_SPINLOCK(bc_lock);
 
+/* broadcast-capable node map */
+struct tipc_node_map tipc_bcast_nmap;
+
 const char tipc_bclink_name[] = "broadcast-link";
 
 static void tipc_nmap_diff(struct tipc_node_map *nm_a,
@@ -566,8 +569,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 	if (likely(!msg_non_seq(buf_msg(buf)))) {
 		struct tipc_msg *msg;
 
-		assert(tipc_cltr_bcast_nodes.count != 0);
-		bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count);
+		assert(tipc_bcast_nmap.count != 0);
+		bcbuf_set_acks(buf, tipc_bcast_nmap.count);
 		msg = buf_msg(buf);
 		msg_set_non_seq(msg, 1);
 		msg_set_mc_netid(msg, tipc_net_id);
@@ -576,7 +579,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 
 	/* Send buffer over bearers until all targets reached */
 
-	bcbearer->remains = tipc_cltr_bcast_nodes;
+	bcbearer->remains = tipc_bcast_nmap;
 
 	for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
 		struct bearer *p = bcbearer->bpairs[bp_index].primary;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 011c03f0a4ab..51f8c5326ce6 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -51,6 +51,7 @@ struct tipc_node_map {
 	u32 map[MAX_NODES / WSIZE];
 };
 
+extern struct tipc_node_map tipc_bcast_nmap;
 
 #define PLSIZE 32
 
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
deleted file mode 100644
index ba6f5bfa0cdb..000000000000
--- a/net/tipc/cluster.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * net/tipc/cluster.c: TIPC cluster management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "cluster.h"
-#include "link.h"
-
-struct tipc_node **tipc_local_nodes = NULL;
-struct tipc_node_map tipc_cltr_bcast_nodes = {0,{0,}};
-
-struct cluster *tipc_cltr_create(u32 addr)
-{
-	struct cluster *c_ptr;
-	int max_nodes;
-
-	c_ptr = kzalloc(sizeof(*c_ptr), GFP_ATOMIC);
-	if (c_ptr == NULL) {
-		warn("Cluster creation failure, no memory\n");
-		return NULL;
-	}
-
-	c_ptr->addr = tipc_addr(tipc_zone(addr), tipc_cluster(addr), 0);
-	max_nodes = tipc_max_nodes + 1;
-
-	c_ptr->nodes = kcalloc(max_nodes + 1, sizeof(void*), GFP_ATOMIC);
-	if (c_ptr->nodes == NULL) {
-		warn("Cluster creation failure, no memory for node area\n");
-		kfree(c_ptr);
-		return NULL;
-	}
-
-	tipc_local_nodes = c_ptr->nodes;
-	c_ptr->highest_node = 0;
-
-	tipc_net.clusters[1] = c_ptr;
-	return c_ptr;
-}
-
-void tipc_cltr_delete(struct cluster *c_ptr)
-{
-	u32 n_num;
-
-	if (!c_ptr)
-		return;
-	for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
-		tipc_node_delete(c_ptr->nodes[n_num]);
-	}
-	kfree(c_ptr->nodes);
-	kfree(c_ptr);
-}
-
-
-void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
-{
-	u32 n_num = tipc_node(n_ptr->addr);
-	u32 max_n_num = tipc_max_nodes;
-
-	assert(n_num > 0);
-	assert(n_num <= max_n_num);
-	assert(c_ptr->nodes[n_num] == NULL);
-	c_ptr->nodes[n_num] = n_ptr;
-	if (n_num > c_ptr->highest_node)
-		c_ptr->highest_node = n_num;
-}
-
-/**
- * tipc_cltr_broadcast - broadcast message to all nodes within cluster
- */
-
-void tipc_cltr_broadcast(struct sk_buff *buf)
-{
-	struct sk_buff *buf_copy;
-	struct cluster *c_ptr;
-	struct tipc_node *n_ptr;
-	u32 n_num;
-
-	if (tipc_mode == TIPC_NET_MODE) {
-		c_ptr = tipc_cltr_find(tipc_own_addr);
-
-		/* Send to nodes */
-		for (n_num = 1; n_num <= c_ptr->highest_node; n_num++) {
-			n_ptr = c_ptr->nodes[n_num];
-			if (n_ptr && tipc_node_has_active_links(n_ptr)) {
-				buf_copy = skb_copy(buf, GFP_ATOMIC);
-				if (buf_copy == NULL)
-					goto exit;
-				msg_set_destnode(buf_msg(buf_copy),
-						 n_ptr->addr);
-				tipc_link_send(buf_copy, n_ptr->addr,
-					       n_ptr->addr);
-			}
-		}
-	}
-exit:
-	buf_discard(buf);
-}
-
-int tipc_cltr_init(void)
-{
-	return tipc_cltr_create(tipc_own_addr) ? 0 : -ENOMEM;
-}
-
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
deleted file mode 100644
index e4b6e4e27371..000000000000
--- a/net/tipc/cluster.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * net/tipc/cluster.h: Include file for TIPC cluster management routines
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_CLUSTER_H
-#define _TIPC_CLUSTER_H
-
-#include "addr.h"
-#include "net.h"
-
-/**
- * struct cluster - TIPC cluster structure
- * @addr: network address of cluster
- * @nodes: array of pointers to all nodes within cluster
- * @highest_node: id of highest numbered node within cluster
- */
-
-struct cluster {
-	u32 addr;
-	struct tipc_node **nodes;
-	u32 highest_node;
-};
-
-
-extern struct tipc_node **tipc_local_nodes;
-extern struct tipc_node_map tipc_cltr_bcast_nodes;
-
-struct cluster *tipc_cltr_create(u32 addr);
-void tipc_cltr_delete(struct cluster *c_ptr);
-void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
-void tipc_cltr_broadcast(struct sk_buff *buf);
-int tipc_cltr_init(void);
-
-static inline struct cluster *tipc_cltr_find(u32 addr)
-{
-	if (!in_own_cluster(addr))
-		return NULL;
-	return tipc_net.clusters[1];
-}
-
-#endif
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 05dc102300ae..bc512102eebd 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -269,19 +269,6 @@ static struct sk_buff *cfg_set_max_ports(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_clusters(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value != delimit(value, 1, 1))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (max clusters fixed at 1)");
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_max_nodes(void)
 {
 	u32 value;
@@ -420,9 +407,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_SUBSCR:
 		rep_tlv_buf = cfg_set_max_subscriptions();
 		break;
-	case TIPC_CMD_SET_MAX_CLUSTERS:
-		rep_tlv_buf = cfg_set_max_clusters();
-		break;
 	case TIPC_CMD_SET_MAX_NODES:
 		rep_tlv_buf = cfg_set_max_nodes();
 		break;
@@ -441,9 +425,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_SUBSCR:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
 		break;
-	case TIPC_CMD_GET_MAX_CLUSTERS:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_clusters);
-		break;
 	case TIPC_CMD_GET_MAX_NODES:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes);
 		break;
@@ -458,6 +439,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_ZONES:
 	case TIPC_CMD_SET_MAX_SLAVES:
 	case TIPC_CMD_GET_MAX_SLAVES:
+	case TIPC_CMD_SET_MAX_CLUSTERS:
+	case TIPC_CMD_GET_MAX_CLUSTERS:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
 							  " (obsolete command)");
 		break;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 8b7af893971e..b9a3ef13f7e7 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,10 +48,6 @@
 #include "config.h"
 
 
-#ifndef CONFIG_TIPC_CLUSTERS
-#define CONFIG_TIPC_CLUSTERS 1
-#endif
-
 #ifndef CONFIG_TIPC_NODES
 #define CONFIG_TIPC_NODES 255
 #endif
@@ -76,7 +72,6 @@ const char tipc_alphabet[] =
 /* configurable TIPC parameters */
 
 u32 tipc_own_addr;
-int tipc_max_clusters;
 int tipc_max_nodes;
 int tipc_max_ports;
 int tipc_max_subscriptions;
@@ -199,7 +194,6 @@ static int __init tipc_init(void)
 	tipc_max_publications = 10000;
 	tipc_max_subscriptions = 2000;
 	tipc_max_ports = CONFIG_TIPC_PORTS;
-	tipc_max_clusters = CONFIG_TIPC_CLUSTERS;
 	tipc_max_nodes = CONFIG_TIPC_NODES;
 	tipc_net_id = 4711;
 
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8313a1689565..c44f955bd54a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -184,7 +184,6 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
  */
 
 extern u32 tipc_own_addr;
-extern int tipc_max_clusters;
 extern int tipc_max_nodes;
 extern int tipc_max_ports;
 extern int tipc_max_subscriptions;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 10ff48be3c01..c4583fe888d8 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -35,7 +35,7 @@
  */
 
 #include "core.h"
-#include "cluster.h"
+#include "addr.h"
 #include "link.h"
 #include "name_distr.h"
 
@@ -107,6 +107,26 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
 	return buf;
 }
 
+static void named_cluster_distribute(struct sk_buff *buf)
+{
+	struct sk_buff *buf_copy;
+	struct tipc_node *n_ptr;
+	u32 n_num;
+
+	for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) {
+		n_ptr = tipc_net.nodes[n_num];
+		if (n_ptr && tipc_node_has_active_links(n_ptr)) {
+			buf_copy = skb_copy(buf, GFP_ATOMIC);
+			if (!buf_copy)
+				break;
+			msg_set_destnode(buf_msg(buf_copy), n_ptr->addr);
+			tipc_link_send(buf_copy, n_ptr->addr, n_ptr->addr);
+		}
+	}
+
+	buf_discard(buf);
+}
+
 /**
  * tipc_named_publish - tell other nodes about a new publication by this node
  */
@@ -127,8 +147,8 @@ void tipc_named_publish(struct publication *publ)
 
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
-	dbg("tipc_named_withdraw: broadcasting publish msg\n");
-	tipc_cltr_broadcast(buf);
+	dbg("tipc_named_publish: broadcasting publish msg\n");
+	named_cluster_distribute(buf);
 }
 
 /**
@@ -152,7 +172,7 @@ void tipc_named_withdraw(struct publication *publ)
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
 	dbg("tipc_named_withdraw: broadcasting withdraw msg\n");
-	tipc_cltr_broadcast(buf);
+	named_cluster_distribute(buf);
 }
 
 /**
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 3967f1f6d97f..3baf55ee0985 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,12 +112,23 @@
 DEFINE_RWLOCK(tipc_net_lock);
 struct network tipc_net;
 
+static int net_start(void)
+{
+	tipc_net.nodes = kcalloc(tipc_max_nodes + 1,
+				 sizeof(*tipc_net.nodes), GFP_ATOMIC);
+	tipc_net.highest_node = 0;
+
+	return tipc_net.nodes ? 0 : -ENOMEM;
+}
+
 static void net_stop(void)
 {
-	u32 c_num;
+	u32 n_num;
 
-	for (c_num = 1; c_num <= tipc_max_clusters; c_num++)
-		tipc_cltr_delete(tipc_net.clusters[c_num]);
+	for (n_num = 1; n_num <= tipc_net.highest_node; n_num++)
+		tipc_node_delete(tipc_net.nodes[n_num]);
+	kfree(tipc_net.nodes);
+	tipc_net.nodes = NULL;
 }
 
 static void net_route_named_msg(struct sk_buff *buf)
@@ -218,10 +229,12 @@ int tipc_net_start(u32 addr)
 	tipc_named_reinit();
 	tipc_port_reinit();
 
-	if ((res = tipc_cltr_init()) ||
-	    (res = tipc_bclink_init())) {
+	res = net_start();
+	if (res)
+		return res;
+	res = tipc_bclink_init();
+	if (res)
 		return res;
-	}
 
 	tipc_k_signal((Handler)tipc_subscr_start, 0);
 	tipc_k_signal((Handler)tipc_cfg_init, 0);
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 6e402d9b33e0..4ae59ad04893 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -37,16 +37,18 @@
 #ifndef _TIPC_NET_H
 #define _TIPC_NET_H
 
-struct cluster;
+struct tipc_node;
 
 /**
  * struct network - TIPC network structure
- * @clusters: array of pointers to all clusters within zone
+ * @nodes: array of pointers to all nodes within cluster
+ * @highest_node: id of highest numbered node within cluster
  * @links: number of (unicast) links to cluster
  */
 
 struct network {
-	struct cluster *clusters[2]; /* currently limited to just 1 cluster */
+	struct tipc_node **nodes;
+	u32 highest_node;
 	u32 links;
 };
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index c47cc69eb575..58e189bf5c96 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -62,9 +62,9 @@ u32 tipc_own_tag = 0;
 
 struct tipc_node *tipc_node_create(u32 addr)
 {
-	struct cluster *c_ptr;
 	struct tipc_node *n_ptr;
 	struct tipc_node **curr_node;
+	u32 n_num;
 
 	spin_lock_bh(&node_create_lock);
 
@@ -84,21 +84,14 @@ struct tipc_node *tipc_node_create(u32 addr)
 		return NULL;
 	}
 
-	c_ptr = tipc_cltr_find(addr);
-	if (!c_ptr) {
-		c_ptr = tipc_cltr_create(addr);
-	}
-	if (!c_ptr) {
-		spin_unlock_bh(&node_create_lock);
-		kfree(n_ptr);
-		return NULL;
-	}
-
 	n_ptr->addr = addr;
 	spin_lock_init(&n_ptr->lock);
 	INIT_LIST_HEAD(&n_ptr->nsub);
-	n_ptr->owner = c_ptr;
-	tipc_cltr_attach_node(c_ptr, n_ptr);
+
+	n_num = tipc_node(addr);
+	tipc_net.nodes[n_num] = n_ptr;
+	if (n_num > tipc_net.highest_node)
+		tipc_net.highest_node = n_num;
 
 	/* Insert node into ordered list */
 	for (curr_node = &tipc_nodes; *curr_node;
@@ -115,11 +108,19 @@ struct tipc_node *tipc_node_create(u32 addr)
 
 void tipc_node_delete(struct tipc_node *n_ptr)
 {
+	u32 n_num;
+
 	if (!n_ptr)
 		return;
 
 	dbg("node %x deleted\n", n_ptr->addr);
+	n_num = tipc_node(n_ptr->addr);
+	tipc_net.nodes[n_num] = NULL;
 	kfree(n_ptr);
+
+	while (!tipc_net.nodes[tipc_net.highest_node])
+		if (--tipc_net.highest_node == 0)
+			break;
 }
 
 
@@ -324,7 +325,7 @@ static void node_established_contact(struct tipc_node *n_ptr)
 	n_ptr->bclink.acked = tipc_bclink_get_last_sent();
 
 	if (n_ptr->bclink.supported) {
-		tipc_nmap_add(&tipc_cltr_bcast_nodes, n_ptr->addr);
+		tipc_nmap_add(&tipc_bcast_nmap, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
 			tipc_own_tag++;
 	}
@@ -361,13 +362,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 		buf_discard(n_ptr->bclink.defragm);
 		n_ptr->bclink.defragm = NULL;
 	}
-	if (in_own_cluster(n_ptr->addr) && n_ptr->bclink.supported) {
-		tipc_bclink_acknowledge(n_ptr, mod(n_ptr->bclink.acked + 10000));
-	}
 
-	/* Update routing tables */
 	if (n_ptr->bclink.supported) {
-		tipc_nmap_remove(&tipc_cltr_bcast_nodes, n_ptr->addr);
+		tipc_bclink_acknowledge(n_ptr,
+					mod(n_ptr->bclink.acked + 10000));
+		tipc_nmap_remove(&tipc_bcast_nmap, n_ptr->addr);
 		if (n_ptr->addr < tipc_own_addr)
 			tipc_own_tag--;
 	}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 3abaaa24c77d..206a8efa410e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -38,14 +38,14 @@
 #define _TIPC_NODE_H
 
 #include "node_subscr.h"
-#include "cluster.h"
+#include "addr.h"
+#include "net.h"
 #include "bearer.h"
 
 /**
  * struct tipc_node - TIPC node structure
  * @addr: network address of node
  * @lock: spinlock governing access to structure
- * @owner: pointer to cluster that node belongs to
  * @next: pointer to next node in sorted list of cluster's nodes
  * @nsub: list of "node down" subscriptions monitoring node
  * @active_links: pointers to active links to node
@@ -69,7 +69,6 @@
 struct tipc_node {
 	u32 addr;
 	spinlock_t lock;
-	struct cluster *owner;
 	struct tipc_node *next;
 	struct list_head nsub;
 	struct link *active_links[2];
@@ -108,7 +107,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 static inline struct tipc_node *tipc_node_find(u32 addr)
 {
 	if (likely(in_own_cluster(addr)))
-		return tipc_local_nodes[tipc_node(addr)];
+		return tipc_net.nodes[tipc_node(addr)];
 	return NULL;
 }
 
-- 
cgit v1.2.3


From 7a488fd3d40a127d0d6057ecd2696f39e11e63c3 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:20 +0000
Subject: tipc: Eliminate use of user registry by configuration service

Simplifies TIPC's configuration service so that it no longer registers
its port with the user registry, since the service doesn't take advantage
of any of the registry's capabilities.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index bc512102eebd..322367b2618f 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -38,15 +38,9 @@
 #include "port.h"
 #include "link.h"
 #include "name_table.h"
-#include "user_reg.h"
 #include "config.h"
 
-struct manager {
-	u32 user_ref;
-	u32 port_ref;
-};
-
-static struct manager mng = { 0};
+static u32 config_port_ref;
 
 static DEFINE_SPINLOCK(config_lock);
 
@@ -506,20 +500,16 @@ int tipc_cfg_init(void)
 	struct tipc_name_seq seq;
 	int res;
 
-	res = tipc_attach(&mng.user_ref);
-	if (res)
-		goto failed;
-
-	res = tipc_createport(mng.user_ref, NULL, TIPC_CRITICAL_IMPORTANCE,
+	res = tipc_createport(0, NULL, TIPC_CRITICAL_IMPORTANCE,
 			      NULL, NULL, NULL,
 			      NULL, cfg_named_msg_event, NULL,
-			      NULL, &mng.port_ref);
+			      NULL, &config_port_ref);
 	if (res)
 		goto failed;
 
 	seq.type = TIPC_CFG_SRV;
 	seq.lower = seq.upper = tipc_own_addr;
-	res = tipc_nametbl_publish_rsv(mng.port_ref, TIPC_ZONE_SCOPE, &seq);
+	res = tipc_nametbl_publish_rsv(config_port_ref, TIPC_ZONE_SCOPE, &seq);
 	if (res)
 		goto failed;
 
@@ -527,15 +517,13 @@ int tipc_cfg_init(void)
 
 failed:
 	err("Unable to create configuration service\n");
-	tipc_detach(mng.user_ref);
-	mng.user_ref = 0;
 	return res;
 }
 
 void tipc_cfg_stop(void)
 {
-	if (mng.user_ref) {
-		tipc_detach(mng.user_ref);
-		mng.user_ref = 0;
+	if (config_port_ref) {
+		tipc_deleteport(config_port_ref);
+		config_port_ref = 0;
 	}
 }
-- 
cgit v1.2.3


From aa70200e001fc4d76552c974c94f65ab26020203 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:21 +0000
Subject: tipc: Eliminate use of user registry by topology service

Simplifies TIPC's network topology service so that it no longer registers
its ports with the user registry, since the service doesn't take advantage
of any of the registry's capabilities.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/subscr.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 23f43d03980c..21abf1765b02 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -36,7 +36,7 @@
 
 #include "core.h"
 #include "name_table.h"
-#include "user_reg.h"
+#include "port.h"
 #include "subscr.h"
 
 /**
@@ -64,7 +64,6 @@ struct subscriber {
  */
 
 struct top_srv {
-	u32 user_ref;
 	u32 setup_port;
 	atomic_t subscription_count;
 	struct list_head subscriber_list;
@@ -494,7 +493,7 @@ static void subscr_named_msg_event(void *usr_handle,
 
 	/* Create server port & establish connection to subscriber */
 
-	tipc_createport(topsrv.user_ref,
+	tipc_createport(0,
 			subscriber,
 			importance,
 			NULL,
@@ -549,13 +548,7 @@ int tipc_subscr_start(void)
 	INIT_LIST_HEAD(&topsrv.subscriber_list);
 
 	spin_lock_bh(&topsrv.lock);
-	res = tipc_attach(&topsrv.user_ref);
-	if (res) {
-		spin_unlock_bh(&topsrv.lock);
-		return res;
-	}
-
-	res = tipc_createport(topsrv.user_ref,
+	res = tipc_createport(0,
 			      NULL,
 			      TIPC_CRITICAL_IMPORTANCE,
 			      NULL,
@@ -570,16 +563,17 @@ int tipc_subscr_start(void)
 		goto failed;
 
 	res = tipc_nametbl_publish_rsv(topsrv.setup_port, TIPC_NODE_SCOPE, &seq);
-	if (res)
+	if (res) {
+		tipc_deleteport(topsrv.setup_port);
+		topsrv.setup_port = 0;
 		goto failed;
+	}
 
 	spin_unlock_bh(&topsrv.lock);
 	return 0;
 
 failed:
 	err("Failed to create subscription service\n");
-	tipc_detach(topsrv.user_ref);
-	topsrv.user_ref = 0;
 	spin_unlock_bh(&topsrv.lock);
 	return res;
 }
@@ -590,8 +584,10 @@ void tipc_subscr_stop(void)
 	struct subscriber *subscriber_temp;
 	spinlock_t *subscriber_lock;
 
-	if (topsrv.user_ref) {
+	if (topsrv.setup_port) {
 		tipc_deleteport(topsrv.setup_port);
+		topsrv.setup_port = 0;
+
 		list_for_each_entry_safe(subscriber, subscriber_temp,
 					 &topsrv.subscriber_list,
 					 subscriber_list) {
@@ -600,7 +596,5 @@ void tipc_subscr_stop(void)
 			subscr_terminate(subscriber);
 			spin_unlock_bh(subscriber_lock);
 		}
-		tipc_detach(topsrv.user_ref);
-		topsrv.user_ref = 0;
 	}
 }
-- 
cgit v1.2.3


From b0c1e928c85023c73780b5d9873406ccf1cd8019 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:22 +0000
Subject: tipc: Remove user registry subsystem

Eliminates routines, data structures, and files that make up TIPC's
user registry. The user registry is no longer needed since the native
API routines that utilized it no longer exist and there are no longer
any internal TIPC services that use it.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Makefile   |   2 +-
 net/tipc/config.c   |   2 +-
 net/tipc/core.c     |   3 -
 net/tipc/port.c     |  11 +--
 net/tipc/port.h     |   6 +-
 net/tipc/subscr.c   |   6 +-
 net/tipc/user_reg.c | 218 ----------------------------------------------------
 net/tipc/user_reg.h |  51 ------------
 8 files changed, 7 insertions(+), 292 deletions(-)
 delete mode 100644 net/tipc/user_reg.c
 delete mode 100644 net/tipc/user_reg.h

(limited to 'net')

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 849d819bfc74..d41cd110fe7d 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -8,6 +8,6 @@ tipc-y	+= addr.o bcast.o bearer.o config.o \
 	   core.o handler.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
-	   socket.o user_reg.o dbg.o eth_media.o
+	   socket.o dbg.o eth_media.o
 
 # End of file
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 322367b2618f..afa6e853c04b 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -500,7 +500,7 @@ int tipc_cfg_init(void)
 	struct tipc_name_seq seq;
 	int res;
 
-	res = tipc_createport(0, NULL, TIPC_CRITICAL_IMPORTANCE,
+	res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE,
 			      NULL, NULL, NULL,
 			      NULL, cfg_named_msg_event, NULL,
 			      NULL, &config_port_ref);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index b9a3ef13f7e7..a02bc490caae 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -42,7 +42,6 @@
 #include "core.h"
 #include "ref.h"
 #include "net.h"
-#include "user_reg.h"
 #include "name_table.h"
 #include "subscr.h"
 #include "config.h"
@@ -144,7 +143,6 @@ static void tipc_core_stop(void)
 	tipc_handler_stop();
 	tipc_cfg_stop();
 	tipc_subscr_stop();
-	tipc_reg_stop();
 	tipc_nametbl_stop();
 	tipc_ref_table_stop();
 	tipc_socket_stop();
@@ -167,7 +165,6 @@ static int tipc_core_start(void)
 
 	if ((res = tipc_handler_start()) ||
 	    (res = tipc_ref_table_init(tipc_max_ports, tipc_random)) ||
-	    (res = tipc_reg_start()) ||
 	    (res = tipc_nametbl_init()) ||
 	    (res = tipc_k_signal((Handler)tipc_subscr_start, 0)) ||
 	    (res = tipc_k_signal((Handler)tipc_cfg_init, 0)) ||
diff --git a/net/tipc/port.c b/net/tipc/port.c
index c033cb87b964..33d0b3b7175f 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -38,7 +38,6 @@
 #include "config.h"
 #include "port.h"
 #include "name_table.h"
-#include "user_reg.h"
 
 /* Connection management: */
 #define PROBING_INTERVAL 3600000	/* [ms] => 1 h */
@@ -272,7 +271,6 @@ int tipc_deleteport(u32 ref)
 		tipc_nodesub_unsubscribe(&p_ptr->subscription);
 	}
 	if (p_ptr->user_port) {
-		tipc_reg_remove_port(p_ptr->user_port);
 		kfree(p_ptr->user_port);
 	}
 
@@ -934,12 +932,10 @@ void tipc_acknowledge(u32 ref, u32 ack)
 }
 
 /*
- * tipc_createport(): user level call. Will add port to
- *                    registry if non-zero user_ref.
+ * tipc_createport(): user level call.
  */
 
-int tipc_createport(u32 user_ref,
-		    void *usr_handle,
+int tipc_createport(void *usr_handle,
 		    unsigned int importance,
 		    tipc_msg_err_event error_cb,
 		    tipc_named_msg_err_event named_error_cb,
@@ -966,7 +962,6 @@ int tipc_createport(u32 user_ref,
 	}
 
 	p_ptr->user_port = up_ptr;
-	up_ptr->user_ref = user_ref;
 	up_ptr->usr_handle = usr_handle;
 	up_ptr->ref = p_ptr->publ.ref;
 	up_ptr->err_cb = error_cb;
@@ -976,8 +971,6 @@ int tipc_createport(u32 user_ref,
 	up_ptr->named_msg_cb = named_msg_cb;
 	up_ptr->conn_msg_cb = conn_msg_cb;
 	up_ptr->continue_event_cb = continue_event_cb;
-	INIT_LIST_HEAD(&up_ptr->uport_list);
-	tipc_reg_add_port(up_ptr);
 	*portref = p_ptr->publ.ref;
 	tipc_port_unlock(p_ptr);
 	return 0;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 3a807fcec2be..da607a8a2f35 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -77,15 +77,12 @@ typedef void (*tipc_continue_event) (void *usr_handle, u32 portref);
 
 /**
  * struct user_port - TIPC user port (used with native API)
- * @user_ref: id of user who created user port
  * @usr_handle: user-specified field
  * @ref: object reference to associated TIPC port
  * <various callback routines>
- * @uport_list: adjacent user ports in list of ports held by user
  */
 
 struct user_port {
-	u32 user_ref;
 	void *usr_handle;
 	u32 ref;
 	tipc_msg_err_event err_cb;
@@ -95,7 +92,6 @@ struct user_port {
 	tipc_named_msg_event named_msg_cb;
 	tipc_conn_msg_event conn_msg_cb;
 	tipc_continue_event continue_event_cb;
-	struct list_head uport_list;
 };
 
 /**
@@ -181,7 +177,7 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode);
 
 void tipc_acknowledge(u32 port_ref, u32 ack);
 
-int tipc_createport(unsigned int tipc_user, void *usr_handle,
+int tipc_createport(void *usr_handle,
 		unsigned int importance, tipc_msg_err_event error_cb,
 		tipc_named_msg_err_event named_error_cb,
 		tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb,
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 21abf1765b02..c5ba323dba47 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -493,8 +493,7 @@ static void subscr_named_msg_event(void *usr_handle,
 
 	/* Create server port & establish connection to subscriber */
 
-	tipc_createport(0,
-			subscriber,
+	tipc_createport(subscriber,
 			importance,
 			NULL,
 			NULL,
@@ -548,8 +547,7 @@ int tipc_subscr_start(void)
 	INIT_LIST_HEAD(&topsrv.subscriber_list);
 
 	spin_lock_bh(&topsrv.lock);
-	res = tipc_createport(0,
-			      NULL,
+	res = tipc_createport(NULL,
 			      TIPC_CRITICAL_IMPORTANCE,
 			      NULL,
 			      NULL,
diff --git a/net/tipc/user_reg.c b/net/tipc/user_reg.c
deleted file mode 100644
index 2e2702e2049c..000000000000
--- a/net/tipc/user_reg.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * net/tipc/user_reg.c: TIPC user registry code
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2004-2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "user_reg.h"
-
-/*
- * TIPC user registry keeps track of users of the tipc_port interface.
- *
- * The registry utilizes an array of "TIPC user" entries;
- * a user's ID is the index of their associated array entry.
- * Array entry 0 is not used, so userid 0 is not valid;
- * TIPC sometimes uses this value to denote an anonymous user.
- * The list of free entries is initially chained from last entry to entry 1.
- */
-
-/**
- * struct tipc_user - registered TIPC user info
- * @next: index of next free registry entry (or -1 for an allocated entry)
- * @ports: list of user ports owned by the user
- */
-
-struct tipc_user {
-	int next;
-	struct list_head ports;
-};
-
-#define MAX_USERID 64
-#define USER_LIST_SIZE ((MAX_USERID + 1) * sizeof(struct tipc_user))
-
-static struct tipc_user *users = NULL;
-static u32 next_free_user = MAX_USERID + 1;
-static DEFINE_SPINLOCK(reg_lock);
-
-/**
- * reg_init - create TIPC user registry (but don't activate it)
- *
- * If registry has been pre-initialized it is left "as is".
- * NOTE: This routine may be called when TIPC is inactive.
- */
-
-static int reg_init(void)
-{
-	u32 i;
-
-	spin_lock_bh(&reg_lock);
-	if (!users) {
-		users = kzalloc(USER_LIST_SIZE, GFP_ATOMIC);
-		if (users) {
-			for (i = 1; i <= MAX_USERID; i++) {
-				users[i].next = i - 1;
-			}
-			next_free_user = MAX_USERID;
-		}
-	}
-	spin_unlock_bh(&reg_lock);
-	return users ? 0 : -ENOMEM;
-}
-
-/**
- * tipc_reg_start - activate TIPC user registry
- */
-
-int tipc_reg_start(void)
-{
-	return reg_init();
-}
-
-/**
- * tipc_reg_stop - shut down & delete TIPC user registry
- */
-
-void tipc_reg_stop(void)
-{
-	if (!users)
-		return;
-
-	kfree(users);
-	users = NULL;
-}
-
-/**
- * tipc_attach - register a TIPC user
- *
- * NOTE: This routine may be called when TIPC is inactive.
- */
-
-int tipc_attach(u32 *userid)
-{
-	struct tipc_user *user_ptr;
-
-	if (!users)
-		reg_init();
-
-	spin_lock_bh(&reg_lock);
-	if (!next_free_user) {
-		spin_unlock_bh(&reg_lock);
-		return -EBUSY;
-	}
-	user_ptr = &users[next_free_user];
-	*userid = next_free_user;
-	next_free_user = user_ptr->next;
-	user_ptr->next = -1;
-	spin_unlock_bh(&reg_lock);
-
-	INIT_LIST_HEAD(&user_ptr->ports);
-	atomic_inc(&tipc_user_count);
-
-	return 0;
-}
-
-/**
- * tipc_detach - deregister a TIPC user
- */
-
-void tipc_detach(u32 userid)
-{
-	struct tipc_user *user_ptr;
-	struct list_head ports_temp;
-	struct user_port *up_ptr, *temp_up_ptr;
-
-	if ((userid == 0) || (userid > MAX_USERID))
-		return;
-
-	spin_lock_bh(&reg_lock);
-	if ((!users) || (users[userid].next >= 0)) {
-		spin_unlock_bh(&reg_lock);
-		return;
-	}
-
-	user_ptr = &users[userid];
-	INIT_LIST_HEAD(&ports_temp);
-	list_splice(&user_ptr->ports, &ports_temp);
-	user_ptr->next = next_free_user;
-	next_free_user = userid;
-	spin_unlock_bh(&reg_lock);
-
-	atomic_dec(&tipc_user_count);
-
-	list_for_each_entry_safe(up_ptr, temp_up_ptr, &ports_temp, uport_list) {
-		tipc_deleteport(up_ptr->ref);
-	}
-}
-
-/**
- * tipc_reg_add_port - register a user's driver port
- */
-
-int tipc_reg_add_port(struct user_port *up_ptr)
-{
-	struct tipc_user *user_ptr;
-
-	if (up_ptr->user_ref == 0)
-		return 0;
-	if (up_ptr->user_ref > MAX_USERID)
-		return -EINVAL;
-	if ((tipc_mode == TIPC_NOT_RUNNING) || !users )
-		return -ENOPROTOOPT;
-
-	spin_lock_bh(&reg_lock);
-	user_ptr = &users[up_ptr->user_ref];
-	list_add(&up_ptr->uport_list, &user_ptr->ports);
-	spin_unlock_bh(&reg_lock);
-	return 0;
-}
-
-/**
- * tipc_reg_remove_port - deregister a user's driver port
- */
-
-int tipc_reg_remove_port(struct user_port *up_ptr)
-{
-	if (up_ptr->user_ref == 0)
-		return 0;
-	if (up_ptr->user_ref > MAX_USERID)
-		return -EINVAL;
-	if (!users )
-		return -ENOPROTOOPT;
-
-	spin_lock_bh(&reg_lock);
-	list_del_init(&up_ptr->uport_list);
-	spin_unlock_bh(&reg_lock);
-	return 0;
-}
-
diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h
deleted file mode 100644
index 109eed0d6de3..000000000000
--- a/net/tipc/user_reg.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * net/tipc/user_reg.h: Include file for TIPC user registry code
- *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_USER_REG_H
-#define _TIPC_USER_REG_H
-
-#include "port.h"
-
-int tipc_reg_start(void);
-void tipc_reg_stop(void);
-
-int tipc_attach(unsigned int *userref);
-void tipc_detach(unsigned int userref);
-
-int tipc_reg_add_port(struct user_port *up_ptr);
-int tipc_reg_remove_port(struct user_port *up_ptr);
-
-#endif
-- 
cgit v1.2.3


From 5af5479296fba0ace5d5cab84045de5b19bde3fe Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:23 +0000
Subject: tipc: Remove internal linked list of node objects

Eliminates a sorted list TIPC uses to keep track of the neighboring
nodes it has links to, since this duplicates information already present
in the internal array of node object pointers.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 43 ++++++++++++++++---------------------------
 1 file changed, 16 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 58e189bf5c96..31dcca98201a 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -43,9 +43,6 @@
 static void node_lost_contact(struct tipc_node *n_ptr);
 static void node_established_contact(struct tipc_node *n_ptr);
 
-/* sorted list of nodes within cluster */
-static struct tipc_node *tipc_nodes = NULL;
-
 static DEFINE_SPINLOCK(node_create_lock);
 
 u32 tipc_own_tag = 0;
@@ -63,21 +60,17 @@ u32 tipc_own_tag = 0;
 struct tipc_node *tipc_node_create(u32 addr)
 {
 	struct tipc_node *n_ptr;
-	struct tipc_node **curr_node;
 	u32 n_num;
 
 	spin_lock_bh(&node_create_lock);
 
-	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
-		if (addr < n_ptr->addr)
-			break;
-		if (addr == n_ptr->addr) {
-			spin_unlock_bh(&node_create_lock);
-			return n_ptr;
-		}
+	n_ptr = tipc_node_find(addr);
+	if (n_ptr) {
+		spin_unlock_bh(&node_create_lock);
+		return n_ptr;
 	}
 
-	n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC);
+	n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
 	if (!n_ptr) {
 		spin_unlock_bh(&node_create_lock);
 		warn("Node creation failed, no memory\n");
@@ -93,15 +86,6 @@ struct tipc_node *tipc_node_create(u32 addr)
 	if (n_num > tipc_net.highest_node)
 		tipc_net.highest_node = n_num;
 
-	/* Insert node into ordered list */
-	for (curr_node = &tipc_nodes; *curr_node;
-	     curr_node = &(*curr_node)->next) {
-		if (addr < (*curr_node)->addr) {
-			n_ptr->next = *curr_node;
-			break;
-		}
-	}
-	(*curr_node) = n_ptr;
 	spin_unlock_bh(&node_create_lock);
 	return n_ptr;
 }
@@ -405,6 +389,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 	struct tipc_node *n_ptr;
 	struct tipc_node_info node_info;
 	u32 payload_size;
+	u32 n_num;
 
 	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
 		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -415,14 +400,15 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 						   " (network address)");
 
 	read_lock_bh(&tipc_net_lock);
-	if (!tipc_nodes) {
+	if (!tipc_net.nodes) {
 		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_none();
 	}
 
 	/* For now, get space for all other nodes */
 
-	payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1);
+	payload_size = TLV_SPACE(sizeof(node_info)) *
+		(tipc_net.highest_node - 1);
 	if (payload_size > 32768u) {
 		read_unlock_bh(&tipc_net_lock);
 		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
@@ -436,8 +422,9 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 
 	/* Add TLVs for all nodes in scope */
 
-	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
-		if (!tipc_in_scope(domain, n_ptr->addr))
+	for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) {
+		n_ptr = tipc_net.nodes[n_num];
+		if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		node_info.addr = htonl(n_ptr->addr);
 		node_info.up = htonl(tipc_node_is_up(n_ptr));
@@ -456,6 +443,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 	struct tipc_node *n_ptr;
 	struct tipc_link_info link_info;
 	u32 payload_size;
+	u32 n_num;
 
 	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR))
 		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
@@ -493,10 +481,11 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 
 	/* Add TLVs for any other links in scope */
 
-	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
+	for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) {
 		u32 i;
 
-		if (!tipc_in_scope(domain, n_ptr->addr))
+		n_ptr = tipc_net.nodes[n_num];
+		if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		tipc_node_lock(n_ptr);
 		for (i = 0; i < MAX_BEARERS; i++) {
-- 
cgit v1.2.3


From f5e75269f59f7c3816f23314b924895e4ecf8409 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:24 +0000
Subject: tipc: rename dbg.[ch] to log.[ch]

As the first step in removing obsolete debugging code from TIPC the
files that implement TIPC's non-debug-related log buffer subsystem
are renamed to better reflect their true nature.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Makefile |   2 +-
 net/tipc/core.h   |   2 +-
 net/tipc/dbg.c    | 432 ------------------------------------------------------
 net/tipc/dbg.h    |  67 ---------
 net/tipc/link.h   |   2 +-
 net/tipc/log.c    | 428 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/log.h    |  67 +++++++++
 7 files changed, 498 insertions(+), 502 deletions(-)
 delete mode 100644 net/tipc/dbg.c
 delete mode 100644 net/tipc/dbg.h
 create mode 100644 net/tipc/log.c
 create mode 100644 net/tipc/log.h

(limited to 'net')

diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index d41cd110fe7d..521d24d04ab2 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -8,6 +8,6 @@ tipc-y	+= addr.o bcast.o bearer.o config.o \
 	   core.o handler.o link.o discover.o msg.o  \
 	   name_distr.o  subscr.o name_table.o net.o  \
 	   netlink.o node.o node_subscr.o port.o ref.o  \
-	   socket.o dbg.o eth_media.o
+	   socket.o log.o eth_media.o
 
 # End of file
diff --git a/net/tipc/core.h b/net/tipc/core.h
index c44f955bd54a..17f3670ed95d 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,7 +59,7 @@
 #define TIPC_MOD_VER "2.0.0"
 
 struct tipc_msg;	/* msg.h */
-struct print_buf;	/* dbg.h */
+struct print_buf;	/* log.h */
 
 /*
  * TIPC sanity test macros
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
deleted file mode 100644
index 46f51d208e5e..000000000000
--- a/net/tipc/dbg.c
+++ /dev/null
@@ -1,432 +0,0 @@
-/*
- * net/tipc/dbg.c: TIPC print buffer routines for debugging
- *
- * Copyright (c) 1996-2006, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core.h"
-#include "config.h"
-#include "dbg.h"
-
-/*
- * TIPC pre-defines the following print buffers:
- *
- * TIPC_NULL : null buffer (i.e. print nowhere)
- * TIPC_CONS : system console
- * TIPC_LOG  : TIPC log buffer
- *
- * Additional user-defined print buffers are also permitted.
- */
-
-static struct print_buf null_buf = { NULL, 0, NULL, 0 };
-struct print_buf *const TIPC_NULL = &null_buf;
-
-static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
-static struct print_buf *const TIPC_CONS = &cons_buf;
-
-static struct print_buf log_buf = { NULL, 0, NULL, 1 };
-struct print_buf *const TIPC_LOG = &log_buf;
-
-/*
- * Locking policy when using print buffers.
- *
- * 1) tipc_printf() uses 'print_lock' to protect against concurrent access to
- * 'print_string' when writing to a print buffer. This also protects against
- * concurrent writes to the print buffer being written to.
- *
- * 2) tipc_dump() and tipc_log_XXX() leverage the aforementioned
- * use of 'print_lock' to protect against all types of concurrent operations
- * on their associated print buffer (not just write operations).
- *
- * Note: All routines of the form tipc_printbuf_XXX() are lock-free, and rely
- * on the caller to prevent simultaneous use of the print buffer(s) being
- * manipulated.
- */
-
-static char print_string[TIPC_PB_MAX_STR];
-static DEFINE_SPINLOCK(print_lock);
-
-static void tipc_printbuf_reset(struct print_buf *pb);
-static int  tipc_printbuf_empty(struct print_buf *pb);
-static void tipc_printbuf_move(struct print_buf *pb_to,
-			       struct print_buf *pb_from);
-
-#define FORMAT(PTR,LEN,FMT) \
-{\
-       va_list args;\
-       va_start(args, FMT);\
-       LEN = vsprintf(PTR, FMT, args);\
-       va_end(args);\
-       *(PTR + LEN) = '\0';\
-}
-
-/**
- * tipc_printbuf_init - initialize print buffer to empty
- * @pb: pointer to print buffer structure
- * @raw: pointer to character array used by print buffer
- * @size: size of character array
- *
- * Note: If the character array is too small (or absent), the print buffer
- * becomes a null device that discards anything written to it.
- */
-
-void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
-{
-	pb->buf = raw;
-	pb->crs = raw;
-	pb->size = size;
-	pb->echo = 0;
-
-	if (size < TIPC_PB_MIN_SIZE) {
-		pb->buf = NULL;
-	} else if (raw) {
-		pb->buf[0] = 0;
-		pb->buf[size - 1] = ~0;
-	}
-}
-
-/**
- * tipc_printbuf_reset - reinitialize print buffer to empty state
- * @pb: pointer to print buffer structure
- */
-
-static void tipc_printbuf_reset(struct print_buf *pb)
-{
-	if (pb->buf) {
-		pb->crs = pb->buf;
-		pb->buf[0] = 0;
-		pb->buf[pb->size - 1] = ~0;
-	}
-}
-
-/**
- * tipc_printbuf_empty - test if print buffer is in empty state
- * @pb: pointer to print buffer structure
- *
- * Returns non-zero if print buffer is empty.
- */
-
-static int tipc_printbuf_empty(struct print_buf *pb)
-{
-	return !pb->buf || (pb->crs == pb->buf);
-}
-
-/**
- * tipc_printbuf_validate - check for print buffer overflow
- * @pb: pointer to print buffer structure
- *
- * Verifies that a print buffer has captured all data written to it.
- * If data has been lost, linearize buffer and prepend an error message
- *
- * Returns length of print buffer data string (including trailing NUL)
- */
-
-int tipc_printbuf_validate(struct print_buf *pb)
-{
-	char *err = "\n\n*** PRINT BUFFER OVERFLOW ***\n\n";
-	char *cp_buf;
-	struct print_buf cb;
-
-	if (!pb->buf)
-		return 0;
-
-	if (pb->buf[pb->size - 1] == 0) {
-		cp_buf = kmalloc(pb->size, GFP_ATOMIC);
-		if (cp_buf) {
-			tipc_printbuf_init(&cb, cp_buf, pb->size);
-			tipc_printbuf_move(&cb, pb);
-			tipc_printbuf_move(pb, &cb);
-			kfree(cp_buf);
-			memcpy(pb->buf, err, strlen(err));
-		} else {
-			tipc_printbuf_reset(pb);
-			tipc_printf(pb, err);
-		}
-	}
-	return pb->crs - pb->buf + 1;
-}
-
-/**
- * tipc_printbuf_move - move print buffer contents to another print buffer
- * @pb_to: pointer to destination print buffer structure
- * @pb_from: pointer to source print buffer structure
- *
- * Current contents of destination print buffer (if any) are discarded.
- * Source print buffer becomes empty if a successful move occurs.
- */
-
-static void tipc_printbuf_move(struct print_buf *pb_to,
-			       struct print_buf *pb_from)
-{
-	int len;
-
-	/* Handle the cases where contents can't be moved */
-
-	if (!pb_to->buf)
-		return;
-
-	if (!pb_from->buf) {
-		tipc_printbuf_reset(pb_to);
-		return;
-	}
-
-	if (pb_to->size < pb_from->size) {
-		strcpy(pb_to->buf, "*** PRINT BUFFER MOVE ERROR ***");
-		pb_to->buf[pb_to->size - 1] = ~0;
-		pb_to->crs = strchr(pb_to->buf, 0);
-		return;
-	}
-
-	/* Copy data from char after cursor to end (if used) */
-
-	len = pb_from->buf + pb_from->size - pb_from->crs - 2;
-	if ((pb_from->buf[pb_from->size - 1] == 0) && (len > 0)) {
-		strcpy(pb_to->buf, pb_from->crs + 1);
-		pb_to->crs = pb_to->buf + len;
-	} else
-		pb_to->crs = pb_to->buf;
-
-	/* Copy data from start to cursor (always) */
-
-	len = pb_from->crs - pb_from->buf;
-	strcpy(pb_to->crs, pb_from->buf);
-	pb_to->crs += len;
-
-	tipc_printbuf_reset(pb_from);
-}
-
-/**
- * tipc_printf - append formatted output to print buffer
- * @pb: pointer to print buffer
- * @fmt: formatted info to be printed
- */
-
-void tipc_printf(struct print_buf *pb, const char *fmt, ...)
-{
-	int chars_to_add;
-	int chars_left;
-	char save_char;
-
-	spin_lock_bh(&print_lock);
-
-	FORMAT(print_string, chars_to_add, fmt);
-	if (chars_to_add >= TIPC_PB_MAX_STR)
-		strcpy(print_string, "*** PRINT BUFFER STRING TOO LONG ***");
-
-	if (pb->buf) {
-		chars_left = pb->buf + pb->size - pb->crs - 1;
-		if (chars_to_add <= chars_left) {
-			strcpy(pb->crs, print_string);
-			pb->crs += chars_to_add;
-		} else if (chars_to_add >= (pb->size - 1)) {
-			strcpy(pb->buf, print_string + chars_to_add + 1
-			       - pb->size);
-			pb->crs = pb->buf + pb->size - 1;
-		} else {
-			strcpy(pb->buf, print_string + chars_left);
-			save_char = print_string[chars_left];
-			print_string[chars_left] = 0;
-			strcpy(pb->crs, print_string);
-			print_string[chars_left] = save_char;
-			pb->crs = pb->buf + chars_to_add - chars_left;
-		}
-	}
-
-	if (pb->echo)
-		printk("%s", print_string);
-
-	spin_unlock_bh(&print_lock);
-}
-
-#ifdef CONFIG_TIPC_DEBUG
-
-/**
- * print_to_console - write string of bytes to console in multiple chunks
- */
-
-static void print_to_console(char *crs, int len)
-{
-	int rest = len;
-
-	while (rest > 0) {
-		int sz = rest < TIPC_PB_MAX_STR ? rest : TIPC_PB_MAX_STR;
-		char c = crs[sz];
-
-		crs[sz] = 0;
-		printk((const char *)crs);
-		crs[sz] = c;
-		rest -= sz;
-		crs += sz;
-	}
-}
-
-/**
- * printbuf_dump - write print buffer contents to console
- */
-
-static void printbuf_dump(struct print_buf *pb)
-{
-	int len;
-
-	if (!pb->buf) {
-		printk("*** PRINT BUFFER NOT ALLOCATED ***");
-		return;
-	}
-
-	/* Dump print buffer from char after cursor to end (if used) */
-
-	len = pb->buf + pb->size - pb->crs - 2;
-	if ((pb->buf[pb->size - 1] == 0) && (len > 0))
-		print_to_console(pb->crs + 1, len);
-
-	/* Dump print buffer from start to cursor (always) */
-
-	len = pb->crs - pb->buf;
-	print_to_console(pb->buf, len);
-}
-
-/**
- * tipc_dump_dbg - dump (non-console) print buffer to console
- * @pb: pointer to print buffer
- */
-
-void tipc_dump_dbg(struct print_buf *pb, const char *fmt, ...)
-{
-	int len;
-
-	if (pb == TIPC_CONS)
-		return;
-
-	spin_lock_bh(&print_lock);
-
-	FORMAT(print_string, len, fmt);
-	printk(print_string);
-
-	printk("\n---- Start of %s log dump ----\n\n",
-	       (pb == TIPC_LOG) ? "global" : "local");
-	printbuf_dump(pb);
-	tipc_printbuf_reset(pb);
-	printk("\n---- End of dump ----\n");
-
-	spin_unlock_bh(&print_lock);
-}
-
-#endif
-
-/**
- * tipc_log_resize - change the size of the TIPC log buffer
- * @log_size: print buffer size to use
- */
-
-int tipc_log_resize(int log_size)
-{
-	int res = 0;
-
-	spin_lock_bh(&print_lock);
-	if (TIPC_LOG->buf) {
-		kfree(TIPC_LOG->buf);
-		TIPC_LOG->buf = NULL;
-	}
-	if (log_size) {
-		if (log_size < TIPC_PB_MIN_SIZE)
-			log_size = TIPC_PB_MIN_SIZE;
-		res = TIPC_LOG->echo;
-		tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC),
-				   log_size);
-		TIPC_LOG->echo = res;
-		res = !TIPC_LOG->buf;
-	}
-	spin_unlock_bh(&print_lock);
-
-	return res;
-}
-
-/**
- * tipc_log_resize_cmd - reconfigure size of TIPC log buffer
- */
-
-struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, int req_tlv_space)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value != delimit(value, 0, 32768))
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (log size must be 0-32768)");
-	if (tipc_log_resize(value))
-		return tipc_cfg_reply_error_string(
-			"unable to create specified log (log size is now 0)");
-	return tipc_cfg_reply_none();
-}
-
-/**
- * tipc_log_dump - capture TIPC log buffer contents in configuration message
- */
-
-struct sk_buff *tipc_log_dump(void)
-{
-	struct sk_buff *reply;
-
-	spin_lock_bh(&print_lock);
-	if (!TIPC_LOG->buf) {
-		spin_unlock_bh(&print_lock);
-		reply = tipc_cfg_reply_ultra_string("log not activated\n");
-	} else if (tipc_printbuf_empty(TIPC_LOG)) {
-		spin_unlock_bh(&print_lock);
-		reply = tipc_cfg_reply_ultra_string("log is empty\n");
-	}
-	else {
-		struct tlv_desc *rep_tlv;
-		struct print_buf pb;
-		int str_len;
-
-		str_len = min(TIPC_LOG->size, 32768u);
-		spin_unlock_bh(&print_lock);
-		reply = tipc_cfg_reply_alloc(TLV_SPACE(str_len));
-		if (reply) {
-			rep_tlv = (struct tlv_desc *)reply->data;
-			tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), str_len);
-			spin_lock_bh(&print_lock);
-			tipc_printbuf_move(&pb, TIPC_LOG);
-			spin_unlock_bh(&print_lock);
-			str_len = strlen(TLV_DATA(rep_tlv)) + 1;
-			skb_put(reply, TLV_SPACE(str_len));
-			TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-		}
-	}
-	return reply;
-}
-
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
deleted file mode 100644
index 3ba6ba8b434a..000000000000
--- a/net/tipc/dbg.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * net/tipc/dbg.h: Include file for TIPC print buffer routines
- *
- * Copyright (c) 1997-2006, Ericsson AB
- * Copyright (c) 2005-2007, Wind River Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the names of the copyright holders nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _TIPC_DBG_H
-#define _TIPC_DBG_H
-
-/**
- * struct print_buf - TIPC print buffer structure
- * @buf: pointer to character array containing print buffer contents
- * @size: size of character array
- * @crs: pointer to first unused space in character array (i.e. final NUL)
- * @echo: echo output to system console if non-zero
- */
-
-struct print_buf {
-	char *buf;
-	u32 size;
-	char *crs;
-	int echo;
-};
-
-#define TIPC_PB_MIN_SIZE 64	/* minimum size for a print buffer's array */
-#define TIPC_PB_MAX_STR 512	/* max printable string (with trailing NUL) */
-
-void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size);
-int  tipc_printbuf_validate(struct print_buf *pb);
-
-int tipc_log_resize(int log_size);
-
-struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area,
-				    int req_tlv_space);
-struct sk_buff *tipc_log_dump(void);
-
-#endif
diff --git a/net/tipc/link.h b/net/tipc/link.h
index c562888d25da..eeb0c015dba9 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -37,7 +37,7 @@
 #ifndef _TIPC_LINK_H
 #define _TIPC_LINK_H
 
-#include "dbg.h"
+#include "log.h"
 #include "msg.h"
 #include "node.h"
 
diff --git a/net/tipc/log.c b/net/tipc/log.c
new file mode 100644
index 000000000000..9d99f7097d2d
--- /dev/null
+++ b/net/tipc/log.c
@@ -0,0 +1,428 @@
+/*
+ * net/tipc/log.c: TIPC print buffer routines for debugging
+ *
+ * Copyright (c) 1996-2006, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "config.h"
+#include "log.h"
+
+/*
+ * TIPC pre-defines the following print buffers:
+ *
+ * TIPC_NULL : null buffer (i.e. print nowhere)
+ * TIPC_CONS : system console
+ * TIPC_LOG  : TIPC log buffer
+ *
+ * Additional user-defined print buffers are also permitted.
+ */
+
+static struct print_buf null_buf = { NULL, 0, NULL, 0 };
+struct print_buf *const TIPC_NULL = &null_buf;
+
+static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
+static struct print_buf *const TIPC_CONS = &cons_buf;
+
+static struct print_buf log_buf = { NULL, 0, NULL, 1 };
+struct print_buf *const TIPC_LOG = &log_buf;
+
+/*
+ * Locking policy when using print buffers.
+ *
+ * 1) tipc_printf() uses 'print_lock' to protect against concurrent access to
+ * 'print_string' when writing to a print buffer. This also protects against
+ * concurrent writes to the print buffer being written to.
+ *
+ * 2) tipc_dump() and tipc_log_XXX() leverage the aforementioned
+ * use of 'print_lock' to protect against all types of concurrent operations
+ * on their associated print buffer (not just write operations).
+ *
+ * Note: All routines of the form tipc_printbuf_XXX() are lock-free, and rely
+ * on the caller to prevent simultaneous use of the print buffer(s) being
+ * manipulated.
+ */
+
+static char print_string[TIPC_PB_MAX_STR];
+static DEFINE_SPINLOCK(print_lock);
+
+static void tipc_printbuf_reset(struct print_buf *pb);
+static int  tipc_printbuf_empty(struct print_buf *pb);
+static void tipc_printbuf_move(struct print_buf *pb_to,
+			       struct print_buf *pb_from);
+
+#define FORMAT(PTR, LEN, FMT) \
+{\
+	va_list args;\
+	va_start(args, FMT);\
+	LEN = vsprintf(PTR, FMT, args);\
+	va_end(args);\
+	*(PTR + LEN) = '\0';\
+}
+
+/**
+ * tipc_printbuf_init - initialize print buffer to empty
+ * @pb: pointer to print buffer structure
+ * @raw: pointer to character array used by print buffer
+ * @size: size of character array
+ *
+ * Note: If the character array is too small (or absent), the print buffer
+ * becomes a null device that discards anything written to it.
+ */
+
+void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
+{
+	pb->buf = raw;
+	pb->crs = raw;
+	pb->size = size;
+	pb->echo = 0;
+
+	if (size < TIPC_PB_MIN_SIZE) {
+		pb->buf = NULL;
+	} else if (raw) {
+		pb->buf[0] = 0;
+		pb->buf[size - 1] = ~0;
+	}
+}
+
+/**
+ * tipc_printbuf_reset - reinitialize print buffer to empty state
+ * @pb: pointer to print buffer structure
+ */
+
+static void tipc_printbuf_reset(struct print_buf *pb)
+{
+	if (pb->buf) {
+		pb->crs = pb->buf;
+		pb->buf[0] = 0;
+		pb->buf[pb->size - 1] = ~0;
+	}
+}
+
+/**
+ * tipc_printbuf_empty - test if print buffer is in empty state
+ * @pb: pointer to print buffer structure
+ *
+ * Returns non-zero if print buffer is empty.
+ */
+
+static int tipc_printbuf_empty(struct print_buf *pb)
+{
+	return !pb->buf || (pb->crs == pb->buf);
+}
+
+/**
+ * tipc_printbuf_validate - check for print buffer overflow
+ * @pb: pointer to print buffer structure
+ *
+ * Verifies that a print buffer has captured all data written to it.
+ * If data has been lost, linearize buffer and prepend an error message
+ *
+ * Returns length of print buffer data string (including trailing NUL)
+ */
+
+int tipc_printbuf_validate(struct print_buf *pb)
+{
+	char *err = "\n\n*** PRINT BUFFER OVERFLOW ***\n\n";
+	char *cp_buf;
+	struct print_buf cb;
+
+	if (!pb->buf)
+		return 0;
+
+	if (pb->buf[pb->size - 1] == 0) {
+		cp_buf = kmalloc(pb->size, GFP_ATOMIC);
+		if (cp_buf) {
+			tipc_printbuf_init(&cb, cp_buf, pb->size);
+			tipc_printbuf_move(&cb, pb);
+			tipc_printbuf_move(pb, &cb);
+			kfree(cp_buf);
+			memcpy(pb->buf, err, strlen(err));
+		} else {
+			tipc_printbuf_reset(pb);
+			tipc_printf(pb, err);
+		}
+	}
+	return pb->crs - pb->buf + 1;
+}
+
+/**
+ * tipc_printbuf_move - move print buffer contents to another print buffer
+ * @pb_to: pointer to destination print buffer structure
+ * @pb_from: pointer to source print buffer structure
+ *
+ * Current contents of destination print buffer (if any) are discarded.
+ * Source print buffer becomes empty if a successful move occurs.
+ */
+
+static void tipc_printbuf_move(struct print_buf *pb_to,
+			       struct print_buf *pb_from)
+{
+	int len;
+
+	/* Handle the cases where contents can't be moved */
+
+	if (!pb_to->buf)
+		return;
+
+	if (!pb_from->buf) {
+		tipc_printbuf_reset(pb_to);
+		return;
+	}
+
+	if (pb_to->size < pb_from->size) {
+		strcpy(pb_to->buf, "*** PRINT BUFFER MOVE ERROR ***");
+		pb_to->buf[pb_to->size - 1] = ~0;
+		pb_to->crs = strchr(pb_to->buf, 0);
+		return;
+	}
+
+	/* Copy data from char after cursor to end (if used) */
+
+	len = pb_from->buf + pb_from->size - pb_from->crs - 2;
+	if ((pb_from->buf[pb_from->size - 1] == 0) && (len > 0)) {
+		strcpy(pb_to->buf, pb_from->crs + 1);
+		pb_to->crs = pb_to->buf + len;
+	} else
+		pb_to->crs = pb_to->buf;
+
+	/* Copy data from start to cursor (always) */
+
+	len = pb_from->crs - pb_from->buf;
+	strcpy(pb_to->crs, pb_from->buf);
+	pb_to->crs += len;
+
+	tipc_printbuf_reset(pb_from);
+}
+
+/**
+ * tipc_printf - append formatted output to print buffer
+ * @pb: pointer to print buffer
+ * @fmt: formatted info to be printed
+ */
+
+void tipc_printf(struct print_buf *pb, const char *fmt, ...)
+{
+	int chars_to_add;
+	int chars_left;
+	char save_char;
+
+	spin_lock_bh(&print_lock);
+
+	FORMAT(print_string, chars_to_add, fmt);
+	if (chars_to_add >= TIPC_PB_MAX_STR)
+		strcpy(print_string, "*** PRINT BUFFER STRING TOO LONG ***");
+
+	if (pb->buf) {
+		chars_left = pb->buf + pb->size - pb->crs - 1;
+		if (chars_to_add <= chars_left) {
+			strcpy(pb->crs, print_string);
+			pb->crs += chars_to_add;
+		} else if (chars_to_add >= (pb->size - 1)) {
+			strcpy(pb->buf, print_string + chars_to_add + 1
+			       - pb->size);
+			pb->crs = pb->buf + pb->size - 1;
+		} else {
+			strcpy(pb->buf, print_string + chars_left);
+			save_char = print_string[chars_left];
+			print_string[chars_left] = 0;
+			strcpy(pb->crs, print_string);
+			print_string[chars_left] = save_char;
+			pb->crs = pb->buf + chars_to_add - chars_left;
+		}
+	}
+
+	if (pb->echo)
+		printk("%s", print_string);
+
+	spin_unlock_bh(&print_lock);
+}
+
+#ifdef CONFIG_TIPC_DEBUG
+
+/**
+ * print_to_console - write string of bytes to console in multiple chunks
+ */
+
+static void print_to_console(char *crs, int len)
+{
+	int rest = len;
+
+	while (rest > 0) {
+		int sz = rest < TIPC_PB_MAX_STR ? rest : TIPC_PB_MAX_STR;
+		char c = crs[sz];
+
+		crs[sz] = 0;
+		printk((const char *)crs);
+		crs[sz] = c;
+		rest -= sz;
+		crs += sz;
+	}
+}
+
+/**
+ * printbuf_dump - write print buffer contents to console
+ */
+
+static void printbuf_dump(struct print_buf *pb)
+{
+	int len;
+
+	if (!pb->buf) {
+		printk("*** PRINT BUFFER NOT ALLOCATED ***");
+		return;
+	}
+
+	/* Dump print buffer from char after cursor to end (if used) */
+
+	len = pb->buf + pb->size - pb->crs - 2;
+	if ((pb->buf[pb->size - 1] == 0) && (len > 0))
+		print_to_console(pb->crs + 1, len);
+
+	/* Dump print buffer from start to cursor (always) */
+
+	len = pb->crs - pb->buf;
+	print_to_console(pb->buf, len);
+}
+
+/**
+ * tipc_dump_dbg - dump (non-console) print buffer to console
+ * @pb: pointer to print buffer
+ */
+
+void tipc_dump_dbg(struct print_buf *pb, const char *fmt, ...)
+{
+	int len;
+
+	if (pb == TIPC_CONS)
+		return;
+
+	spin_lock_bh(&print_lock);
+
+	FORMAT(print_string, len, fmt);
+	printk(print_string);
+
+	printk("\n---- Start of %s log dump ----\n\n",
+	       (pb == TIPC_LOG) ? "global" : "local");
+	printbuf_dump(pb);
+	tipc_printbuf_reset(pb);
+	printk("\n---- End of dump ----\n");
+
+	spin_unlock_bh(&print_lock);
+}
+
+#endif
+
+/**
+ * tipc_log_resize - change the size of the TIPC log buffer
+ * @log_size: print buffer size to use
+ */
+
+int tipc_log_resize(int log_size)
+{
+	int res = 0;
+
+	spin_lock_bh(&print_lock);
+	kfree(TIPC_LOG->buf);
+	TIPC_LOG->buf = NULL;
+	if (log_size) {
+		if (log_size < TIPC_PB_MIN_SIZE)
+			log_size = TIPC_PB_MIN_SIZE;
+		res = TIPC_LOG->echo;
+		tipc_printbuf_init(TIPC_LOG, kmalloc(log_size, GFP_ATOMIC),
+				   log_size);
+		TIPC_LOG->echo = res;
+		res = !TIPC_LOG->buf;
+	}
+	spin_unlock_bh(&print_lock);
+
+	return res;
+}
+
+/**
+ * tipc_log_resize_cmd - reconfigure size of TIPC log buffer
+ */
+
+struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area, int req_tlv_space)
+{
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
+	if (value != delimit(value, 0, 32768))
+		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
+						   " (log size must be 0-32768)");
+	if (tipc_log_resize(value))
+		return tipc_cfg_reply_error_string(
+			"unable to create specified log (log size is now 0)");
+	return tipc_cfg_reply_none();
+}
+
+/**
+ * tipc_log_dump - capture TIPC log buffer contents in configuration message
+ */
+
+struct sk_buff *tipc_log_dump(void)
+{
+	struct sk_buff *reply;
+
+	spin_lock_bh(&print_lock);
+	if (!TIPC_LOG->buf) {
+		spin_unlock_bh(&print_lock);
+		reply = tipc_cfg_reply_ultra_string("log not activated\n");
+	} else if (tipc_printbuf_empty(TIPC_LOG)) {
+		spin_unlock_bh(&print_lock);
+		reply = tipc_cfg_reply_ultra_string("log is empty\n");
+	} else {
+		struct tlv_desc *rep_tlv;
+		struct print_buf pb;
+		int str_len;
+
+		str_len = min(TIPC_LOG->size, 32768u);
+		spin_unlock_bh(&print_lock);
+		reply = tipc_cfg_reply_alloc(TLV_SPACE(str_len));
+		if (reply) {
+			rep_tlv = (struct tlv_desc *)reply->data;
+			tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), str_len);
+			spin_lock_bh(&print_lock);
+			tipc_printbuf_move(&pb, TIPC_LOG);
+			spin_unlock_bh(&print_lock);
+			str_len = strlen(TLV_DATA(rep_tlv)) + 1;
+			skb_put(reply, TLV_SPACE(str_len));
+			TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+		}
+	}
+	return reply;
+}
diff --git a/net/tipc/log.h b/net/tipc/log.h
new file mode 100644
index 000000000000..f4343bb9e429
--- /dev/null
+++ b/net/tipc/log.h
@@ -0,0 +1,67 @@
+/*
+ * net/tipc/log.h: Include file for TIPC print buffer routines
+ *
+ * Copyright (c) 1997-2006, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_DBG_H
+#define _TIPC_DBG_H
+
+/**
+ * struct print_buf - TIPC print buffer structure
+ * @buf: pointer to character array containing print buffer contents
+ * @size: size of character array
+ * @crs: pointer to first unused space in character array (i.e. final NUL)
+ * @echo: echo output to system console if non-zero
+ */
+
+struct print_buf {
+	char *buf;
+	u32 size;
+	char *crs;
+	int echo;
+};
+
+#define TIPC_PB_MIN_SIZE 64	/* minimum size for a print buffer's array */
+#define TIPC_PB_MAX_STR 512	/* max printable string (with trailing NUL) */
+
+void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size);
+int  tipc_printbuf_validate(struct print_buf *pb);
+
+int tipc_log_resize(int log_size);
+
+struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area,
+				    int req_tlv_space);
+struct sk_buff *tipc_log_dump(void);
+
+#endif
-- 
cgit v1.2.3


From b29f14284989b3d0b3a5ce268b5b1fc4df9c5795 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:25 +0000
Subject: tipc: remove calls to dbg() and msg_dbg()

Eliminates obsolete calls to two of TIPC's main debugging macros, as well
as a pair of associated debugging routines that are no longer required.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      |   3 --
 net/tipc/bearer.c     |   1 -
 net/tipc/config.c     |   4 +-
 net/tipc/core.h       |   4 --
 net/tipc/discover.c   |   4 --
 net/tipc/link.c       | 119 ++------------------------------------------------
 net/tipc/name_distr.c |  14 ------
 net/tipc/name_table.c |  33 +-------------
 net/tipc/net.c        |  10 -----
 net/tipc/node.c       |   3 --
 net/tipc/port.c       |  10 -----
 net/tipc/port.h       |   1 -
 net/tipc/socket.c     |  39 +++++------------
 net/tipc/subscr.c     |   4 --
 14 files changed, 16 insertions(+), 233 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 110829eab96a..cb817d503c14 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -436,8 +436,6 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
 	u32 seqno;
 	struct sk_buff *deferred;
 
-	msg_dbg(msg, "<BC<<<");
-
 	if (unlikely(!node || !tipc_node_is_up(node) || !node->bclink.supported ||
 		     (msg_mc_netid(msg) != tipc_net_id))) {
 		buf_discard(buf);
@@ -445,7 +443,6 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
 	}
 
 	if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
-		msg_dbg(msg, "<BCNACK<<<");
 		if (msg_destnode(msg) == tipc_own_addr) {
 			tipc_node_lock(node);
 			tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 885da94be4ac..24dc6c2c75ad 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -164,7 +164,6 @@ int  tipc_register_media(u32 media_type,
 	m_ptr->priority = bearer_priority;
 	m_ptr->tolerance = link_tolerance;
 	m_ptr->window = send_window_limit;
-	dbg("Media <%s> registered\n", name);
 	res = 0;
 exit:
 	write_unlock_bh(&tipc_net_lock);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index afa6e853c04b..a7894ff77ae9 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -65,10 +65,8 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
 	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
 	int new_tlv_space = TLV_SPACE(tlv_data_size);
 
-	if (skb_tailroom(buf) < new_tlv_space) {
-		dbg("tipc_cfg_append_tlv unable to append TLV\n");
+	if (skb_tailroom(buf) < new_tlv_space)
 		return 0;
-	}
 	skb_put(buf, new_tlv_space);
 	tlv->tlv_type = htons(tlv_type);
 	tlv->tlv_len  = htons(TLV_LENGTH(tlv_data_size));
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 17f3670ed95d..b4e54f8dd43b 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -243,7 +243,6 @@ u32 tipc_k_signal(Handler routine, unsigned long argument);
 static inline void k_init_timer(struct timer_list *timer, Handler routine,
 				unsigned long argument)
 {
-	dbg("initializing timer %p\n", timer);
 	setup_timer(timer, routine, argument);
 }
 
@@ -263,7 +262,6 @@ static inline void k_init_timer(struct timer_list *timer, Handler routine,
 
 static inline void k_start_timer(struct timer_list *timer, unsigned long msec)
 {
-	dbg("starting timer %p for %u\n", timer, msec);
 	mod_timer(timer, jiffies + msecs_to_jiffies(msec) + 1);
 }
 
@@ -280,7 +278,6 @@ static inline void k_start_timer(struct timer_list *timer, unsigned long msec)
 
 static inline void k_cancel_timer(struct timer_list *timer)
 {
-	dbg("cancelling timer %p\n", timer);
 	del_timer_sync(timer);
 }
 
@@ -298,7 +295,6 @@ static inline void k_cancel_timer(struct timer_list *timer)
 
 static inline void k_term_timer(struct timer_list *timer)
 {
-	dbg("terminating timer %p\n", timer);
 }
 
 
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 80799f6ba892..e7223789d150 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -133,7 +133,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 	u32 type = msg_type(msg);
 
 	msg_get_media_addr(msg,&media_addr);
-	msg_dbg(msg, "RECV:");
 	buf_discard(buf);
 
 	if (net_id != tipc_net_id)
@@ -156,7 +155,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 		struct tipc_node *n_ptr = tipc_node_find(orig);
 		int link_fully_up;
 
-		dbg(" in own cluster\n");
 		if (n_ptr == NULL) {
 			n_ptr = tipc_node_create(orig);
 			if (!n_ptr)
@@ -173,7 +171,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 
 		link = n_ptr->links[b_ptr->identity];
 		if (!link) {
-			dbg("creating link\n");
 			link = tipc_link_create(b_ptr, orig, &media_addr);
 			if (!link) {
 				spin_unlock_bh(&n_ptr->lock);
@@ -198,7 +195,6 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 			return;
 		rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr);
 		if (rbuf != NULL) {
-			msg_dbg(buf_msg(rbuf),"SEND:");
 			b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr);
 			buf_discard(rbuf);
 		}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 671ffd3c0e53..cb10d20caef3 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -153,18 +153,6 @@ static void dbg_print_link(struct link *l_ptr, const char *str)
 		link_print(l_ptr, DBG_OUTPUT, str);
 }
 
-static void dbg_print_buf_chain(struct sk_buff *root_buf)
-{
-	if (DBG_OUTPUT != TIPC_NULL) {
-		struct sk_buff *buf = root_buf;
-
-		while (buf) {
-			msg_dbg(buf_msg(buf), "In chain: ");
-			buf = buf->next;
-		}
-	}
-}
-
 /*
  *  Simple link routines
  */
@@ -433,9 +421,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 	list_add_tail(&l_ptr->link_list, &b_ptr->links);
 	tipc_k_signal((Handler)link_start, (unsigned long)l_ptr);
 
-	dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n",
-	    l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit);
-
 	return l_ptr;
 }
 
@@ -455,8 +440,6 @@ void tipc_link_delete(struct link *l_ptr)
 		return;
 	}
 
-	dbg("tipc_link_delete()\n");
-
 	k_cancel_timer(&l_ptr->timer);
 
 	tipc_node_lock(l_ptr->owner);
@@ -473,7 +456,6 @@ void tipc_link_delete(struct link *l_ptr)
 
 static void link_start(struct link *l_ptr)
 {
-	dbg("link_start %x\n", l_ptr);
 	link_state_event(l_ptr, STARTING_EVT);
 }
 
@@ -926,9 +908,6 @@ static int link_bundle_buf(struct link *l_ptr,
 	skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
 	msg_set_size(bundler_msg, to_pos + size);
 	msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
-	dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n",
-	    msg_msgcnt(bundler_msg), size, to_pos, msg_seqno(bundler_msg));
-	msg_dbg(msg, "PACKD:");
 	buf_discard(buf);
 	l_ptr->stats.sent_bundled++;
 	return 1;
@@ -977,7 +956,6 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 			return link_schedule_port(l_ptr, msg_origport(msg),
 						  size);
 		}
-		msg_dbg(msg, "TIPC: Congestion, throwing away\n");
 		buf_discard(buf);
 		if (imp > CONN_MANAGER) {
 			warn("Resetting link <%s>, send queue full", l_ptr->name);
@@ -1066,17 +1044,12 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
 		if (l_ptr) {
-			dbg("tipc_link_send: found link %x for dest %x\n", l_ptr, dest);
 			res = tipc_link_send_buf(l_ptr, buf);
 		} else {
-			dbg("Attempt to send msg to unreachable node:\n");
-			msg_dbg(buf_msg(buf),">>>");
 			buf_discard(buf);
 		}
 		tipc_node_unlock(n_ptr);
 	} else {
-		dbg("Attempt to send msg to unknown node:\n");
-		msg_dbg(buf_msg(buf),">>>");
 		buf_discard(buf);
 	}
 	read_unlock_bh(&tipc_net_lock);
@@ -1103,10 +1076,8 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 				if (likely(tipc_bearer_send(l_ptr->b_ptr, buf,
 							    &l_ptr->media_addr))) {
 					l_ptr->unacked_window = 0;
-					msg_dbg(msg,"SENT_FAST:");
 					return res;
 				}
-				dbg("failed sent fast...\n");
 				tipc_bearer_schedule(l_ptr->b_ptr, l_ptr);
 				l_ptr->stats.bearer_congs++;
 				l_ptr->next_out = buf;
@@ -1141,8 +1112,6 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode)
 	if (likely(n_ptr)) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector];
-		dbg("send_fast: buf %x selected %x, destnode = %x\n",
-		    buf, l_ptr, destnode);
 		if (likely(l_ptr)) {
 			res = link_send_buf_fast(l_ptr, buf, &dummy);
 			tipc_node_unlock(n_ptr);
@@ -1292,7 +1261,6 @@ again:
 
 	/* Prepare reusable fragment header: */
 
-	msg_dbg(hdr, ">FRAGMENTING>");
 	tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
 		 INT_H_SIZE, msg_destnode(hdr));
 	msg_set_link_selector(&fragm_hdr, sender->publ.ref);
@@ -1308,7 +1276,6 @@ again:
 	skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
 	hsz = msg_hdr_sz(hdr);
 	skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
-	msg_dbg(buf_msg(buf), ">BUILD>");
 
 	/* Chop up message: */
 
@@ -1367,7 +1334,6 @@ error:
 			skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
 			fragm_crs = INT_H_SIZE;
 			fragm_rest = fragm_sz;
-			msg_dbg(buf_msg(buf),"  >BUILD>");
 		}
 	}
 	while (rest > 0);
@@ -1417,7 +1383,6 @@ reject:
 		l_ptr->stats.sent_fragments++;
 		msg_set_long_msgno(msg, l_ptr->long_msg_seq_no);
 		link_add_to_outqueue(l_ptr, buf, msg);
-		msg_dbg(msg, ">ADD>");
 		buf = next;
 	}
 
@@ -1459,14 +1424,12 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
 		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
-			msg_dbg(buf_msg(buf), ">DEF-RETR>");
 			l_ptr->retransm_queue_head = mod(++r_q_head);
 			l_ptr->retransm_queue_size = --r_q_size;
 			l_ptr->stats.retransmitted++;
 			return 0;
 		} else {
 			l_ptr->stats.bearer_congs++;
-			msg_dbg(buf_msg(buf), "|>DEF-RETR>");
 			return PUSH_FAILED;
 		}
 	}
@@ -1478,13 +1441,11 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(buf_msg(buf),l_ptr->owner->bclink.last_in);
 		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
-			msg_dbg(buf_msg(buf), ">DEF-PROT>");
 			l_ptr->unacked_window = 0;
 			buf_discard(buf);
 			l_ptr->proto_msg_queue = NULL;
 			return 0;
 		} else {
-			msg_dbg(buf_msg(buf), "|>DEF-PROT>");
 			l_ptr->stats.bearer_congs++;
 			return PUSH_FAILED;
 		}
@@ -1504,11 +1465,9 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 			if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
 				if (msg_user(msg) == MSG_BUNDLER)
 					msg_set_type(msg, CLOSED_MSG);
-				msg_dbg(msg, ">PUSH-DATA>");
 				l_ptr->next_out = buf->next;
 				return 0;
 			} else {
-				msg_dbg(msg, "|PUSH-DATA|");
 				l_ptr->stats.bearer_congs++;
 				return PUSH_FAILED;
 			}
@@ -1571,7 +1530,6 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 	struct tipc_msg *msg = buf_msg(buf);
 
 	warn("Retransmission failure on link <%s>\n", l_ptr->name);
-	tipc_msg_dbg(TIPC_OUTPUT, msg, ">RETR-FAIL>");
 
 	if (l_ptr->addr) {
 
@@ -1621,11 +1579,8 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 
 	msg = buf_msg(buf);
 
-	dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
-
 	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
 		if (l_ptr->retransm_queue_size == 0) {
-			msg_dbg(msg, ">NO_RETR->BCONG>");
 			dbg_print_link(l_ptr, "   ");
 			l_ptr->retransm_queue_head = msg_seqno(msg);
 			l_ptr->retransm_queue_size = retransmits;
@@ -1653,7 +1608,6 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
 		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
-			msg_dbg(buf_msg(buf), ">RETR>");
 			buf = buf->next;
 			retransmits--;
 			l_ptr->stats.retransmitted++;
@@ -1939,12 +1893,10 @@ deliver:
 			tipc_node_unlock(n_ptr);
 			continue;
 		}
-		msg_dbg(msg,"NSEQ<REC<");
 		link_state_event(l_ptr, TRAFFIC_MSG_EVT);
 
 		if (link_working_working(l_ptr)) {
 			/* Re-insert in front of queue */
-			msg_dbg(msg,"RECV-REINS:");
 			buf->next = head;
 			head = buf;
 			tipc_node_unlock(n_ptr);
@@ -2026,9 +1978,6 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr,
 		return;
 	}
 
-	dbg("rx OOS msg: seq_no %u, expecting %u (%u)\n",
-	    seq_no, mod(l_ptr->next_in_no), l_ptr->next_in_no);
-
 	/* Record OOS packet arrival (force mismatch on next timeout) */
 
 	l_ptr->checkpoint--;
@@ -2146,8 +2095,6 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 
 	/* Message can be sent */
 
-	msg_dbg(msg, ">>");
-
 	buf = tipc_buf_acquire(msg_size);
 	if (!buf)
 		return;
@@ -2181,8 +2128,6 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 	u32 msg_tol;
 	struct tipc_msg *msg = buf_msg(buf);
 
-	dbg("AT(%u):", jiffies_to_msecs(jiffies));
-	msg_dbg(msg, "<<");
 	if (link_blocked(l_ptr))
 		goto exit;
 
@@ -2201,11 +2146,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 	case RESET_MSG:
 		if (!link_working_unknown(l_ptr) &&
 		    (l_ptr->peer_session != INVALID_SESSION)) {
-			if (msg_session(msg) == l_ptr->peer_session) {
-				dbg("Duplicate RESET: %u<->%u\n",
-				    msg_session(msg), l_ptr->peer_session);
+			if (msg_session(msg) == l_ptr->peer_session)
 				break; /* duplicate: ignore */
-			}
 		}
 		/* fall thru' */
 	case ACTIVATE_MSG:
@@ -2266,8 +2208,6 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 
 		max_pkt_ack = msg_max_pkt(msg);
 		if (max_pkt_ack > l_ptr->max_pkt) {
-			dbg("Link <%s> updated MTU %u -> %u\n",
-			    l_ptr->name, l_ptr->max_pkt, max_pkt_ack);
 			l_ptr->max_pkt = max_pkt_ack;
 			l_ptr->max_pkt_probes = 0;
 		}
@@ -2289,14 +2229,11 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 						 0, rec_gap, 0, 0, max_pkt_ack);
 		}
 		if (msg_seq_gap(msg)) {
-			msg_dbg(msg, "With Gap:");
 			l_ptr->stats.recv_nacks++;
 			tipc_link_retransmit(l_ptr, l_ptr->first_out,
 					     msg_seq_gap(msg));
 		}
 		break;
-	default:
-		msg_dbg(buf_msg(buf), "<DISCARDING UNKNOWN<");
 	}
 exit:
 	buf_discard(buf);
@@ -2331,8 +2268,6 @@ static void tipc_link_tunnel(struct link *l_ptr,
 	}
 	skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
 	skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
-	dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
-	msg_dbg(buf_msg(buf), ">SEND>");
 	tipc_link_send_buf(tunnel, buf);
 }
 
@@ -2364,7 +2299,6 @@ void tipc_link_changeover(struct link *l_ptr)
 		 ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	msg_set_msgcnt(&tunnel_hdr, msgcount);
-	dbg("Link changeover requires %u tunnel messages\n", msgcount);
 
 	if (!l_ptr->first_out) {
 		struct sk_buff *buf;
@@ -2373,9 +2307,6 @@ void tipc_link_changeover(struct link *l_ptr)
 		if (buf) {
 			skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
 			msg_set_size(&tunnel_hdr, INT_H_SIZE);
-			dbg("%c->%c:", l_ptr->b_ptr->net_plane,
-			    tunnel->b_ptr->net_plane);
-			msg_dbg(&tunnel_hdr, "EMPTY>SEND>");
 			tipc_link_send_buf(tunnel, buf);
 		} else {
 			warn("Link changeover error, "
@@ -2439,9 +2370,6 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
 		skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
 		skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
 					       length);
-		dbg("%c->%c:", l_ptr->b_ptr->net_plane,
-		    tunnel->b_ptr->net_plane);
-		msg_dbg(buf_msg(outbuf), ">SEND>");
 		tipc_link_send_buf(tunnel, outbuf);
 		if (!tipc_link_is_up(l_ptr))
 			return;
@@ -2488,31 +2416,24 @@ static int link_recv_changeover_msg(struct link **l_ptr,
 	u32 msg_count = msg_msgcnt(tunnel_msg);
 
 	dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)];
-	if (!dest_link) {
-		msg_dbg(tunnel_msg, "NOLINK/<REC<");
+	if (!dest_link)
 		goto exit;
-	}
 	if (dest_link == *l_ptr) {
 		err("Unexpected changeover message on link <%s>\n",
 		    (*l_ptr)->name);
 		goto exit;
 	}
-	dbg("%c<-%c:", dest_link->b_ptr->net_plane,
-	    (*l_ptr)->b_ptr->net_plane);
 	*l_ptr = dest_link;
 	msg = msg_get_wrapped(tunnel_msg);
 
 	if (msg_typ == DUPLICATE_MSG) {
-		if (less(msg_seqno(msg), mod(dest_link->next_in_no))) {
-			msg_dbg(tunnel_msg, "DROP/<REC<");
+		if (less(msg_seqno(msg), mod(dest_link->next_in_no)))
 			goto exit;
-		}
 		*buf = buf_extract(tunnel_buf,INT_H_SIZE);
 		if (*buf == NULL) {
 			warn("Link changeover error, duplicate msg dropped\n");
 			goto exit;
 		}
-		msg_dbg(tunnel_msg, "TNL<REC<");
 		buf_discard(tunnel_buf);
 		return 1;
 	}
@@ -2520,18 +2441,14 @@ static int link_recv_changeover_msg(struct link **l_ptr,
 	/* First original message ?: */
 
 	if (tipc_link_is_up(dest_link)) {
-		msg_dbg(tunnel_msg, "UP/FIRST/<REC<");
 		info("Resetting link <%s>, changeover initiated by peer\n",
 		     dest_link->name);
 		tipc_link_reset(dest_link);
 		dest_link->exp_msg_count = msg_count;
-		dbg("Expecting %u tunnelled messages\n", msg_count);
 		if (!msg_count)
 			goto exit;
 	} else if (dest_link->exp_msg_count == START_CHANGEOVER) {
-		msg_dbg(tunnel_msg, "BLK/FIRST/<REC<");
 		dest_link->exp_msg_count = msg_count;
-		dbg("Expecting %u tunnelled messages\n", msg_count);
 		if (!msg_count)
 			goto exit;
 	}
@@ -2541,18 +2458,15 @@ static int link_recv_changeover_msg(struct link **l_ptr,
 	if (dest_link->exp_msg_count == 0) {
 		warn("Link switchover error, "
 		     "got too many tunnelled messages\n");
-		msg_dbg(tunnel_msg, "OVERDUE/DROP/<REC<");
 		dbg_print_link(dest_link, "LINK:");
 		goto exit;
 	}
 	dest_link->exp_msg_count--;
 	if (less(msg_seqno(msg), dest_link->reset_checkpoint)) {
-		msg_dbg(tunnel_msg, "DROP/DUPL/<REC<");
 		goto exit;
 	} else {
 		*buf = buf_extract(tunnel_buf, INT_H_SIZE);
 		if (*buf != NULL) {
-			msg_dbg(tunnel_msg, "TNL<REC<");
 			buf_discard(tunnel_buf);
 			return 1;
 		} else {
@@ -2574,7 +2488,6 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
 	u32 pos = INT_H_SIZE;
 	struct sk_buff *obuf;
 
-	msg_dbg(buf_msg(buf), "<BNDL<: ");
 	while (msgcount--) {
 		obuf = buf_extract(buf, pos);
 		if (obuf == NULL) {
@@ -2582,7 +2495,6 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
 			break;
 		}
 		pos += align(msg_size(buf_msg(obuf)));
-		msg_dbg(buf_msg(obuf), "     /");
 		tipc_net_route_msg(obuf);
 	}
 	buf_discard(buf);
@@ -2719,7 +2631,6 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 	u32 long_msg_seq_no = msg_long_msgno(fragm);
 
 	*fb = NULL;
-	msg_dbg(fragm,"FRG<REC<");
 
 	/* Is there an incomplete message waiting for this fragment? */
 
@@ -2738,7 +2649,6 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 		if (msg_type(imsg) == TIPC_MCAST_MSG)
 			max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
 		if (msg_size(imsg) > max) {
-			msg_dbg(fragm,"<REC<Oversized: ");
 			buf_discard(fbuf);
 			return 0;
 		}
@@ -2782,10 +2692,6 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 		set_expected_frags(pbuf,exp_frags);
 		return 0;
 	}
-	dbg(" Discarding orphan fragment %x\n",fbuf);
-	msg_dbg(fragm,"ORPHAN:");
-	dbg("Pending long buffers:\n");
-	dbg_print_buf_chain(*pending);
 	buf_discard(fbuf);
 	return 0;
 }
@@ -2813,11 +2719,6 @@ static void link_check_defragm_bufs(struct link *l_ptr)
 			incr_timer_cnt(buf);
 			prev = buf;
 		} else {
-			dbg(" Discarding incomplete long buffer\n");
-			msg_dbg(buf_msg(buf), "LONG:");
-			dbg_print_link(l_ptr, "curr:");
-			dbg("Pending long buffers:\n");
-			dbg_print_buf_chain(l_ptr->defragm_buf);
 			if (prev)
 				prev->next = buf->next;
 			else
@@ -3165,19 +3066,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
 	return res;
 }
 
-static void link_dump_send_queue(struct link *l_ptr)
-{
-	if (l_ptr->next_out) {
-		info("\nContents of unsent queue:\n");
-		dbg_print_buf_chain(l_ptr->next_out);
-	}
-	info("\nContents of send queue:\n");
-	if (l_ptr->first_out) {
-		dbg_print_buf_chain(l_ptr->first_out);
-	}
-	info("Empty send queue\n");
-}
-
 static void link_print(struct link *l_ptr, struct print_buf *buf,
 		       const char *str)
 {
@@ -3203,7 +3091,6 @@ static void link_print(struct link *l_ptr, struct print_buf *buf,
 			tipc_printf(buf, "first_out= %x ", l_ptr->first_out);
 			tipc_printf(buf, "next_out= %x ", l_ptr->next_out);
 			tipc_printf(buf, "last_out= %x ", l_ptr->last_out);
-			link_dump_send_queue(l_ptr);
 		}
 	} else
 		tipc_printf(buf, "[]");
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index c4583fe888d8..0dd648ec0809 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -87,7 +87,6 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
 	i->upper = htonl(p->upper);
 	i->ref = htonl(p->ref);
 	i->key = htonl(p->key);
-	dbg("publ_to_item: %u, %u, %u\n", p->type, p->lower, p->upper);
 }
 
 /**
@@ -147,7 +146,6 @@ void tipc_named_publish(struct publication *publ)
 
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
-	dbg("tipc_named_publish: broadcasting publish msg\n");
 	named_cluster_distribute(buf);
 }
 
@@ -171,7 +169,6 @@ void tipc_named_withdraw(struct publication *publ)
 
 	item = (struct distr_item *)msg_data(buf_msg(buf));
 	publ_to_item(item, publ);
-	dbg("tipc_named_withdraw: broadcasting withdraw msg\n");
 	named_cluster_distribute(buf);
 }
 
@@ -209,9 +206,6 @@ void tipc_named_node_up(unsigned long node)
 		left -= ITEM_SIZE;
 		if (!left) {
 			msg_set_link_selector(buf_msg(buf), node);
-			dbg("tipc_named_node_up: sending publish msg to "
-			    "<%u.%u.%u>\n", tipc_zone(node),
-			    tipc_cluster(node), tipc_node(node));
 			tipc_link_send(buf, node, node);
 			buf = NULL;
 		}
@@ -236,8 +230,6 @@ static void node_is_down(struct publication *publ)
 	struct publication *p;
 
 	write_lock_bh(&tipc_nametbl_lock);
-	dbg("node_is_down: withdrawing %u, %u, %u\n",
-	    publ->type, publ->lower, publ->upper);
 	publ->key += 1222345;
 	p = tipc_nametbl_remove_publ(publ->type, publ->lower,
 				     publ->node, publ->ref, publ->key);
@@ -268,9 +260,6 @@ void tipc_named_recv(struct sk_buff *buf)
 	write_lock_bh(&tipc_nametbl_lock);
 	while (count--) {
 		if (msg_type(msg) == PUBLICATION) {
-			dbg("tipc_named_recv: got publication for %u, %u, %u\n",
-			    ntohl(item->type), ntohl(item->lower),
-			    ntohl(item->upper));
 			publ = tipc_nametbl_insert_publ(ntohl(item->type),
 							ntohl(item->lower),
 							ntohl(item->upper),
@@ -285,9 +274,6 @@ void tipc_named_recv(struct sk_buff *buf)
 						       (net_ev_handler)node_is_down);
 			}
 		} else if (msg_type(msg) == WITHDRAWAL) {
-			dbg("tipc_named_recv: got withdrawl for %u, %u, %u\n",
-			    ntohl(item->type), ntohl(item->lower),
-			    ntohl(item->upper));
 			publ = tipc_nametbl_remove_publ(ntohl(item->type),
 							ntohl(item->lower),
 							msg_orignode(msg),
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index d5adb0456746..ddc2ad4c2d32 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -172,8 +172,6 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
 	spin_lock_init(&nseq->lock);
 	nseq->type = type;
 	nseq->sseqs = sseq;
-	dbg("tipc_nameseq_create(): nseq = %p, type %u, ssseqs %p, ff: %u\n",
-	    nseq, type, nseq->sseqs, nseq->first_free);
 	nseq->alloc = 1;
 	INIT_HLIST_NODE(&nseq->ns_list);
 	INIT_LIST_HEAD(&nseq->subscriptions);
@@ -251,8 +249,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 	int created_subseq = 0;
 
 	sseq = nameseq_find_subseq(nseq, lower);
-	dbg("nameseq_ins: for seq %p, {%u,%u}, found sseq %p\n",
-	    nseq, type, lower, sseq);
 	if (sseq) {
 
 		/* Lower end overlaps existing entry => need an exact match */
@@ -289,18 +285,15 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 				     type, lower, upper);
 				return NULL;
 			}
-			dbg("Allocated %u more sseqs\n", nseq->alloc);
 			memcpy(sseqs, nseq->sseqs,
 			       nseq->alloc * sizeof(struct sub_seq));
 			kfree(nseq->sseqs);
 			nseq->sseqs = sseqs;
 			nseq->alloc *= 2;
 		}
-		dbg("Have %u sseqs for type %u\n", nseq->alloc, type);
 
 		/* Insert new sub-sequence */
 
-		dbg("ins in pos %u, ff = %u\n", inspos, nseq->first_free);
 		sseq = &nseq->sseqs[inspos];
 		freesseq = &nseq->sseqs[nseq->first_free];
 		memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof (*sseq));
@@ -310,17 +303,12 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 		sseq->upper = upper;
 		created_subseq = 1;
 	}
-	dbg("inserting {%u,%u,%u} from <0x%x:%u> into sseq %p(%u,%u) of seq %p\n",
-	    type, lower, upper, node, port, sseq,
-	    sseq->lower, sseq->upper, nseq);
 
 	/* Insert a publication: */
 
 	publ = publ_create(type, lower, upper, scope, node, port, key);
 	if (!publ)
 		return NULL;
-	dbg("inserting publ %p, node=0x%x publ->node=0x%x, subscr->node=%p\n",
-	    publ, node, publ->node, publ->subscr.node);
 
 	sseq->zone_list_size++;
 	if (!sseq->zone_list)
@@ -355,7 +343,6 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 	 * Any subscriptions waiting for notification?
 	 */
 	list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) {
-		dbg("calling report_overlap()\n");
 		tipc_subscr_report_overlap(s,
 					   publ->lower,
 					   publ->upper,
@@ -393,9 +380,6 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i
 	if (!sseq)
 		return NULL;
 
-	dbg("tipc_nameseq_remove_publ: seq: %p, sseq %p, {%u,%u}, key %u\n",
-	    nseq, sseq, nseq->type, inst, key);
-
 	/* Remove publication from zone scope list */
 
 	prev = sseq->zone_list;
@@ -549,15 +533,10 @@ static struct name_seq *nametbl_find_seq(u32 type)
 	struct hlist_node *seq_node;
 	struct name_seq *ns;
 
-	dbg("find_seq %u,(%u,0x%x) table = %p, hash[type] = %u\n",
-	    type, htonl(type), type, table.types, hash(type));
-
 	seq_head = &table.types[hash(type)];
 	hlist_for_each_entry(ns, seq_node, seq_head, ns_list) {
-		if (ns->type == type) {
-			dbg("found %p\n", ns);
+		if (ns->type == type)
 			return ns;
-		}
 	}
 
 	return NULL;
@@ -568,18 +547,14 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
 {
 	struct name_seq *seq = nametbl_find_seq(type);
 
-	dbg("tipc_nametbl_insert_publ: {%u,%u,%u} found %p\n", type, lower, upper, seq);
 	if (lower > upper) {
 		warn("Failed to publish illegal {%u,%u,%u}\n",
 		     type, lower, upper);
 		return NULL;
 	}
 
-	dbg("Publishing {%u,%u,%u} from 0x%x\n", type, lower, upper, node);
-	if (!seq) {
+	if (!seq)
 		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
-		dbg("tipc_nametbl_insert_publ: created %p\n", seq);
-	}
 	if (!seq)
 		return NULL;
 
@@ -596,7 +571,6 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
 	if (!seq)
 		return NULL;
 
-	dbg("Withdrawing {%u,%u} from 0x%x\n", type, lower, node);
 	publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
 
 	if (!seq->first_free && list_empty(&seq->subscriptions)) {
@@ -792,7 +766,6 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 {
 	struct publication *publ;
 
-	dbg("tipc_nametbl_withdraw: {%u,%u}, key=%u\n", type, lower, key);
 	write_lock_bh(&tipc_nametbl_lock);
 	publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
 	if (likely(publ)) {
@@ -827,8 +800,6 @@ void tipc_nametbl_subscribe(struct subscription *s)
 	}
 	if (seq){
 		spin_lock_bh(&seq->lock);
-		dbg("tipc_nametbl_subscribe:found %p for {%u,%u,%u}\n",
-		    seq, type, s->seq.lower, s->seq.upper);
 		tipc_nameseq_subscribe(seq, s);
 		spin_unlock_bh(&seq->lock);
 	} else {
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 3baf55ee0985..6290becd35be 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -138,22 +138,18 @@ static void net_route_named_msg(struct sk_buff *buf)
 	u32 dport;
 
 	if (!msg_named(msg)) {
-		msg_dbg(msg, "tipc_net->drop_nam:");
 		buf_discard(buf);
 		return;
 	}
 
 	dnode = addr_domain(msg_lookup_scope(msg));
 	dport = tipc_nametbl_translate(msg_nametype(msg), msg_nameinst(msg), &dnode);
-	dbg("tipc_net->lookup<%u,%u>-><%u,%x>\n",
-	    msg_nametype(msg), msg_nameinst(msg), dport, dnode);
 	if (dport) {
 		msg_set_destnode(msg, dnode);
 		msg_set_destport(msg, dport);
 		tipc_net_route_msg(buf);
 		return;
 	}
-	msg_dbg(msg, "tipc_net->rej:NO NAME: ");
 	tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
 }
 
@@ -169,18 +165,14 @@ void tipc_net_route_msg(struct sk_buff *buf)
 	msg_incr_reroute_cnt(msg);
 	if (msg_reroute_cnt(msg) > 6) {
 		if (msg_errcode(msg)) {
-			msg_dbg(msg, "NET>DISC>:");
 			buf_discard(buf);
 		} else {
-			msg_dbg(msg, "NET>REJ>:");
 			tipc_reject_msg(buf, msg_destport(msg) ?
 					TIPC_ERR_NO_PORT : TIPC_ERR_NO_NAME);
 		}
 		return;
 	}
 
-	msg_dbg(msg, "tipc_net->rout: ");
-
 	/* Handle message for this node */
 	dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
 	if (tipc_in_scope(dnode, tipc_own_addr)) {
@@ -201,14 +193,12 @@ void tipc_net_route_msg(struct sk_buff *buf)
 			tipc_port_recv_proto_msg(buf);
 			break;
 		default:
-			msg_dbg(msg,"DROP/NET/<REC<");
 			buf_discard(buf);
 		}
 		return;
 	}
 
 	/* Handle message for another node */
-	msg_dbg(msg, "NET>SEND>: ");
 	skb_trim(buf, msg_size(msg));
 	tipc_link_send(buf, dnode, msg_link_selector(msg));
 }
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 31dcca98201a..fb54719679a6 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -97,7 +97,6 @@ void tipc_node_delete(struct tipc_node *n_ptr)
 	if (!n_ptr)
 		return;
 
-	dbg("node %x deleted\n", n_ptr->addr);
 	n_num = tipc_node(n_ptr->addr);
 	tipc_net.nodes[n_num] = NULL;
 	kfree(n_ptr);
@@ -124,7 +123,6 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr)
 	     l_ptr->name, l_ptr->b_ptr->net_plane);
 
 	if (!active[0]) {
-		dbg(" link %x into %x/%x\n", l_ptr, &active[0], &active[1]);
 		active[0] = active[1] = l_ptr;
 		node_established_contact(n_ptr);
 		return;
@@ -302,7 +300,6 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
 
 static void node_established_contact(struct tipc_node *n_ptr)
 {
-	dbg("node_established_contact:-> %x\n", n_ptr->addr);
 	tipc_k_signal((Handler)tipc_named_node_up, n_ptr->addr);
 
 	/* Syncronize broadcast acks */
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 33d0b3b7175f..8bacd572a9fb 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -188,7 +188,6 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
 
 			if (b == NULL) {
 				warn("Unable to deliver multicast message(s)\n");
-				msg_dbg(msg, "LOST:");
 				goto exit;
 			}
 			if ((index == 0) && (cnt != 0)) {
@@ -280,7 +279,6 @@ int tipc_deleteport(u32 ref)
 	spin_unlock_bh(&tipc_port_list_lock);
 	k_term_timer(&p_ptr->timer);
 	kfree(p_ptr);
-	dbg("Deleted port %u\n", ref);
 	tipc_net_route_msg(buf);
 	return 0;
 }
@@ -366,7 +364,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 		msg_set_orignode(msg, orignode);
 		msg_set_transp_seqno(msg, seqno);
 		msg_set_msgcnt(msg, ack);
-		msg_dbg(msg, "PORT>SEND>:");
 	}
 	return buf;
 }
@@ -384,7 +381,6 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 		data_sz = MAX_REJECT_SIZE;
 	if (msg_connected(msg) && (imp < TIPC_CRITICAL_IMPORTANCE))
 		imp++;
-	msg_dbg(msg, "port->rej: ");
 
 	/* discard rejected message if it shouldn't be returned to sender */
 	if (msg_errcode(msg) || msg_dest_droppable(msg)) {
@@ -547,8 +543,6 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
 	struct sk_buff *r_buf = NULL;
 	struct sk_buff *abort_buf = NULL;
 
-	msg_dbg(msg, "PORT<RECV<:");
-
 	if (!p_ptr) {
 		err = TIPC_ERR_NO_PORT;
 	} else if (p_ptr->publ.connected) {
@@ -1015,9 +1009,6 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq)
 	if (!p_ptr)
 		return -EINVAL;
 
-	dbg("tipc_publ %u, p_ptr = %x, conn = %x, scope = %x, "
-	    "lower = %u, upper = %u\n",
-	    ref, p_ptr, p_ptr->publ.connected, scope, seq->lower, seq->upper);
 	if (p_ptr->publ.connected)
 		goto exit;
 	if (seq->lower > seq->upper)
@@ -1357,7 +1348,6 @@ int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest,
 
 	skb_push(buf, DIR_MSG_H_SIZE);
 	skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
-	msg_dbg(msg, "buf2port: ");
 	p_ptr->sent++;
 	if (dest->node == tipc_own_addr)
 		return tipc_port_recv_msg(buf);
diff --git a/net/tipc/port.h b/net/tipc/port.h
index da607a8a2f35..a9c528799f2f 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -316,7 +316,6 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf)
 		err = TIPC_ERR_NO_PORT;
 	}
 reject:
-	dbg("port->rejecting, err = %x..\n",err);
 	return tipc_reject_msg(buf, err);
 }
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cd0bb77f2673..7a21a5ee43e8 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1226,42 +1226,25 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 	 */
 
 	if (sock->state == SS_READY) {
-		if (msg_connected(msg)) {
-			msg_dbg(msg, "dispatch filter 1\n");
+		if (msg_connected(msg))
 			return TIPC_ERR_NO_PORT;
-		}
 	} else {
-		if (msg_mcast(msg)) {
-			msg_dbg(msg, "dispatch filter 2\n");
+		if (msg_mcast(msg))
 			return TIPC_ERR_NO_PORT;
-		}
 		if (sock->state == SS_CONNECTED) {
-			if (!msg_connected(msg)) {
-				msg_dbg(msg, "dispatch filter 3\n");
+			if (!msg_connected(msg))
 				return TIPC_ERR_NO_PORT;
-			}
-		}
-		else if (sock->state == SS_CONNECTING) {
-			if (!msg_connected(msg) && (msg_errcode(msg) == 0)) {
-				msg_dbg(msg, "dispatch filter 4\n");
+		} else if (sock->state == SS_CONNECTING) {
+			if (!msg_connected(msg) && (msg_errcode(msg) == 0))
 				return TIPC_ERR_NO_PORT;
-			}
-		}
-		else if (sock->state == SS_LISTENING) {
-			if (msg_connected(msg) || msg_errcode(msg)) {
-				msg_dbg(msg, "dispatch filter 5\n");
+		} else if (sock->state == SS_LISTENING) {
+			if (msg_connected(msg) || msg_errcode(msg))
 				return TIPC_ERR_NO_PORT;
-			}
-		}
-		else if (sock->state == SS_DISCONNECTING) {
-			msg_dbg(msg, "dispatch filter 6\n");
+		} else if (sock->state == SS_DISCONNECTING) {
 			return TIPC_ERR_NO_PORT;
-		}
-		else /* (sock->state == SS_UNCONNECTED) */ {
-			if (msg_connected(msg) || msg_errcode(msg)) {
-				msg_dbg(msg, "dispatch filter 7\n");
+		} else /* (sock->state == SS_UNCONNECTED) */ {
+			if (msg_connected(msg) || msg_errcode(msg))
 				return TIPC_ERR_NO_PORT;
-			}
 		}
 	}
 
@@ -1280,7 +1263,6 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 
 	/* Enqueue message (finally!) */
 
-	msg_dbg(msg, "<DISP<: ");
 	TIPC_SKB_CB(buf)->handle = msg_data(msg);
 	atomic_inc(&tipc_queue_size);
 	__skb_queue_tail(&sk->sk_receive_queue, buf);
@@ -1588,7 +1570,6 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 		 * Respond to 'SYN+' by queuing it on new socket.
 		 */
 
-		msg_dbg(msg,"<ACC<: ");
 		if (!msg_data_sz(msg)) {
 			struct msghdr m = {NULL,};
 
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index c5ba323dba47..510271152165 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -249,8 +249,6 @@ static void subscr_terminate(struct subscriber *subscriber)
 			k_cancel_timer(&sub->timer);
 			k_term_timer(&sub->timer);
 		}
-		dbg("Term: Removing sub %u,%u,%u from subscriber %x list\n",
-		    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
 		subscr_del(sub);
 	}
 
@@ -307,8 +305,6 @@ static void subscr_cancel(struct tipc_subscr *s,
 		k_term_timer(&sub->timer);
 		spin_lock_bh(subscriber->lock);
 	}
-	dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
-	    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
 	subscr_del(sub);
 }
 
-- 
cgit v1.2.3


From 7ced6890bf81d311ab2ea846f92d5f3d0951c08c Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:26 +0000
Subject: tipc: remove dump() and tipc_dump_dbg()

Eliminates calls to two debugging macros that are being completely obsoleted,
as well as any associated debugging routines that are no longer required.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.h |  8 ------
 net/tipc/log.c  | 81 +++------------------------------------------------------
 2 files changed, 3 insertions(+), 86 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.h b/net/tipc/core.h
index b4e54f8dd43b..3af0b36e3f1a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -145,23 +145,15 @@ void tipc_printf(struct print_buf *, const char *fmt, ...);
 		if (DBG_OUTPUT != TIPC_NULL) \
 			tipc_msg_dbg(DBG_OUTPUT, msg, txt); \
 	} while (0)
-#define dump(fmt, arg...) \
-	do { \
-		if (DBG_OUTPUT != TIPC_NULL) \
-			tipc_dump_dbg(DBG_OUTPUT, fmt, ##arg); \
-	} while (0)
 
 void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *);
-void tipc_dump_dbg(struct print_buf *, const char *fmt, ...);
 
 #else
 
 #define dbg(fmt, arg...)	do {} while (0)
 #define msg_dbg(msg, txt)	do {} while (0)
-#define dump(fmt, arg...)	do {} while (0)
 
 #define tipc_msg_dbg(...)	do {} while (0)
-#define tipc_dump_dbg(...)	do {} while (0)
 
 #endif
 
diff --git a/net/tipc/log.c b/net/tipc/log.c
index 9d99f7097d2d..2796044f9941 100644
--- a/net/tipc/log.c
+++ b/net/tipc/log.c
@@ -64,9 +64,9 @@ struct print_buf *const TIPC_LOG = &log_buf;
  * 'print_string' when writing to a print buffer. This also protects against
  * concurrent writes to the print buffer being written to.
  *
- * 2) tipc_dump() and tipc_log_XXX() leverage the aforementioned
- * use of 'print_lock' to protect against all types of concurrent operations
- * on their associated print buffer (not just write operations).
+ * 2) tipc_log_XXX() leverages the aforementioned use of 'print_lock' to
+ * protect against all types of concurrent operations on their associated
+ * print buffer (not just write operations).
  *
  * Note: All routines of the form tipc_printbuf_XXX() are lock-free, and rely
  * on the caller to prevent simultaneous use of the print buffer(s) being
@@ -268,81 +268,6 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...)
 	spin_unlock_bh(&print_lock);
 }
 
-#ifdef CONFIG_TIPC_DEBUG
-
-/**
- * print_to_console - write string of bytes to console in multiple chunks
- */
-
-static void print_to_console(char *crs, int len)
-{
-	int rest = len;
-
-	while (rest > 0) {
-		int sz = rest < TIPC_PB_MAX_STR ? rest : TIPC_PB_MAX_STR;
-		char c = crs[sz];
-
-		crs[sz] = 0;
-		printk((const char *)crs);
-		crs[sz] = c;
-		rest -= sz;
-		crs += sz;
-	}
-}
-
-/**
- * printbuf_dump - write print buffer contents to console
- */
-
-static void printbuf_dump(struct print_buf *pb)
-{
-	int len;
-
-	if (!pb->buf) {
-		printk("*** PRINT BUFFER NOT ALLOCATED ***");
-		return;
-	}
-
-	/* Dump print buffer from char after cursor to end (if used) */
-
-	len = pb->buf + pb->size - pb->crs - 2;
-	if ((pb->buf[pb->size - 1] == 0) && (len > 0))
-		print_to_console(pb->crs + 1, len);
-
-	/* Dump print buffer from start to cursor (always) */
-
-	len = pb->crs - pb->buf;
-	print_to_console(pb->buf, len);
-}
-
-/**
- * tipc_dump_dbg - dump (non-console) print buffer to console
- * @pb: pointer to print buffer
- */
-
-void tipc_dump_dbg(struct print_buf *pb, const char *fmt, ...)
-{
-	int len;
-
-	if (pb == TIPC_CONS)
-		return;
-
-	spin_lock_bh(&print_lock);
-
-	FORMAT(print_string, len, fmt);
-	printk(print_string);
-
-	printk("\n---- Start of %s log dump ----\n\n",
-	       (pb == TIPC_LOG) ? "global" : "local");
-	printbuf_dump(pb);
-	tipc_printbuf_reset(pb);
-	printk("\n---- End of dump ----\n");
-
-	spin_unlock_bh(&print_lock);
-}
-
-#endif
-
 /**
  * tipc_log_resize - change the size of the TIPC log buffer
  * @log_size: print buffer size to use
-- 
cgit v1.2.3


From 8d64a5ba58157dedc61f3f1f51e1c5d66f32a484 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:27 +0000
Subject: tipc: Prune down link-specific debugging code

Eliminates most link-specific debugging code in TIPC, which is now
largely unnecessary. All calls to the link-specific debugging macros
have been removed, as are the macros themselves; in addition, the optional
allocation of print buffers to hold debugging information for each link
endpoint has been removed. The ability for TIPC to print out helpful
diagnostic information when link retransmit failures occur has been
retained for the time being, as an aid in tracking down the cause of
such failures.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c |  13 -----
 net/tipc/link.c  | 167 ++++++++++++-------------------------------------------
 net/tipc/link.h  |   3 -
 3 files changed, 36 insertions(+), 147 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index cb817d503c14..22a10fadc8c6 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -45,8 +45,6 @@
 
 #define BCLINK_WIN_DEFAULT 20		/* bcast link window size (default) */
 
-#define BCLINK_LOG_BUF_SIZE 0
-
 /*
  * Loss rate for incoming broadcast frames; used to test retransmission code.
  * Set to N to cause every N'th frame to be discarded; 0 => don't discard any.
@@ -774,7 +772,6 @@ int tipc_bclink_init(void)
 	bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
 	bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
 	if (!bcbearer || !bclink) {
- nomem:
 		warn("Multicast link creation failed, no memory\n");
 		kfree(bcbearer);
 		bcbearer = NULL;
@@ -799,14 +796,6 @@ int tipc_bclink_init(void)
 	bcl->state = WORKING_WORKING;
 	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
 
-	if (BCLINK_LOG_BUF_SIZE) {
-		char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC);
-
-		if (!pb)
-			goto nomem;
-		tipc_printbuf_init(&bcl->print_buf, pb, BCLINK_LOG_BUF_SIZE);
-	}
-
 	return 0;
 }
 
@@ -815,8 +804,6 @@ void tipc_bclink_stop(void)
 	spin_lock_bh(&bc_lock);
 	if (bcbearer) {
 		tipc_link_stop(bcl);
-		if (BCLINK_LOG_BUF_SIZE)
-			kfree(bcl->print_buf.buf);
 		bcl = NULL;
 		kfree(bclink);
 		bclink = NULL;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index cb10d20caef3..647f2ecfde50 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -96,63 +96,10 @@ static int  link_send_sections_long(struct port *sender,
 static void link_check_defragm_bufs(struct link *l_ptr);
 static void link_state_event(struct link *l_ptr, u32 event);
 static void link_reset_statistics(struct link *l_ptr);
-static void link_print(struct link *l_ptr, struct print_buf *buf,
-		       const char *str);
+static void link_print(struct link *l_ptr, const char *str);
 static void link_start(struct link *l_ptr);
 static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
 
-
-/*
- * Debugging code used by link routines only
- *
- * When debugging link problems on a system that has multiple links,
- * the standard TIPC debugging routines may not be useful since they
- * allow the output from multiple links to be intermixed.  For this reason
- * routines of the form "dbg_link_XXX()" have been created that will capture
- * debug info into a link's personal print buffer, which can then be dumped
- * into the TIPC system log (TIPC_LOG) upon request.
- *
- * To enable per-link debugging, use LINK_LOG_BUF_SIZE to specify the size
- * of the print buffer used by each link.  If LINK_LOG_BUF_SIZE is set to 0,
- * the dbg_link_XXX() routines simply send their output to the standard
- * debug print buffer (DBG_OUTPUT), if it has been defined; this can be useful
- * when there is only a single link in the system being debugged.
- *
- * Notes:
- * - When enabled, LINK_LOG_BUF_SIZE should be set to at least TIPC_PB_MIN_SIZE
- * - "l_ptr" must be valid when using dbg_link_XXX() macros
- */
-
-#define LINK_LOG_BUF_SIZE 0
-
-#define dbg_link(fmt, arg...) \
-	do { \
-		if (LINK_LOG_BUF_SIZE) \
-			tipc_printf(&l_ptr->print_buf, fmt, ## arg); \
-	} while (0)
-#define dbg_link_msg(msg, txt) \
-	do { \
-		if (LINK_LOG_BUF_SIZE) \
-			tipc_msg_dbg(&l_ptr->print_buf, msg, txt); \
-	} while (0)
-#define dbg_link_state(txt) \
-	do { \
-		if (LINK_LOG_BUF_SIZE) \
-			link_print(l_ptr, &l_ptr->print_buf, txt); \
-	} while (0)
-#define dbg_link_dump() do { \
-	if (LINK_LOG_BUF_SIZE) { \
-		tipc_printf(LOG, "\n\nDumping link <%s>:\n", l_ptr->name); \
-		tipc_printbuf_move(LOG, &l_ptr->print_buf); \
-	} \
-} while (0)
-
-static void dbg_print_link(struct link *l_ptr, const char *str)
-{
-	if (DBG_OUTPUT != TIPC_NULL)
-		link_print(l_ptr, DBG_OUTPUT, str);
-}
-
 /*
  *  Simple link routines
  */
@@ -366,17 +313,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 		return NULL;
 	}
 
-	if (LINK_LOG_BUF_SIZE) {
-		char *pb = kmalloc(LINK_LOG_BUF_SIZE, GFP_ATOMIC);
-
-		if (!pb) {
-			kfree(l_ptr);
-			warn("Link creation failed, no memory for print buffer\n");
-			return NULL;
-		}
-		tipc_printbuf_init(&l_ptr->print_buf, pb, LINK_LOG_BUF_SIZE);
-	}
-
 	l_ptr->addr = peer;
 	if_name = strchr(b_ptr->publ.name, ':') + 1;
 	sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:",
@@ -411,8 +347,6 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 
 	l_ptr->owner = tipc_node_attach_link(l_ptr);
 	if (!l_ptr->owner) {
-		if (LINK_LOG_BUF_SIZE)
-			kfree(l_ptr->print_buf.buf);
 		kfree(l_ptr);
 		return NULL;
 	}
@@ -447,8 +381,6 @@ void tipc_link_delete(struct link *l_ptr)
 	tipc_node_detach_link(l_ptr->owner, l_ptr);
 	tipc_link_stop(l_ptr);
 	list_del_init(&l_ptr->link_list);
-	if (LINK_LOG_BUF_SIZE)
-		kfree(l_ptr->print_buf.buf);
 	tipc_node_unlock(l_ptr->owner);
 	k_term_timer(&l_ptr->timer);
 	kfree(l_ptr);
@@ -607,7 +539,6 @@ void tipc_link_reset(struct link *l_ptr)
 	link_init_max_pkt(l_ptr);
 
 	l_ptr->state = RESET_UNKNOWN;
-	dbg_link_state("Resetting Link\n");
 
 	if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET))
 		return;
@@ -686,20 +617,14 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 		}
 		return;	  /* Changeover going on */
 	}
-	dbg_link("STATE_EV: <%s> ", l_ptr->name);
 
 	switch (l_ptr->state) {
 	case WORKING_WORKING:
-		dbg_link("WW/");
 		switch (event) {
 		case TRAFFIC_MSG_EVT:
-			dbg_link("TRF-");
-			/* fall through */
 		case ACTIVATE_MSG:
-			dbg_link("ACT\n");
 			break;
 		case TIMEOUT_EVT:
-			dbg_link("TIM ");
 			if (l_ptr->next_in_no != l_ptr->checkpoint) {
 				l_ptr->checkpoint = l_ptr->next_in_no;
 				if (tipc_bclink_acks_missing(l_ptr->owner)) {
@@ -714,7 +639,6 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 				link_set_timer(l_ptr, cont_intv);
 				break;
 			}
-			dbg_link(" -> WU\n");
 			l_ptr->state = WORKING_UNKNOWN;
 			l_ptr->fsm_msg_cnt = 0;
 			tipc_link_send_proto_msg(l_ptr, STATE_MSG, 1, 0, 0, 0, 0);
@@ -722,7 +646,6 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			link_set_timer(l_ptr, cont_intv / 4);
 			break;
 		case RESET_MSG:
-			dbg_link("RES -> RR\n");
 			info("Resetting link <%s>, requested by peer\n",
 			     l_ptr->name);
 			tipc_link_reset(l_ptr);
@@ -737,18 +660,14 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 		}
 		break;
 	case WORKING_UNKNOWN:
-		dbg_link("WU/");
 		switch (event) {
 		case TRAFFIC_MSG_EVT:
-			dbg_link("TRF-");
 		case ACTIVATE_MSG:
-			dbg_link("ACT -> WW\n");
 			l_ptr->state = WORKING_WORKING;
 			l_ptr->fsm_msg_cnt = 0;
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case RESET_MSG:
-			dbg_link("RES -> RR\n");
 			info("Resetting link <%s>, requested by peer "
 			     "while probing\n", l_ptr->name);
 			tipc_link_reset(l_ptr);
@@ -759,9 +678,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case TIMEOUT_EVT:
-			dbg_link("TIM ");
 			if (l_ptr->next_in_no != l_ptr->checkpoint) {
-				dbg_link("-> WW\n");
 				l_ptr->state = WORKING_WORKING;
 				l_ptr->fsm_msg_cnt = 0;
 				l_ptr->checkpoint = l_ptr->next_in_no;
@@ -772,16 +689,11 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 				}
 				link_set_timer(l_ptr, cont_intv);
 			} else if (l_ptr->fsm_msg_cnt < l_ptr->abort_limit) {
-				dbg_link("Probing %u/%u,timer = %u ms)\n",
-					 l_ptr->fsm_msg_cnt, l_ptr->abort_limit,
-					 cont_intv / 4);
 				tipc_link_send_proto_msg(l_ptr, STATE_MSG,
 							 1, 0, 0, 0, 0);
 				l_ptr->fsm_msg_cnt++;
 				link_set_timer(l_ptr, cont_intv / 4);
 			} else {	/* Link has failed */
-				dbg_link("-> RU (%u probes unanswered)\n",
-					 l_ptr->fsm_msg_cnt);
 				warn("Resetting link <%s>, peer not responding\n",
 				     l_ptr->name);
 				tipc_link_reset(l_ptr);
@@ -798,18 +710,13 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 		}
 		break;
 	case RESET_UNKNOWN:
-		dbg_link("RU/");
 		switch (event) {
 		case TRAFFIC_MSG_EVT:
-			dbg_link("TRF-\n");
 			break;
 		case ACTIVATE_MSG:
 			other = l_ptr->owner->active_links[0];
-			if (other && link_working_unknown(other)) {
-				dbg_link("ACT\n");
+			if (other && link_working_unknown(other))
 				break;
-			}
-			dbg_link("ACT -> WW\n");
 			l_ptr->state = WORKING_WORKING;
 			l_ptr->fsm_msg_cnt = 0;
 			link_activate(l_ptr);
@@ -818,8 +725,6 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case RESET_MSG:
-			dbg_link("RES\n");
-			dbg_link(" -> RR\n");
 			l_ptr->state = RESET_RESET;
 			l_ptr->fsm_msg_cnt = 0;
 			tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 1, 0, 0, 0, 0);
@@ -827,11 +732,9 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case STARTING_EVT:
-			dbg_link("START-");
 			l_ptr->started = 1;
 			/* fall through */
 		case TIMEOUT_EVT:
-			dbg_link("TIM\n");
 			tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
 			l_ptr->fsm_msg_cnt++;
 			link_set_timer(l_ptr, cont_intv);
@@ -841,18 +744,12 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 		}
 		break;
 	case RESET_RESET:
-		dbg_link("RR/ ");
 		switch (event) {
 		case TRAFFIC_MSG_EVT:
-			dbg_link("TRF-");
-			/* fall through */
 		case ACTIVATE_MSG:
 			other = l_ptr->owner->active_links[0];
-			if (other && link_working_unknown(other)) {
-				dbg_link("ACT\n");
+			if (other && link_working_unknown(other))
 				break;
-			}
-			dbg_link("ACT -> WW\n");
 			l_ptr->state = WORKING_WORKING;
 			l_ptr->fsm_msg_cnt = 0;
 			link_activate(l_ptr);
@@ -861,14 +758,11 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case RESET_MSG:
-			dbg_link("RES\n");
 			break;
 		case TIMEOUT_EVT:
-			dbg_link("TIM\n");
 			tipc_link_send_proto_msg(l_ptr, ACTIVATE_MSG, 0, 0, 0, 0, 0);
 			l_ptr->fsm_msg_cnt++;
 			link_set_timer(l_ptr, cont_intv);
-			dbg_link("fsm_msg_cnt %u\n", l_ptr->fsm_msg_cnt);
 			break;
 		default:
 			err("Unknown link event %u in RR state\n", event);
@@ -1515,8 +1409,7 @@ static void link_reset_all(unsigned long addr)
 
 	for (i = 0; i < MAX_BEARERS; i++) {
 		if (n_ptr->links[i]) {
-			link_print(n_ptr->links[i], TIPC_OUTPUT,
-				   "Resetting link\n");
+			link_print(n_ptr->links[i], "Resetting link\n");
 			tipc_link_reset(n_ptr->links[i]);
 		}
 	}
@@ -1535,7 +1428,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 
 		/* Handle failure on standard link */
 
-		link_print(l_ptr, TIPC_OUTPUT, "Resetting link\n");
+		link_print(l_ptr, "Resetting link\n");
 		tipc_link_reset(l_ptr);
 
 	} else {
@@ -1545,21 +1438,21 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 		struct tipc_node *n_ptr;
 		char addr_string[16];
 
-		tipc_printf(TIPC_OUTPUT, "Msg seq number: %u,  ", msg_seqno(msg));
-		tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n",
-				     (unsigned long) TIPC_SKB_CB(buf)->handle);
+		info("Msg seq number: %u,  ", msg_seqno(msg));
+		info("Outstanding acks: %lu\n",
+		     (unsigned long) TIPC_SKB_CB(buf)->handle);
 
 		n_ptr = l_ptr->owner->next;
 		tipc_node_lock(n_ptr);
 
 		tipc_addr_string_fill(addr_string, n_ptr->addr);
-		tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string);
-		tipc_printf(TIPC_OUTPUT, "Supported: %d,  ", n_ptr->bclink.supported);
-		tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked);
-		tipc_printf(TIPC_OUTPUT, "Last in: %u,  ", n_ptr->bclink.last_in);
-		tipc_printf(TIPC_OUTPUT, "Gap after: %u,  ", n_ptr->bclink.gap_after);
-		tipc_printf(TIPC_OUTPUT, "Gap to: %u\n", n_ptr->bclink.gap_to);
-		tipc_printf(TIPC_OUTPUT, "Nack sync: %u\n\n", n_ptr->bclink.nack_sync);
+		info("Multicast link info for %s\n", addr_string);
+		info("Supported: %d,  ", n_ptr->bclink.supported);
+		info("Acked: %u\n", n_ptr->bclink.acked);
+		info("Last in: %u,  ", n_ptr->bclink.last_in);
+		info("Gap after: %u,  ", n_ptr->bclink.gap_after);
+		info("Gap to: %u\n", n_ptr->bclink.gap_to);
+		info("Nack sync: %u\n\n", n_ptr->bclink.nack_sync);
 
 		tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr);
 
@@ -1581,7 +1474,6 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 
 	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
 		if (l_ptr->retransm_queue_size == 0) {
-			dbg_print_link(l_ptr, "   ");
 			l_ptr->retransm_queue_head = msg_seqno(msg);
 			l_ptr->retransm_queue_size = retransmits;
 		} else {
@@ -2458,7 +2350,6 @@ static int link_recv_changeover_msg(struct link **l_ptr,
 	if (dest_link->exp_msg_count == 0) {
 		warn("Link switchover error, "
 		     "got too many tunnelled messages\n");
-		dbg_print_link(dest_link, "LINK:");
 		goto exit;
 	}
 	dest_link->exp_msg_count--;
@@ -3066,14 +2957,22 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
 	return res;
 }
 
-static void link_print(struct link *l_ptr, struct print_buf *buf,
-		       const char *str)
+static void link_print(struct link *l_ptr, const char *str)
 {
+	char print_area[256];
+	struct print_buf pb;
+	struct print_buf *buf = &pb;
+
+	tipc_printbuf_init(buf, print_area, sizeof(print_area));
+
 	tipc_printf(buf, str);
-	if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr))
-		return;
 	tipc_printf(buf, "Link %x<%s>:",
 		    l_ptr->addr, l_ptr->b_ptr->publ.name);
+
+#ifdef CONFIG_TIPC_DEBUG
+	if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr))
+		goto print_state;
+
 	tipc_printf(buf, ": NXO(%u):", mod(l_ptr->next_out_no));
 	tipc_printf(buf, "NXI(%u):", mod(l_ptr->next_in_no));
 	tipc_printf(buf, "SQUE");
@@ -3104,14 +3003,20 @@ static void link_print(struct link *l_ptr, struct print_buf *buf,
 				    l_ptr->deferred_inqueue_sz);
 		}
 	}
+print_state:
+#endif
+
 	if (link_working_unknown(l_ptr))
 		tipc_printf(buf, ":WU");
-	if (link_reset_reset(l_ptr))
+	else if (link_reset_reset(l_ptr))
 		tipc_printf(buf, ":RR");
-	if (link_reset_unknown(l_ptr))
+	else if (link_reset_unknown(l_ptr))
 		tipc_printf(buf, ":RU");
-	if (link_working_working(l_ptr))
+	else if (link_working_working(l_ptr))
 		tipc_printf(buf, ":WW");
 	tipc_printf(buf, "\n");
+
+	tipc_printbuf_validate(buf);
+	info("%s", print_area);
 }
 
diff --git a/net/tipc/link.h b/net/tipc/link.h
index eeb0c015dba9..eca19c247b79 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -107,7 +107,6 @@
  * @long_msg_seq_no: next identifier to use for outbound fragmented messages
  * @defragm_buf: list of partially reassembled inbound message fragments
  * @stats: collects statistics regarding link activity
- * @print_buf: print buffer used to log link activity
  */
 
 struct link {
@@ -210,8 +209,6 @@ struct link {
 		u32 msg_lengths_total;
 		u32 msg_length_profile[7];
 	} stats;
-
-	struct print_buf print_buf;
 };
 
 struct port;
-- 
cgit v1.2.3


From 6e7e309c62ab584348e0fef90c8e3e48f634dba1 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:28 +0000
Subject: tipc: Finish streamlining of debugging code

Completes the simplification of TIPC's debugging capabilities. By default
TIPC includes no debugging code, and any debugging code added by developers
that calls the dbg() and dbg_macros() is compiled out. If debugging support
is enabled, TIPC prints out some additional data about its internal state
when certain abnormal conditions occur, and any developer-added calls to the
TIPC debug macros are compiled in.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/Kconfig | 11 +++++++----
 net/tipc/core.h  | 47 +++++++++--------------------------------------
 net/tipc/log.c   |  4 +---
 net/tipc/msg.c   |  1 +
 4 files changed, 18 insertions(+), 45 deletions(-)

(limited to 'net')

diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index c02d3e9c156d..0e7ce30fd567 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -67,12 +67,15 @@ config TIPC_LOG
 	  managed remotely via TIPC.
 
 config TIPC_DEBUG
-	bool "Enable debug messages"
+	bool "Enable debugging support"
 	default n
 	help
-	  This enables debugging of TIPC.
+	  Saying Y here enables TIPC debugging capabilities used by developers.
+	  Most users do not need to bother; if unsure, just say N.
 
-	  Only say Y here if you are having trouble with TIPC.  It will
-	  enable the display of detailed information about what is going on.
+	  Enabling debugging support causes TIPC to display data about its
+	  internal state when certain abnormal conditions occur. It also
+	  makes it easy for developers to capture additional information of
+	  interest using the dbg() or msg_dbg() macros.
 
 endif # TIPC
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 3af0b36e3f1a..997158546e25 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -83,6 +83,7 @@ struct print_buf;	/* log.h */
  *       user-defined buffers can be configured to do the same thing.
  */
 extern struct print_buf *const TIPC_NULL;
+extern struct print_buf *const TIPC_CONS;
 extern struct print_buf *const TIPC_LOG;
 
 void tipc_printf(struct print_buf *, const char *fmt, ...);
@@ -95,56 +96,26 @@ void tipc_printf(struct print_buf *, const char *fmt, ...);
 #define TIPC_OUTPUT TIPC_LOG
 #endif
 
-/*
- * TIPC can be configured to send system messages to TIPC_OUTPUT
- * or to the system console only.
- */
-
-#ifdef CONFIG_TIPC_DEBUG
-
 #define err(fmt, arg...)  tipc_printf(TIPC_OUTPUT, \
-					KERN_ERR "TIPC: " fmt, ## arg)
+				      KERN_ERR "TIPC: " fmt, ## arg)
 #define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, \
-					KERN_WARNING "TIPC: " fmt, ## arg)
+				      KERN_WARNING "TIPC: " fmt, ## arg)
 #define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, \
-					KERN_NOTICE "TIPC: " fmt, ## arg)
-
-#else
+				      KERN_NOTICE "TIPC: " fmt, ## arg)
 
-#define err(fmt, arg...)  printk(KERN_ERR "TIPC: " fmt , ## arg)
-#define info(fmt, arg...) printk(KERN_INFO "TIPC: " fmt , ## arg)
-#define warn(fmt, arg...) printk(KERN_WARNING "TIPC: " fmt , ## arg)
-
-#endif
+#ifdef CONFIG_TIPC_DEBUG
 
 /*
  * DBG_OUTPUT is the destination print buffer for debug messages.
- * It defaults to the the null print buffer, but can be redefined
- * (typically in the individual .c files being debugged) to allow
- * selected debug messages to be generated where needed.
  */
 
 #ifndef DBG_OUTPUT
-#define DBG_OUTPUT TIPC_NULL
+#define DBG_OUTPUT TIPC_LOG
 #endif
 
-/*
- * TIPC can be configured to send debug messages to the specified print buffer
- * (typically DBG_OUTPUT) or to suppress them entirely.
- */
-
-#ifdef CONFIG_TIPC_DEBUG
+#define dbg(fmt, arg...)  tipc_printf(DBG_OUTPUT, KERN_DEBUG fmt, ## arg);
 
-#define dbg(fmt, arg...)  \
-	do { \
-		if (DBG_OUTPUT != TIPC_NULL) \
-			tipc_printf(DBG_OUTPUT, fmt, ## arg); \
-	} while (0)
-#define msg_dbg(msg, txt) \
-	do { \
-		if (DBG_OUTPUT != TIPC_NULL) \
-			tipc_msg_dbg(DBG_OUTPUT, msg, txt); \
-	} while (0)
+#define msg_dbg(msg, txt) tipc_msg_dbg(DBG_OUTPUT, msg, txt);
 
 void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *);
 
@@ -153,7 +124,7 @@ void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *);
 #define dbg(fmt, arg...)	do {} while (0)
 #define msg_dbg(msg, txt)	do {} while (0)
 
-#define tipc_msg_dbg(...)	do {} while (0)
+#define tipc_msg_dbg(buf, msg, txt) do {} while (0)
 
 #endif
 
diff --git a/net/tipc/log.c b/net/tipc/log.c
index 2796044f9941..952c39f643e6 100644
--- a/net/tipc/log.c
+++ b/net/tipc/log.c
@@ -52,7 +52,7 @@ static struct print_buf null_buf = { NULL, 0, NULL, 0 };
 struct print_buf *const TIPC_NULL = &null_buf;
 
 static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
-static struct print_buf *const TIPC_CONS = &cons_buf;
+struct print_buf *const TIPC_CONS = &cons_buf;
 
 static struct print_buf log_buf = { NULL, 0, NULL, 1 };
 struct print_buf *const TIPC_LOG = &log_buf;
@@ -76,8 +76,6 @@ struct print_buf *const TIPC_LOG = &log_buf;
 static char print_string[TIPC_PB_MAX_STR];
 static DEFINE_SPINLOCK(print_lock);
 
-static void tipc_printbuf_reset(struct print_buf *pb);
-static int  tipc_printbuf_empty(struct print_buf *pb);
 static void tipc_printbuf_move(struct print_buf *pb_to,
 			       struct print_buf *pb_from);
 
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ee6b4c68d4a4..a029cdc2df6d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -138,6 +138,7 @@ int tipc_msg_build(struct tipc_msg *hdr,
 void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 {
 	u32 usr = msg_user(msg);
+	tipc_printf(buf, KERN_DEBUG);
 	tipc_printf(buf, str);
 
 	switch (usr) {
-- 
cgit v1.2.3


From 886ef52a8ce6930a9d0c58267d5b5038ac3e8d30 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:29 +0000
Subject: tipc: remove redundant #includes

Eliminates a number of #include statements that no longer serve any
useful purpose.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c       |  1 -
 net/tipc/bcast.c      |  1 -
 net/tipc/bearer.c     |  1 -
 net/tipc/config.c     |  1 -
 net/tipc/core.c       |  6 ------
 net/tipc/discover.c   |  2 --
 net/tipc/eth_media.c  |  4 ----
 net/tipc/msg.c        |  1 -
 net/tipc/name_distr.c |  1 -
 net/tipc/net.c        |  2 --
 net/tipc/node.c       |  1 -
 net/tipc/socket.c     | 11 -----------
 12 files changed, 32 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index 483868a75b88..88463d9a6f12 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -35,7 +35,6 @@
  */
 
 #include "core.h"
-#include "node.h"
 #include "addr.h"
 
 /**
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 22a10fadc8c6..c0f3b096e7f5 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -38,7 +38,6 @@
 #include "core.h"
 #include "link.h"
 #include "port.h"
-#include "name_distr.h"
 #include "bcast.h"
 
 #define MAX_PKT_DEFAULT_MCAST 1500	/* bcast link max packet size (fixed) */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 24dc6c2c75ad..040f3ed32ec8 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -37,7 +37,6 @@
 #include "core.h"
 #include "config.h"
 #include "bearer.h"
-#include "port.h"
 #include "discover.h"
 
 #define MAX_ADDR_STR 32
diff --git a/net/tipc/config.c b/net/tipc/config.c
index a7894ff77ae9..6c67132f8652 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -36,7 +36,6 @@
 
 #include "core.h"
 #include "port.h"
-#include "link.h"
 #include "name_table.h"
 #include "config.h"
 
diff --git a/net/tipc/core.c b/net/tipc/core.c
index a02bc490caae..60b85ec6d106 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -34,14 +34,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/random.h>
-
 #include "core.h"
 #include "ref.h"
-#include "net.h"
 #include "name_table.h"
 #include "subscr.h"
 #include "config.h"
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index e7223789d150..be28f5adc770 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -37,8 +37,6 @@
 #include "core.h"
 #include "link.h"
 #include "discover.h"
-#include "port.h"
-#include "name_table.h"
 
 #define TIPC_LINK_REQ_INIT	125	/* min delay during bearer start up */
 #define TIPC_LINK_REQ_FAST	2000	/* normal delay if bearer has no links */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index ee683cc8f4b1..101d9cb6a559 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -34,10 +34,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/netdevice.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-
 #include "core.h"
 #include "bearer.h"
 
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index a029cdc2df6d..2571ffb4d350 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -35,7 +35,6 @@
  */
 
 #include "core.h"
-#include "addr.h"
 #include "msg.h"
 
 u32 tipc_msg_tot_importance(struct tipc_msg *m)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 0dd648ec0809..376a30b9fd72 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -35,7 +35,6 @@
  */
 
 #include "core.h"
-#include "addr.h"
 #include "link.h"
 #include "name_distr.h"
 
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 6290becd35be..9bacfd00b91e 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -36,10 +36,8 @@
 
 #include "core.h"
 #include "net.h"
-#include "name_table.h"
 #include "name_distr.h"
 #include "subscr.h"
-#include "link.h"
 #include "port.h"
 #include "config.h"
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index fb54719679a6..126d774883dd 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -37,7 +37,6 @@
 #include "core.h"
 #include "config.h"
 #include "node.h"
-#include "port.h"
 #include "name_distr.h"
 
 static void node_lost_contact(struct tipc_node *n_ptr);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7a21a5ee43e8..f972c0b4a719 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -34,17 +34,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/poll.h>
-#include <linux/fcntl.h>
-#include <linux/gfp.h>
-#include <asm/string.h>
-#include <asm/atomic.h>
 #include <net/sock.h>
 
 #include <linux/tipc.h>
-- 
cgit v1.2.3


From e83504f72456809cdbdbc91700d3ba6370c9da1c Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:30 +0000
Subject: tipc: remove pointless check for NULL prior to kfree

It is acceptable to call kfree() with NULL, so these checks are not
serving any useful purpose.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 4 +---
 net/tipc/port.c       | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 376a30b9fd72..a6c989fd4988 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -240,9 +240,7 @@ static void node_is_down(struct publication *publ)
 		    publ->type, publ->lower, publ->node, publ->ref, publ->key);
 	}
 
-	if (p) {
-		kfree(p);
-	}
+	kfree(p);
 }
 
 /**
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 8bacd572a9fb..db14b7e0afdc 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -269,9 +269,7 @@ int tipc_deleteport(u32 ref)
 		buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT);
 		tipc_nodesub_unsubscribe(&p_ptr->subscription);
 	}
-	if (p_ptr->user_port) {
-		kfree(p_ptr->user_port);
-	}
+	kfree(p_ptr->user_port);
 
 	spin_lock_bh(&tipc_port_list_lock);
 	list_del(&p_ptr->port_list);
-- 
cgit v1.2.3


From 25860c3bd5bd1db236d4fd5826d76127d677dc28 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Fri, 31 Dec 2010 18:59:31 +0000
Subject: tipc: recode getsockopt error handling for better readability

The existing code for the copy to user and error handling at the
end of getsockopt isn't easy to follow, due to the excessive use
of if/else.  By simply using return where appropriate, it can be
made smaller and easier to follow at the same time.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f972c0b4a719..1a2eb23c6223 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1755,20 +1755,16 @@ static int getsockopt(struct socket *sock,
 
 	release_sock(sk);
 
-	if (res) {
-		/* "get" failed */
-	}
-	else if (len < sizeof(value)) {
-		res = -EINVAL;
-	}
-	else if (copy_to_user(ov, &value, sizeof(value))) {
-		res = -EFAULT;
-	}
-	else {
-		res = put_user(sizeof(value), ol);
-	}
+	if (res)
+		return res;	/* "get" failed */
 
-	return res;
+	if (len < sizeof(value))
+		return -EINVAL;
+
+	if (copy_to_user(ov, &value, sizeof(value)))
+		return -EFAULT;
+
+	return put_user(sizeof(value), ol);
 }
 
 /**
-- 
cgit v1.2.3


From 0e65967e33be61e5f67727edd4ea829b47676fc0 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:32 +0000
Subject: tipc: cleanup various cosmetic whitespace issues

Cleans up TIPC's source code to eliminate deviations from generally
accepted coding conventions relating to leading/trailing white space
and white space around commas, braces, cases, and sizeof.

These changes are purely cosmetic and do not alter the operation of TIPC
in any way.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h        | 18 +++++++-------
 include/linux/tipc_config.h | 59 ++++++++++++++++++++++-----------------------
 net/tipc/Kconfig            |  2 +-
 net/tipc/bcast.c            |  2 +-
 net/tipc/config.c           |  3 +--
 net/tipc/discover.c         |  2 +-
 net/tipc/eth_media.c        |  5 ++--
 net/tipc/link.c             | 31 +++++++++++-------------
 net/tipc/link.h             |  4 +--
 net/tipc/msg.c              | 56 +++++++++++++++++++++---------------------
 net/tipc/msg.h              |  8 +++---
 net/tipc/name_table.c       | 14 +++++------
 net/tipc/name_table.h       |  2 +-
 net/tipc/node.c             |  2 +-
 net/tipc/port.c             | 15 ++++++------
 net/tipc/port.h             |  2 +-
 net/tipc/ref.c              |  6 ++---
 net/tipc/socket.c           | 24 +++++++++---------
 net/tipc/subscr.c           |  2 +-
 19 files changed, 124 insertions(+), 133 deletions(-)

(limited to 'net')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index d10614b29d59..1eefa3f6d1f4 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -1,6 +1,6 @@
 /*
  * include/linux/tipc.h: Include file for TIPC socket interface
- * 
+ *
  * Copyright (c) 2003-2006, Ericsson AB
  * Copyright (c) 2005, Wind River Systems
  * All rights reserved.
@@ -42,7 +42,7 @@
 /*
  * TIPC addressing primitives
  */
- 
+
 struct tipc_portid {
 	__u32 ref;
 	__u32 node;
@@ -89,7 +89,7 @@ static inline unsigned int tipc_node(__u32 addr)
 #define TIPC_TOP_SRV		1	/* topology service name type */
 #define TIPC_RESERVED_TYPES	64	/* lowest user-publishable name type */
 
-/* 
+/*
  * Publication scopes when binding port names and port name sequences
  */
 
@@ -112,7 +112,7 @@ static inline unsigned int tipc_node(__u32 addr)
 #define TIPC_HIGH_IMPORTANCE		2
 #define TIPC_CRITICAL_IMPORTANCE	3
 
-/* 
+/*
  * Msg rejection/connection shutdown reasons
  */
 
@@ -127,9 +127,9 @@ static inline unsigned int tipc_node(__u32 addr)
  * TIPC topology subscription service definitions
  */
 
-#define TIPC_SUB_PORTS     	0x01  	/* filter for port availability */
-#define TIPC_SUB_SERVICE     	0x02  	/* filter for service availability */
-#define TIPC_SUB_CANCEL         0x04    /* cancel a subscription */
+#define TIPC_SUB_PORTS		0x01	/* filter for port availability */
+#define TIPC_SUB_SERVICE	0x02	/* filter for service availability */
+#define TIPC_SUB_CANCEL		0x04	/* cancel a subscription */
 #if 0
 /* The following filter options are not currently implemented */
 #define TIPC_SUB_NO_BIND_EVTS	0x04	/* filter out "publish" events */
@@ -137,12 +137,12 @@ static inline unsigned int tipc_node(__u32 addr)
 #define TIPC_SUB_SINGLE_EVT	0x10	/* expire after first event */
 #endif
 
-#define TIPC_WAIT_FOREVER	~0	/* timeout for permanent subscription */
+#define TIPC_WAIT_FOREVER	(~0)	/* timeout for permanent subscription */
 
 struct tipc_subscr {
 	struct tipc_name_seq seq;	/* name sequence of interest */
 	__u32 timeout;			/* subscription duration (in ms) */
-        __u32 filter;   		/* bitmask of filter options */
+	__u32 filter;			/* bitmask of filter options */
 	char usr_handle[8];		/* available for subscriber use */
 };
 
diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 677aa13dc5d7..7d42460a5e3c 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -1,6 +1,6 @@
 /*
  * include/linux/tipc_config.h: Include file for TIPC configuration interface
- * 
+ *
  * Copyright (c) 2003-2006, Ericsson AB
  * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
@@ -54,19 +54,19 @@
  * which specify parameters or results for the operation.
  *
  * For many operations, the request and reply messages have a fixed number
- * of TLVs (usually zero or one); however, some reply messages may return 
+ * of TLVs (usually zero or one); however, some reply messages may return
  * a variable number of TLVs.  A failed request is denoted by the presence
  * of an "error string" TLV in the reply message instead of the TLV(s) the
  * reply should contain if the request succeeds.
  */
- 
-/* 
+
+/*
  * Public commands:
  * May be issued by any process.
- * Accepted by own node, or by remote node only if remote management enabled.                       
+ * Accepted by own node, or by remote node only if remote management enabled.
  */
- 
-#define  TIPC_CMD_NOOP   	    0x0000    /* tx none, rx none */
+
+#define  TIPC_CMD_NOOP              0x0000    /* tx none, rx none */
 #define  TIPC_CMD_GET_NODES         0x0001    /* tx net_addr, rx node_info(s) */
 #define  TIPC_CMD_GET_MEDIA_NAMES   0x0002    /* tx none, rx media_name(s) */
 #define  TIPC_CMD_GET_BEARER_NAMES  0x0003    /* tx none, rx bearer_name(s) */
@@ -83,11 +83,11 @@
 #define  TIPC_CMD_GET_LINK_PEER     0x000D    /* tx link_name, rx ? */
 #endif
 
-/* 
+/*
  * Protected commands:
  * May only be issued by "network administration capable" process.
  * Accepted by own node, or by remote node only if remote management enabled
- * and this node is zone manager.                       
+ * and this node is zone manager.
  */
 
 #define  TIPC_CMD_GET_REMOTE_MNG    0x4003    /* tx none, rx unsigned */
@@ -116,10 +116,10 @@
 #define  TIPC_CMD_UNBLOCK_LINK      0x4106    /* tx link_name, rx none */
 #endif
 
-/* 
+/*
  * Private commands:
  * May only be issued by "network administration capable" process.
- * Accepted by own node only; cannot be used on a remote node.                       
+ * Accepted by own node only; cannot be used on a remote node.
  */
 
 #define  TIPC_CMD_SET_NODE_ADDR     0x8001    /* tx net_addr, rx none */
@@ -156,20 +156,20 @@
 #define TIPC_TLV_ULTRA_STRING	5	/* char[32768] (max) */
 
 #define TIPC_TLV_ERROR_STRING	16	/* char[128] containing "error code" */
-#define TIPC_TLV_NET_ADDR   	17	/* 32-bit integer denoting <Z.C.N> */
+#define TIPC_TLV_NET_ADDR	17	/* 32-bit integer denoting <Z.C.N> */
 #define TIPC_TLV_MEDIA_NAME	18	/* char[TIPC_MAX_MEDIA_NAME] */
 #define TIPC_TLV_BEARER_NAME	19	/* char[TIPC_MAX_BEARER_NAME] */
 #define TIPC_TLV_LINK_NAME	20	/* char[TIPC_MAX_LINK_NAME] */
 #define TIPC_TLV_NODE_INFO	21	/* struct tipc_node_info */
 #define TIPC_TLV_LINK_INFO	22	/* struct tipc_link_info */
-#define TIPC_TLV_BEARER_CONFIG  23	/* struct tipc_bearer_config */
-#define TIPC_TLV_LINK_CONFIG    24	/* struct tipc_link_config */
+#define TIPC_TLV_BEARER_CONFIG	23	/* struct tipc_bearer_config */
+#define TIPC_TLV_LINK_CONFIG	24	/* struct tipc_link_config */
 #define TIPC_TLV_NAME_TBL_QUERY	25	/* struct tipc_name_table_query */
-#define TIPC_TLV_PORT_REF   	26	/* 32-bit port reference */
+#define TIPC_TLV_PORT_REF	26	/* 32-bit port reference */
 
 /*
  * Maximum sizes of TIPC bearer-related names (including terminating NUL)
- */ 
+ */
 
 #define TIPC_MAX_MEDIA_NAME	16	/* format = media */
 #define TIPC_MAX_IF_NAME	16	/* format = interface */
@@ -234,7 +234,7 @@ struct tipc_name_table_query {
 };
 
 /*
- * The error string TLV is a null-terminated string describing the cause 
+ * The error string TLV is a null-terminated string describing the cause
  * of the request failure.  To simplify error processing (and to save space)
  * the first character of the string can be a special error code character
  * (lying by the range 0x80 to 0xFF) which represents a pre-defined reason.
@@ -258,7 +258,7 @@ struct tipc_link_create {
 
 /*
  * A TLV consists of a descriptor, followed by the TLV value.
- * TLV descriptor fields are stored in network byte order; 
+ * TLV descriptor fields are stored in network byte order;
  * TLV values must also be stored in network byte order (where applicable).
  * TLV descriptors must be aligned to addresses which are multiple of 4,
  * so up to 3 bytes of padding may exist at the end of the TLV value area.
@@ -294,7 +294,7 @@ static inline int TLV_OK(const void *tlv, __u16 space)
 
 static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type)
 {
-	return TLV_OK(tlv, space) && 
+	return TLV_OK(tlv, space) &&
 		(ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
 }
 
@@ -313,7 +313,7 @@ static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 }
 
 /*
- * A TLV list descriptor simplifies processing of messages 
+ * A TLV list descriptor simplifies processing of messages
  * containing multiple TLVs.
  */
 
@@ -322,15 +322,15 @@ struct tlv_list_desc {
 	__u32 tlv_space;		/* # bytes from curr TLV to list end */
 };
 
-static inline void TLV_LIST_INIT(struct tlv_list_desc *list, 
+static inline void TLV_LIST_INIT(struct tlv_list_desc *list,
 				 void *data, __u32 space)
 {
 	list->tlv_ptr = (struct tlv_desc *)data;
 	list->tlv_space = space;
 }
-	     
+
 static inline int TLV_LIST_EMPTY(struct tlv_list_desc *list)
-{ 
+{
 	return (list->tlv_space == 0);
 }
 
@@ -348,7 +348,7 @@ static inline void TLV_LIST_STEP(struct tlv_list_desc *list)
 {
 	__u16 tlv_space = TLV_ALIGN(ntohs(list->tlv_ptr->tlv_len));
 
-        list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space);
+	list->tlv_ptr = (struct tlv_desc *)((char *)list->tlv_ptr + tlv_space);
 	list->tlv_space -= tlv_space;
 }
 
@@ -372,15 +372,14 @@ struct tipc_genlmsghdr {
 #define TIPC_GENL_HDRLEN	NLMSG_ALIGN(sizeof(struct tipc_genlmsghdr))
 
 /*
- * Configuration messages exchanged via TIPC sockets use the TIPC configuration 
- * message header, which is defined below.  This structure is analogous 
- * to the Netlink message header, but fields are stored in network byte order 
- * and no padding is permitted between the header and the message data 
+ * Configuration messages exchanged via TIPC sockets use the TIPC configuration
+ * message header, which is defined below.  This structure is analogous
+ * to the Netlink message header, but fields are stored in network byte order
+ * and no padding is permitted between the header and the message data
  * that follows.
  */
 
-struct tipc_cfg_msg_hdr
-{
+struct tipc_cfg_msg_hdr {
 	__be32 tcm_len;		/* Message length (including header) */
 	__be16 tcm_type;	/* Command type */
 	__be16 tcm_flags;	/* Additional flags */
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 0e7ce30fd567..0436927369f3 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -50,7 +50,7 @@ config TIPC_PORTS
 	  Specifies how many ports can be supported by a node.
 	  Can range from 127 to 65535 ports; default is 8191.
 
-	  Setting this to a smaller value saves some memory, 
+	  Setting this to a smaller value saves some memory,
 	  setting it to higher allows for more ports.
 
 config TIPC_LOG
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index c0f3b096e7f5..9de32564984a 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -428,7 +428,7 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
 	static int rx_count = 0;
 #endif
 	struct tipc_msg *msg = buf_msg(buf);
-	struct tipc_node* node = tipc_node_find(msg_prevnode(msg));
+	struct tipc_node *node = tipc_node_find(msg_prevnode(msg));
 	u32 next_in;
 	u32 seqno;
 	struct sk_buff *deferred;
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 6c67132f8652..e16750dcf3c1 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -322,8 +322,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	} else if (!tipc_remote_management) {
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NO_REMOTE);
 		goto exit;
-	}
-	else if (cmd >= 0x4000) {
+	} else if (cmd >= 0x4000) {
 		u32 domain = 0;
 
 		if ((tipc_nametbl_translate(TIPC_ZM_SRV, 0, &domain) == 0) ||
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index be28f5adc770..fa026bd91a68 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -130,7 +130,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 	u32 net_id = msg_bc_netid(msg);
 	u32 type = msg_type(msg);
 
-	msg_get_media_addr(msg,&media_addr);
+	msg_get_media_addr(msg, &media_addr);
 	buf_discard(buf);
 
 	if (net_id != tipc_net_id)
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 101d9cb6a559..5dfe66357794 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -144,7 +144,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
 
 	/* Find device with specified name */
 
-	for_each_netdev(&init_net, pdev){
+	for_each_netdev(&init_net, pdev) {
 		if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
 			dev = pdev;
 			break;
@@ -155,7 +155,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
 
 	/* Find Ethernet bearer for device (or create one) */
 
-	for (;(eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev); eb_ptr++);
+	while ((eb_ptr != stop) && eb_ptr->dev && (eb_ptr->dev != dev))
+		eb_ptr++;
 	if (eb_ptr == stop)
 		return -EDQUOT;
 	if (!eb_ptr->dev) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 647f2ecfde50..ef203a1581cd 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -977,8 +977,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 				l_ptr->next_out = buf;
 				return res;
 			}
-		}
-		else
+		} else
 			*used_max_pkt = l_ptr->max_pkt;
 	}
 	return tipc_link_send_buf(l_ptr, buf);  /* All other cases */
@@ -1132,10 +1131,10 @@ static int link_send_sections_long(struct port *sender,
 	struct tipc_node *node;
 	struct tipc_msg *hdr = &sender->publ.phdr;
 	u32 dsz = msg_data_sz(hdr);
-	u32 max_pkt,fragm_sz,rest;
+	u32 max_pkt, fragm_sz, rest;
 	struct tipc_msg fragm_hdr;
-	struct sk_buff *buf,*buf_chain,*prev;
-	u32 fragm_crs,fragm_rest,hsz,sect_rest;
+	struct sk_buff *buf, *buf_chain, *prev;
+	u32 fragm_crs, fragm_rest, hsz, sect_rest;
 	const unchar *sect_crs;
 	int curr_sect;
 	u32 fragm_no;
@@ -1212,7 +1211,7 @@ error:
 			/* Initiate new fragment: */
 			if (rest <= fragm_sz) {
 				fragm_sz = rest;
-				msg_set_type(&fragm_hdr,LAST_FRAGMENT);
+				msg_set_type(&fragm_hdr, LAST_FRAGMENT);
 			} else {
 				msg_set_type(&fragm_hdr, FRAGMENT);
 			}
@@ -1229,8 +1228,7 @@ error:
 			fragm_crs = INT_H_SIZE;
 			fragm_rest = fragm_sz;
 		}
-	}
-	while (rest > 0);
+	} while (rest > 0);
 
 	/*
 	 * Now we have a buffer chain. Select a link and check
@@ -1333,7 +1331,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 	buf = l_ptr->proto_msg_queue;
 	if (buf) {
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
-		msg_set_bcast_ack(buf_msg(buf),l_ptr->owner->bclink.last_in);
+		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
 		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
 			l_ptr->unacked_window = 0;
 			buf_discard(buf);
@@ -1847,8 +1845,7 @@ u32 tipc_link_defer_pkt(struct sk_buff **head,
 		}
 		prev = crs;
 		crs = crs->next;
-	}
-	while (crs);
+	} while (crs);
 
 	/* Message is a duplicate of an existing message */
 
@@ -2215,11 +2212,11 @@ void tipc_link_changeover(struct link *l_ptr)
 
 		if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) {
 			struct tipc_msg *m = msg_get_wrapped(msg);
-			unchar* pos = (unchar*)m;
+			unchar *pos = (unchar *)m;
 
 			msgcount = msg_msgcnt(msg);
 			while (msgcount--) {
-				msg_set_seqno(m,msg_seqno(msg));
+				msg_set_seqno(m, msg_seqno(msg));
 				tipc_link_tunnel(l_ptr, &tunnel_hdr, m,
 						 msg_link_selector(m));
 				pos += align(msg_size(m));
@@ -2321,7 +2318,7 @@ static int link_recv_changeover_msg(struct link **l_ptr,
 	if (msg_typ == DUPLICATE_MSG) {
 		if (less(msg_seqno(msg), mod(dest_link->next_in_no)))
 			goto exit;
-		*buf = buf_extract(tunnel_buf,INT_H_SIZE);
+		*buf = buf_extract(tunnel_buf, INT_H_SIZE);
 		if (*buf == NULL) {
 			warn("Link changeover error, duplicate msg dropped\n");
 			goto exit;
@@ -2552,8 +2549,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 			/*  Prepare buffer for subsequent fragments. */
 
 			set_long_msg_seqno(pbuf, long_msg_seq_no);
-			set_fragm_size(pbuf,fragm_sz);
-			set_expected_frags(pbuf,exp_fragm_cnt - 1);
+			set_fragm_size(pbuf, fragm_sz);
+			set_expected_frags(pbuf, exp_fragm_cnt - 1);
 		} else {
 			warn("Link unable to reassemble fragmented message\n");
 		}
@@ -2580,7 +2577,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 			*m = buf_msg(pbuf);
 			return 1;
 		}
-		set_expected_frags(pbuf,exp_frags);
+		set_expected_frags(pbuf, exp_frags);
 		return 0;
 	}
 	buf_discard(fbuf);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index eca19c247b79..70967e637027 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -229,8 +229,8 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
 void tipc_link_reset(struct link *l_ptr);
 int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector);
 int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf);
-u32 tipc_link_get_max_pkt(u32 dest,u32 selector);
-int tipc_link_send_sections_fast(struct port* sender,
+u32 tipc_link_get_max_pkt(u32 dest, u32 selector);
+int tipc_link_send_sections_fast(struct port *sender,
 				 struct iovec const *msg_sect,
 				 const u32 num_sect,
 				 u32 destnode);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 2571ffb4d350..e81d43a8ea3d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -91,7 +91,7 @@ int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
 
 int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
-			    int max_size, int usrmem, struct sk_buff** buf)
+			    int max_size, int usrmem, struct sk_buff **buf)
 {
 	int dsz, sz, hsz, pos, res, cnt;
 
@@ -161,10 +161,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "LAST:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN:%x", msg_type(msg));
 
 		}
-		tipc_printf(buf, "NO(%u/%u):",msg_long_msgno(msg),
+		tipc_printf(buf, "NO(%u/%u):", msg_long_msgno(msg),
 			    msg_fragm_no(msg));
 		break;
 	case TIPC_LOW_IMPORTANCE:
@@ -190,7 +190,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "DIR:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE %u",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg));
 		}
 		if (msg_routed(msg) && !msg_non_seq(msg))
 			tipc_printf(buf, "ROUT:");
@@ -208,7 +208,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "WDRW:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN:%x", msg_type(msg));
 		}
 		if (msg_routed(msg))
 			tipc_printf(buf, "ROUT:");
@@ -227,39 +227,39 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			break;
 		case CONN_ACK:
 			tipc_printf(buf, "CONN_ACK:");
-			tipc_printf(buf, "ACK(%u):",msg_msgcnt(msg));
+			tipc_printf(buf, "ACK(%u):", msg_msgcnt(msg));
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
 		if (msg_routed(msg))
 			tipc_printf(buf, "ROUT:");
 		if (msg_reroute_cnt(msg))
-			tipc_printf(buf, "REROUTED(%u):",msg_reroute_cnt(msg));
+			tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg));
 		break;
 	case LINK_PROTOCOL:
-		tipc_printf(buf, "PROT:TIM(%u):",msg_timestamp(msg));
+		tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg));
 		switch (msg_type(msg)) {
 		case STATE_MSG:
 			tipc_printf(buf, "STATE:");
-			tipc_printf(buf, "%s:",msg_probe(msg) ? "PRB" :"");
-			tipc_printf(buf, "NXS(%u):",msg_next_sent(msg));
-			tipc_printf(buf, "GAP(%u):",msg_seq_gap(msg));
-			tipc_printf(buf, "LSTBC(%u):",msg_last_bcast(msg));
+			tipc_printf(buf, "%s:", msg_probe(msg) ? "PRB" : "");
+			tipc_printf(buf, "NXS(%u):", msg_next_sent(msg));
+			tipc_printf(buf, "GAP(%u):", msg_seq_gap(msg));
+			tipc_printf(buf, "LSTBC(%u):", msg_last_bcast(msg));
 			break;
 		case RESET_MSG:
 			tipc_printf(buf, "RESET:");
 			if (msg_size(msg) != msg_hdr_sz(msg))
-				tipc_printf(buf, "BEAR:%s:",msg_data(msg));
+				tipc_printf(buf, "BEAR:%s:", msg_data(msg));
 			break;
 		case ACTIVATE_MSG:
 			tipc_printf(buf, "ACTIVATE:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
-		tipc_printf(buf, "PLANE(%c):",msg_net_plane(msg));
-		tipc_printf(buf, "SESS(%u):",msg_session(msg));
+		tipc_printf(buf, "PLANE(%c):", msg_net_plane(msg));
+		tipc_printf(buf, "SESS(%u):", msg_session(msg));
 		break;
 	case CHANGEOVER_PROTOCOL:
 		tipc_printf(buf, "TUNL:");
@@ -269,10 +269,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			break;
 		case ORIGINAL_MSG:
 			tipc_printf(buf, "ORIG:");
-			tipc_printf(buf, "EXP(%u)",msg_msgcnt(msg));
+			tipc_printf(buf, "EXP(%u)", msg_msgcnt(msg));
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
 		break;
 	case ROUTE_DISTRIBUTOR:
@@ -280,26 +280,26 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 		switch (msg_type(msg)) {
 		case EXT_ROUTING_TABLE:
 			tipc_printf(buf, "EXT_TBL:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case LOCAL_ROUTING_TABLE:
 			tipc_printf(buf, "LOCAL_TBL:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case SLAVE_ROUTING_TABLE:
 			tipc_printf(buf, "DP_TBL:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case ROUTE_ADDITION:
 			tipc_printf(buf, "ADD:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		case ROUTE_REMOVAL:
 			tipc_printf(buf, "REMOVE:");
-			tipc_printf(buf, "TO:%x:",msg_remote_node(msg));
+			tipc_printf(buf, "TO:%x:", msg_remote_node(msg));
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg));
 		}
 		break;
 	case LINK_CONFIG:
@@ -312,7 +312,7 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "DSC_RESP:");
 			break;
 		default:
-			tipc_printf(buf, "UNKNOWN TYPE:%x:",msg_type(msg));
+			tipc_printf(buf, "UNKNOWN TYPE:%x:", msg_type(msg));
 			break;
 		}
 		break;
@@ -393,8 +393,8 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 	}
 
 	if (msg_user(msg) ==  LINK_CONFIG) {
-		u32* raw = (u32*)msg;
-		struct tipc_media_addr* orig = (struct tipc_media_addr*)&raw[5];
+		u32 *raw = (u32 *)msg;
+		struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5];
 		tipc_printf(buf, ":REQL(%u):", msg_req_links(msg));
 		tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg));
 		tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 68b65ef3e74b..92c4c4fd7b3f 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -174,7 +174,7 @@ static inline u32 msg_hdr_sz(struct tipc_msg *m)
 	return msg_bits(m, 0, 21, 0xf) << 2;
 }
 
-static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n)
+static inline void msg_set_hdr_sz(struct tipc_msg *m, u32 n)
 {
 	msg_set_bits(m, 0, 21, 0xf, n>>2);
 }
@@ -425,7 +425,7 @@ static inline u32 msg_routed(struct tipc_msg *m)
 {
 	if (likely(msg_short(m)))
 		return 0;
-	return(msg_destnode(m) ^ msg_orignode(m)) >> 11;
+	return (msg_destnode(m) ^ msg_orignode(m)) >> 11;
 }
 
 static inline u32 msg_nametype(struct tipc_msg *m)
@@ -863,7 +863,7 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
 int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect);
 int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
-			    int max_size, int usrmem, struct sk_buff** buf);
+			    int max_size, int usrmem, struct sk_buff **buf);
 
 static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
 {
@@ -872,7 +872,7 @@ static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr
 
 static inline void msg_get_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
 {
-	memcpy(a, &((int*)m)[5], sizeof(*a));
+	memcpy(a, &((int *)m)[5], sizeof(*a));
 }
 
 #endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ddc2ad4c2d32..46b2f31f96a1 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -296,8 +296,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
 
 		sseq = &nseq->sseqs[inspos];
 		freesseq = &nseq->sseqs[nseq->first_free];
-		memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof (*sseq));
-		memset(sseq, 0, sizeof (*sseq));
+		memmove(sseq + 1, sseq, (freesseq - sseq) * sizeof(*sseq));
+		memset(sseq, 0, sizeof(*sseq));
 		nseq->first_free++;
 		sseq->lower = lower;
 		sseq->upper = upper;
@@ -471,7 +471,7 @@ end_node:
 
 	if (!sseq->zone_list) {
 		free = &nseq->sseqs[nseq->first_free--];
-		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof (*sseq));
+		memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq));
 		removed_subseq = 1;
 	}
 
@@ -507,7 +507,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s
 
 	while (sseq != &nseq->sseqs[nseq->first_free]) {
 		struct publication *zl = sseq->zone_list;
-		if (zl && tipc_subscr_overlap(s,sseq->lower,sseq->upper)) {
+		if (zl && tipc_subscr_overlap(s, sseq->lower, sseq->upper)) {
 			struct publication *crs = zl;
 			int must_report = 1;
 
@@ -798,7 +798,7 @@ void tipc_nametbl_subscribe(struct subscription *s)
 	if (!seq) {
 		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
 	}
-	if (seq){
+	if (seq) {
 		spin_lock_bh(&seq->lock);
 		tipc_nameseq_subscribe(seq, s);
 		spin_unlock_bh(&seq->lock);
@@ -819,7 +819,7 @@ void tipc_nametbl_unsubscribe(struct subscription *s)
 
 	write_lock_bh(&tipc_nametbl_lock);
 	seq = nametbl_find_seq(s->seq.type);
-	if (seq != NULL){
+	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		list_del_init(&s->nameseq_list);
 		spin_unlock_bh(&seq->lock);
@@ -852,7 +852,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
 	}
 
 	do {
-		sprintf (portIdStr, "<%u.%u.%u:%u>",
+		sprintf(portIdStr, "<%u.%u.%u:%u>",
 			 tipc_zone(publ->node), tipc_cluster(publ->node),
 			 tipc_node(publ->node), publ->ref);
 		tipc_printf(buf, "%-26s ", portIdStr);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 139882d4ed00..d228bd682655 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -46,7 +46,7 @@ struct port_list;
  * TIPC name types reserved for internal TIPC use (both current and planned)
  */
 
-#define TIPC_ZM_SRV 3  		/* zone master service name type */
+#define TIPC_ZM_SRV 3		/* zone master service name type */
 
 
 /**
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 126d774883dd..2ed162f91a08 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -334,7 +334,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 	/* Clean up broadcast reception remains */
 	n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = 0;
 	while (n_ptr->bclink.deferred_head) {
-		struct sk_buff* buf = n_ptr->bclink.deferred_head;
+		struct sk_buff *buf = n_ptr->bclink.deferred_head;
 		n_ptr->bclink.deferred_head = buf->next;
 		buf_discard(buf);
 	}
diff --git a/net/tipc/port.c b/net/tipc/port.c
index db14b7e0afdc..78fcb75bb082 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -54,8 +54,8 @@ static DEFINE_SPINLOCK(queue_lock);
 
 static LIST_HEAD(ports);
 static void port_handle_node_down(unsigned long ref);
-static struct sk_buff* port_build_self_abort_msg(struct port *,u32 err);
-static struct sk_buff* port_build_peer_abort_msg(struct port *,u32 err);
+static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err);
+static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err);
 static void port_timeout(unsigned long ref);
 
 
@@ -155,7 +155,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
 
 void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
 {
-	struct tipc_msg* msg;
+	struct tipc_msg *msg;
 	struct port_list dports = {0, NULL, };
 	struct port_list *item = dp;
 	int cnt = 0;
@@ -193,7 +193,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
 			if ((index == 0) && (cnt != 0)) {
 				item = item->next;
 			}
-			msg_set_destport(buf_msg(b),item->ports[index]);
+			msg_set_destport(buf_msg(b), item->ports[index]);
 			tipc_port_recv_msg(b);
 		}
 	}
@@ -484,7 +484,7 @@ static void port_timeout(unsigned long ref)
 static void port_handle_node_down(unsigned long ref)
 {
 	struct port *p_ptr = tipc_port_lock(ref);
-	struct sk_buff* buf = NULL;
+	struct sk_buff *buf = NULL;
 
 	if (!p_ptr)
 		return;
@@ -620,8 +620,7 @@ static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id)
 			tipc_printf(buf, " via {%u,%u}",
 				    p_ptr->publ.conn_type,
 				    p_ptr->publ.conn_instance);
-	}
-	else if (p_ptr->publ.published) {
+	} else if (p_ptr->publ.published) {
 		tipc_printf(buf, " bound to");
 		list_for_each_entry(publ, &p_ptr->publications, pport_list) {
 			if (publ->lower == publ->upper)
@@ -1099,7 +1098,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer)
 	p_ptr->publ.connected = 1;
 	k_start_timer(&p_ptr->timer, p_ptr->probing_interval);
 
-	tipc_nodesub_subscribe(&p_ptr->subscription,peer->node,
+	tipc_nodesub_subscribe(&p_ptr->subscription, peer->node,
 			  (void *)(unsigned long)ref,
 			  (net_ev_handler)port_handle_node_down);
 	res = 0;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index a9c528799f2f..8e84b989949c 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -258,7 +258,7 @@ static inline void tipc_port_unlock(struct port *p_ptr)
 	spin_unlock_bh(p_ptr->publ.lock);
 }
 
-static inline struct port* tipc_port_deref(u32 ref)
+static inline struct port *tipc_port_deref(u32 ref)
 {
 	return (struct port *)tipc_ref_deref(ref);
 }
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index ab8ad32d8c20..3974529a66e7 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -178,14 +178,12 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
 		next_plus_upper = entry->ref;
 		tipc_ref_table.first_free = next_plus_upper & index_mask;
 		ref = (next_plus_upper & ~index_mask) + index;
-	}
-	else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
+	} else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
 		index = tipc_ref_table.init_point++;
 		entry = &(tipc_ref_table.entries[index]);
 		spin_lock_init(&entry->lock);
 		ref = tipc_ref_table.start_mask + index;
-	}
-	else {
+	} else {
 		ref = 0;
 	}
 	write_unlock_bh(&ref_table_lock);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1a2eb23c6223..e9fc5df79eb0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -375,7 +375,7 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
  *
  * NOTE: This routine doesn't need to take the socket lock since it only
  *       accesses socket information that is unchanging (or which changes in
- * 	 a completely predictable manner).
+ *       a completely predictable manner).
  */
 
 static int get_name(struct socket *sock, struct sockaddr *uaddr,
@@ -570,14 +570,12 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 					     dest->addr.name.domain,
 					     m->msg_iovlen,
 					     m->msg_iov);
-		}
-		else if (dest->addrtype == TIPC_ADDR_ID) {
+		} else if (dest->addrtype == TIPC_ADDR_ID) {
 			res = tipc_send2port(tport->ref,
 					     &dest->addr.id,
 					     m->msg_iovlen,
 					     m->msg_iov);
-		}
-		else if (dest->addrtype == TIPC_ADDR_MCAST) {
+		} else if (dest->addrtype == TIPC_ADDR_MCAST) {
 			if (needs_conn) {
 				res = -EOPNOTSUPP;
 				break;
@@ -812,8 +810,8 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
 		addr->addrtype = TIPC_ADDR_ID;
 		addr->addr.id.ref = msg_origport(msg);
 		addr->addr.id.node = msg_orignode(msg);
-		addr->addr.name.domain = 0;   	/* could leave uninitialized */
-		addr->scope = 0;   		/* could leave uninitialized */
+		addr->addr.name.domain = 0;	/* could leave uninitialized */
+		addr->scope = 0;		/* could leave uninitialized */
 		m->msg_namelen = sizeof(struct sockaddr_tipc);
 	}
 }
@@ -1743,10 +1741,10 @@ static int getsockopt(struct socket *sock,
 		value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
 		/* no need to set "res", since already 0 at this point */
 		break;
-	 case TIPC_NODE_RECVQ_DEPTH:
+	case TIPC_NODE_RECVQ_DEPTH:
 		value = (u32)atomic_read(&tipc_queue_size);
 		break;
-	 case TIPC_SOCK_RECVQ_DEPTH:
+	case TIPC_SOCK_RECVQ_DEPTH:
 		value = skb_queue_len(&sk->sk_receive_queue);
 		break;
 	default:
@@ -1772,7 +1770,7 @@ static int getsockopt(struct socket *sock,
  */
 
 static const struct proto_ops msg_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.release	= release,
 	.bind		= bind,
@@ -1793,7 +1791,7 @@ static const struct proto_ops msg_ops = {
 };
 
 static const struct proto_ops packet_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.release	= release,
 	.bind		= bind,
@@ -1814,7 +1812,7 @@ static const struct proto_ops packet_ops = {
 };
 
 static const struct proto_ops stream_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.release	= release,
 	.bind		= bind,
@@ -1835,7 +1833,7 @@ static const struct proto_ops stream_ops = {
 };
 
 static const struct net_proto_family tipc_family_ops = {
-	.owner 		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.family		= AF_TIPC,
 	.create		= tipc_create
 };
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 510271152165..acc30e1833c9 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -538,7 +538,7 @@ int tipc_subscr_start(void)
 	struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV};
 	int res;
 
-	memset(&topsrv, 0, sizeof (topsrv));
+	memset(&topsrv, 0, sizeof(topsrv));
 	spin_lock_init(&topsrv.lock);
 	INIT_LIST_HEAD(&topsrv.subscriber_list);
 
-- 
cgit v1.2.3


From 2db9983a4318818845193bd577879c0620705e82 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:33 +0000
Subject: tipc: split variable assignments out of conditional expressions

Cleans up TIPC's source code to eliminate assigning values to variables
within conditional expressions, improving code readability and reducing
warnings from various code checker tools.

These changes are purely cosmetic and do not alter the operation of TIPC
in any way.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c |  3 ++-
 net/tipc/core.c   | 33 +++++++++++++++++++++------------
 net/tipc/link.c   | 16 ++++++++++------
 net/tipc/socket.c | 36 +++++++++++++++++++++++-------------
 4 files changed, 56 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 040f3ed32ec8..e9136f0abe60 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -251,7 +251,8 @@ static int bearer_name_validate(const char *name,
 	/* ensure all component parts of bearer name are present */
 
 	media_name = name_copy;
-	if ((if_name = strchr(media_name, ':')) == NULL)
+	if_name = strchr(media_name, ':');
+	if (if_name == NULL)
 		return 0;
 	*(if_name++) = 0;
 	media_len = if_name - media_name;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 60b85ec6d106..e071579e0850 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -115,10 +115,11 @@ int tipc_core_start_net(unsigned long addr)
 {
 	int res;
 
-	if ((res = tipc_net_start(addr)) ||
-	    (res = tipc_eth_media_start())) {
+	res = tipc_net_start(addr);
+	if (!res)
+		res = tipc_eth_media_start();
+	if (res)
 		tipc_core_stop_net();
-	}
 	return res;
 }
 
@@ -157,15 +158,22 @@ static int tipc_core_start(void)
 	get_random_bytes(&tipc_random, sizeof(tipc_random));
 	tipc_mode = TIPC_NODE_MODE;
 
-	if ((res = tipc_handler_start()) ||
-	    (res = tipc_ref_table_init(tipc_max_ports, tipc_random)) ||
-	    (res = tipc_nametbl_init()) ||
-	    (res = tipc_k_signal((Handler)tipc_subscr_start, 0)) ||
-	    (res = tipc_k_signal((Handler)tipc_cfg_init, 0)) ||
-	    (res = tipc_netlink_start()) ||
-	    (res = tipc_socket_init())) {
+	res = tipc_handler_start();
+	if (!res)
+		res = tipc_ref_table_init(tipc_max_ports, tipc_random);
+	if (!res)
+		res = tipc_nametbl_init();
+	if (!res)
+		res = tipc_k_signal((Handler)tipc_subscr_start, 0);
+	if (!res)
+		res = tipc_k_signal((Handler)tipc_cfg_init, 0);
+	if (!res)
+		res = tipc_netlink_start();
+	if (!res)
+		res = tipc_socket_init();
+	if (res)
 		tipc_core_stop();
-	}
+
 	return res;
 }
 
@@ -188,7 +196,8 @@ static int __init tipc_init(void)
 	tipc_max_nodes = CONFIG_TIPC_NODES;
 	tipc_net_id = 4711;
 
-	if ((res = tipc_core_start()))
+	res = tipc_core_start();
+	if (res)
 		err("Unable to start in single node mode\n");
 	else
 		info("Started in single node mode\n");
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ef203a1581cd..de9d49108d41 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -187,14 +187,17 @@ static int link_name_validate(const char *name, struct link_name *name_parts)
 	/* ensure all component parts of link name are present */
 
 	addr_local = name_copy;
-	if ((if_local = strchr(addr_local, ':')) == NULL)
+	if_local = strchr(addr_local, ':');
+	if (if_local == NULL)
 		return 0;
 	*(if_local++) = 0;
-	if ((addr_peer = strchr(if_local, '-')) == NULL)
+	addr_peer = strchr(if_local, '-');
+	if (addr_peer == NULL)
 		return 0;
 	*(addr_peer++) = 0;
 	if_local_len = addr_peer - if_local;
-	if ((if_peer = strchr(addr_peer, ':')) == NULL)
+	if_peer = strchr(addr_peer, ':');
+	if (if_peer == NULL)
 		return 0;
 	*(if_peer++) = 0;
 	if_peer_len = strlen(if_peer) + 1;
@@ -2044,8 +2047,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 
 		strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg));
 
-		if ((msg_tol = msg_link_tolerance(msg)) &&
-		    (msg_tol > l_ptr->tolerance))
+		msg_tol = msg_link_tolerance(msg);
+		if (msg_tol > l_ptr->tolerance)
 			link_set_supervision_props(l_ptr, msg_tol);
 
 		if (msg_linkprio(msg) > l_ptr->priority)
@@ -2074,7 +2077,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 		break;
 	case STATE_MSG:
 
-		if ((msg_tol = msg_link_tolerance(msg)))
+		msg_tol = msg_link_tolerance(msg);
+		if (msg_tol)
 			link_set_supervision_props(l_ptr, msg_tol);
 
 		if (msg_linkprio(msg) &&
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e9fc5df79eb0..0895dec2967c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -563,7 +563,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 
 	do {
 		if (dest->addrtype == TIPC_ADDR_NAME) {
-			if ((res = dest_name_check(dest, m)))
+			res = dest_name_check(dest, m);
+			if (res)
 				break;
 			res = tipc_send2name(tport->ref,
 					     &dest->addr.name.name,
@@ -580,7 +581,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 				res = -EOPNOTSUPP;
 				break;
 			}
-			if ((res = dest_name_check(dest, m)))
+			res = dest_name_check(dest, m);
+			if (res)
 				break;
 			res = tipc_multicast(tport->ref,
 					     &dest->addr.nameseq,
@@ -750,7 +752,8 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
 				bytes_to_send = curr_left;
 			my_iov.iov_base = curr_start;
 			my_iov.iov_len = bytes_to_send;
-			if ((res = send_packet(NULL, sock, &my_msg, 0)) < 0) {
+			res = send_packet(NULL, sock, &my_msg, 0);
+			if (res < 0) {
 				if (bytes_sent)
 					res = bytes_sent;
 				goto exit;
@@ -845,12 +848,15 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 	if (unlikely(err)) {
 		anc_data[0] = err;
 		anc_data[1] = msg_data_sz(msg);
-		if ((res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data)))
-			return res;
-		if (anc_data[1] &&
-		    (res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
-				    msg_data(msg))))
+		res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
+		if (res)
 			return res;
+		if (anc_data[1]) {
+			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
+				       msg_data(msg));
+			if (res)
+				return res;
+		}
 	}
 
 	/* Optionally capture message destination object */
@@ -878,9 +884,11 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 	default:
 		has_name = 0;
 	}
-	if (has_name &&
-	    (res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data)))
-		return res;
+	if (has_name) {
+		res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
+		if (res)
+			return res;
+	}
 
 	return 0;
 }
@@ -1664,7 +1672,8 @@ static int setsockopt(struct socket *sock,
 		return -ENOPROTOOPT;
 	if (ol < sizeof(value))
 		return -EINVAL;
-	if ((res = get_user(value, (u32 __user *)ov)))
+	res = get_user(value, (u32 __user *)ov);
+	if (res)
 		return res;
 
 	lock_sock(sk);
@@ -1722,7 +1731,8 @@ static int getsockopt(struct socket *sock,
 		return put_user(0, ol);
 	if (lvl != SOL_TIPC)
 		return -ENOPROTOOPT;
-	if ((res = get_user(len, ol)))
+	res = get_user(len, ol);
+	if (res)
 		return res;
 
 	lock_sock(sk);
-- 
cgit v1.2.3


From e3ec9c7d5eea9adf2c604c623c987360cc700b88 Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:34 +0000
Subject: tipc: remove zeroing assignments to static global variables

Cleans up TIPC's source code to eliminate the needless initialization
of static variables to zero.

These changes are purely cosmetic and do not alter the operation of TIPC
in any way.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      | 8 ++++----
 net/tipc/bearer.c     | 2 +-
 net/tipc/eth_media.c  | 2 +-
 net/tipc/handler.c    | 2 +-
 net/tipc/name_distr.c | 2 +-
 net/tipc/name_table.c | 2 +-
 net/tipc/node.c       | 2 +-
 net/tipc/port.c       | 4 ++--
 net/tipc/ref.c        | 2 +-
 net/tipc/socket.c     | 2 +-
 net/tipc/subscr.c     | 2 +-
 11 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 9de32564984a..99a1469e300a 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -103,9 +103,9 @@ struct bclink {
 };
 
 
-static struct bcbearer *bcbearer = NULL;
-static struct bclink *bclink = NULL;
-static struct link *bcl = NULL;
+static struct bcbearer *bcbearer;
+static struct bclink *bclink;
+static struct link *bcl;
 static DEFINE_SPINLOCK(bc_lock);
 
 /* broadcast-capable node map */
@@ -425,7 +425,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
 void tipc_bclink_recv_pkt(struct sk_buff *buf)
 {
 #if (TIPC_BCAST_LOSS_RATE)
-	static int rx_count = 0;
+	static int rx_count;
 #endif
 	struct tipc_msg *msg = buf_msg(buf);
 	struct tipc_node *node = tipc_node_find(msg_prevnode(msg));
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index e9136f0abe60..68e729045424 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -42,7 +42,7 @@
 #define MAX_ADDR_STR 32
 
 static struct media media_list[MAX_MEDIA];
-static u32 media_count = 0;
+static u32 media_count;
 
 struct bearer tipc_bearers[MAX_BEARERS];
 
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 5dfe66357794..b69092eb95d8 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -56,7 +56,7 @@ struct eth_bearer {
 };
 
 static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
-static int eth_started = 0;
+static int eth_started;
 static struct notifier_block notifier;
 
 /**
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index 0c70010a7dfe..274c98e164b7 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -45,7 +45,7 @@ struct queue_item {
 static struct kmem_cache *tipc_queue_item_cache;
 static struct list_head signal_queue_head;
 static DEFINE_SPINLOCK(qitem_lock);
-static int handler_enabled = 0;
+static int handler_enabled;
 
 static void process_signal_queue(unsigned long dummy);
 
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index a6c989fd4988..483c226c9581 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -73,7 +73,7 @@ struct distr_item {
  */
 
 static LIST_HEAD(publ_root);
-static u32 publ_cnt = 0;
+static u32 publ_cnt;
 
 /**
  * publ_to_item - add publication info to a publication message
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 46b2f31f96a1..ea3e94e9d939 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -104,7 +104,7 @@ struct name_table {
 	u32 local_publ_count;
 };
 
-static struct name_table table = { NULL } ;
+static struct name_table table;
 static atomic_t rsv_publ_ok = ATOMIC_INIT(0);
 DEFINE_RWLOCK(tipc_nametbl_lock);
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2ed162f91a08..3af53e327f49 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -44,7 +44,7 @@ static void node_established_contact(struct tipc_node *n_ptr);
 
 static DEFINE_SPINLOCK(node_create_lock);
 
-u32 tipc_own_tag = 0;
+u32 tipc_own_tag;
 
 /**
  * tipc_node_create - create neighboring node
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 78fcb75bb082..c23f3380d0a7 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -46,8 +46,8 @@
 
 #define MAX_REJECT_SIZE 1024
 
-static struct sk_buff *msg_queue_head = NULL;
-static struct sk_buff *msg_queue_tail = NULL;
+static struct sk_buff *msg_queue_head;
+static struct sk_buff *msg_queue_tail;
 
 DEFINE_SPINLOCK(tipc_port_list_lock);
 static DEFINE_SPINLOCK(queue_lock);
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 3974529a66e7..83116892528b 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -89,7 +89,7 @@ struct ref_table {
  * have a reference value of 0 (although this is unlikely).
  */
 
-static struct ref_table tipc_ref_table = { NULL };
+static struct ref_table tipc_ref_table;
 
 static DEFINE_RWLOCK(ref_table_lock);
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 0895dec2967c..18aad573f551 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -68,7 +68,7 @@ static const struct proto_ops msg_ops;
 
 static struct proto tipc_proto;
 
-static int sockets_enabled = 0;
+static int sockets_enabled;
 
 static atomic_t tipc_queue_size = ATOMIC_INIT(0);
 
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index acc30e1833c9..ca04479c3d42 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -70,7 +70,7 @@ struct top_srv {
 	spinlock_t lock;
 };
 
-static struct top_srv topsrv = { 0 };
+static struct top_srv topsrv;
 
 /**
  * htohl - convert value to endianness used by destination
-- 
cgit v1.2.3


From a016892cd6eb8d3dd9769021b088917ac7371abd Mon Sep 17 00:00:00 2001
From: Allan Stephens <Allan.Stephens@windriver.com>
Date: Fri, 31 Dec 2010 18:59:35 +0000
Subject: tipc: remove extraneous braces from single statements

Cleans up TIPC's source code to eliminate the presence of unnecessary
use of {} around single statements.

These changes are purely cosmetic and do not alter the operation of TIPC
in any way.

Signed-off-by: Allan Stephens <Allan.Stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c      |  6 ++----
 net/tipc/bearer.c     |  3 +--
 net/tipc/link.c       | 25 +++++++++----------------
 net/tipc/msg.c        | 15 ++++++---------
 net/tipc/name_table.c |  6 ++----
 net/tipc/port.c       |  6 ++----
 net/tipc/socket.c     | 14 +++++---------
 7 files changed, 27 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 99a1469e300a..70ab5ef48766 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -196,9 +196,8 @@ static void bclink_retransmit_pkt(u32 after, u32 to)
 	struct sk_buff *buf;
 
 	buf = bcl->first_out;
-	while (buf && less_eq(buf_seqno(buf), after)) {
+	while (buf && less_eq(buf_seqno(buf), after))
 		buf = buf->next;
-	}
 	tipc_link_retransmit(bcl, buf, mod(to - after));
 }
 
@@ -224,9 +223,8 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
 	/* Skip over packets that node has previously acknowledged */
 
 	crs = bcl->first_out;
-	while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked)) {
+	while (crs && less_eq(buf_seqno(crs), n_ptr->bclink.acked))
 		crs = crs->next;
-	}
 
 	/* Update packets that node is now acknowledging */
 
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 68e729045424..837b7a467885 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -194,9 +194,8 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a)
 		unchar *addr = (unchar *)&a->dev_addr;
 
 		tipc_printf(pb, "UNKNOWN(%u)", media_type);
-		for (i = 0; i < (sizeof(*a) - sizeof(a->type)); i++) {
+		for (i = 0; i < (sizeof(*a) - sizeof(a->type)); i++)
 			tipc_printf(pb, "-%02x", addr[i]);
-		}
 	}
 }
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index de9d49108d41..18702f58d111 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -615,9 +615,8 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 		return;		/* Not yet. */
 
 	if (link_blocked(l_ptr)) {
-		if (event == TIMEOUT_EVT) {
+		if (event == TIMEOUT_EVT)
 			link_set_timer(l_ptr, cont_intv);
-		}
 		return;	  /* Changeover going on */
 	}
 
@@ -940,11 +939,10 @@ int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector)
 	if (n_ptr) {
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
-		if (l_ptr) {
+		if (l_ptr)
 			res = tipc_link_send_buf(l_ptr, buf);
-		} else {
+		else
 			buf_discard(buf);
-		}
 		tipc_node_unlock(n_ptr);
 	} else {
 		buf_discard(buf);
@@ -1626,9 +1624,8 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
 
 		/* Ensure message data is a single contiguous unit */
 
-		if (unlikely(buf_linearize(buf))) {
+		if (unlikely(buf_linearize(buf)))
 			goto cont;
-		}
 
 		/* Handle arrival of a non-unicast link message */
 
@@ -1843,9 +1840,8 @@ u32 tipc_link_defer_pkt(struct sk_buff **head,
 				*head = buf;
 			return 1;
 		}
-		if (seq_no == msg_seqno(msg)) {
+		if (seq_no == msg_seqno(msg))
 			break;
-		}
 		prev = crs;
 		crs = crs->next;
 	} while (crs);
@@ -1959,11 +1955,10 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 		msg_set_max_pkt(msg, l_ptr->max_pkt_target);
 	}
 
-	if (tipc_node_has_redundant_links(l_ptr->owner)) {
+	if (tipc_node_has_redundant_links(l_ptr->owner))
 		msg_set_redundant_link(msg);
-	} else {
+	else
 		msg_clear_redundant_link(msg);
-	}
 	msg_set_linkprio(msg, l_ptr->priority);
 
 	/* Ensure sequence number will not fit : */
@@ -2071,9 +2066,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 		l_ptr->peer_bearer_id = msg_bearer_id(msg);
 
 		/* Synchronize broadcast sequence numbers */
-		if (!tipc_node_has_redundant_links(l_ptr->owner)) {
+		if (!tipc_node_has_redundant_links(l_ptr->owner))
 			l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg));
-		}
 		break;
 	case STATE_MSG:
 
@@ -2108,9 +2102,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf)
 		max_pkt_ack = 0;
 		if (msg_probe(msg)) {
 			l_ptr->stats.recv_probes++;
-			if (msg_size(msg) > sizeof(l_ptr->proto_msg)) {
+			if (msg_size(msg) > sizeof(l_ptr->proto_msg))
 				max_pkt_ack = msg_size(msg);
-			}
 		}
 
 		/* Protocol message before retransmits, reduce loss risk */
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index e81d43a8ea3d..bb6180c4fcbb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -348,7 +348,8 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 			tipc_printf(buf, "UNKNOWN ERROR(%x):",
 				    msg_errcode(msg));
 		}
-	default:{}
+	default:
+		break;
 	}
 
 	tipc_printf(buf, "HZ(%u):", msg_hdr_sz(msg));
@@ -357,9 +358,8 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 
 	if (msg_non_seq(msg))
 		tipc_printf(buf, "NOSEQ:");
-	else {
+	else
 		tipc_printf(buf, "ACK(%u):", msg_ack(msg));
-	}
 	tipc_printf(buf, "BACK(%u):", msg_bcast_ack(msg));
 	tipc_printf(buf, "PRND(%x)", msg_prevnode(msg));
 
@@ -387,9 +387,8 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 	if (msg_user(msg) == NAME_DISTRIBUTOR) {
 		tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg));
 		tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg));
-		if (msg_routed(msg)) {
+		if (msg_routed(msg))
 			tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg));
-		}
 	}
 
 	if (msg_user(msg) ==  LINK_CONFIG) {
@@ -405,12 +404,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str)
 		tipc_printf(buf, "TO(%u):", msg_bcgap_to(msg));
 	}
 	tipc_printf(buf, "\n");
-	if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg))) {
+	if ((usr == CHANGEOVER_PROTOCOL) && (msg_msgcnt(msg)))
 		tipc_msg_dbg(buf, msg_get_wrapped(msg), "      /");
-	}
-	if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT)) {
+	if ((usr == MSG_FRAGMENTER) && (msg_type(msg) == FIRST_FRAGMENT))
 		tipc_msg_dbg(buf, msg_get_wrapped(msg), "      /");
-	}
 }
 
 #endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ea3e94e9d939..205ed4a4e186 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -751,9 +751,8 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
 	table.local_publ_count++;
 	publ = tipc_nametbl_insert_publ(type, lower, upper, scope,
 				   tipc_own_addr, port_ref, key);
-	if (publ && (scope != TIPC_NODE_SCOPE)) {
+	if (publ && (scope != TIPC_NODE_SCOPE))
 		tipc_named_publish(publ);
-	}
 	write_unlock_bh(&tipc_nametbl_lock);
 	return publ;
 }
@@ -795,9 +794,8 @@ void tipc_nametbl_subscribe(struct subscription *s)
 
 	write_lock_bh(&tipc_nametbl_lock);
 	seq = nametbl_find_seq(type);
-	if (!seq) {
+	if (!seq)
 		seq = tipc_nameseq_create(type, &table.types[hash(type)]);
-	}
 	if (seq) {
 		spin_lock_bh(&seq->lock);
 		tipc_nameseq_subscribe(seq, s);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index c23f3380d0a7..067bab2a0b98 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -131,9 +131,8 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq,
 			}
 		}
 		res = tipc_bclink_send_msg(buf);
-		if ((res < 0) && (dports.count != 0)) {
+		if ((res < 0) && (dports.count != 0))
 			buf_discard(ibuf);
-		}
 	} else {
 		ibuf = buf;
 	}
@@ -190,9 +189,8 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct port_list *dp)
 				warn("Unable to deliver multicast message(s)\n");
 				goto exit;
 			}
-			if ((index == 0) && (cnt != 0)) {
+			if ((index == 0) && (cnt != 0))
 				item = item->next;
-			}
 			msg_set_destport(buf_msg(b), item->ports[index]);
 			tipc_port_recv_msg(b);
 		}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 18aad573f551..2b02a3a80313 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -590,9 +590,8 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 					     m->msg_iov);
 		}
 		if (likely(res != -ELINKCONG)) {
-			if (needs_conn && (res >= 0)) {
+			if (needs_conn && (res >= 0))
 				sock->state = SS_CONNECTING;
-			}
 			break;
 		}
 		if (m->msg_flags & MSG_DONTWAIT) {
@@ -651,9 +650,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 		}
 
 		res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov);
-		if (likely(res != -ELINKCONG)) {
+		if (likely(res != -ELINKCONG))
 			break;
-		}
 		if (m->msg_flags & MSG_DONTWAIT) {
 			res = -EWOULDBLOCK;
 			break;
@@ -1418,9 +1416,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
 	m.msg_name = dest;
 	m.msg_namelen = destlen;
 	res = send_msg(NULL, sock, &m, 0);
-	if (res < 0) {
+	if (res < 0)
 		goto exit;
-	}
 
 	/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
 
@@ -1442,11 +1439,10 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
 					advance_rx_queue(sk);
 			}
 		} else {
-			if (sock->state == SS_CONNECTED) {
+			if (sock->state == SS_CONNECTED)
 				res = -EISCONN;
-			} else {
+			else
 				res = -ECONNREFUSED;
-			}
 		}
 	} else {
 		if (res == 0)
-- 
cgit v1.2.3


From 40cd201e37073b3e2281cf2c73fcf5674f22267f Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sat, 1 Jan 2011 14:56:18 -0800
Subject: tipc: update log.h re-include protection to reflect new name

The tipc/dbg.h file was recently renamed to tipc/log.h,
but the re-include define was not updated accordingly.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/log.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/log.h b/net/tipc/log.h
index f4343bb9e429..2248d96238e6 100644
--- a/net/tipc/log.h
+++ b/net/tipc/log.h
@@ -34,8 +34,8 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifndef _TIPC_DBG_H
-#define _TIPC_DBG_H
+#ifndef _TIPC_LOG_H
+#define _TIPC_LOG_H
 
 /**
  * struct print_buf - TIPC print buffer structure
-- 
cgit v1.2.3


From 7f891cf1fc0d5d5c5b359caec77e5383e1d55986 Mon Sep 17 00:00:00 2001
From: Shmulik Ravid <shmulikr@broadcom.com>
Date: Mon, 3 Jan 2011 08:04:59 +0000
Subject: dcbnl: more informed return values for new dcbnl routines

More accurate return values for the following (new) dcbnl routines:
dcbnl_getdcbx()
dcbnl_setdcbx()
dcbnl_getfeatcfg()
dcbnl_setfeatcfg()

Signed-off-by: Shmulik Ravid <shmulikr@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 81 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 37 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index ff3c12d2cd72..9399af565715 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1286,10 +1286,10 @@ nlmsg_failure:
 static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb,
 			 u32 pid, u32 seq, u16 flags)
 {
-	int ret = -EINVAL;
+	int ret;
 
 	if (!netdev->dcbnl_ops->getdcbx)
-		return ret;
+		return -EOPNOTSUPP;
 
 	ret = dcbnl_reply(netdev->dcbnl_ops->getdcbx(netdev), RTM_GETDCB,
 			  DCB_CMD_GDCBX, DCB_ATTR_DCBX, pid, seq, flags);
@@ -1300,11 +1300,14 @@ static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb,
 static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb,
 			 u32 pid, u32 seq, u16 flags)
 {
-	int ret = -EINVAL;
+	int ret;
 	u8 value;
 
-	if (!tb[DCB_ATTR_DCBX] || !netdev->dcbnl_ops->setdcbx)
-		return ret;
+	if (!netdev->dcbnl_ops->setdcbx)
+		return -EOPNOTSUPP;
+
+	if (!tb[DCB_ATTR_DCBX])
+		return -EINVAL;
 
 	value = nla_get_u8(tb[DCB_ATTR_DCBX]);
 
@@ -1323,23 +1326,23 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb,
 	struct dcbmsg *dcb;
 	struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1], *nest;
 	u8 value;
-	int ret = -EINVAL;
-	int i;
+	int ret, i;
 	int getall = 0;
 
-	if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->getfeatcfg)
-		return ret;
+	if (!netdev->dcbnl_ops->getfeatcfg)
+		return -EOPNOTSUPP;
+
+	if (!tb[DCB_ATTR_FEATCFG])
+		return -EINVAL;
 
 	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
 			       dcbnl_featcfg_nest);
-	if (ret) {
-		ret = -EINVAL;
+	if (ret)
 		goto err_out;
-	}
 
 	dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!dcbnl_skb) {
-		ret = -EINVAL;
+		ret = -ENOBUFS;
 		goto err_out;
 	}
 
@@ -1351,8 +1354,8 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb,
 
 	nest = nla_nest_start(dcbnl_skb, DCB_ATTR_FEATCFG);
 	if (!nest) {
-		ret = -EINVAL;
-		goto err;
+		ret = -EMSGSIZE;
+		goto nla_put_failure;
 	}
 
 	if (data[DCB_FEATCFG_ATTR_ALL])
@@ -1363,30 +1366,22 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb,
 			continue;
 
 		ret = netdev->dcbnl_ops->getfeatcfg(netdev, i, &value);
-		if (!ret) {
+		if (!ret)
 			ret = nla_put_u8(dcbnl_skb, i, value);
 
-			if (ret) {
-				nla_nest_cancel(dcbnl_skb, nest);
-				ret = -EINVAL;
-				goto err;
-			}
-		} else
-			goto err;
+		if (ret) {
+			nla_nest_cancel(dcbnl_skb, nest);
+			goto nla_put_failure;
+		}
 	}
 	nla_nest_end(dcbnl_skb, nest);
 
 	nlmsg_end(dcbnl_skb, nlh);
 
-	ret = rtnl_unicast(dcbnl_skb, &init_net, pid);
-	if (ret) {
-		ret = -EINVAL;
-		goto err_out;
-	}
-
-	return 0;
+	return rtnl_unicast(dcbnl_skb, &init_net, pid);
+nla_put_failure:
+	nlmsg_cancel(dcbnl_skb, nlh);
 nlmsg_failure:
-err:
 	kfree_skb(dcbnl_skb);
 err_out:
 	return ret;
@@ -1396,20 +1391,20 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb,
 			    u32 pid, u32 seq, u16 flags)
 {
 	struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1];
-	int ret = -EINVAL;
+	int ret, i;
 	u8 value;
-	int i;
 
-	if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->setfeatcfg)
-		return ret;
+	if (!netdev->dcbnl_ops->setfeatcfg)
+		return -ENOTSUPP;
+
+	if (!tb[DCB_ATTR_FEATCFG])
+		return -EINVAL;
 
 	ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG],
 			       dcbnl_featcfg_nest);
 
-	if (ret) {
-		ret = -EINVAL;
+	if (ret)
 		goto err;
-	}
 
 	for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) {
 		if (data[i] == NULL)
@@ -1420,14 +1415,12 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb,
 		ret = netdev->dcbnl_ops->setfeatcfg(netdev, i, value);
 
 		if (ret)
-			goto operr;
+			goto err;
 	}
-
-operr:
-	ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SFEATCFG,
-			  DCB_ATTR_FEATCFG, pid, seq, flags);
-
 err:
+	dcbnl_reply(ret, RTM_SETDCB, DCB_CMD_SFEATCFG, DCB_ATTR_FEATCFG,
+		    pid, seq, flags);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 0dfb33a0d7e2d9316eb4441a065ddd173f87223e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 3 Jan 2011 08:11:38 +0000
Subject: sch_red: report backlog information

Provide child qdisc backlog (byte count) information so that "tc -s
qdisc" can report it to user.

packet count is already correctly provided.

qdisc red 11: parent 1:11 limit 60Kb min 15Kb max 45Kb ecn
 Sent 3116427684 bytes 1415782 pkt (dropped 8, overlimits 7866 requeues 0)
 rate 242385Kbit 13630pps backlog 13560b 8p requeues 0
  marked 7865 early 1 pdrop 7 other 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_red.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 8d42bb3ba540..a67ba3c5a0cc 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -239,6 +239,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 		.Scell_log	= q->parms.Scell_log,
 	};
 
+	sch->qstats.backlog = q->qdisc->qstats.backlog;
 	opts = nla_nest_start(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
-- 
cgit v1.2.3


From 1a9180a20f3a314fda3e96b77570cad3864b2896 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 3 Jan 2011 11:08:58 +0000
Subject: net/bridge: fix trivial sparse errors

net/bridge//br_stp_if.c:148:66: warning: conversion of
net/bridge//br_stp_if.c:148:66:     int to
net/bridge//br_stp_if.c:148:66:     int enum umh_wait

net/bridge//netfilter/ebtables.c:1150:30: warning: Using plain integer as NULL pointer

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp_if.c          | 2 +-
 net/bridge/netfilter/ebtables.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 1d8826914cbf..79372d4a4055 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -145,7 +145,7 @@ static void br_stp_stop(struct net_bridge *br)
 	char *envp[] = { NULL };
 
 	if (br->stp_enabled == BR_USER_STP) {
-		r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+		r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
 		br_info(br, "userspace STP stopped, return code %d\n", r);
 
 		/* To start timers on any ports left in blocking */
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cbc9f395ab1e..16df0532d4b9 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1147,7 +1147,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
 	void *p;
 
 	if (input_table == NULL || (repl = input_table->table) == NULL ||
-	    repl->entries == 0 || repl->entries_size == 0 ||
+	    repl->entries == NULL || repl->entries_size == 0 ||
 	    repl->counters != NULL || input_table->private != NULL) {
 		BUGPRINT("Bad table data for ebt_register_table!!!\n");
 		return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From beb0f0a9fba1fa98b378329a9a5b0a73f25097ae Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 20 Dec 2010 21:19:26 +0000
Subject: kernel panic when mount NFSv4

On Tue, 2010-12-14 at 16:58 +0800, Mi Jinlong wrote:
> Hi,
>
> When testing NFSv4 at RHEL6 with kernel 2.6.32, I got a kernel panic
> at NFS client's __rpc_create_common function.
>
> The panic place is:
>   rpc_mkpipe
>     __rpc_lookup_create()          <=== find pipefile *idmap*
>     __rpc_mkpipe()                 <=== pipefile is *idmap*
>       __rpc_create_common()
>        ******  BUG_ON(!d_unhashed(dentry)); ******    *panic*
>
> It means that the dentry's d_flags have be set DCACHE_UNHASHED,
> but it should not be set here.
>
> Is someone known this bug? or give me some idea?
>
> A reproduce program is append, but it can't reproduce the bug every time.
> the export is: "/nfsroot       *(rw,no_root_squash,fsid=0,insecure)"
>
> And the panic message is append.
>
> ============================================================================
> #!/bin/sh
>
> LOOPTOTAL=768
> LOOPCOUNT=0
> ret=0
>
> while [ $LOOPCOUNT -ne $LOOPTOTAL ]
> do
> 	((LOOPCOUNT += 1))
> 	service nfs restart
> 	/usr/sbin/rpc.idmapd
> 	mount -t nfs4 127.0.0.1:/ /mnt|| return 1;
> 	ls -l /var/lib/nfs/rpc_pipefs/nfs/*/
> 	umount /mnt
> 	echo $LOOPCOUNT
> done
>
> ===============================================================================
> Code: af 60 01 00 00 89 fa 89 f0 e8 64 cf 89 f0 e8 5c 7c 64 cf 31 c0 8b 5c 24 10 8b
> 74 24 14 8b 7c 24 18 8b 6c 24 1c 83 c4 20 c3 <0f> 0b eb fc 8b 46 28 c7 44 24 08 20
> de ee f0 c7 44 24 04 56 ea
> EIP:[<f0ee92ea>] __rpc_create_common+0x8a/0xc0 [sunrpc] SS:ESP 0068:eccb5d28
> ---[ end trace 8f5606cd08928ed2]---
> Kernel panic - not syncing: Fatal exception
> Pid:7131, comm: mount.nfs4 Tainted: G     D   -------------------2.6.32 #1
> Call Trace:
>  [<c080ad18>] ? panic+0x42/0xed
>  [<c080e42c>] ? oops_end+0xbc/0xd0
>  [<c040b090>] ? do_invalid_op+0x0/0x90
>  [<c040b10f>] ? do_invalid_op+0x7f/0x90
>  [<f0ee92ea>] ? __rpc_create_common+0x8a/0xc0[sunrpc]
>  [<f0edc433>] ? rpc_free_task+0x33/0x70[sunrpc]
>  [<f0ed6508>] ? prc_call_sync+0x48/0x60[sunrpc]
>  [<f0ed656e>] ? rpc_ping+0x4e/0x60[sunrpc]
>  [<f0ed6eaf>] ? rpc_create+0x38f/0x4f0[sunrpc]
>  [<c080d80b>] ? error_code+0x73/0x78
>  [<f0ee92ea>] ? __rpc_create_common+0x8a/0xc0[sunrpc]
>  [<c0532bda>] ? d_lookup+0x2a/0x40
>  [<f0ee94b1>] ? rpc_mkpipe+0x111/0x1b0[sunrpc]
>  [<f10a59f4>] ? nfs_create_rpc_client+0xb4/0xf0[nfs]
>  [<f10d6c6d>] ? nfs_fscache_get_client_cookie+0x1d/0x50[nfs]
>  [<f10d3fcb>] ? nfs_idmap_new+0x7b/0x140[nfs]
>  [<c05e76aa>] ? strlcpy+0x3a/0x60
>  [<f10a60ca>] ? nfs4_set_client+0xea/0x2b0[nfs]
>  [<f10a6d0c>] ? nfs4_create_server+0xac/0x1b0[nfs]
>  [<c04f1400>] ? krealloc+0x40/0x50
>  [<f10b0e8b>] ? nfs4_remote_get_sb+0x6b/0x250[nfs]
>  [<c04f14ec>] ? kstrdup+0x3c/0x60
>  [<c0520739>] ? vfs_kern_mount+0x69/0x170
>  [<f10b1a3c>] ? nfs_do_root_mount+0x6c/0xa0[nfs]
>  [<f10b1b47>] ? nfs4_try_mount+0x37/0xa0[nfs]
>  [<f10afe6d>] ? nfs4_validate_text_mount_data+-x7d/0xf0[nfs]
>  [<f10b1c42>] ? nfs4_get_sb+0x92/0x2f0
>  [<c0520739>] ? vfs_kern_mount+0x69/0x170
>  [<c05366d2>] ? get_fs_type+0x32/0xb0
>  [<c052089f>] ? do_kern_mount+0x3f/0xe0
>  [<c053954f>] ? do_mount+0x2ef/0x740
>  [<c0537740>] ? copy_mount_options+0xb0/0x120
>  [<c0539a0e>] ? sys_mount+0x6e/0xa0

Hi,

Does the following patch fix the problem?

Cheers
  Trond

--------------------------
SUNRPC: Fix a BUG in __rpc_create_common

From: Trond Myklebust <Trond.Myklebust@netapp.com>

Mi Jinlong reports:

When testing NFSv4 at RHEL6 with kernel 2.6.32, I got a kernel panic
at NFS client's __rpc_create_common function.

The panic place is:
  rpc_mkpipe
      __rpc_lookup_create()          <=== find pipefile *idmap*
      __rpc_mkpipe()                 <=== pipefile is *idmap*
        __rpc_create_common()
         ******  BUG_ON(!d_unhashed(dentry)); ****** *panic*

The test is wrong: we can find ourselves with a hashed negative dentry here
if the idmapper tried to look up the file before we got round to creating
it.

Just replace the BUG_ON() with a d_drop(dentry).

Reported-by: Mi Jinlong <mijinlong@cn.fujitsu.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpc_pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 10a17a37ec4e..5356d95343f3 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -466,7 +466,7 @@ static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
 {
 	struct inode *inode;
 
-	BUG_ON(!d_unhashed(dentry));
+	d_drop(dentry);
 	inode = rpc_get_inode(dir->i_sb, mode);
 	if (!inode)
 		goto out_err;
-- 
cgit v1.2.3


From 919bbad580445801c22ef6ccbe624551fee652bd Mon Sep 17 00:00:00 2001
From: Milton Miller <miltonm@bga.com>
Date: Thu, 30 Dec 2010 02:01:03 -0600
Subject: mac80211: fix mesh forwarding when ratelimited too

Commit b51aff057c9d0ef6c529dc25fd9f775faf7b6c63 said:

    Under memory pressure, the mac80211 mesh code
    may helpfully print a message that it failed
    to clone a mesh frame and then will proceed
    to crash trying to use it anyway. Fix that.

Avoid the reference whenever the frame copy is unsuccessful
regardless of the debug message being suppressed or printed.

Cc: stable@kernel.org [2.6.27+]
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b01e467b76c6..e98668fab503 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1788,11 +1788,11 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
 			fwd_skb = skb_copy(skb, GFP_ATOMIC);
 
-			if (!fwd_skb && net_ratelimit()) {
+			if (!fwd_skb && net_ratelimit())
 				printk(KERN_DEBUG "%s: failed to clone mesh frame\n",
 						   sdata->name);
+			if (!fwd_skb)
 				goto out;
-			}
 
 			fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
 			memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
-- 
cgit v1.2.3


From d2460f4b2fa6dbdeec800414f9cf5b1fc8b71197 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 3 Jan 2011 19:42:24 +0100
Subject: mac80211: add missing synchronize_rcu

commit ad0e2b5a00dbec303e4682b403bb6703d11dcdb2
Author: Johannes Berg <johannes.berg@intel.com>
Date:   Tue Jun 1 10:19:19 2010 +0200

    mac80211: simplify key locking

removed the synchronization against RCU and thus
opened a race window where we can use a key for
TX while it is already freed. Put a synchronisation
into the right place to close that window.

Reported-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Cc: stable@kernel.org [2.6.36+]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ccd676b2f599..aa1b734a5e99 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -366,6 +366,12 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
 	if (!key)
 		return;
 
+	/*
+	 * Synchronize so the TX path can no longer be using
+	 * this key before we free/remove it.
+	 */
+	synchronize_rcu();
+
 	if (key->local)
 		ieee80211_key_disable_hw_accel(key);
 
-- 
cgit v1.2.3


From 4cfda47b69d0a37e5fc0292addba6d0f5f671a14 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Mon, 27 Dec 2010 23:21:26 +0100
Subject: mac80211: ignore PSM bit of reordered frames

This patch tackles one of the problems of my
reorder release timer patch from August.

<http://www.spinics.net/lists/linux-wireless/msg57214.html>
=>
What if the reorder release triggers and ap_sta_ps_end
(called by ieee80211_rx_h_sta_process) accidentally clears
the WLAN_STA_PS_STA flag, because 100ms ago - when the STA
was still active - frames were put into the reorder buffer.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h | 2 ++
 net/mac80211/rx.c          | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index eadaa243a3da..a9eb67380da6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -167,6 +167,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
  * @IEEE80211_RX_FRAGMENTED: fragmented frame
  * @IEEE80211_RX_AMSDU: a-MSDU packet
  * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
+ * @IEEE80211_RX_DEFERRED_RELEASE: frame was subjected to receive reordering
  *
  * These are per-frame flags that are attached to a frame in the
  * @rx_flags field of &struct ieee80211_rx_status.
@@ -177,6 +178,7 @@ enum ieee80211_packet_rx_flags {
 	IEEE80211_RX_FRAGMENTED			= BIT(2),
 	IEEE80211_RX_AMSDU			= BIT(3),
 	IEEE80211_RX_MALFORMED_ACTION_FRM	= BIT(4),
+	IEEE80211_RX_DEFERRED_RELEASE		= BIT(5),
 };
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7c5d1b2ec453..260b48bac42e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -537,6 +537,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
 					    struct sk_buff_head *frames)
 {
 	struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
+	struct ieee80211_rx_status *status;
 
 	lockdep_assert_held(&tid_agg_rx->reorder_lock);
 
@@ -546,6 +547,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
 	/* release the frame from the reorder ring buffer */
 	tid_agg_rx->stored_mpdu_num--;
 	tid_agg_rx->reorder_buf[index] = NULL;
+	status = IEEE80211_SKB_RXCB(skb);
+	status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE;
 	__skb_queue_tail(frames, skb);
 
 no_frame:
@@ -1189,6 +1192,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	 * exchange sequence.
 	 */
 	if (!ieee80211_has_morefrags(hdr->frame_control) &&
+	    !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
 	    (rx->sdata->vif.type == NL80211_IFTYPE_AP ||
 	     rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) {
 		if (test_sta_flags(sta, WLAN_STA_PS_STA)) {
-- 
cgit v1.2.3


From f76b57b47e5fd423f9827c7b0ba7bbd06cca6b9b Mon Sep 17 00:00:00 2001
From: Joel A Fernandes <agnel.joel@gmail.com>
Date: Tue, 28 Dec 2010 19:28:11 -0600
Subject: mac80211: Fix mesh portal communication with other mesh nodes.

Fixed a bug where if a mesh interface has a different MAC address from its bridge
interface, then it would not be able to send data traffic to any other mesh node.
This also adds support for communication between mesh nodes and external bridged
nodes by using a 6 address format if the source is a node within the mesh and the
destination is an external node proxied by a mesh portal.

Signed-off-by: Joel A Fernandes <agnel.joel@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d2b4b67a7b53..11d013590166 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1747,6 +1747,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 	__le16 fc;
 	struct ieee80211_hdr hdr;
 	struct ieee80211s_hdr mesh_hdr __maybe_unused;
+	struct mesh_path *mppath = NULL;
 	const u8 *encaps_data;
 	int encaps_len, skip_header_bytes;
 	int nh_pos, h_pos;
@@ -1807,16 +1808,23 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			ret = NETDEV_TX_OK;
 			goto fail;
 		}
+		if (!is_multicast_ether_addr(skb->data))
+			mppath = mpp_path_lookup(skb->data, sdata);
 
+		/*
+		 * Do not use address extension, if it is a packet from
+		 * the same interface and the destination is not being
+		 * proxied by any other mest point.
+		 */
 		if (compare_ether_addr(sdata->vif.addr,
-				       skb->data + ETH_ALEN) == 0) {
+				       skb->data + ETH_ALEN) == 0 &&
+		    (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) {
 			hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc,
 					skb->data, skb->data + ETH_ALEN);
 			meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr,
 					sdata, NULL, NULL);
 		} else {
 			/* packet from other interface */
-			struct mesh_path *mppath;
 			int is_mesh_mcast = 1;
 			const u8 *mesh_da;
 
@@ -1827,8 +1835,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 			else {
 				static const u8 bcast[ETH_ALEN] =
 					{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
-
-				mppath = mpp_path_lookup(skb->data, sdata);
 				if (mppath) {
 					/* RA TA mDA mSA AE:DA SA */
 					mesh_da = mppath->mpp;
-- 
cgit v1.2.3


From ff039c6fb372c87a3cc4fd25bb846790cb35edb8 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Wed, 29 Dec 2010 17:09:02 -0500
Subject: cfg80211: fix transposition of words in printk

Fixes the misplaced article in the following:

"cfg80211: Updating information on frequency 5785 MHz for
    20 a MHz width channel with regulatory rule:"

Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 99d41831d76e..37693b6ef23a 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -752,7 +752,7 @@ static void chan_reg_rule_print_dbg(struct ieee80211_channel *chan,
 		snprintf(max_antenna_gain, 32, "%d", power_rule->max_antenna_gain);
 
 	REG_DBG_PRINT("Updating information on frequency %d MHz "
-		      "for %d a MHz width channel with regulatory rule:\n",
+		      "for a %d MHz width channel with regulatory rule:\n",
 		      chan->center_freq,
 		      KHZ_TO_MHZ(desired_bw_khz));
 
-- 
cgit v1.2.3


From 24a8fdad35835e8d71f7c4b978a246fafed2e7b4 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Thu, 30 Dec 2010 17:25:29 +0100
Subject: mac80211: serialize rx path workers

This patch addresses the issue of serialization between
the main rx path and various reorder release timers.

<http://www.spinics.net/lists/linux-wireless/msg57214.html>

It converts the previously local "frames" queue into
a global rx queue [rx_skb_queue]. This way, everyone
(be it the main rx-path or some reorder release timeout)
can add frames to it.

Only one active rx handler worker [ieee80211_rx_handlers]
is needed. All other threads which have lost the race of
"runnning_rx_handler" can now simply "return", knowing that
the thread who had the "edge" will also take care of their
workload.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  9 ++++++
 net/mac80211/main.c        |  3 ++
 net/mac80211/rx.c          | 80 +++++++++++++++++++++-------------------------
 3 files changed, 49 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a9eb67380da6..1bf2fc8b9bde 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -761,6 +761,15 @@ struct ieee80211_local {
 	struct sk_buff_head skb_queue;
 	struct sk_buff_head skb_queue_unreliable;
 
+	/*
+	 * Internal FIFO queue which is shared between multiple rx path
+	 * stages. Its main task is to provide a serialization mechanism,
+	 * so all rx handlers can enjoy having exclusive access to their
+	 * private data structures.
+	 */
+	struct sk_buff_head rx_skb_queue;
+	bool running_rx_handler;	/* protected by rx_skb_queue.lock */
+
 	/* Station data */
 	/*
 	 * The mutex only protects the list and counter,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a21d049caf19..32e7ae0dac98 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -569,6 +569,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	spin_lock_init(&local->filter_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
+	skb_queue_head_init(&local->rx_skb_queue);
+
 	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
 
 	ieee80211_work_init(local);
@@ -912,6 +914,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 		wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
 	skb_queue_purge(&local->skb_queue);
 	skb_queue_purge(&local->skb_queue_unreliable);
+	skb_queue_purge(&local->rx_skb_queue);
 
 	destroy_workqueue(local->workqueue);
 	wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 260b48bac42e..12cbb4df81aa 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -533,9 +533,9 @@ static inline u16 seq_sub(u16 sq1, u16 sq2)
 
 static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
 					    struct tid_ampdu_rx *tid_agg_rx,
-					    int index,
-					    struct sk_buff_head *frames)
+					    int index)
 {
+	struct ieee80211_local *local = hw_to_local(hw);
 	struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
 	struct ieee80211_rx_status *status;
 
@@ -549,7 +549,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
 	tid_agg_rx->reorder_buf[index] = NULL;
 	status = IEEE80211_SKB_RXCB(skb);
 	status->rx_flags |= IEEE80211_RX_DEFERRED_RELEASE;
-	__skb_queue_tail(frames, skb);
+	skb_queue_tail(&local->rx_skb_queue, skb);
 
 no_frame:
 	tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
@@ -557,8 +557,7 @@ no_frame:
 
 static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
 					     struct tid_ampdu_rx *tid_agg_rx,
-					     u16 head_seq_num,
-					     struct sk_buff_head *frames)
+					     u16 head_seq_num)
 {
 	int index;
 
@@ -567,7 +566,7 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
 	while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) {
 		index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
 							tid_agg_rx->buf_size;
-		ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
+		ieee80211_release_reorder_frame(hw, tid_agg_rx, index);
 	}
 }
 
@@ -583,8 +582,7 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
 #define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10)
 
 static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
-					  struct tid_ampdu_rx *tid_agg_rx,
-					  struct sk_buff_head *frames)
+					  struct tid_ampdu_rx *tid_agg_rx)
 {
 	int index, j;
 
@@ -615,8 +613,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
 				wiphy_debug(hw->wiphy,
 					    "release an RX reorder frame due to timeout on earlier frames\n");
 #endif
-			ieee80211_release_reorder_frame(hw, tid_agg_rx,
-							j, frames);
+			ieee80211_release_reorder_frame(hw, tid_agg_rx, j);
 
 			/*
 			 * Increment the head seq# also for the skipped slots.
@@ -626,7 +623,7 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
 			skipped = 0;
 		}
 	} else while (tid_agg_rx->reorder_buf[index]) {
-		ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
+		ieee80211_release_reorder_frame(hw, tid_agg_rx, index);
 		index =	seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
 							tid_agg_rx->buf_size;
 	}
@@ -682,8 +679,7 @@ set_release_timer:
  */
 static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 					     struct tid_ampdu_rx *tid_agg_rx,
-					     struct sk_buff *skb,
-					     struct sk_buff_head *frames)
+					     struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	u16 sc = le16_to_cpu(hdr->seq_ctrl);
@@ -710,8 +706,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 	if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) {
 		head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size));
 		/* release stored frames up to new head to stack */
-		ieee80211_release_reorder_frames(hw, tid_agg_rx, head_seq_num,
-						 frames);
+		ieee80211_release_reorder_frames(hw, tid_agg_rx, head_seq_num);
 	}
 
 	/* Now the new frame is always in the range of the reordering buffer */
@@ -739,7 +734,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 	tid_agg_rx->reorder_buf[index] = skb;
 	tid_agg_rx->reorder_time[index] = jiffies;
 	tid_agg_rx->stored_mpdu_num++;
-	ieee80211_sta_reorder_release(hw, tid_agg_rx, frames);
+	ieee80211_sta_reorder_release(hw, tid_agg_rx);
 
  out:
 	spin_unlock(&tid_agg_rx->reorder_lock);
@@ -750,8 +745,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
  * Reorder MPDUs from A-MPDUs, keeping them on a buffer. Returns
  * true if the MPDU was buffered, false if it should be processed.
  */
-static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
-				       struct sk_buff_head *frames)
+static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 {
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_local *local = rx->local;
@@ -806,11 +800,11 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	 * sure that we cannot get to it any more before doing
 	 * anything with it.
 	 */
-	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames))
+	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb))
 		return;
 
  dont_reorder:
-	__skb_queue_tail(frames, skb);
+	skb_queue_tail(&local->rx_skb_queue, skb);
 }
 
 static ieee80211_rx_result debug_noinline
@@ -1931,7 +1925,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 }
 
 static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
+ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_local *local = rx->local;
 	struct ieee80211_hw *hw = &local->hw;
@@ -1971,8 +1965,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 
 		spin_lock(&tid_agg_rx->reorder_lock);
 		/* release stored frames up to start of BAR */
-		ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num,
-						 frames);
+		ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num);
 		spin_unlock(&tid_agg_rx->reorder_lock);
 
 		kfree_skb(skb);
@@ -2489,8 +2482,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
 	}
 }
 
-static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
-				  struct sk_buff_head *frames)
+static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx)
 {
 	ieee80211_rx_result res = RX_DROP_MONITOR;
 	struct sk_buff *skb;
@@ -2502,7 +2494,15 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
 			goto rxh_next;  \
 	} while (0);
 
-	while ((skb = __skb_dequeue(frames))) {
+	spin_lock(&rx->local->rx_skb_queue.lock);
+	if (rx->local->running_rx_handler)
+		goto unlock;
+
+	rx->local->running_rx_handler = true;
+
+	while ((skb = __skb_dequeue(&rx->local->rx_skb_queue))) {
+		spin_unlock(&rx->local->rx_skb_queue.lock);
+
 		/*
 		 * all the other fields are valid across frames
 		 * that belong to an aMPDU since they are on the
@@ -2525,12 +2525,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
 			CALL_RXH(ieee80211_rx_h_mesh_fwding);
 #endif
 		CALL_RXH(ieee80211_rx_h_data)
-
-		/* special treatment -- needs the queue */
-		res = ieee80211_rx_h_ctrl(rx, frames);
-		if (res != RX_CONTINUE)
-			goto rxh_next;
-
+		CALL_RXH(ieee80211_rx_h_ctrl);
 		CALL_RXH(ieee80211_rx_h_mgmt_check)
 		CALL_RXH(ieee80211_rx_h_action)
 		CALL_RXH(ieee80211_rx_h_userspace_mgmt)
@@ -2539,18 +2534,20 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
 
  rxh_next:
 		ieee80211_rx_handlers_result(rx, res);
-
+		spin_lock(&rx->local->rx_skb_queue.lock);
 #undef CALL_RXH
 	}
+
+	rx->local->running_rx_handler = false;
+
+ unlock:
+	spin_unlock(&rx->local->rx_skb_queue.lock);
 }
 
 static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
 {
-	struct sk_buff_head reorder_release;
 	ieee80211_rx_result res = RX_DROP_MONITOR;
 
-	__skb_queue_head_init(&reorder_release);
-
 #define CALL_RXH(rxh)			\
 	do {				\
 		res = rxh(rx);		\
@@ -2561,9 +2558,9 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
 	CALL_RXH(ieee80211_rx_h_passive_scan)
 	CALL_RXH(ieee80211_rx_h_check)
 
-	ieee80211_rx_reorder_ampdu(rx, &reorder_release);
+	ieee80211_rx_reorder_ampdu(rx);
 
-	ieee80211_rx_handlers(rx, &reorder_release);
+	ieee80211_rx_handlers(rx);
 	return;
 
  rxh_next:
@@ -2578,7 +2575,6 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
  */
 void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 {
-	struct sk_buff_head frames;
 	struct ieee80211_rx_data rx = {
 		.sta = sta,
 		.sdata = sta->sdata,
@@ -2591,13 +2587,11 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
 	if (!tid_agg_rx)
 		return;
 
-	__skb_queue_head_init(&frames);
-
 	spin_lock(&tid_agg_rx->reorder_lock);
-	ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames);
+	ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx);
 	spin_unlock(&tid_agg_rx->reorder_lock);
 
-	ieee80211_rx_handlers(&rx, &frames);
+	ieee80211_rx_handlers(&rx);
 }
 
 /* main receive path */
-- 
cgit v1.2.3


From 707e634326448190bfe2d937c44ec05c8dea63c4 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Thu, 30 Dec 2010 17:29:53 +0100
Subject: Revert "mac80211: temporarily disable reorder release timer"

This reverts enables the reorder release timer once again.

The issues laid out in:
<http://www.spinics.net/lists/linux-wireless/msg57214.html>

Have been addressed by:
	mac80211: serialize rx path workers
	mac80211: ignore PSM bit of reordered frames

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 12cbb4df81aa..9595e564badd 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -628,26 +628,6 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
 							tid_agg_rx->buf_size;
 	}
 
-	/*
-	 * Disable the reorder release timer for now.
-	 *
-	 * The current implementation lacks a proper locking scheme
-	 * which would protect vital statistic and debug counters
-	 * from being updated by two different but concurrent BHs.
-	 *
-	 * More information about the topic is available from:
-	 * - thread: http://marc.info/?t=128635927000001
-	 *
-	 * What was wrong:
-	 * =>  http://marc.info/?l=linux-wireless&m=128636170811964
-	 * "Basically the thing is that until your patch, the data
-	 *  in the struct didn't actually need locking because it
-	 *  was accessed by the RX path only which is not concurrent."
-	 *
-	 * List of what needs to be fixed:
-	 * => http://marc.info/?l=linux-wireless&m=128656352920957
-	 *
-
 	if (tid_agg_rx->stored_mpdu_num) {
 		j = index = seq_sub(tid_agg_rx->head_seq_num,
 				    tid_agg_rx->ssn) % tid_agg_rx->buf_size;
@@ -666,10 +646,6 @@ static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
 	} else {
 		del_timer(&tid_agg_rx->reorder_timer);
 	}
-	*/
-
-set_release_timer:
-	return;
 }
 
 /*
-- 
cgit v1.2.3


From b5c34f662a3519d34f9634a14d8de638fdbe0ca3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 3 Jan 2011 19:51:09 +0100
Subject: mac80211: fix some key comments and code

The key documentation is slightly out of date, fix
that. Also, the list entry in the key struct is no
longer used that way, so list_del_init() isn't
necessary any more there.

Finally, ieee80211_key_link() is no longer invoked
under RCU read lock, but rather with an appropriate
station lock held.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/key.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 84cf9196820f..315ee301b75e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -30,19 +30,20 @@
  * keys and per-station keys. Since each station belongs to an interface,
  * each station key also belongs to that interface.
  *
- * Hardware acceleration is done on a best-effort basis, for each key
- * that is eligible the hardware is asked to enable that key but if
- * it cannot do that they key is simply kept for software encryption.
- * There is currently no way of knowing this except by looking into
- * debugfs.
+ * Hardware acceleration is done on a best-effort basis for algorithms
+ * that are implemented in software,  for each key the hardware is asked
+ * to enable that key for offloading but if it cannot do that the key is
+ * simply kept for software encryption (unless it is for an algorithm
+ * that isn't implemented in software).
+ * There is currently no way of knowing whether a key is handled in SW
+ * or HW except by looking into debugfs.
  *
- * All key operations are protected internally.
- *
- * Within mac80211, key references are, just as STA structure references,
- * protected by RCU. Note, however, that some things are unprotected,
- * namely the key->sta dereferences within the hardware acceleration
- * functions. This means that sta_info_destroy() must remove the key
- * which waits for an RCU grace period.
+ * All key management is internally protected by a mutex. Within all
+ * other parts of mac80211, key references are, just as STA structure
+ * references, protected by RCU. Note, however, that some things are
+ * unprotected, namely the key->sta dereferences within the hardware
+ * acceleration functions. This means that sta_info_destroy() must
+ * remove the key which waits for an RCU grace period.
  */
 
 static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
@@ -279,13 +280,8 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
 							 new->conf.keyidx);
 	}
 
-	if (old) {
-		/*
-		 * We'll use an empty list to indicate that the key
-		 * has already been removed.
-		 */
-		list_del_init(&old->list);
-	}
+	if (old)
+		list_del(&old->list);
 }
 
 struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
@@ -420,8 +416,8 @@ int ieee80211_key_link(struct ieee80211_key *key,
 			struct sta_info *ap;
 
 			/*
-			 * We're getting a sta pointer in,
-			 * so must be under RCU read lock.
+			 * We're getting a sta pointer in, so must be under
+			 * appropriate locking for sta_info_get().
 			 */
 
 			/* same here, the AP could be using QoS */
-- 
cgit v1.2.3


From 052ff461c8427629aee887ccc27478fc7373237c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 5 Jan 2011 12:47:28 +0100
Subject: [S390] irq: have detailed statistics for interrupt types

Up to now /proc/interrupts only has statistics for external and i/o
interrupts but doesn't split up them any further.
This patch adds a line for every single interrupt source so that it
is possible to easier tell what the machine is/was doing.
Part of the output now looks like this;

           CPU0       CPU2       CPU4
EXT:       3898       4232       2305
I/O:        782        315        245
CLK:       1029       1964        727   [EXT] Clock Comparator
IPI:       2868       2267       1577   [EXT] Signal Processor
TMR:          0          0          0   [EXT] CPU Timer
TAL:          0          0          0   [EXT] Timing Alert
PFL:          0          0          0   [EXT] Pseudo Page Fault
[...]
NMI:          0          1          1   [NMI] Machine Checks

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/irq.h    | 23 +++++++++++------------
 arch/s390/kernel/irq.c         | 30 ++++++++++++++++++++++++------
 arch/s390/kernel/nmi.c         |  3 ++-
 arch/s390/kernel/smp.c         |  1 +
 arch/s390/kernel/time.c        |  3 +++
 arch/s390/kernel/vtime.c       |  1 +
 arch/s390/mm/fault.c           |  2 ++
 drivers/s390/block/dasd_diag.c |  2 ++
 drivers/s390/char/sclp.c       |  6 +++++-
 drivers/s390/kvm/kvm_virtio.c  |  3 +++
 net/iucv/iucv.c                |  2 ++
 11 files changed, 56 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 7da991a858f8..f65faf63ab3a 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,23 +1,22 @@
 #ifndef _ASM_IRQ_H
 #define _ASM_IRQ_H
 
-#ifdef __KERNEL__
 #include <linux/hardirq.h>
 
-/*
- * the definition of irqs has changed in 2.5.46:
- * NR_IRQS is no longer the number of i/o
- * interrupts (65536), but rather the number
- * of interrupt classes (2).
- * Only external and i/o interrupts make much sense here (CH).
- */
-
 enum interruption_class {
 	EXTERNAL_INTERRUPT,
 	IO_INTERRUPT,
-
+	EXTINT_CLK,
+	EXTINT_IPI,
+	EXTINT_TMR,
+	EXTINT_TLA,
+	EXTINT_PFL,
+	EXTINT_DSD,
+	EXTINT_VRT,
+	EXTINT_SCP,
+	EXTINT_IUC,
+	NMI_NMI,
 	NR_IRQS,
 };
 
-#endif /* __KERNEL__ */
-#endif
+#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 026a37a94fc9..9bd049b8f997 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -1,7 +1,5 @@
 /*
- *  arch/s390/kernel/irq.c
- *
- *    Copyright IBM Corp. 2004,2007
+ *    Copyright IBM Corp. 2004,2010
  *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
  *		 Thomas Spatzier (tspat@de.ibm.com)
  *
@@ -17,12 +15,31 @@
 #include <linux/proc_fs.h>
 #include <linux/profile.h>
 
+struct irq_class {
+	char *name;
+	char *desc;
+};
+
+static const struct irq_class intrclass_names[] = {
+	{.name = "EXT" },
+	{.name = "I/O" },
+	{.name = "CLK", .desc = "[EXT] Clock Comparator" },
+	{.name = "IPI", .desc = "[EXT] Signal Processor" },
+	{.name = "TMR", .desc = "[EXT] CPU Timer" },
+	{.name = "TAL", .desc = "[EXT] Timing Alert" },
+	{.name = "PFL", .desc = "[EXT] Pseudo Page Fault" },
+	{.name = "DSD", .desc = "[EXT] DASD Diag" },
+	{.name = "VRT", .desc = "[EXT] Virtio" },
+	{.name = "SCP", .desc = "[EXT] Service Call" },
+	{.name = "IUC", .desc = "[EXT] IUCV" },
+	{.name = "NMI", .desc = "[NMI] Machine Check" },
+};
+
 /*
  * show_interrupts is needed by /proc/interrupts.
  */
 int show_interrupts(struct seq_file *p, void *v)
 {
-	static const char *intrclass_names[] = { "EXT", "I/O", };
 	int i = *(loff_t *) v, j;
 
 	get_online_cpus();
@@ -34,15 +51,16 @@ int show_interrupts(struct seq_file *p, void *v)
 	}
 
 	if (i < NR_IRQS) {
-		seq_printf(p, "%s: ", intrclass_names[i]);
+		seq_printf(p, "%s: ", intrclass_names[i].name);
 #ifndef CONFIG_SMP
 		seq_printf(p, "%10u ", kstat_irqs(i));
 #else
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
+		if (intrclass_names[i].desc)
+			seq_printf(p, "  %s", intrclass_names[i].desc);
                 seq_putc(p, '\n');
-
         }
 	put_online_cpus();
         return 0;
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 1995c1712fc8..fab88431a06f 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -8,6 +8,7 @@
  *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
  */
 
+#include <linux/kernel_stat.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/hardirq.h>
@@ -255,7 +256,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	nmi_enter();
 	s390_idle_check(regs, S390_lowcore.mcck_clock,
 			S390_lowcore.mcck_enter_timer);
-
+	kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++;
 	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
 	mcck = &__get_cpu_var(cpu_mcck);
 	umode = user_mode(regs);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 94cf510b8fe1..a9702df22f3a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -161,6 +161,7 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 {
 	unsigned long bits;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
 	/*
 	 * handle bit signal external calls
 	 *
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 4c9d72d2e273..9e7b039458da 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -15,6 +15,7 @@
 #define KMSG_COMPONENT "time"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/kernel_stat.h>
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
@@ -160,6 +161,7 @@ static void clock_comparator_interrupt(unsigned int ext_int_code,
 				       unsigned int param32,
 				       unsigned long param64)
 {
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_CLK]++;
 	if (S390_lowcore.clock_comparator == -1ULL)
 		set_clock_comparator(S390_lowcore.clock_comparator);
 }
@@ -170,6 +172,7 @@ static void stp_timing_alert(struct stp_irq_parm *);
 static void timing_alert_interrupt(unsigned int ext_int_code,
 				   unsigned int param32, unsigned long param64)
 {
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_TLA]++;
 	if (param32 & 0x00c40000)
 		etr_timing_alert((struct etr_irq_parm *) &param32);
 	if (param32 & 0x00038000)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 8636dd00e393..1ccdf4d8aa85 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -324,6 +324,7 @@ static void do_cpu_timer_interrupt(unsigned int ext_int_code,
 	struct list_head cb_list;	/* the callback queue */
 	__u64 elapsed, next;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_TMR]++;
 	INIT_LIST_HEAD(&cb_list);
 	vq = &__get_cpu_var(virt_cpu_timer);
 
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index fe5701e9efbf..839b16df72b3 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -10,6 +10,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
+#include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
@@ -543,6 +544,7 @@ static void pfault_interrupt(unsigned int ext_int_code,
 	struct task_struct *tsk;
 	__u16 subcode;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
 	/*
 	 * Get the external interruption subcode & pfault
 	 * initial/completion signal bit. VM stores this 
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 266b34b55403..a3a5db58df18 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -10,6 +10,7 @@
 
 #define KMSG_COMPONENT "dasd"
 
+#include <linux/kernel_stat.h>
 #include <linux/stddef.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -238,6 +239,7 @@ static void dasd_ext_handler(unsigned int ext_int_code,
 	addr_t ip;
 	int rc;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_DSD]++;
 	switch (ext_int_code >> 24) {
 	case DASD_DIAG_CODE_31BIT:
 		ip = (addr_t) param32;
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 35cc4686b99b..e65572e504ba 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -7,6 +7,7 @@
  *	      Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
+#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/spinlock.h>
@@ -18,8 +19,9 @@
 #include <linux/suspend.h>
 #include <linux/completion.h>
 #include <linux/platform_device.h>
-#include <asm/types.h>
 #include <asm/s390_ext.h>
+#include <asm/types.h>
+#include <asm/irq.h>
 
 #include "sclp.h"
 
@@ -402,6 +404,7 @@ static void sclp_interrupt_handler(unsigned int ext_int_code,
 	u32 finished_sccb;
 	u32 evbuf_pending;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_SCP]++;
 	spin_lock(&sclp_lock);
 	finished_sccb = param32 & 0xfffffff8;
 	evbuf_pending = param32 & 0x3;
@@ -824,6 +827,7 @@ static void sclp_check_handler(unsigned int ext_int_code,
 {
 	u32 finished_sccb;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_SCP]++;
 	finished_sccb = param32 & 0xfffffff8;
 	/* Is this the interrupt we are waiting for? */
 	if (finished_sccb == 0)
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 375aeeaf9ea5..414427d64a8f 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -10,6 +10,7 @@
  *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
  */
 
+#include <linux/kernel_stat.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/err.h>
@@ -25,6 +26,7 @@
 #include <asm/kvm_virtio.h>
 #include <asm/setup.h>
 #include <asm/s390_ext.h>
+#include <asm/irq.h>
 
 #define VIRTIO_SUBCODE_64 0x0D00
 
@@ -379,6 +381,7 @@ static void kvm_extint_handler(unsigned int ext_int_code,
 	u16 subcode;
 	u32 param;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_VRT]++;
 	subcode = ext_int_code >> 16;
 	if ((subcode & 0xff00) != VIRTIO_SUBCODE_64)
 		return;
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f7db676de77d..1ee5dab3cfae 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -36,6 +36,7 @@
 #define KMSG_COMPONENT "iucv"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/spinlock.h>
@@ -1804,6 +1805,7 @@ static void iucv_external_interrupt(unsigned int ext_int_code,
 	struct iucv_irq_data *p;
 	struct iucv_irq_list *work;
 
+	kstat_cpu(smp_processor_id()).irqs[EXTINT_IUC]++;
 	p = iucv_irq_data[smp_processor_id()];
 	if (p->ippathid >= iucv_max_pathid) {
 		WARN_ON(p->ippathid >= iucv_max_pathid);
-- 
cgit v1.2.3


From 21f83589644bb2ed98079bf1e2154c8e70ca6a6c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 18 Dec 2010 17:20:47 +0100
Subject: mac80211: implement hardware offload for remain-on-channel

This allows drivers to support remain-on-channel
offload if they implement smarter timing or need
to use a device implementation like iwlwifi.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      | 19 +++++++++++
 net/mac80211/cfg.c          | 83 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/driver-ops.h   | 30 ++++++++++++++++
 net/mac80211/driver-trace.h | 80 +++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h  |  8 +++++
 net/mac80211/iface.c        |  9 +++--
 net/mac80211/main.c         |  5 ++-
 net/mac80211/offchannel.c   | 75 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 306 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 479c35e160e3..5b3fd5add7a4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -365,6 +365,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_INTFL_NL80211_FRAME_TX	= BIT(21),
 	IEEE80211_TX_CTL_LDPC			= BIT(22),
 	IEEE80211_TX_CTL_STBC			= BIT(23) | BIT(24),
+	IEEE80211_TX_CTL_TX_OFFCHAN		= BIT(25),
 };
 
 #define IEEE80211_TX_CTL_STBC_SHIFT		23
@@ -1824,6 +1825,12 @@ struct ieee80211_ops {
 	int (*napi_poll)(struct ieee80211_hw *hw, int budget);
 	int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant);
 	int (*get_antenna)(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant);
+
+	int (*remain_on_channel)(struct ieee80211_hw *hw,
+				 struct ieee80211_channel *chan,
+				 enum nl80211_channel_type channel_type,
+				 int duration);
+	int (*cancel_remain_on_channel)(struct ieee80211_hw *hw);
 };
 
 /**
@@ -2729,6 +2736,18 @@ void ieee80211_request_smps(struct ieee80211_vif *vif,
  */
 void ieee80211_key_removed(struct ieee80211_key_conf *key_conf);
 
+/**
+ * ieee80211_ready_on_channel - notification of remain-on-channel start
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ */
+void ieee80211_ready_on_channel(struct ieee80211_hw *hw);
+
+/**
+ * ieee80211_remain_on_channel_expired - remain_on_channel duration expired
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ */
+void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw);
+
 /* Rate control API */
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 5892b0302454..168a6ba8fc28 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1593,6 +1593,37 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	return 0;
 }
 
+static int ieee80211_remain_on_channel_hw(struct ieee80211_local *local,
+					  struct net_device *dev,
+					  struct ieee80211_channel *chan,
+					  enum nl80211_channel_type chantype,
+					  unsigned int duration, u64 *cookie)
+{
+	int ret;
+	u32 random_cookie;
+
+	lockdep_assert_held(&local->mtx);
+
+	if (local->hw_roc_cookie)
+		return -EBUSY;
+	/* must be nonzero */
+	random_cookie = random32() | 1;
+
+	*cookie = random_cookie;
+	local->hw_roc_dev = dev;
+	local->hw_roc_cookie = random_cookie;
+	local->hw_roc_channel = chan;
+	local->hw_roc_channel_type = chantype;
+	local->hw_roc_duration = duration;
+	ret = drv_remain_on_channel(local, chan, chantype, duration);
+	if (ret) {
+		local->hw_roc_channel = NULL;
+		local->hw_roc_cookie = 0;
+	}
+
+	return ret;
+}
+
 static int ieee80211_remain_on_channel(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       struct ieee80211_channel *chan,
@@ -1601,16 +1632,62 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy,
 				       u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+
+	if (local->ops->remain_on_channel) {
+		int ret;
+
+		mutex_lock(&local->mtx);
+		ret = ieee80211_remain_on_channel_hw(local, dev,
+						     chan, channel_type,
+						     duration, cookie);
+		mutex_unlock(&local->mtx);
+
+		return ret;
+	}
 
 	return ieee80211_wk_remain_on_channel(sdata, chan, channel_type,
 					      duration, cookie);
 }
 
+static int ieee80211_cancel_remain_on_channel_hw(struct ieee80211_local *local,
+						 u64 cookie)
+{
+	int ret;
+
+	lockdep_assert_held(&local->mtx);
+
+	if (local->hw_roc_cookie != cookie)
+		return -ENOENT;
+
+	ret = drv_cancel_remain_on_channel(local);
+	if (ret)
+		return ret;
+
+	local->hw_roc_cookie = 0;
+	local->hw_roc_channel = NULL;
+
+	ieee80211_recalc_idle(local);
+
+	return 0;
+}
+
 static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
 					      struct net_device *dev,
 					      u64 cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+
+	if (local->ops->cancel_remain_on_channel) {
+		int ret;
+
+		mutex_lock(&local->mtx);
+		ret = ieee80211_cancel_remain_on_channel_hw(local, cookie);
+		mutex_unlock(&local->mtx);
+
+		return ret;
+	}
 
 	return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
 }
@@ -1662,6 +1739,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	     channel_type != local->_oper_channel_type))
 		is_offchan = true;
 
+	if (chan == local->hw_roc_channel) {
+		/* TODO: check channel type? */
+		is_offchan = false;
+		flags |= IEEE80211_TX_CTL_TX_OFFCHAN;
+	}
+
 	if (is_offchan && !offchan)
 		return -EBUSY;
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index af0c4398cceb..98d589960a49 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -465,4 +465,34 @@ static inline int drv_get_antenna(struct ieee80211_local *local,
 	return ret;
 }
 
+static inline int drv_remain_on_channel(struct ieee80211_local *local,
+					struct ieee80211_channel *chan,
+					enum nl80211_channel_type chantype,
+					unsigned int duration)
+{
+	int ret;
+
+	might_sleep();
+
+	trace_drv_remain_on_channel(local, chan, chantype, duration);
+	ret = local->ops->remain_on_channel(&local->hw, chan, chantype,
+					    duration);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
+static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local)
+{
+	int ret;
+
+	might_sleep();
+
+	trace_drv_cancel_remain_on_channel(local);
+	ret = local->ops->cancel_remain_on_channel(&local->hw);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index c2772f23ac9c..49c84218b2f4 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -933,6 +933,50 @@ TRACE_EVENT(drv_get_antenna,
 	)
 );
 
+TRACE_EVENT(drv_remain_on_channel,
+	TP_PROTO(struct ieee80211_local *local, struct ieee80211_channel *chan,
+		 enum nl80211_channel_type chantype, unsigned int duration),
+
+	TP_ARGS(local, chan, chantype, duration),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(int, center_freq)
+		__field(int, channel_type)
+		__field(unsigned int, duration)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->center_freq = chan->center_freq;
+		__entry->channel_type = chantype;
+		__entry->duration = duration;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " freq:%dMHz duration:%dms",
+		LOCAL_PR_ARG, __entry->center_freq, __entry->duration
+	)
+);
+
+TRACE_EVENT(drv_cancel_remain_on_channel,
+	TP_PROTO(struct ieee80211_local *local),
+
+	TP_ARGS(local),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT, LOCAL_PR_ARG
+	)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
@@ -1170,6 +1214,42 @@ TRACE_EVENT(api_chswitch_done,
 	)
 );
 
+TRACE_EVENT(api_ready_on_channel,
+	TP_PROTO(struct ieee80211_local *local),
+
+	TP_ARGS(local),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT, LOCAL_PR_ARG
+	)
+);
+
+TRACE_EVENT(api_remain_on_channel_expired,
+	TP_PROTO(struct ieee80211_local *local),
+
+	TP_ARGS(local),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT, LOCAL_PR_ARG
+	)
+);
+
 /*
  * Tracing for internal functions
  * (which may also be called in response to driver calls)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 95cdd2a3f809..f866af8de5ac 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -951,6 +951,13 @@ struct ieee80211_local {
 	} debugfs;
 #endif
 
+	struct ieee80211_channel *hw_roc_channel;
+	struct net_device *hw_roc_dev;
+	struct work_struct hw_roc_start, hw_roc_done;
+	enum nl80211_channel_type hw_roc_channel_type;
+	unsigned int hw_roc_duration;
+	u32 hw_roc_cookie;
+
 	/* dummy netdev for use w/ NAPI */
 	struct net_device napi_dev;
 
@@ -1142,6 +1149,7 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local);
 void ieee80211_offchannel_stop_station(struct ieee80211_local *local);
 void ieee80211_offchannel_return(struct ieee80211_local *local,
 				 bool enable_beaconing);
+void ieee80211_hw_roc_setup(struct ieee80211_local *local);
 
 /* interface handling */
 int ieee80211_iface_init(void);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b6db237672ff..8acba456744e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1264,7 +1264,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata;
 	int count = 0;
-	bool working = false, scanning = false;
+	bool working = false, scanning = false, hw_roc = false;
 	struct ieee80211_work *wk;
 	unsigned int led_trig_start = 0, led_trig_stop = 0;
 
@@ -1308,6 +1308,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 		local->scan_sdata->vif.bss_conf.idle = false;
 	}
 
+	if (local->hw_roc_channel)
+		hw_roc = true;
+
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (sdata->old_idle == sdata->vif.bss_conf.idle)
 			continue;
@@ -1316,7 +1319,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
 	}
 
-	if (working || scanning)
+	if (working || scanning || hw_roc)
 		led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK;
 	else
 		led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK;
@@ -1328,6 +1331,8 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 
 	ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop);
 
+	if (hw_roc)
+		return ieee80211_idle_off(local, "hw remain-on-channel");
 	if (working)
 		return ieee80211_idle_off(local, "working");
 	if (scanning)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4b088b3c25e8..485d36bc9a46 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -609,6 +609,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	ieee80211_led_names(local);
 
+	ieee80211_hw_roc_setup(local);
+
 	return local_to_hw(local);
 }
 EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -753,7 +755,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
-	local->hw.wiphy->max_remain_on_channel_duration = 5000;
+	if (!local->ops->remain_on_channel)
+		local->hw.wiphy->max_remain_on_channel_duration = 5000;
 
 	result = wiphy_register(local->hw.wiphy);
 	if (result < 0)
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 4b564091e51d..49b9ec22d9b6 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -14,6 +14,7 @@
  */
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
+#include "driver-trace.h"
 
 /*
  * inform AP that we will go to sleep so that it will buffer the frames
@@ -190,3 +191,77 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
 	}
 	mutex_unlock(&local->iflist_mtx);
 }
+
+static void ieee80211_hw_roc_start(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, hw_roc_start);
+
+	mutex_lock(&local->mtx);
+
+	if (!local->hw_roc_channel) {
+		mutex_unlock(&local->mtx);
+		return;
+	}
+
+	ieee80211_recalc_idle(local);
+
+	cfg80211_ready_on_channel(local->hw_roc_dev, local->hw_roc_cookie,
+				  local->hw_roc_channel,
+				  local->hw_roc_channel_type,
+				  local->hw_roc_duration,
+				  GFP_KERNEL);
+	mutex_unlock(&local->mtx);
+}
+
+void ieee80211_ready_on_channel(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	trace_api_ready_on_channel(local);
+
+	ieee80211_queue_work(hw, &local->hw_roc_start);
+}
+EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel);
+
+static void ieee80211_hw_roc_done(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, hw_roc_done);
+
+	mutex_lock(&local->mtx);
+
+	if (!local->hw_roc_channel) {
+		mutex_unlock(&local->mtx);
+		return;
+	}
+
+	cfg80211_remain_on_channel_expired(local->hw_roc_dev,
+					   local->hw_roc_cookie,
+					   local->hw_roc_channel,
+					   local->hw_roc_channel_type,
+					   GFP_KERNEL);
+
+	local->hw_roc_channel = NULL;
+	local->hw_roc_cookie = 0;
+
+	ieee80211_recalc_idle(local);
+
+	mutex_unlock(&local->mtx);
+}
+
+void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	trace_api_remain_on_channel_expired(local);
+
+	ieee80211_queue_work(hw, &local->hw_roc_done);
+}
+EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired);
+
+void ieee80211_hw_roc_setup(struct ieee80211_local *local)
+{
+	INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start);
+	INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done);
+}
-- 
cgit v1.2.3


From 90fc4b3a5ba24f09af2a8c4a723651a328949460 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 18 Dec 2010 17:20:48 +0100
Subject: mac80211: implement off-channel TX using hw r-o-c offload

When the driver has remain-on-channel offload,
implement off-channel transmission using that
primitive.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c         | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/offchannel.c  | 30 +++++++++++++++--------
 3 files changed, 81 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 168a6ba8fc28..4bc8a9250cfd 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1641,6 +1641,7 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy,
 		ret = ieee80211_remain_on_channel_hw(local, dev,
 						     chan, channel_type,
 						     duration, cookie);
+		local->hw_roc_for_tx = false;
 		mutex_unlock(&local->mtx);
 
 		return ret;
@@ -1783,6 +1784,49 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 
 	*cookie = (unsigned long) skb;
 
+	if (is_offchan && local->ops->remain_on_channel) {
+		unsigned int duration;
+		int ret;
+
+		mutex_lock(&local->mtx);
+		/*
+		 * If the duration is zero, then the driver
+		 * wouldn't actually do anything. Set it to
+		 * 100 for now.
+		 *
+		 * TODO: cancel the off-channel operation
+		 *       when we get the SKB's TX status and
+		 *       the wait time was zero before.
+		 */
+		duration = 100;
+		if (wait)
+			duration = wait;
+		ret = ieee80211_remain_on_channel_hw(local, dev, chan,
+						     channel_type,
+						     duration, cookie);
+		if (ret) {
+			kfree_skb(skb);
+			mutex_unlock(&local->mtx);
+			return ret;
+		}
+
+		local->hw_roc_for_tx = true;
+		local->hw_roc_duration = wait;
+
+		/*
+		 * queue up frame for transmission after
+		 * ieee80211_ready_on_channel call
+		 */
+
+		/* modify cookie to prevent API mismatches */
+		*cookie ^= 2;
+		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN;
+		local->hw_roc_skb = skb;
+		mutex_unlock(&local->mtx);
+
+		return 0;
+	}
+
 	/*
 	 * Can transmit right away if the channel was the
 	 * right one and there's no wait involved... If a
@@ -1823,6 +1867,21 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy,
 	int ret = -ENOENT;
 
 	mutex_lock(&local->mtx);
+
+	if (local->ops->cancel_remain_on_channel) {
+		cookie ^= 2;
+		ret = ieee80211_cancel_remain_on_channel_hw(local, cookie);
+
+		if (ret == 0) {
+			kfree_skb(local->hw_roc_skb);
+			local->hw_roc_skb = NULL;
+		}
+
+		mutex_unlock(&local->mtx);
+
+		return ret;
+	}
+
 	list_for_each_entry(wk, &local->work_list, list) {
 		if (wk->sdata != sdata)
 			continue;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f866af8de5ac..c47d7c0e48a4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -953,10 +953,12 @@ struct ieee80211_local {
 
 	struct ieee80211_channel *hw_roc_channel;
 	struct net_device *hw_roc_dev;
+	struct sk_buff *hw_roc_skb;
 	struct work_struct hw_roc_start, hw_roc_done;
 	enum nl80211_channel_type hw_roc_channel_type;
 	unsigned int hw_roc_duration;
 	u32 hw_roc_cookie;
+	bool hw_roc_for_tx;
 
 	/* dummy netdev for use w/ NAPI */
 	struct net_device napi_dev;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 49b9ec22d9b6..b4e52676f3fb 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -196,6 +196,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
 {
 	struct ieee80211_local *local =
 		container_of(work, struct ieee80211_local, hw_roc_start);
+	struct ieee80211_sub_if_data *sdata;
 
 	mutex_lock(&local->mtx);
 
@@ -206,11 +207,19 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
 
 	ieee80211_recalc_idle(local);
 
-	cfg80211_ready_on_channel(local->hw_roc_dev, local->hw_roc_cookie,
-				  local->hw_roc_channel,
-				  local->hw_roc_channel_type,
-				  local->hw_roc_duration,
-				  GFP_KERNEL);
+	if (local->hw_roc_skb) {
+		sdata = IEEE80211_DEV_TO_SUB_IF(local->hw_roc_dev);
+		ieee80211_tx_skb(sdata, local->hw_roc_skb);
+		local->hw_roc_skb = NULL;
+	} else {
+		cfg80211_ready_on_channel(local->hw_roc_dev,
+					  local->hw_roc_cookie,
+					  local->hw_roc_channel,
+					  local->hw_roc_channel_type,
+					  local->hw_roc_duration,
+					  GFP_KERNEL);
+	}
+
 	mutex_unlock(&local->mtx);
 }
 
@@ -236,11 +245,12 @@ static void ieee80211_hw_roc_done(struct work_struct *work)
 		return;
 	}
 
-	cfg80211_remain_on_channel_expired(local->hw_roc_dev,
-					   local->hw_roc_cookie,
-					   local->hw_roc_channel,
-					   local->hw_roc_channel_type,
-					   GFP_KERNEL);
+	if (!local->hw_roc_for_tx)
+		cfg80211_remain_on_channel_expired(local->hw_roc_dev,
+						   local->hw_roc_cookie,
+						   local->hw_roc_channel,
+						   local->hw_roc_channel_type,
+						   GFP_KERNEL);
 
 	local->hw_roc_channel = NULL;
 	local->hw_roc_cookie = 0;
-- 
cgit v1.2.3


From 06778b1c383afbdb88ffd837e117bec06a76f450 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 22 Dec 2010 10:15:52 +0100
Subject: mac80211: remove stray extern

Somehow this snuck into my earlier patch, and
only now did I see a compiler warning:

net/mac80211/led.c:218:13: warning: function '__ieee80211_create_tpt_led_trigger' with external linkage has definition

Remove the stray extern.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/led.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 4905eb8af572..14590332c81c 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -215,8 +215,8 @@ static void tpt_trig_timer(unsigned long data)
 	read_unlock(&tpt_trig->trig.leddev_list_lock);
 }
 
-extern char *__ieee80211_create_tpt_led_trigger(
-				struct ieee80211_hw *hw, unsigned int flags,
+char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
+				unsigned int flags,
 				const struct ieee80211_tpt_blink *blink_table,
 				unsigned int blink_table_len)
 {
-- 
cgit v1.2.3


From 44b8288308ac9da27eab7d7bdbf1375a568805c3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 5 Jan 2011 10:35:02 +0000
Subject: net_sched: pfifo_head_drop problem

commit 57dbb2d83d100ea (sched: add head drop fifo queue)
introduced pfifo_head_drop, and broke the invariant that
sch->bstats.bytes and sch->bstats.packets are COUNTER (increasing
counters only)

This can break estimators because est_timer() handles unsigned deltas
only. A decreasing counter can then give a huge unsigned delta.

My mid term suggestion would be to change things so that
sch->bstats.bytes and sch->bstats.packets are incremented in dequeue()
only, not at enqueue() time. We also could add drop_bytes/drop_packets
and provide estimations of drop rates.

It would be more sensible anyway for very low speeds, and big bursts.
Right now, if we drop packets, they still are accounted in byte/packets
abolute counters and rate estimators.

Before this mid term change, this patch makes pfifo_head_drop behavior
similar to other qdiscs in case of drops :
Dont decrement sch->bstats.bytes and sch->bstats.packets

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fifo.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 4dfecb0cba37..aa4d6337e43c 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -54,8 +54,6 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	/* queue full, remove one skb to fulfill the limit */
 	skb_head = qdisc_dequeue_head(sch);
-	sch->bstats.bytes -= qdisc_pkt_len(skb_head);
-	sch->bstats.packets--;
 	sch->qstats.drops++;
 	kfree_skb(skb_head);
 
-- 
cgit v1.2.3


From 3610cda53f247e176bcbb7a7cca64bc53b12acdb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 5 Jan 2011 15:38:53 -0800
Subject: af_unix: Avoid socket->sk NULL OOPS in stream connect security hooks.

unix_release() can asynchornously set socket->sk to NULL, and
it does so without holding the unix_state_lock() on "other"
during stream connects.

However, the reverse mapping, sk->sk_socket, is only transitioned
to NULL under the unix_state_lock().

Therefore make the security hooks follow the reverse mapping instead
of the forward mapping.

Reported-by: Jeremy Fitzhardinge <jeremy@goop.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/security.h   | 15 +++++++--------
 net/unix/af_unix.c         |  2 +-
 security/capability.c      |  2 +-
 security/security.c        |  3 +--
 security/selinux/hooks.c   | 10 +++++-----
 security/smack/smack_lsm.c | 14 +++++++-------
 6 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/linux/security.h b/include/linux/security.h
index fd4d55fb8845..d47a4c24b3e4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -796,8 +796,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @unix_stream_connect:
  *	Check permissions before establishing a Unix domain stream connection
  *	between @sock and @other.
- *	@sock contains the socket structure.
- *	@other contains the peer socket structure.
+ *	@sock contains the sock structure.
+ *	@other contains the peer sock structure.
+ *	@newsk contains the new sock structure.
  *	Return 0 if permission is granted.
  * @unix_may_send:
  *	Check permissions before connecting or sending datagrams from @sock to
@@ -1568,8 +1569,7 @@ struct security_operations {
 	int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
 
 #ifdef CONFIG_SECURITY_NETWORK
-	int (*unix_stream_connect) (struct socket *sock,
-				    struct socket *other, struct sock *newsk);
+	int (*unix_stream_connect) (struct sock *sock, struct sock *other, struct sock *newsk);
 	int (*unix_may_send) (struct socket *sock, struct socket *other);
 
 	int (*socket_create) (int family, int type, int protocol, int kern);
@@ -2525,8 +2525,7 @@ static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32
 
 #ifdef CONFIG_SECURITY_NETWORK
 
-int security_unix_stream_connect(struct socket *sock, struct socket *other,
-				 struct sock *newsk);
+int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk);
 int security_unix_may_send(struct socket *sock,  struct socket *other);
 int security_socket_create(int family, int type, int protocol, int kern);
 int security_socket_post_create(struct socket *sock, int family,
@@ -2567,8 +2566,8 @@ void security_tun_dev_post_create(struct sock *sk);
 int security_tun_dev_attach(struct sock *sk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
-static inline int security_unix_stream_connect(struct socket *sock,
-					       struct socket *other,
+static inline int security_unix_stream_connect(struct sock *sock,
+					       struct sock *other,
 					       struct sock *newsk)
 {
 	return 0;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 417d7a6c36cf..dd419d286204 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1157,7 +1157,7 @@ restart:
 		goto restart;
 	}
 
-	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
+	err = security_unix_stream_connect(sk, other, newsk);
 	if (err) {
 		unix_state_unlock(sk);
 		goto out_unlock;
diff --git a/security/capability.c b/security/capability.c
index c773635ca3a0..2a5df2b7da83 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -548,7 +548,7 @@ static int cap_sem_semop(struct sem_array *sma, struct sembuf *sops,
 }
 
 #ifdef CONFIG_SECURITY_NETWORK
-static int cap_unix_stream_connect(struct socket *sock, struct socket *other,
+static int cap_unix_stream_connect(struct sock *sock, struct sock *other,
 				   struct sock *newsk)
 {
 	return 0;
diff --git a/security/security.c b/security/security.c
index 1b798d3df710..e5fb07a3052d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -977,8 +977,7 @@ EXPORT_SYMBOL(security_inode_getsecctx);
 
 #ifdef CONFIG_SECURITY_NETWORK
 
-int security_unix_stream_connect(struct socket *sock, struct socket *other,
-				 struct sock *newsk)
+int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk)
 {
 	return security_ops->unix_stream_connect(sock, other, newsk);
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c82538a4b1a4..6f637d2678ac 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3921,18 +3921,18 @@ static int selinux_socket_shutdown(struct socket *sock, int how)
 	return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN);
 }
 
-static int selinux_socket_unix_stream_connect(struct socket *sock,
-					      struct socket *other,
+static int selinux_socket_unix_stream_connect(struct sock *sock,
+					      struct sock *other,
 					      struct sock *newsk)
 {
-	struct sk_security_struct *sksec_sock = sock->sk->sk_security;
-	struct sk_security_struct *sksec_other = other->sk->sk_security;
+	struct sk_security_struct *sksec_sock = sock->sk_security;
+	struct sk_security_struct *sksec_other = other->sk_security;
 	struct sk_security_struct *sksec_new = newsk->sk_security;
 	struct common_audit_data ad;
 	int err;
 
 	COMMON_AUDIT_DATA_INIT(&ad, NET);
-	ad.u.net.sk = other->sk;
+	ad.u.net.sk = other;
 
 	err = avc_has_perm(sksec_sock->sid, sksec_other->sid,
 			   sksec_other->sclass,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 489a85afa477..ccb71a044a1a 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2408,22 +2408,22 @@ static int smack_setprocattr(struct task_struct *p, char *name,
 
 /**
  * smack_unix_stream_connect - Smack access on UDS
- * @sock: one socket
- * @other: the other socket
+ * @sock: one sock
+ * @other: the other sock
  * @newsk: unused
  *
  * Return 0 if a subject with the smack of sock could access
  * an object with the smack of other, otherwise an error code
  */
-static int smack_unix_stream_connect(struct socket *sock,
-				     struct socket *other, struct sock *newsk)
+static int smack_unix_stream_connect(struct sock *sock,
+				     struct sock *other, struct sock *newsk)
 {
-	struct inode *sp = SOCK_INODE(sock);
-	struct inode *op = SOCK_INODE(other);
+	struct inode *sp = SOCK_INODE(sock->sk_socket);
+	struct inode *op = SOCK_INODE(other->sk_socket);
 	struct smk_audit_info ad;
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET);
-	smk_ad_setfield_u_net_sk(&ad, other->sk);
+	smk_ad_setfield_u_net_sk(&ad, other);
 	return smk_access(smk_of_inode(sp), smk_of_inode(op),
 				 MAY_READWRITE, &ad);
 }
-- 
cgit v1.2.3


From 2c6607c611cb7bf0a6750bcea34a258144e302c5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 6 Jan 2011 10:54:29 -0800
Subject: net: add POLLPRI to sock_def_readable()

Leonardo Chiquitto found poll() could block forever on tcp sockets and
Urgent data was received, if the event flag only contains POLLPRI.

He did a bisection and found commit 4938d7e0233 (poll: avoid extra
wakeups in select/poll) was the source of the problem.

Problem is TCP sockets use standard sock_def_readable() function for
their sk_data_ready() handler, and sock_def_readable() doesnt signal
POLLPRI.

Only TCP is affected by the problem. Adding POLLPRI to the list of flags
might trigger unnecessary schedules, but URGENT handling is such a
seldom used feature this seems a good compromise.

Thanks a lot to Leonardo for providing the bisection result and a test
program as well.

Reference : http://www.spinics.net/lists/netdev/msg151793.html

Reported-and-bisected-by: Leonardo Chiquitto <leonardo.lists@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index a6b9e8061f34..a658aeb6d554 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1908,7 +1908,7 @@ static void sock_def_readable(struct sock *sk, int len)
 	rcu_read_lock();
 	wq = rcu_dereference(sk->sk_wq);
 	if (wq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 70bfa2d2e1bfd90ef26758b5e2749f043a940037 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 4 Jan 2011 21:03:12 +0000
Subject: dcb: unlock on error in dcbnl_ieee_get()

There is a "goto nla_put_failure" hidden inside the NLA_PUT() macro, but
we're holding the dcb_lock so we need to unlock first.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 9399af565715..4323bd441f0f 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1264,9 +1264,14 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb,
 
 	spin_lock(&dcb_lock);
 	list_for_each_entry(itr, &dcb_app_list, list) {
-		if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0)
-			NLA_PUT(skb, DCB_ATTR_IEEE_APP,
-				sizeof(itr->app), &itr->app);
+		if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) {
+			err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app),
+					 &itr->app);
+			if (err) {
+				spin_unlock(&dcb_lock);
+				goto nla_put_failure;
+			}
+		}
 	}
 	spin_unlock(&dcb_lock);
 	nla_nest_end(skb, app);
-- 
cgit v1.2.3


From 2a8fe003741aa90b6b9453e90af4bbb7bc42918c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Tue, 4 Jan 2011 21:03:44 +0000
Subject: dcb: use after free in dcb_flushapp()

The original code has a use after free bug because it's not using the
_safe() version of the list_for_each_entry() macro.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dcb/dcbnl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 4323bd441f0f..d900ab99814a 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1643,9 +1643,10 @@ EXPORT_SYMBOL(dcb_setapp);
 static void dcb_flushapp(void)
 {
 	struct dcb_app_type *app;
+	struct dcb_app_type *tmp;
 
 	spin_lock(&dcb_lock);
-	list_for_each_entry(app, &dcb_app_list, list) {
+	list_for_each_entry_safe(app, tmp, &dcb_app_list, list) {
 		list_del(&app->list);
 		kfree(app);
 	}
-- 
cgit v1.2.3


From 6623e3b24a5ebb07e81648c478d286a1329ab891 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 5 Jan 2011 07:52:55 +0000
Subject: ipv4: IP defragmentation must be ECN aware

RFC3168 (The Addition of Explicit Congestion Notification to IP)
states :

5.3.  Fragmentation

   ECN-capable packets MAY have the DF (Don't Fragment) bit set.
   Reassembly of a fragmented packet MUST NOT lose indications of
   congestion.  In other words, if any fragment of an IP packet to be
   reassembled has the CE codepoint set, then one of two actions MUST be
   taken:

      * Set the CE codepoint on the reassembled packet.  However, this
        MUST NOT occur if any of the other fragments contributing to
        this reassembly carries the Not-ECT codepoint.

      * The packet is dropped, instead of being reassembled, for any
        other reason.

This patch implements this requirement for IPv4, choosing the first
action :

If one fragment had NO-ECT codepoint
        reassembled frame has NO-ECT
ElIf one fragment had CE codepoint
        reassembled frame has CE

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e6215bdd96c0..a1151b8adf3c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -45,6 +45,7 @@
 #include <linux/udp.h>
 #include <linux/inet.h>
 #include <linux/netfilter_ipv4.h>
+#include <net/inet_ecn.h>
 
 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -70,11 +71,28 @@ struct ipq {
 	__be32		daddr;
 	__be16		id;
 	u8		protocol;
+	u8		ecn; /* RFC3168 support */
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
 };
 
+#define IPFRAG_ECN_CLEAR  0x01 /* one frag had INET_ECN_NOT_ECT */
+#define IPFRAG_ECN_SET_CE 0x04 /* one frag had INET_ECN_CE */
+
+static inline u8 ip4_frag_ecn(u8 tos)
+{
+	tos = (tos & INET_ECN_MASK) + 1;
+	/*
+	 * After the last operation we have (in binary):
+	 * INET_ECN_NOT_ECT => 001
+	 * INET_ECN_ECT_1   => 010
+	 * INET_ECN_ECT_0   => 011
+	 * INET_ECN_CE      => 100
+	 */
+	return (tos & 2) ? 0 : tos;
+}
+
 static struct inet_frags ip4_frags;
 
 int ip_frag_nqueues(struct net *net)
@@ -137,6 +155,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 
 	qp->protocol = arg->iph->protocol;
 	qp->id = arg->iph->id;
+	qp->ecn = ip4_frag_ecn(arg->iph->tos);
 	qp->saddr = arg->iph->saddr;
 	qp->daddr = arg->iph->daddr;
 	qp->user = arg->user;
@@ -316,6 +335,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.fragments = NULL;
 	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
+	qp->ecn = 0;
 
 	return 0;
 }
@@ -328,6 +348,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	int flags, offset;
 	int ihl, end;
 	int err = -ENOENT;
+	u8 ecn;
 
 	if (qp->q.last_in & INET_FRAG_COMPLETE)
 		goto err;
@@ -339,6 +360,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		goto err;
 	}
 
+	ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
 	offset = ntohs(ip_hdr(skb)->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
@@ -472,6 +494,7 @@ found:
 	}
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
+	qp->ecn |= ecn;
 	atomic_add(skb->truesize, &qp->q.net->mem);
 	if (offset == 0)
 		qp->q.last_in |= INET_FRAG_FIRST_IN;
@@ -583,6 +606,17 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
+	/* RFC3168 5.3 Fragmentation support
+	 * If one fragment had INET_ECN_NOT_ECT,
+	 *	reassembled frame also has INET_ECN_NOT_ECT
+	 * Elif one fragment had INET_ECN_CE
+	 *	reassembled frame also has INET_ECN_CE
+	 */
+	if (qp->ecn & IPFRAG_ECN_CLEAR)
+		iph->tos &= ~INET_ECN_MASK;
+	else if (qp->ecn & IPFRAG_ECN_SET_CE)
+		iph->tos |= INET_ECN_CE;
+
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
 	qp->q.fragments_tail = NULL;
-- 
cgit v1.2.3


From f682cefa5ad204d3bfaa54a58046c66d2d035ac1 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 5 Jan 2011 04:23:23 +0000
Subject: netfilter: fix the race when initializing nf_ct_expect_hash_rnd

Since nf_ct_expect_dst_hash() may be called without nf_conntrack_lock
locked, nf_ct_expect_hash_rnd should be initialized in the atomic way.

In this patch, we use nf_conntrack_hash_rnd instead of
nf_ct_expect_hash_rnd.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack.h |  2 ++
 net/netfilter/nf_conntrack_core.c    | 30 +++++++++++++++++-------------
 net/netfilter/nf_conntrack_expect.c  | 10 +++-------
 3 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index caf17db87dbc..d85cff10e169 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -298,6 +298,8 @@ static inline int nf_ct_is_untracked(const struct nf_conn *ct)
 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
 extern unsigned int nf_conntrack_htable_size;
 extern unsigned int nf_conntrack_max;
+extern unsigned int nf_conntrack_hash_rnd;
+void init_nf_conntrack_hash_rnd(void);
 
 #define NF_CT_STAT_INC(net, count)	\
 	__this_cpu_inc((net)->ct.stat->count)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 27a5ea6b6a0f..e61511929c66 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
-static unsigned int nf_conntrack_hash_rnd __read_mostly;
+unsigned int nf_conntrack_hash_rnd __read_mostly;
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
 {
@@ -596,6 +596,21 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	return dropped;
 }
 
+void init_nf_conntrack_hash_rnd(void)
+{
+	unsigned int rand;
+
+	/*
+	 * Why not initialize nf_conntrack_rnd in a "init()" function ?
+	 * Because there isn't enough entropy when system initializing,
+	 * and we initialize it as late as possible.
+	 */
+	do {
+		get_random_bytes(&rand, sizeof(rand));
+	} while (!rand);
+	cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(struct net *net, u16 zone,
 		     const struct nf_conntrack_tuple *orig,
@@ -605,18 +620,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
 	struct nf_conn *ct;
 
 	if (unlikely(!nf_conntrack_hash_rnd)) {
-		unsigned int rand;
-
-		/*
-		 * Why not initialize nf_conntrack_rnd in a "init()" function ?
-		 * Because there isn't enough entropy when system initializing,
-		 * and we initialize it as late as possible.
-		 */
-		do {
-			get_random_bytes(&rand, sizeof(rand));
-		} while (!rand);
-		cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
-
+		init_nf_conntrack_hash_rnd();
 		/* recompute the hash as nf_conntrack_hash_rnd is initialized */
 		hash = hash_conntrack_raw(orig, zone);
 	}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 46e8966912b1..a20fb0bd1efe 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -32,9 +32,7 @@
 unsigned int nf_ct_expect_hsize __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
 
-static unsigned int nf_ct_expect_hash_rnd __read_mostly;
 unsigned int nf_ct_expect_max __read_mostly;
-static int nf_ct_expect_hash_rnd_initted __read_mostly;
 
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
@@ -77,15 +75,13 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
 {
 	unsigned int hash;
 
-	if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
-		get_random_bytes(&nf_ct_expect_hash_rnd,
-				 sizeof(nf_ct_expect_hash_rnd));
-		nf_ct_expect_hash_rnd_initted = 1;
+	if (unlikely(!nf_conntrack_hash_rnd)) {
+		init_nf_conntrack_hash_rnd();
 	}
 
 	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
 		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
-		       (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
+		       (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
 	return ((u64)hash * nf_ct_expect_hsize) >> 32;
 }
 
-- 
cgit v1.2.3


From cba85b532e4aabdb97f44c18987d45141fd93faa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 6 Jan 2011 11:25:00 -0800
Subject: netfilter: fix export secctx error handling

In 1ae4de0cdf855305765592647025bde55e85e451, the secctx was exported
via the /proc/net/netfilter/nf_conntrack and ctnetlink interfaces
instead of the secmark.

That patch introduced the use of security_secid_to_secctx() which may
return a non-zero value on error.

In one of my setups, I have NF_CONNTRACK_SECMARK enabled but no
security modules. Thus, security_secid_to_secctx() returns a negative
value that results in the breakage of the /proc and `conntrack -L'
outputs. To fix this, we skip the inclusion of secctx if the
aforementioned function fails.

This patch also fixes the dynamic netlink message size calculation
if security_secid_to_secctx() returns an error, since its logic is
also wrong.

This problem exists in Linux kernel >= 2.6.37.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  2 +-
 net/netfilter/nf_conntrack_netlink.c               | 25 ++++++++++++----------
 net/netfilter/nf_conntrack_standalone.c            |  2 +-
 3 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 37f8adb68c79..63f60fc5d26a 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -97,7 +97,7 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
 
 	ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
 	if (ret)
-		return ret;
+		return 0;
 
 	ret = seq_printf(s, "secctx=%s ", secctx);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b729ace1dcc1..0cdba50c0d69 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -254,7 +254,7 @@ ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct)
 
 	ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
 	if (ret)
-		return ret;
+		return 0;
 
 	ret = -1;
 	nest_secctx = nla_nest_start(skb, CTA_SECCTX | NLA_F_NESTED);
@@ -453,16 +453,22 @@ ctnetlink_counters_size(const struct nf_conn *ct)
 	       ;
 }
 
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
-static int ctnetlink_nlmsg_secctx_size(const struct nf_conn *ct)
+static inline int
+ctnetlink_secctx_size(const struct nf_conn *ct)
 {
-	int len;
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+	int len, ret;
 
-	security_secid_to_secctx(ct->secmark, NULL, &len);
+	ret = security_secid_to_secctx(ct->secmark, NULL, &len);
+	if (ret)
+		return 0;
 
-	return sizeof(char) * len;
-}
+	return nla_total_size(0) /* CTA_SECCTX */
+	       + nla_total_size(sizeof(char) * len); /* CTA_SECCTX_NAME */
+#else
+	return 0;
 #endif
+}
 
 static inline size_t
 ctnetlink_nlmsg_size(const struct nf_conn *ct)
@@ -479,10 +485,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + nla_total_size(0) /* CTA_PROTOINFO */
 	       + nla_total_size(0) /* CTA_HELP */
 	       + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
-	       + nla_total_size(0) /* CTA_SECCTX */
-	       + nla_total_size(ctnetlink_nlmsg_secctx_size(ct)) /* CTA_SECCTX_NAME */
-#endif
+	       + ctnetlink_secctx_size(ct)
 #ifdef CONFIG_NF_NAT_NEEDED
 	       + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
 	       + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0fb65705b44b..b4d7f0f24b27 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -118,7 +118,7 @@ static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
 
 	ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
 	if (ret)
-		return ret;
+		return 0;
 
 	ret = seq_printf(s, "secctx=%s ", secctx);
 
-- 
cgit v1.2.3


From f88de8de5a8c8a8a73960d4432ceef2d38b7f86f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 25 Dec 2010 03:41:30 +0000
Subject: net: bridge: check the length of skb after
 nf_bridge_maybe_copy_header()

Since nf_bridge_maybe_copy_header() may change the length of skb,
we should check the length of skb after it to handle the ppoe skbs.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 2bd11ec6d166..ee64287f1290 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -41,17 +41,13 @@ static inline unsigned packet_length(const struct sk_buff *skb)
 
 int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
-	/* drop mtu oversized packets except gso */
-	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
+	/* ip_fragment doesn't copy the MAC header */
+	if (nf_bridge_maybe_copy_header(skb) ||
+	    (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))) {
 		kfree_skb(skb);
-	else {
-		/* ip_fragment doesn't copy the MAC header */
-		if (nf_bridge_maybe_copy_header(skb))
-			kfree_skb(skb);
-		else {
-			skb_push(skb, ETH_HLEN);
-			dev_queue_xmit(skb);
-		}
+	} else {
+		skb_push(skb, ETH_HLEN);
+		dev_queue_xmit(skb);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 4b5b3ba16be1b195d2e1161746637acd4b9fed4f Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:24 +0000
Subject: SUNRPC move svc_drop to caller of svc_process_common

The NFSv4.1 shared back channel does not need to call svc_drop because the
callback service never outlives the single connection it services, and it
reuses it's buffers and keeps the trasport.

Signed-off-by: Andy Adamson <andros@netapp.com>
Acked-by: Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/svc.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6359c42c4941..606d182895a9 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1147,7 +1147,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
  dropit:
 	svc_authorise(rqstp);	/* doesn't hurt to call this twice */
 	dprintk("svc: svc_process dropit\n");
-	svc_drop(rqstp);
 	return 0;
 
 err_short_len:
@@ -1218,7 +1217,6 @@ svc_process(struct svc_rqst *rqstp)
 	struct kvec		*resv = &rqstp->rq_res.head[0];
 	struct svc_serv		*serv = rqstp->rq_server;
 	u32			dir;
-	int			error;
 
 	/*
 	 * Setup response xdr_buf.
@@ -1246,11 +1244,13 @@ svc_process(struct svc_rqst *rqstp)
 		return 0;
 	}
 
-	error = svc_process_common(rqstp, argv, resv);
-	if (error <= 0)
-		return error;
-
-	return svc_send(rqstp);
+	/* Returns 1 for send, 0 for drop */
+	if (svc_process_common(rqstp, argv, resv))
+		return svc_send(rqstp);
+	else {
+		svc_drop(rqstp);
+		return 0;
+	}
 }
 
 #if defined(CONFIG_NFS_V4_1)
@@ -1264,7 +1264,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
-	int 		error;
 
 	/* Build the svc_rqst used by the common processing routine */
 	rqstp->rq_xprt = serv->bc_xprt;
@@ -1292,12 +1291,15 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 	svc_getu32(argv);	/* XID */
 	svc_getnl(argv);	/* CALLDIR */
 
-	error = svc_process_common(rqstp, argv, resv);
-	if (error <= 0)
-		return error;
-
-	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
-	return bc_send(req);
+	/* Returns 1 for send, 0 for drop */
+	if (svc_process_common(rqstp, argv, resv)) {
+		memcpy(&req->rq_snd_buf, &rqstp->rq_res,
+						sizeof(req->rq_snd_buf));
+		return bc_send(req);
+	} else {
+		/* Nothing to do to drop request */
+		return 0;
+	}
 }
 EXPORT_SYMBOL(bc_svc_process);
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3


From 71e161a6a9fa021a280e564254fcda894e6fbd14 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:25 +0000
Subject: SUNRPC fix bc_send print

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/bc_svc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 7dcfe0cc3500..1dd1a6890007 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -59,8 +59,8 @@ int bc_send(struct rpc_rqst *req)
 		ret = task->tk_status;
 		rpc_put_task(task);
 	}
-	return ret;
 	dprintk("RPC:       bc_send ret= %d\n", ret);
+	return ret;
 }
 
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3


From 1f11a034cdc4b45ee56d51b87a9e37cb776fb15b Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:26 +0000
Subject: SUNRPC new transport for the NFSv4.1 shared back channel

Move the current sock create and destroy routines into the new transport ops.
Back channel socket will be destroyed by the svc_closs_all call in svc_destroy.

Added check: only TCP supported on shared back channel.

Signed-off-by: Andy Adamson <andros@netapp.com>
Acked-by: Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svcsock.h |  1 +
 net/sunrpc/svc.c               |  4 ---
 net/sunrpc/svcsock.c           | 82 +++++++++++++++++++++++++++++++-----------
 3 files changed, 63 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 1b353a76c304..3a45a80760b9 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -45,6 +45,7 @@ int		svc_sock_names(struct svc_serv *serv, char *buf,
 int		svc_addsock(struct svc_serv *serv, const int fd,
 					char *name_return, const size_t len);
 void		svc_init_xprt_sock(void);
+void		svc_init_bc_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
 void		svc_sock_destroy(struct svc_xprt *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 606d182895a9..261e2d1dff10 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -488,10 +488,6 @@ svc_destroy(struct svc_serv *serv)
 	if (svc_serv_is_pooled(serv))
 		svc_pool_map_put();
 
-#if defined(CONFIG_NFS_V4_1)
-	svc_sock_destroy(serv->bc_xprt);
-#endif /* CONFIG_NFS_V4_1 */
-
 	svc_unregister(serv);
 	kfree(serv->sv_pools);
 	kfree(serv);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 07919e16be3e..6630f2922f40 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -66,6 +66,13 @@ static void		svc_sock_free(struct svc_xprt *);
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
 					  struct net *, struct sockaddr *,
 					  int, int);
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+					     struct net *, struct sockaddr *,
+					     int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+#endif /* CONFIG_NFS_V4_1 */
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
 static struct lock_class_key svc_slock_key[2];
@@ -1184,6 +1191,39 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
 	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
+					     struct net *, struct sockaddr *,
+					     int, int);
+static void svc_bc_sock_free(struct svc_xprt *xprt);
+
+static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
+				       struct net *net,
+				       struct sockaddr *sa, int salen,
+				       int flags)
+{
+	return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
+}
+
+static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
+{
+}
+
+static struct svc_xprt_ops svc_tcp_bc_ops = {
+	.xpo_create = svc_bc_tcp_create,
+	.xpo_detach = svc_bc_tcp_sock_detach,
+	.xpo_free = svc_bc_sock_free,
+	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+};
+
+static struct svc_xprt_class svc_tcp_bc_class = {
+	.xcl_name = "tcp-bc",
+	.xcl_owner = THIS_MODULE,
+	.xcl_ops = &svc_tcp_bc_ops,
+	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
+};
+#endif /* CONFIG_NFS_V4_1 */
+
 static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_create = svc_tcp_create,
 	.xpo_recvfrom = svc_tcp_recvfrom,
@@ -1509,41 +1549,43 @@ static void svc_sock_free(struct svc_xprt *xprt)
 	kfree(svsk);
 }
 
+#if defined(CONFIG_NFS_V4_1)
 /*
- * Create a svc_xprt.
- *
- * For internal use only (e.g. nfsv4.1 backchannel).
- * Callers should typically use the xpo_create() method.
+ * Create a back channel svc_xprt which shares the fore channel socket.
  */
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot)
+static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
+					     int protocol,
+					     struct net *net,
+					     struct sockaddr *sin, int len,
+					     int flags)
 {
 	struct svc_sock *svsk;
-	struct svc_xprt *xprt = NULL;
+	struct svc_xprt *xprt;
+
+	if (protocol != IPPROTO_TCP) {
+		printk(KERN_WARNING "svc: only TCP sockets"
+			" supported on shared back channel\n");
+		return ERR_PTR(-EINVAL);
+	}
 
-	dprintk("svc: %s\n", __func__);
 	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
 	if (!svsk)
-		goto out;
+		return ERR_PTR(-ENOMEM);
 
 	xprt = &svsk->sk_xprt;
-	if (prot == IPPROTO_TCP)
-		svc_xprt_init(&svc_tcp_class, xprt, serv);
-	else if (prot == IPPROTO_UDP)
-		svc_xprt_init(&svc_udp_class, xprt, serv);
-	else
-		BUG();
-out:
-	dprintk("svc: %s return %p\n", __func__, xprt);
+	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+
+	serv->bc_xprt = xprt;
+
 	return xprt;
 }
-EXPORT_SYMBOL_GPL(svc_sock_create);
 
 /*
- * Destroy a svc_sock.
+ * Free a back channel svc_sock.
  */
-void svc_sock_destroy(struct svc_xprt *xprt)
+static void svc_bc_sock_free(struct svc_xprt *xprt)
 {
 	if (xprt)
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
 }
-EXPORT_SYMBOL_GPL(svc_sock_destroy);
+#endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3


From 16b2d1e1d12de000404d7c845d0db1226511f84d Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:27 +0000
Subject: SUNRPC register and unregister the back channel transport

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svcsock.h |  1 -
 net/sunrpc/svcsock.c           | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 3a45a80760b9..1b353a76c304 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -45,7 +45,6 @@ int		svc_sock_names(struct svc_serv *serv, char *buf,
 int		svc_addsock(struct svc_serv *serv, const int fd,
 					char *name_return, const size_t len);
 void		svc_init_xprt_sock(void);
-void		svc_init_bc_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
 void		svc_sock_destroy(struct svc_xprt *);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6630f2922f40..e6b66d81115e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1222,6 +1222,24 @@ static struct svc_xprt_class svc_tcp_bc_class = {
 	.xcl_ops = &svc_tcp_bc_ops,
 	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
 };
+
+static void svc_init_bc_xprt_sock(void)
+{
+	svc_reg_xprt_class(&svc_tcp_bc_class);
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+	svc_unreg_xprt_class(&svc_tcp_bc_class);
+}
+#else /* CONFIG_NFS_V4_1 */
+static void svc_init_bc_xprt_sock(void)
+{
+}
+
+static void svc_cleanup_bc_xprt_sock(void)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static struct svc_xprt_ops svc_tcp_ops = {
@@ -1247,12 +1265,14 @@ void svc_init_xprt_sock(void)
 {
 	svc_reg_xprt_class(&svc_tcp_class);
 	svc_reg_xprt_class(&svc_udp_class);
+	svc_init_bc_xprt_sock();
 }
 
 void svc_cleanup_xprt_sock(void)
 {
 	svc_unreg_xprt_class(&svc_tcp_class);
 	svc_unreg_xprt_class(&svc_udp_class);
+	svc_cleanup_bc_xprt_sock();
 }
 
 static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
-- 
cgit v1.2.3


From 2c2618c6f29c41a0a966f14f05c8bf45fcabb750 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:31 +0000
Subject: NFS associate sessionid with callback connection

The sessions based callback service is started prior to the CREATE_SESSION call
so that it can handle CB_NULL requests which can be sent before the
CREATE_SESSION call returns and the session ID is known.

Set the callback sessionid after a sucessful CREATE_SESSION.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c               | 31 +++++++++++++++++++++++++++++++
 fs/nfs/callback.h               |  1 +
 fs/nfs/nfs4state.c              |  6 ++++++
 include/linux/sunrpc/svc_xprt.h |  1 +
 net/sunrpc/svcsock.c            |  4 +++-
 5 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 0e9fae831dfa..c0b05497972b 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -136,6 +136,33 @@ out_err:
 }
 
 #if defined(CONFIG_NFS_V4_1)
+/*
+ *  * CB_SEQUENCE operations will fail until the callback sessionid is set.
+ *   */
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+	struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
+	struct nfs4_sessionid *bc_sid;
+
+	if (!serv->bc_xprt)
+		return -EINVAL;
+
+	/* on success freed in xprt_free */
+	bc_sid = kmalloc(sizeof(struct nfs4_sessionid), GFP_KERNEL);
+	if (!bc_sid)
+		return -ENOMEM;
+	memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
+		NFS4_MAX_SESSIONID_LEN);
+	spin_lock_bh(&serv->sv_cb_lock);
+	serv->bc_xprt->xpt_bc_sid = bc_sid;
+	spin_unlock_bh(&serv->sv_cb_lock);
+	dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for bc_xprt %p\n", __func__,
+		((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
+		((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
+		serv->bc_xprt);
+	return 0;
+}
+
 /*
  * The callback service for NFSv4.1 callbacks
  */
@@ -241,6 +268,10 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
 		struct nfs_callback_data *cb_info)
 {
 }
+int nfs4_set_callback_sessionid(struct nfs_client *clp)
+{
+	return 0;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 85a7cfd1b8dd..58d61a8ce8b9 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -137,6 +137,7 @@ extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
 extern void nfs_callback_down(int minorversion);
 extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
 					    const nfs4_stateid *stateid);
+extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
 #endif /* CONFIG_NFS_V4 */
 /*
  * nfs41: Callbacks are expected to not cause substantial latency,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f575a3126737..485e95e8fd62 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -192,6 +192,12 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 	status = nfs4_proc_create_session(clp);
 	if (status != 0)
 		goto out;
+	status = nfs4_set_callback_sessionid(clp);
+	if (status != 0) {
+		printk(KERN_WARNING "Sessionid not set. No callback service\n");
+		nfs_callback_down(1);
+		status = 0;
+	}
 	nfs41_setup_state_renewal(clp);
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index aea0d438e3c7..357da5e0daa3 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -78,6 +78,7 @@ struct svc_xprt {
 	size_t			xpt_remotelen;	/* length of address */
 	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 	struct list_head	xpt_users;	/* callbacks on free */
+	void			*xpt_bc_sid;	/* back channel session ID */
 
 	struct net		*xpt_net;
 };
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e6b66d81115e..db3013e4aa04 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1605,7 +1605,9 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
  */
 static void svc_bc_sock_free(struct svc_xprt *xprt)
 {
-	if (xprt)
+	if (xprt) {
+		kfree(xprt->xpt_bc_sid);
 		kfree(container_of(xprt, struct svc_sock, sk_xprt));
+	}
 }
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3


From 4a19de0f4b693139bb10b7cc3cfe1f618576ba67 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 6 Jan 2011 02:04:35 +0000
Subject: NFS rename client back channel transport field

Differentiate from server backchannel

Signed-off-by: Andy Adamson <andros@netapp.com>
Acked-by: Bruce Fields <bfields@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c              | 12 ++++++------
 include/linux/sunrpc/bc_xprt.h |  4 ++--
 include/linux/sunrpc/svc.h     |  2 +-
 net/sunrpc/svc.c               |  2 +-
 net/sunrpc/svcsock.c           |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 753a9e315518..199016528fcb 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -142,7 +142,7 @@ int nfs4_set_callback_sessionid(struct nfs_client *clp)
 	struct svc_serv *serv = clp->cl_rpcclient->cl_xprt->bc_serv;
 	struct nfs4_sessionid *bc_sid;
 
-	if (!serv->bc_xprt)
+	if (!serv->sv_bc_xprt)
 		return -EINVAL;
 
 	/* on success freed in xprt_free */
@@ -152,12 +152,12 @@ int nfs4_set_callback_sessionid(struct nfs_client *clp)
 	memcpy(bc_sid->data, &clp->cl_session->sess_id.data,
 		NFS4_MAX_SESSIONID_LEN);
 	spin_lock_bh(&serv->sv_cb_lock);
-	serv->bc_xprt->xpt_bc_sid = bc_sid;
+	serv->sv_bc_xprt->xpt_bc_sid = bc_sid;
 	spin_unlock_bh(&serv->sv_cb_lock);
-	dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for bc_xprt %p\n", __func__,
+	dprintk("%s set xpt_bc_sid=%u:%u:%u:%u for sv_bc_xprt %p\n", __func__,
 		((u32 *)bc_sid->data)[0], ((u32 *)bc_sid->data)[1],
 		((u32 *)bc_sid->data)[2], ((u32 *)bc_sid->data)[3],
-		serv->bc_xprt);
+		serv->sv_bc_xprt);
 	return 0;
 }
 
@@ -228,8 +228,8 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 	init_waitqueue_head(&serv->sv_cb_waitq);
 	rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
 	if (IS_ERR(rqstp)) {
-		svc_xprt_put(serv->bc_xprt);
-		serv->bc_xprt = NULL;
+		svc_xprt_put(serv->sv_bc_xprt);
+		serv->sv_bc_xprt = NULL;
 	}
 out:
 	dprintk("--> %s return %ld\n", __func__,
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 2c60e094fdec..c50b458b8a3f 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -43,7 +43,7 @@ int bc_send(struct rpc_rqst *req);
  */
 static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 {
-	if (rqstp->rq_server->bc_xprt)
+	if (rqstp->rq_server->sv_bc_xprt)
 		return 1;
 	return 0;
 }
@@ -51,7 +51,7 @@ static inline struct nfs4_sessionid *bc_xprt_sid(struct svc_rqst *rqstp)
 {
 	if (svc_is_backchannel(rqstp))
 		return (struct nfs4_sessionid *)
-					rqstp->rq_server->bc_xprt->xpt_bc_sid;
+			rqstp->rq_server->sv_bc_xprt->xpt_bc_sid;
 	return NULL;
 }
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5a3085b9b394..c81d4d8be3a9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -99,7 +99,7 @@ struct svc_serv {
 	spinlock_t		sv_cb_lock;	/* protects the svc_cb_list */
 	wait_queue_head_t	sv_cb_waitq;	/* sleep here if there are no
 						 * entries in the svc_cb_list */
-	struct svc_xprt		*bc_xprt;
+	struct svc_xprt		*sv_bc_xprt;	/* callback on fore channel */
 #endif /* CONFIG_NFS_V4_1 */
 };
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 261e2d1dff10..0e659c665a8d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1262,7 +1262,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 	struct kvec	*resv = &rqstp->rq_res.head[0];
 
 	/* Build the svc_rqst used by the common processing routine */
-	rqstp->rq_xprt = serv->bc_xprt;
+	rqstp->rq_xprt = serv->sv_bc_xprt;
 	rqstp->rq_xid = req->rq_xid;
 	rqstp->rq_prot = req->rq_xprt->prot;
 	rqstp->rq_server = serv;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index db3013e4aa04..d265aa700bb3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1595,7 +1595,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
 	xprt = &svsk->sk_xprt;
 	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
 
-	serv->bc_xprt = xprt;
+	serv->sv_bc_xprt = xprt;
 
 	return xprt;
 }
-- 
cgit v1.2.3


From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:23 +1100
Subject: fs: change d_delete semantics

Change d_delete from a dentry deletion notification to a dentry caching
advise, more like ->drop_inode. Require it to be constant and idempotent,
and not take d_lock. This is how all existing filesystems use the callback
anyway.

This makes fine grained dentry locking of dput and dentry lru scanning
much simpler.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/porting |  8 ++++++++
 Documentation/filesystems/vfs.txt | 27 +++++++++++++--------------
 arch/ia64/kernel/perfmon.c        |  2 +-
 drivers/staging/smbfs/dir.c       |  4 ++--
 fs/9p/vfs_dentry.c                |  4 ++--
 fs/afs/dir.c                      |  4 ++--
 fs/btrfs/inode.c                  |  2 +-
 fs/coda/dir.c                     |  4 ++--
 fs/configfs/dir.c                 |  2 +-
 fs/dcache.c                       |  2 --
 fs/gfs2/dentry.c                  |  2 +-
 fs/hostfs/hostfs_kern.c           |  2 +-
 fs/libfs.c                        |  2 +-
 fs/ncpfs/dir.c                    |  4 ++--
 fs/nfs/dir.c                      |  2 +-
 fs/proc/base.c                    |  2 +-
 fs/proc/generic.c                 |  2 +-
 fs/proc/proc_sysctl.c             |  2 +-
 fs/sysfs/dir.c                    |  2 +-
 include/linux/dcache.h            |  6 +++---
 kernel/cgroup.c                   |  2 +-
 net/sunrpc/rpc_pipe.c             |  2 +-
 22 files changed, 47 insertions(+), 42 deletions(-)

(limited to 'net')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index b12c89538680..9e71c9ad3108 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -318,3 +318,11 @@ if it's zero is not *and* *never* *had* *been* enough.  Final unlink() and iput(
 may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
 free the on-disk inode, you may end up doing that while ->write_inode() is writing
 to it.
+
+---
+[mandatory]
+
+	.d_delete() now only advises the dcache as to whether or not to cache
+unreferenced dentries, and is now only called when the dentry refcount goes to
+0. Even on 0 refcount transition, it must be able to tolerate being called 0,
+1, or more times (eg. constant, idempotent).
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 20899e095e7e..95c0a93f056c 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -847,9 +847,9 @@ defined:
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
-	int (*d_hash) (struct dentry *, struct qstr *);
-	int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
-	int (*d_delete)(struct dentry *);
+	int (*d_hash)(struct dentry *, struct qstr *);
+	int (*d_compare)(struct dentry *, struct qstr *, struct qstr *);
+	int (*d_delete)(const struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
@@ -864,9 +864,11 @@ struct dentry_operations {
 
   d_compare: called when a dentry should be compared with another
 
-  d_delete: called when the last reference to a dentry is
-	deleted. This means no-one is using the dentry, however it is
-	still valid and in the dcache
+  d_delete: called when the last reference to a dentry is dropped and the
+	dcache is deciding whether or not to cache it. Return 1 to delete
+	immediately, or 0 to cache the dentry. Default is NULL which means to
+	always cache a reachable dentry. d_delete must be constant and
+	idempotent.
 
   d_release: called when a dentry is really deallocated
 
@@ -910,14 +912,11 @@ manipulate dentries:
 	the usage count)
 
   dput: close a handle for a dentry (decrements the usage count). If
-	the usage count drops to 0, the "d_delete" method is called
-	and the dentry is placed on the unused list if the dentry is
-	still in its parents hash list. Putting the dentry on the
-	unused list just means that if the system needs some RAM, it
-	goes through the unused list of dentries and deallocates them.
-	If the dentry has already been unhashed and the usage count
-	drops to 0, in this case the dentry is deallocated after the
-	"d_delete" method is called
+	the usage count drops to 0, and the dentry is still in its
+	parent's hash, the "d_delete" method is called to check whether
+	it should be cached. If it should not be cached, or if the dentry
+	is not hashed, it is deleted. Otherwise cached dentries are put
+	into an LRU list to be reclaimed on memory shortage.
 
   d_drop: this unhashes a dentry from its parents hash list. A
 	subsequent call to dput() will deallocate the dentry if its
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 39e534f5a3b0..d39d8a53b579 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2185,7 +2185,7 @@ static const struct file_operations pfm_file_ops = {
 };
 
 static int
-pfmfs_delete_dentry(struct dentry *dentry)
+pfmfs_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index f088ea2f6ac9..2270d4822c2f 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -276,7 +276,7 @@ smb_dir_open(struct inode *dir, struct file *file)
 static int smb_lookup_validate(struct dentry *, struct nameidata *);
 static int smb_hash_dentry(struct dentry *, struct qstr *);
 static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
-static int smb_delete_dentry(struct dentry *);
+static int smb_delete_dentry(const struct dentry *);
 
 static const struct dentry_operations smbfs_dentry_operations =
 {
@@ -367,7 +367,7 @@ out:
  * We use this to unhash dentries with bad inodes.
  */
 static int
-smb_delete_dentry(struct dentry * dentry)
+smb_delete_dentry(const struct dentry *dentry)
 {
 	if (dentry->d_inode) {
 		if (is_bad_inode(dentry->d_inode)) {
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index cbf4e50f3933..466d2a4fc5cb 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
  *
  */
 
-static int v9fs_dentry_delete(struct dentry *dentry)
+static int v9fs_dentry_delete(const struct dentry *dentry)
 {
 	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
 									dentry);
@@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
  *
  */
 
-static int v9fs_cached_dentry_delete(struct dentry *dentry)
+static int v9fs_cached_dentry_delete(const struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5439e1bc9a86..2c18cde27000 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -23,7 +23,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
-static int afs_d_delete(struct dentry *dentry);
+static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
 static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
@@ -730,7 +730,7 @@ out_bad:
  * - called from dput() when d_count is going to 0.
  * - return 1 to request dentry be unhashed, 0 otherwise
  */
-static int afs_d_delete(struct dentry *dentry)
+static int afs_d_delete(const struct dentry *dentry)
 {
 	_enter("%s", dentry->d_name.name);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 72f31ecb5c90..7ce9f8932789 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	return inode;
 }
 
-static int btrfs_dentry_delete(struct dentry *dentry)
+static int btrfs_dentry_delete(const struct dentry *dentry)
 {
 	struct btrfs_root *root;
 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5d8b35539601..4cce3b07d9d7 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -47,7 +47,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
 
 /* dentry ops */
 static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
-static int coda_dentry_delete(struct dentry *);
+static int coda_dentry_delete(const struct dentry *);
 
 /* support routines */
 static int coda_venus_readdir(struct file *coda_file, void *buf,
@@ -577,7 +577,7 @@ out:
  * This is the callback from dput() when d_count is going to 0.
  * We use this to unhash dentries with bad inodes.
  */
-static int coda_dentry_delete(struct dentry * dentry)
+static int coda_dentry_delete(const struct dentry * dentry)
 {
 	int flags;
 
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 578706969415..20024a9ef5a7 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry,
  * We _must_ delete our dentries on last dput, as the chain-to-parent
  * behavior is required to clear the parents of default_groups.
  */
-static int configfs_d_delete(struct dentry *dentry)
+static int configfs_d_delete(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index b2cb2662ca00..6ee6bc40cb63 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -453,8 +453,6 @@ static void prune_one_dentry(struct dentry * dentry)
 		if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
 			return;
 
-		if (dentry->d_op && dentry->d_op->d_delete)
-			dentry->d_op->d_delete(dentry);
 		dentry_lru_del(dentry);
 		__d_drop(dentry);
 		dentry = d_kill(dentry);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6798755b3858..e80fea2f65ff 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -106,7 +106,7 @@ static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
 	return 0;
 }
 
-static int gfs2_dentry_delete(struct dentry *dentry)
+static int gfs2_dentry_delete(const struct dentry *dentry)
 {
 	struct gfs2_inode *ginode;
 
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2c0f148a49e6..cfe8bc7de511 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 
 #define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
 
-static int hostfs_d_delete(struct dentry *dentry)
+static int hostfs_d_delete(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/fs/libfs.c b/fs/libfs.c
index a3accdf528ad..b9d25d83e228 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -37,7 +37,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
  * Retaining negative dentries for an in-memory filesystem just wastes
  * memory and lookup time: arrange for them to be deleted immediately.
  */
-static int simple_delete_dentry(struct dentry *dentry)
+static int simple_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f22b12e7d337..d6e6453881ce 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -76,7 +76,7 @@ const struct inode_operations ncp_dir_inode_operations =
 static int ncp_lookup_validate(struct dentry *, struct nameidata *);
 static int ncp_hash_dentry(struct dentry *, struct qstr *);
 static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *);
-static int ncp_delete_dentry(struct dentry *);
+static int ncp_delete_dentry(const struct dentry *);
 
 static const struct dentry_operations ncp_dentry_operations =
 {
@@ -162,7 +162,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
  * Closing files can be safely postponed until iput() - it's done there anyway.
  */
 static int
-ncp_delete_dentry(struct dentry * dentry)
+ncp_delete_dentry(const struct dentry * dentry)
 {
 	struct inode *inode = dentry->d_inode;
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 996dd8989a91..9184c7c80f78 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1117,7 +1117,7 @@ out_error:
 /*
  * This is called from dput() when d_count is going to 0.
  */
-static int nfs_dentry_delete(struct dentry *dentry)
+static int nfs_dentry_delete(const struct dentry *dentry)
 {
 	dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 182845147fe4..d932fdb6a245 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1744,7 +1744,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 	return 0;
 }
 
-static int pid_delete_dentry(struct dentry * dentry)
+static int pid_delete_dentry(const struct dentry * dentry)
 {
 	/* Is the task we represent dead?
 	 * If so, then don't put the dentry on the lru list,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f0337661..1d607be36d95 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
  * smarter: we could keep a "volatile" flag in the 
  * inode to indicate which ones to keep.
  */
-static int proc_delete_dentry(struct dentry * dentry)
+static int proc_delete_dentry(const struct dentry * dentry)
 {
 	return 1;
 }
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b652cb00906b..a256d770ea18 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -392,7 +392,7 @@ static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
 	return !PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
 
-static int proc_sys_delete(struct dentry *dentry)
+static int proc_sys_delete(const struct dentry *dentry)
 {
 	return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7e54bac8c4b0..27e1102e303e 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
 		goto repeat;
 }
 
-static int sysfs_dentry_delete(struct dentry *dentry)
+static int sysfs_dentry_delete(const struct dentry *dentry)
 {
 	struct sysfs_dirent *sd = dentry->d_fsdata;
 	return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index fff975576b5b..cbfc9567e4e9 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -133,9 +133,9 @@ enum dentry_d_lock_class
 
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
-	int (*d_hash) (struct dentry *, struct qstr *);
-	int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
-	int (*d_delete)(struct dentry *);
+	int (*d_hash)(struct dentry *, struct qstr *);
+	int (*d_compare)(struct dentry *, struct qstr *, struct qstr *);
+	int (*d_delete)(const struct dentry *);
 	void (*d_release)(struct dentry *);
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)(struct dentry *, char *, int);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 163c890f436d..746055b214d7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2198,7 +2198,7 @@ static inline struct cftype *__file_cft(struct file *file)
 	return __d_cft(file->f_dentry);
 }
 
-static int cgroup_delete_dentry(struct dentry *dentry)
+static int cgroup_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 10a17a37ec4e..a0dc1a86fcea 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -430,7 +430,7 @@ void rpc_put_mount(void)
 }
 EXPORT_SYMBOL_GPL(rpc_put_mount);
 
-static int rpc_delete_dentry(struct dentry *dentry)
+static int rpc_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
-- 
cgit v1.2.3


From fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:49 +1100
Subject: fs: icache RCU free inodes

RCU free the struct inode. This will allow:

- Subsequent store-free path walking patch. The inode must be consulted for
  permissions when walking, so an RCU inode reference is a must.
- sb_inode_list_lock to be moved inside i_lock because sb list walkers who want
  to take i_lock no longer need to take sb_inode_list_lock to walk the list in
  the first place. This will simplify and optimize locking.
- Could remove some nested trylock loops in dcache code
- Could potentially simplify things a bit in VM land. Do not need to take the
  page lock to follow page->mapping.

The downsides of this is the performance cost of using RCU. In a simple
creat/unlink microbenchmark, performance drops by about 10% due to inability to
reuse cache-hot slab objects. As iterations increase and RCU freeing starts
kicking over, this increases to about 20%.

In cases where inode lifetimes are longer (ie. many inodes may be allocated
during the average life span of a single inode), a lot of this cache reuse is
not applicable, so the regression caused by this patch is smaller.

The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU,
however this adds some complexity to list walking and store-free path walking,
so I prefer to implement this at a later date, if it is shown to be a win in
real situations. I haven't found a regression in any non-micro benchmark so I
doubt it will be a problem.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 Documentation/filesystems/porting         | 14 ++++++++++++++
 arch/powerpc/platforms/cell/spufs/inode.c | 10 ++++++++--
 drivers/staging/pohmelfs/inode.c          |  9 ++++++++-
 drivers/staging/smbfs/inode.c             |  9 ++++++++-
 fs/9p/vfs_inode.c                         |  9 ++++++++-
 fs/adfs/super.c                           |  9 ++++++++-
 fs/affs/super.c                           |  9 ++++++++-
 fs/afs/super.c                            | 10 +++++++++-
 fs/befs/linuxvfs.c                        | 10 ++++++++--
 fs/bfs/inode.c                            |  9 ++++++++-
 fs/block_dev.c                            |  9 ++++++++-
 fs/btrfs/inode.c                          |  9 ++++++++-
 fs/ceph/inode.c                           | 11 ++++++++++-
 fs/cifs/cifsfs.c                          |  9 ++++++++-
 fs/coda/inode.c                           |  9 ++++++++-
 fs/ecryptfs/super.c                       | 12 +++++++++++-
 fs/efs/super.c                            |  9 ++++++++-
 fs/exofs/super.c                          |  9 ++++++++-
 fs/ext2/super.c                           |  9 ++++++++-
 fs/ext3/super.c                           |  9 ++++++++-
 fs/ext4/super.c                           |  9 ++++++++-
 fs/fat/inode.c                            |  9 ++++++++-
 fs/freevxfs/vxfs_inode.c                  |  9 ++++++++-
 fs/fuse/inode.c                           |  9 ++++++++-
 fs/gfs2/super.c                           |  9 ++++++++-
 fs/hfs/super.c                            |  9 ++++++++-
 fs/hfsplus/super.c                        | 10 +++++++++-
 fs/hostfs/hostfs_kern.c                   |  9 ++++++++-
 fs/hpfs/super.c                           |  9 ++++++++-
 fs/hppfs/hppfs.c                          |  9 ++++++++-
 fs/hugetlbfs/inode.c                      |  9 ++++++++-
 fs/inode.c                                | 10 +++++++++-
 fs/isofs/inode.c                          |  9 ++++++++-
 fs/jffs2/super.c                          |  9 ++++++++-
 fs/jfs/super.c                            | 10 +++++++++-
 fs/logfs/inode.c                          |  9 ++++++++-
 fs/minix/inode.c                          |  9 ++++++++-
 fs/ncpfs/inode.c                          |  9 ++++++++-
 fs/nfs/inode.c                            |  9 ++++++++-
 fs/nilfs2/super.c                         | 10 +++++++++-
 fs/ntfs/inode.c                           |  9 ++++++++-
 fs/ocfs2/dlmfs/dlmfs.c                    |  9 ++++++++-
 fs/ocfs2/super.c                          |  9 ++++++++-
 fs/openpromfs/inode.c                     |  9 ++++++++-
 fs/proc/inode.c                           |  9 ++++++++-
 fs/qnx4/inode.c                           |  9 ++++++++-
 fs/reiserfs/super.c                       |  9 ++++++++-
 fs/romfs/super.c                          |  9 ++++++++-
 fs/squashfs/super.c                       |  9 ++++++++-
 fs/sysv/inode.c                           |  9 ++++++++-
 fs/ubifs/super.c                          | 10 +++++++++-
 fs/udf/super.c                            |  9 ++++++++-
 fs/ufs/super.c                            |  9 ++++++++-
 fs/xfs/xfs_iget.c                         | 13 ++++++++++++-
 include/linux/fs.h                        |  5 ++++-
 include/linux/net.h                       |  1 -
 ipc/mqueue.c                              |  9 ++++++++-
 mm/shmem.c                                |  9 ++++++++-
 net/socket.c                              | 16 ++++++++--------
 net/sunrpc/rpc_pipe.c                     | 10 +++++++++-
 60 files changed, 490 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 1eb76959d096..ccf0ce7866b9 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -346,3 +346,17 @@ look at examples of other filesystems) for guidance.
 for details of what locks to replace dcache_lock with in order to protect
 particular things. Most of the time, a filesystem only needs ->d_lock, which
 protects *all* the dcache state of a given dentry.
+
+--
+[mandatory]
+
+	Filesystems must RCU-free their inodes, if they can have been accessed
+via rcu-walk path walk (basically, if the file can have had a path name in the
+vfs namespace).
+
+	i_dentry and i_rcu share storage in a union, and the vfs expects
+i_dentry to be reinitialized before it is freed, so an:
+
+  INIT_LIST_HEAD(&inode->i_dentry);
+
+must be done in the RCU callback.
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 03185de7cd4a..856e9c398068 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -71,12 +71,18 @@ spufs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void
-spufs_destroy_inode(struct inode *inode)
+static void spufs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
 }
 
+static void spufs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, spufs_i_callback);
+}
+
 static void
 spufs_init_once(void *p)
 {
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 61685ccceda8..cc8d2840f9b6 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -826,6 +826,13 @@ const struct address_space_operations pohmelfs_aops = {
 	.set_page_dirty 	= __set_page_dirty_nobuffers,
 };
 
+static void pohmelfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(pohmelfs_inode_cache, POHMELFS_I(inode));
+}
+
 /*
  * ->detroy_inode() callback. Deletes inode from the caches
  *  and frees private data.
@@ -842,8 +849,8 @@ static void pohmelfs_destroy_inode(struct inode *inode)
 
 	dprintk("%s: pi: %p, inode: %p, ino: %llu.\n",
 		__func__, pi, &pi->vfs_inode, pi->ino);
-	kmem_cache_free(pohmelfs_inode_cache, pi);
 	atomic_long_dec(&psb->total_inodes);
+	call_rcu(&inode->i_rcu, pohmelfs_i_callback);
 }
 
 /*
diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c
index 540a984bb516..244319dc9702 100644
--- a/drivers/staging/smbfs/inode.c
+++ b/drivers/staging/smbfs/inode.c
@@ -62,11 +62,18 @@ static struct inode *smb_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void smb_destroy_inode(struct inode *inode)
+static void smb_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
 
+static void smb_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, smb_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 1073bca8488c..f6f9081e6d2c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
  *
  */
 
-void v9fs_destroy_inode(struct inode *inode)
+static void v9fs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
 }
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, v9fs_i_callback);
+}
 #endif
 
 /**
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 959dbff2d42d..47dffc513a26 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void adfs_destroy_inode(struct inode *inode)
+static void adfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
+static void adfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, adfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0cf7f4384cbd..4c18fcfb0a19 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
 	return &i->vfs_inode;
 }
 
-static void affs_destroy_inode(struct inode *inode)
+static void affs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
 
+static void affs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, affs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct affs_inode_info *ei = (struct affs_inode_info *) foo;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 27201cffece4..f901a9d7c111 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	return &vnode->vfs_inode;
 }
 
+static void afs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(afs_inode_cachep, vnode);
+}
+
 /*
  * destroy an AFS inode struct
  */
@@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
 
 	ASSERTCMP(vnode->server, ==, NULL);
 
-	kmem_cache_free(afs_inode_cachep, vnode);
+	call_rcu(&inode->i_rcu, afs_i_callback);
 	atomic_dec(&afs_count_active_inodes);
 }
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index aa4e7c7ae3c6..de93581b79a2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
         return &bi->vfs_inode;
 }
 
-static void
-befs_destroy_inode(struct inode *inode)
+static void befs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
         kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
+static void befs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, befs_i_callback);
+}
+
 static void init_once(void *foo)
 {
         struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 76db6d7d49bb..a8e37f81d097 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
 	return &bi->vfs_inode;
 }
 
-static void bfs_destroy_inode(struct inode *inode)
+static void bfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
 
+static void bfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, bfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct bfs_inode_info *bi = foo;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4230252fd689..771f23527010 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct bdev_inode *bdi = BDEV_I(inode);
 
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
+static void bdev_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7ce9f8932789..f9d2994a42a2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	return inode;
 }
 
+static void btrfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+
 void btrfs_destroy_inode(struct inode *inode)
 {
 	struct btrfs_ordered_extent *ordered;
@@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode)
 	inode_tree_del(inode);
 	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 free:
-	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+	call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
 
 int btrfs_drop_inode(struct inode *inode)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2a48cafc174b..47f8c8baf3b5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	return &ci->vfs_inode;
 }
 
+static void ceph_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ceph_inode_cachep, ci);
+}
+
 void ceph_destroy_inode(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode)
 	if (ci->i_xattrs.prealloc_blob)
 		ceph_buffer_put(ci->i_xattrs.prealloc_blob);
 
-	kmem_cache_free(ceph_inode_cachep, ci);
+	call_rcu(&inode->i_rcu, ceph_i_callback);
 }
 
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3936aa7f2c22..223717dcc401 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -334,10 +334,17 @@ cifs_alloc_inode(struct super_block *sb)
 	return &cifs_inode->vfs_inode;
 }
 
+static void cifs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+}
+
 static void
 cifs_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+	call_rcu(&inode->i_rcu, cifs_i_callback);
 }
 
 static void
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5ea57c8c7f97..50dc7d189f56 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void coda_destroy_inode(struct inode *inode)
+static void coda_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
 
+static void coda_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, coda_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2720178b7718..3042fe123a34 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -62,6 +62,16 @@ out:
 	return inode;
 }
 
+static void ecryptfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ecryptfs_inode_info *inode_info;
+	inode_info = ecryptfs_inode_to_private(inode);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+}
+
 /**
  * ecryptfs_destroy_inode
  * @inode: The ecryptfs inode
@@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
 		}
 	}
 	ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
-	kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+	call_rcu(&inode->i_rcu, ecryptfs_i_callback);
 }
 
 /**
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 5073a07652cc..0f31acb0131c 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void efs_destroy_inode(struct inode *inode)
+static void efs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
 
+static void efs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, efs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 79c3ae6e0456..8c6c4669b381 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
 	return &oi->vfs_inode;
 }
 
+static void exofs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+}
+
 /*
  * Remove an inode from the cache
  */
 static void exofs_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+	call_rcu(&inode->i_rcu, exofs_i_callback);
 }
 
 /*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d89e0b6a2d78..e0c6380ff992 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void ext2_destroy_inode(struct inode *inode)
+static void ext2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
+static void ext2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ext2_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index acf8695fa8f0..77ce1616f725 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -479,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
+static void ext3_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
 static void ext3_destroy_inode(struct inode *inode)
 {
 	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -489,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode)
 				false);
 		dump_stack();
 	}
-	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+	call_rcu(&inode->i_rcu, ext3_i_callback);
 }
 
 static void init_once(void *foo)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb15c9c0be74..cd37f9d5e447 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -841,6 +841,13 @@ static int ext4_drop_inode(struct inode *inode)
 	return drop;
 }
 
+static void ext4_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+}
+
 static void ext4_destroy_inode(struct inode *inode)
 {
 	ext4_ioend_wait(inode);
@@ -853,7 +860,7 @@ static void ext4_destroy_inode(struct inode *inode)
 				true);
 		dump_stack();
 	}
-	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+	call_rcu(&inode->i_rcu, ext4_i_callback);
 }
 
 static void init_once(void *foo)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ad6998a92c30..8cccfebee180 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void fat_destroy_inode(struct inode *inode)
+static void fat_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
+static void fat_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, fat_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 8c04eac5079d..2ba6719ac612 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
 	return ip;
 }
 
+static void vxfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(vxfs_inode_cachep, inode->i_private);
+}
+
 /**
  * vxfs_evict_inode - remove inode from main memory
  * @ip:		inode to discard.
@@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip)
 {
 	truncate_inode_pages(&ip->i_data, 0);
 	end_writeback(ip);
-	kmem_cache_free(vxfs_inode_cachep, ip->i_private);
+	call_rcu(&ip->i_rcu, vxfs_i_callback);
 }
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cfce3ad86a92..44e0a6c57e87 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -99,6 +99,13 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	return inode;
 }
 
+static void fuse_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(fuse_inode_cachep, inode);
+}
+
 static void fuse_destroy_inode(struct inode *inode)
 {
 	struct fuse_inode *fi = get_fuse_inode(inode);
@@ -106,7 +113,7 @@ static void fuse_destroy_inode(struct inode *inode)
 	BUG_ON(!list_empty(&fi->queued_writes));
 	if (fi->forget_req)
 		fuse_request_free(fi->forget_req);
-	kmem_cache_free(fuse_inode_cachep, inode);
+	call_rcu(&inode->i_rcu, fuse_i_callback);
 }
 
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2b2c4997430b..16c2ecac7eb7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 	return &ip->i_inode;
 }
 
-static void gfs2_destroy_inode(struct inode *inode)
+static void gfs2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(gfs2_inode_cachep, inode);
 }
 
+static void gfs2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, gfs2_i_callback);
+}
+
 const struct super_operations gfs2_super_ops = {
 	.alloc_inode		= gfs2_alloc_inode,
 	.destroy_inode		= gfs2_destroy_inode,
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4824c27cebb8..ef4ee5716abe 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
 	return i ? &i->vfs_inode : NULL;
 }
 
-static void hfs_destroy_inode(struct inode *inode)
+static void hfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
 }
 
+static void hfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hfs_i_callback);
+}
+
 static const struct super_operations hfs_super_operations = {
 	.alloc_inode	= hfs_alloc_inode,
 	.destroy_inode	= hfs_destroy_inode,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 52cc746d3ba3..182e83a9079e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -488,11 +488,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 	return i ? &i->vfs_inode : NULL;
 }
 
-static void hfsplus_destroy_inode(struct inode *inode)
+static void hfsplus_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
 }
 
+static void hfsplus_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hfsplus_i_callback);
+}
+
 #define HFSPLUS_INODE_SIZE	sizeof(struct hfsplus_inode_info)
 
 static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 39dc505ed273..861113fcfc88 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -247,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode)
 	}
 }
 
-static void hostfs_destroy_inode(struct inode *inode)
+static void hostfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kfree(HOSTFS_I(inode));
 }
 
+static void hostfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hostfs_i_callback);
+}
+
 static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	const char *root_path = vfs->mnt_sb->s_fs_info;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 6c5f01597c3a..49935ba78db8 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void hpfs_destroy_inode(struct inode *inode)
+static void hpfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
 
+static void hpfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hpfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f702b5f713fc..87ed48e0343d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino)
 	mntput(ino->i_sb->s_fs_info);
 }
 
-static void hppfs_destroy_inode(struct inode *inode)
+static void hppfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kfree(HPPFS_I(inode));
 }
 
+static void hppfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, hppfs_i_callback);
+}
+
 static const struct super_operations hppfs_sbops = {
 	.alloc_inode	= hppfs_alloc_inode,
 	.destroy_inode	= hppfs_destroy_inode,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a5fe68189eed..9885082b470f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 	return &p->vfs_inode;
 }
 
+static void hugetlbfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+}
+
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
-	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+	call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
 }
 
 static const struct address_space_operations hugetlbfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index 5a0a898f55d1..6751dfe8cc06 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -272,6 +272,13 @@ void __destroy_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(__destroy_inode);
 
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(inode_cachep, inode);
+}
+
 static void destroy_inode(struct inode *inode)
 {
 	BUG_ON(!list_empty(&inode->i_lru));
@@ -279,7 +286,7 @@ static void destroy_inode(struct inode *inode)
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
-		kmem_cache_free(inode_cachep, (inode));
+		call_rcu(&inode->i_rcu, i_callback);
 }
 
 /*
@@ -432,6 +439,7 @@ void end_writeback(struct inode *inode)
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
 	inode_sync_wait(inode);
+	/* don't need i_lock here, no concurrent mods to i_state */
 	inode->i_state = I_FREEING | I_CLEAR;
 }
 EXPORT_SYMBOL(end_writeback);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d204ee4235fd..d8f3a652243d 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -81,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void isofs_destroy_inode(struct inode *inode)
+static void isofs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
+static void isofs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, isofs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct iso_inode_info *ei = foo;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c86041b866a4..853b8e300084 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
 	return &f->vfs_inode;
 }
 
-static void jffs2_destroy_inode(struct inode *inode)
+static void jffs2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
 
+static void jffs2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, jffs2_i_callback);
+}
+
 static void jffs2_i_init_once(void *foo)
 {
 	struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0669fc1cc3bf..b715b0f7bdfd 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
 	return &jfs_inode->vfs_inode;
 }
 
+static void jfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct jfs_inode_info *ji = JFS_IP(inode);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(jfs_inode_cachep, ji);
+}
+
 static void jfs_destroy_inode(struct inode *inode)
 {
 	struct jfs_inode_info *ji = JFS_IP(inode);
@@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode)
 		ji->active_ag = -1;
 	}
 	spin_unlock_irq(&ji->ag_lock);
-	kmem_cache_free(jfs_inode_cachep, ji);
+	call_rcu(&inode->i_rcu, jfs_i_callback);
 }
 
 static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ece098f..03b8c240aeda 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
 	return __logfs_iget(sb, ino);
 }
 
+static void logfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
+}
+
 static void __logfs_destroy_inode(struct inode *inode)
 {
 	struct logfs_inode *li = logfs_inode(inode);
 
 	BUG_ON(li->li_block);
 	list_del(&li->li_freeing_list);
-	kmem_cache_free(logfs_inode_cache, li);
+	call_rcu(&inode->i_rcu, logfs_i_callback);
 }
 
 static void logfs_destroy_inode(struct inode *inode)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fb2020858a34..ae0b83f476a6 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void minix_destroy_inode(struct inode *inode)
+static void minix_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
 
+static void minix_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, minix_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8fb93b604e73..60047dbeb38d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -58,11 +58,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void ncp_destroy_inode(struct inode *inode)
+static void ncp_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
 
+static void ncp_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ncp_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e67e31c73416..017daa3bed38 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
 	return &nfsi->vfs_inode;
 }
 
-void nfs_destroy_inode(struct inode *inode)
+static void nfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
 }
 
+void nfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, nfs_i_callback);
+}
+
 static inline void nfs4_init_once(struct nfs_inode *nfsi)
 {
 #ifdef CONFIG_NFS_V4
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index d36fc7ee615f..e2dcc9c733f7 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -162,10 +162,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
 	return &ii->vfs_inode;
 }
 
-void nilfs_destroy_inode(struct inode *inode)
+static void nilfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
 
+	INIT_LIST_HEAD(&inode->i_dentry);
+
 	if (mdi) {
 		kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
 		kfree(mdi);
@@ -173,6 +176,11 @@ void nilfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
 }
 
+void nilfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, nilfs_i_callback);
+}
+
 static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
 {
 	struct the_nilfs *nilfs = sbi->s_nilfs;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b175fc7..a627ed82c0a3 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
 	return NULL;
 }
 
+static void ntfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+}
+
 void ntfs_destroy_big_inode(struct inode *inode)
 {
 	ntfs_inode *ni = NTFS_I(inode);
@@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
 	BUG_ON(ni->page);
 	if (!atomic_dec_and_test(&ni->count))
 		BUG();
-	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+	call_rcu(&inode->i_rcu, ntfs_i_callback);
 }
 
 static inline ntfs_inode *ntfs_alloc_extent_inode(void)
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b2df490a19ed..8c5c0eddc365 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
 	return &ip->ip_vfs_inode;
 }
 
-static void dlmfs_destroy_inode(struct inode *inode)
+static void dlmfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
 }
 
+static void dlmfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, dlmfs_i_callback);
+}
+
 static void dlmfs_evict_inode(struct inode *inode)
 {
 	int status;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cfeab7ce3697..17ff46fa8a10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
 	return &oi->vfs_inode;
 }
 
-static void ocfs2_destroy_inode(struct inode *inode)
+static void ocfs2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
 }
 
+static void ocfs2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ocfs2_i_callback);
+}
+
 static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
 						unsigned int cbits)
 {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911e61f348fc..a2a5bff774e3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
 	return &oi->vfs_inode;
 }
 
-static void openprom_destroy_inode(struct inode *inode)
+static void openprom_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(op_inode_cachep, OP_I(inode));
 }
 
+static void openprom_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, openprom_i_callback);
+}
+
 static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
 {
 	struct inode *inode;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3ddb6068177c..6bcb926b101b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	return inode;
 }
 
-static void proc_destroy_inode(struct inode *inode)
+static void proc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
+static void proc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, proc_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fcada42f1aa3..e63b4171d583 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void qnx4_destroy_inode(struct inode *inode)
+static void qnx4_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
 
+static void qnx4_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, qnx4_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b243117b8752..2575682a9ead 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void reiserfs_destroy_inode(struct inode *inode)
+static void reiserfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
 
+static void reiserfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, reiserfs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 6647f90e55cd..2305e3121cb1 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
 /*
  * return a spent inode to the slab cache
  */
-static void romfs_destroy_inode(struct inode *inode)
+static void romfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
+static void romfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, romfs_i_callback);
+}
+
 /*
  * get filesystem statistics
  */
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 24de30ba34c1..20700b9f2b4c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
 }
 
 
-static void squashfs_destroy_inode(struct inode *inode)
+static void squashfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
 }
 
+static void squashfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, squashfs_i_callback);
+}
+
 
 static struct file_system_type squashfs_fs_type = {
 	.owner = THIS_MODULE,
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index de44d067b9e6..0630eb969a28 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
 	return &si->vfs_inode;
 }
 
-static void sysv_destroy_inode(struct inode *inode)
+static void sysv_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
 
+static void sysv_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, sysv_i_callback);
+}
+
 static void init_once(void *p)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 91fac54c70e3..6e11c2975dcf 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
 	return &ui->vfs_inode;
 };
 
+static void ubifs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct ubifs_inode *ui = ubifs_inode(inode);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ubifs_inode_slab, ui);
+}
+
 static void ubifs_destroy_inode(struct inode *inode)
 {
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
 	kfree(ui->data);
-	kmem_cache_free(ubifs_inode_slab, inode);
+	call_rcu(&inode->i_rcu, ubifs_i_callback);
 }
 
 /*
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4a5c7c61836a..b539d53320fb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void udf_destroy_inode(struct inode *inode)
+static void udf_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
 
+static void udf_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, udf_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct udf_inode_info *ei = (struct udf_inode_info *)foo;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c47daed56da..2c61ac5d4e48 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void ufs_destroy_inode(struct inode *inode)
+static void ufs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
 
+static void ufs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ufs_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd26932d8e..d7de5a3f7867 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,6 +91,17 @@ xfs_inode_alloc(
 	return ip;
 }
 
+STATIC void
+xfs_inode_free_callback(
+	struct rcu_head		*head)
+{
+	struct inode		*inode = container_of(head, struct inode, i_rcu);
+	struct xfs_inode	*ip = XFS_I(inode);
+
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_zone_free(xfs_inode_zone, ip);
+}
+
 void
 xfs_inode_free(
 	struct xfs_inode	*ip)
@@ -134,7 +145,7 @@ xfs_inode_free(
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
 
-	kmem_zone_free(xfs_inode_zone, ip);
+	call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 296cf2fde945..1ff4d0a33b25 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -737,7 +737,10 @@ struct inode {
 	struct list_head	i_wb_list;	/* backing dev IO list */
 	struct list_head	i_lru;		/* inode LRU list */
 	struct list_head	i_sb_list;
-	struct list_head	i_dentry;
+	union {
+		struct list_head	i_dentry;
+		struct rcu_head		i_rcu;
+	};
 	unsigned long		i_ino;
 	atomic_t		i_count;
 	unsigned int		i_nlink;
diff --git a/include/linux/net.h b/include/linux/net.h
index 16faa130088c..06bde4908473 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -120,7 +120,6 @@ enum sock_shutdown_cmd {
 struct socket_wq {
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
-	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
 /**
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 035f4399edbc..14fb6d67e6a3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -237,11 +237,18 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
-static void mqueue_destroy_inode(struct inode *inode)
+static void mqueue_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
 }
 
+static void mqueue_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, mqueue_i_callback);
+}
+
 static void mqueue_evict_inode(struct inode *inode)
 {
 	struct mqueue_inode_info *info;
diff --git a/mm/shmem.c b/mm/shmem.c
index 47fdeeb9d636..5ee67c990602 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2415,13 +2415,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
 	return &p->vfs_inode;
 }
 
+static void shmem_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+}
+
 static void shmem_destroy_inode(struct inode *inode)
 {
 	if ((inode->i_mode & S_IFMT) == S_IFREG) {
 		/* only struct inode is valid if it's an inline symlink */
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
 	}
-	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+	call_rcu(&inode->i_rcu, shmem_i_callback);
 }
 
 static void init_once(void *foo)
diff --git a/net/socket.c b/net/socket.c
index 088fb3fd45e0..97fff3a4e72f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -262,20 +262,20 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 }
 
 
-static void wq_free_rcu(struct rcu_head *head)
+static void sock_free_rcu(struct rcu_head *head)
 {
-	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct socket_alloc *ei = container_of(inode, struct socket_alloc,
+								vfs_inode);
 
-	kfree(wq);
+	kfree(ei->socket.wq);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void sock_destroy_inode(struct inode *inode)
 {
-	struct socket_alloc *ei;
-
-	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
-	kmem_cache_free(sock_inode_cachep, ei);
+	call_rcu(&inode->i_rcu, sock_free_rcu);
 }
 
 static void init_once(void *foo)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a0dc1a86fcea..2899fe27f880 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -162,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb)
 }
 
 static void
-rpc_destroy_inode(struct inode *inode)
+rpc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
 }
 
+static void
+rpc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, rpc_i_callback);
+}
+
 static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
-- 
cgit v1.2.3


From ff0c7d15f9787b7e8c601533c015295cc68329f8 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:50 +1100
Subject: fs: avoid inode RCU freeing for pseudo fs

Pseudo filesystems that don't put inode on RCU list or reachable by
rcu-walk dentries do not need to RCU free their inodes.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/inode.c          |  6 ++++++
 fs/pipe.c           |  6 +++++-
 include/linux/fs.h  |  1 +
 include/linux/net.h |  1 +
 net/socket.c        | 17 +++++++++--------
 5 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/fs/inode.c b/fs/inode.c
index 6751dfe8cc06..da85e56378f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -257,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb)
 	return inode;
 }
 
+void free_inode_nonrcu(struct inode *inode)
+{
+	kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
 void __destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
diff --git a/fs/pipe.c b/fs/pipe.c
index 04629f36e397..ae3592dcc88c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1253,6 +1253,10 @@ out:
 	return ret;
 }
 
+static const struct super_operations pipefs_ops = {
+	.destroy_inode = free_inode_nonrcu,
+};
+
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
  * no real gain from having the whole whorehouse mounted. So we don't need
@@ -1262,7 +1266,7 @@ out:
 static struct dentry *pipefs_mount(struct file_system_type *fs_type,
 			 int flags, const char *dev_name, void *data)
 {
-	return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
+	return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
 }
 
 static struct file_system_type pipe_fs_type = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ff4d0a33b25..ea202fff44f8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2233,6 +2233,7 @@ extern void iget_failed(struct inode *);
 extern void end_writeback(struct inode *);
 extern void __destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
+extern void free_inode_nonrcu(struct inode *inode);
 extern int should_remove_suid(struct dentry *);
 extern int file_remove_suid(struct file *);
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 06bde4908473..16faa130088c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -120,6 +120,7 @@ enum sock_shutdown_cmd {
 struct socket_wq {
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
+	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
 /**
diff --git a/net/socket.c b/net/socket.c
index 97fff3a4e72f..817dc92e9ef8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -262,20 +262,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 }
 
 
-static void sock_free_rcu(struct rcu_head *head)
+
+static void wq_free_rcu(struct rcu_head *head)
 {
-	struct inode *inode = container_of(head, struct inode, i_rcu);
-	struct socket_alloc *ei = container_of(inode, struct socket_alloc,
-								vfs_inode);
+	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
 
-	kfree(ei->socket.wq);
-	INIT_LIST_HEAD(&inode->i_dentry);
-	kmem_cache_free(sock_inode_cachep, ei);
+	kfree(wq);
 }
 
 static void sock_destroy_inode(struct inode *inode)
 {
-	call_rcu(&inode->i_rcu, sock_free_rcu);
+	struct socket_alloc *ei;
+
+	ei = container_of(inode, struct socket_alloc, vfs_inode);
+	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void init_once(void *foo)
-- 
cgit v1.2.3


From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:49:55 +1100
Subject: fs: dcache reduce branches in lookup path

Reduce some branches and memory accesses in dcache lookup by adding dentry
flags to indicate common d_ops are set, rather than having to check them.
This saves a pointer memory access (dentry->d_op) in common path lookup
situations, and saves another pointer load and branch in cases where we
have d_op but not the particular operation.

Patched with:

git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 arch/ia64/kernel/perfmon.c    |  2 +-
 drivers/staging/autofs/root.c |  2 +-
 drivers/staging/smbfs/dir.c   |  8 ++++----
 fs/9p/vfs_inode.c             | 26 +++++++++++++-------------
 fs/adfs/dir.c                 |  2 +-
 fs/adfs/super.c               |  2 +-
 fs/affs/namei.c               |  2 +-
 fs/affs/super.c               |  2 +-
 fs/afs/dir.c                  |  2 +-
 fs/anon_inodes.c              |  2 +-
 fs/autofs4/inode.c            |  2 +-
 fs/autofs4/root.c             | 10 +++++-----
 fs/btrfs/export.c             |  4 ++--
 fs/btrfs/inode.c              |  2 +-
 fs/ceph/dir.c                 |  6 +++---
 fs/cifs/dir.c                 | 16 ++++++++--------
 fs/cifs/inode.c               |  8 ++++----
 fs/cifs/link.c                |  4 ++--
 fs/cifs/readdir.c             |  4 ++--
 fs/coda/dir.c                 |  2 +-
 fs/configfs/dir.c             |  8 ++++----
 fs/dcache.c                   | 30 ++++++++++++++++++++++++++----
 fs/ecryptfs/inode.c           |  2 +-
 fs/ecryptfs/main.c            |  4 ++--
 fs/fat/inode.c                |  4 ++--
 fs/fat/namei_msdos.c          |  6 +++---
 fs/fat/namei_vfat.c           |  8 ++++----
 fs/fuse/dir.c                 |  2 +-
 fs/fuse/inode.c               |  4 ++--
 fs/gfs2/export.c              |  4 ++--
 fs/gfs2/ops_fstype.c          |  2 +-
 fs/gfs2/ops_inode.c           |  2 +-
 fs/hfs/dir.c                  |  2 +-
 fs/hfs/super.c                |  2 +-
 fs/hfsplus/dir.c              |  2 +-
 fs/hfsplus/super.c            |  2 +-
 fs/hostfs/hostfs_kern.c       |  2 +-
 fs/hpfs/dentry.c              |  2 +-
 fs/isofs/inode.c              |  2 +-
 fs/isofs/namei.c              |  2 +-
 fs/jfs/namei.c                |  4 ++--
 fs/jfs/super.c                |  2 +-
 fs/libfs.c                    |  2 +-
 fs/minix/namei.c              |  2 +-
 fs/namei.c                    | 31 ++++++++++++++++++++-----------
 fs/ncpfs/dir.c                |  4 ++--
 fs/ncpfs/inode.c              |  2 +-
 fs/nfs/dir.c                  |  6 +++---
 fs/nfs/getroot.c              |  4 ++--
 fs/ocfs2/export.c             |  4 ++--
 fs/ocfs2/namei.c              | 10 +++++-----
 fs/pipe.c                     |  2 +-
 fs/proc/base.c                | 12 ++++++------
 fs/proc/generic.c             |  2 +-
 fs/proc/proc_sysctl.c         |  4 ++--
 fs/reiserfs/xattr.c           |  2 +-
 fs/sysfs/dir.c                |  2 +-
 fs/sysv/namei.c               |  2 +-
 fs/sysv/super.c               |  2 +-
 include/linux/dcache.h        |  6 ++++++
 kernel/cgroup.c               |  2 +-
 net/socket.c                  |  2 +-
 net/sunrpc/rpc_pipe.c         |  2 +-
 63 files changed, 174 insertions(+), 137 deletions(-)

(limited to 'net')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index d39d8a53b579..5a24f40bb48e 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2233,7 +2233,7 @@ pfm_alloc_file(pfm_context_t *ctx)
 	}
 	path.mnt = mntget(pfmfs_mnt);
 
-	path.dentry->d_op = &pfmfs_dentry_operations;
+	d_set_d_op(path.dentry, &pfmfs_dentry_operations);
 	d_add(path.dentry, inode);
 
 	file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c
index 0fdec4befd84..b09adb57971f 100644
--- a/drivers/staging/autofs/root.c
+++ b/drivers/staging/autofs/root.c
@@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
 	 *
 	 * We need to do this before we release the directory semaphore.
 	 */
-	dentry->d_op = &autofs_dentry_operations;
+	d_set_d_op(dentry, &autofs_dentry_operations);
 	dentry->d_flags |= DCACHE_AUTOFS_PENDING;
 	d_add(dentry, NULL);
 
diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c
index 5f79799d5d4a..78f09412740c 100644
--- a/drivers/staging/smbfs/dir.c
+++ b/drivers/staging/smbfs/dir.c
@@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry)
 	struct smb_sb_info *server = server_from_dentry(dentry);
 
 	if (server->mnt->flags & SMB_MOUNT_CASE)
-		dentry->d_op = &smbfs_dentry_operations_case;
+		d_set_d_op(dentry, &smbfs_dentry_operations_case);
 	else
-		dentry->d_op = &smbfs_dentry_operations;
+		d_set_d_op(dentry, &smbfs_dentry_operations);
 	dentry->d_time = jiffies;
 }
 
@@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	add_entry:
 			server = server_from_dentry(dentry);
 			if (server->mnt->flags & SMB_MOUNT_CASE)
-				dentry->d_op = &smbfs_dentry_operations_case;
+				d_set_d_op(dentry, &smbfs_dentry_operations_case);
 			else
-				dentry->d_op = &smbfs_dentry_operations;
+				d_set_d_op(dentry, &smbfs_dentry_operations);
 
 			d_add(dentry, inode);
 			smb_renew_times(dentry);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f6f9081e6d2c..df8bbb358d54 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -635,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	}
 
 	if (v9ses->cache)
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 	else
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 
 	d_instantiate(dentry, inode);
 	err = v9fs_fid_add(dentry, fid);
@@ -749,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 				err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -767,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 	/* Now set the ACL based on the default value */
@@ -956,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 				err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -973,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 	/* Now set the ACL based on the default value */
@@ -1041,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 inst_out:
 	if (v9ses->cache)
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 	else
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 
 	d_add(dentry, inode);
 	return NULL;
@@ -1709,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 					err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -1722,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 
@@ -1856,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 		ihold(old_dentry->d_inode);
 	}
 
-	dentry->d_op = old_dentry->d_op;
+	d_set_d_op(dentry, old_dentry->d_op);
 	d_instantiate(dentry, old_dentry->d_inode);
 
 	return err;
@@ -1980,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 				err);
 			goto error;
 		}
-		dentry->d_op = &v9fs_cached_dentry_operations;
+		d_set_d_op(dentry, &v9fs_cached_dentry_operations);
 		d_instantiate(dentry, inode);
 		err = v9fs_fid_add(dentry, fid);
 		if (err < 0)
@@ -1996,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 			err = PTR_ERR(inode);
 			goto error;
 		}
-		dentry->d_op = &v9fs_dentry_operations;
+		d_set_d_op(dentry, &v9fs_dentry_operations);
 		d_instantiate(dentry, inode);
 	}
 	/* Now set the ACL based on the default value */
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index a11e5e102716..bf7693c384f9 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -276,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	struct object_info obj;
 	int error;
 
-	dentry->d_op = &adfs_dentry_operations;	
+	d_set_d_op(dentry, &adfs_dentry_operations);
 	lock_kernel();
 	error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
 	if (error == 0) {
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 47dffc513a26..a4041b52fbca 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -484,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
 		adfs_error(sb, "get root inode failed\n");
 		goto error;
 	} else
-		sb->s_root->d_op = &adfs_dentry_operations;
+		d_set_d_op(sb->s_root, &adfs_dentry_operations);
 	unlock_kernel();
 	return 0;
 
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 5aca08c21100..944a4042fb65 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -240,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		if (IS_ERR(inode))
 			return ERR_CAST(inode);
 	}
-	dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations;
+	d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations);
 	d_add(dentry, inode);
 	return NULL;
 }
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4c18fcfb0a19..d39081bbe7ce 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -482,7 +482,7 @@ got_root:
 		printk(KERN_ERR "AFFS: Get root inode failed\n");
 		goto out_error;
 	}
-	sb->s_root->d_op = &affs_dentry_operations;
+	d_set_d_op(sb->s_root, &affs_dentry_operations);
 
 	pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
 	return 0;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 2c18cde27000..b8bb7e7148d0 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 success:
-	dentry->d_op = &afs_fs_dentry_operations;
+	d_set_d_op(dentry, &afs_fs_dentry_operations);
 
 	d_add(dentry, inode);
 	_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 57ce55b2564c..aca8806fa206 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name,
 	 */
 	ihold(anon_inode_inode);
 
-	path.dentry->d_op = &anon_inodefs_dentry_operations;
+	d_set_d_op(path.dentry, &anon_inodefs_dentry_operations);
 	d_instantiate(path.dentry, anon_inode_inode);
 
 	error = -ENFILE;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ac87e49fa706..a7bdb9dcac84 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 		goto fail_iput;
 	pipe = NULL;
 
-	root->d_op = &autofs4_sb_dentry_operations;
+	d_set_d_op(root, &autofs4_sb_dentry_operations);
 	root->d_fsdata = ino;
 
 	/* Can this call block? */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 10ca68a96dc7..bfe3f2eb684d 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -571,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 		 * we check for the hashed dentry and return the newly
 		 * hashed dentry.
 		 */
-		dentry->d_op = &autofs4_root_dentry_operations;
+		d_set_d_op(dentry, &autofs4_root_dentry_operations);
 
 		/*
 		 * And we need to ensure that the same dentry is used for
@@ -710,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir,
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)
-		dentry->d_op = &autofs4_root_dentry_operations;
+		d_set_d_op(dentry, &autofs4_root_dentry_operations);
 	else
-		dentry->d_op = &autofs4_dentry_operations;
+		d_set_d_op(dentry, &autofs4_dentry_operations);
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
@@ -845,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	d_add(dentry, inode);
 
 	if (dir == dir->i_sb->s_root->d_inode)
-		dentry->d_op = &autofs4_root_dentry_operations;
+		d_set_d_op(dentry, &autofs4_root_dentry_operations);
 	else
-		dentry->d_op = &autofs4_dentry_operations;
+		d_set_d_op(dentry, &autofs4_dentry_operations);
 
 	dentry->d_fsdata = ino;
 	ino->dentry = dget(dentry);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 659f532d26a0..0ccf9a8afcdf 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
 
 	dentry = d_obtain_alias(inode);
 	if (!IS_ERR(dentry))
-		dentry->d_op = &btrfs_dentry_operations;
+		d_set_d_op(dentry, &btrfs_dentry_operations);
 	return dentry;
 fail:
 	srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
 	key.offset = 0;
 	dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
 	if (!IS_ERR(dentry))
-		dentry->d_op = &btrfs_dentry_operations;
+		d_set_d_op(dentry, &btrfs_dentry_operations);
 	return dentry;
 fail:
 	btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f9d2994a42a2..63e4546b478a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	int index;
 	int ret;
 
-	dentry->d_op = &btrfs_dentry_operations;
+	d_set_d_op(dentry, &btrfs_dentry_operations);
 
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 58abc3da6111..cc01cf826769 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry)
 
 	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
 	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
-		dentry->d_op = &ceph_dentry_ops;
+		d_set_d_op(dentry, &ceph_dentry_ops);
 	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
-		dentry->d_op = &ceph_snapdir_dentry_ops;
+		d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
 	else
-		dentry->d_op = &ceph_snap_dentry_ops;
+		d_set_d_op(dentry, &ceph_snap_dentry_ops);
 
 	di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
 	if (!di)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 88bfe686ac00..e3b10ca6d453 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon,
 			      struct inode *newinode)
 {
 	if (tcon->nocase)
-		direntry->d_op = &cifs_ci_dentry_ops;
+		d_set_d_op(direntry, &cifs_ci_dentry_ops);
 	else
-		direntry->d_op = &cifs_dentry_ops;
+		d_set_d_op(direntry, &cifs_dentry_ops);
 	d_instantiate(direntry, newinode);
 }
 
@@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 		rc = cifs_get_inode_info_unix(&newinode, full_path,
 						inode->i_sb, xid);
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 
 		if (rc == 0)
 			d_instantiate(direntry, newinode);
@@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 
 	if ((rc == 0) && (newInode != NULL)) {
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 		d_add(direntry, newInode);
 		if (posix_open) {
 			filp = lookup_instantiate_filp(nd, direntry,
@@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		rc = 0;
 		direntry->d_time = jiffies;
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 		d_add(direntry, NULL);
 	/*	if it was once a directory (but how can we tell?) we could do
 		shrink_dcache_parent(direntry); */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 99b9a2cc14b7..2a239d878e85 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 	to set uid/gid */
 			inc_nlink(inode);
 			if (pTcon->nocase)
-				direntry->d_op = &cifs_ci_dentry_ops;
+				d_set_d_op(direntry, &cifs_ci_dentry_ops);
 			else
-				direntry->d_op = &cifs_dentry_ops;
+				d_set_d_op(direntry, &cifs_dentry_ops);
 
 			cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
 			cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1363,9 +1363,9 @@ mkdir_get_info:
 						 inode->i_sb, xid, NULL);
 
 		if (pTcon->nocase)
-			direntry->d_op = &cifs_ci_dentry_ops;
+			d_set_d_op(direntry, &cifs_ci_dentry_ops);
 		else
-			direntry->d_op = &cifs_dentry_ops;
+			d_set_d_op(direntry, &cifs_dentry_ops);
 		d_instantiate(direntry, newinode);
 		 /* setting nlink not necessary except in cases where we
 		  * failed to get it from the server or was set bogus */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 85cdbf831e7b..fe2f6a93c49e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
 			      rc);
 		} else {
 			if (pTcon->nocase)
-				direntry->d_op = &cifs_ci_dentry_ops;
+				d_set_d_op(direntry, &cifs_ci_dentry_ops);
 			else
-				direntry->d_op = &cifs_dentry_ops;
+				d_set_d_op(direntry, &cifs_dentry_ops);
 			d_instantiate(direntry, newinode);
 		}
 	}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ee463aeca0b0..ec5b68e3b928 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
 	}
 
 	if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase)
-		dentry->d_op = &cifs_ci_dentry_ops;
+		d_set_d_op(dentry, &cifs_ci_dentry_ops);
 	else
-		dentry->d_op = &cifs_dentry_ops;
+		d_set_d_op(dentry, &cifs_dentry_ops);
 
 	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9e37e8bc9b89..aa40c811f8d2 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
 		return ERR_PTR(error);
 
 exit:
-	entry->d_op = &coda_dentry_operations;
+	d_set_d_op(entry, &coda_dentry_operations);
 
 	if (inode && (type & CODA_NOCACHE))
 		coda_flag_inode(inode, C_VATTR | C_PURGE);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index e9acea440ffc..36637a8c1ed3 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
 		return error;
 	}
 
-	dentry->d_op = &configfs_dentry_ops;
+	d_set_d_op(dentry, &configfs_dentry_ops);
 	d_rehash(dentry);
 
 	return 0;
@@ -489,7 +489,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
 		 */
 		if (dentry->d_name.len > NAME_MAX)
 			return ERR_PTR(-ENAMETOOLONG);
-		dentry->d_op = &configfs_dentry_ops;
+		d_set_d_op(dentry, &configfs_dentry_ops);
 		d_add(dentry, NULL);
 		return NULL;
 	}
@@ -683,7 +683,7 @@ static int create_default_group(struct config_group *parent_group,
 	ret = -ENOMEM;
 	child = d_alloc(parent, &name);
 	if (child) {
-		child->d_op = &configfs_dentry_ops;
+		d_set_d_op(child, &configfs_dentry_ops);
 		d_add(child, NULL);
 
 		ret = configfs_attach_group(&parent_group->cg_item,
@@ -1681,7 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 	err = -ENOMEM;
 	dentry = d_alloc(configfs_sb->s_root, &name);
 	if (dentry) {
-		dentry->d_op = &configfs_dentry_ops;
+		d_set_d_op(dentry, &configfs_dentry_ops);
 		d_add(dentry, NULL);
 
 		err = configfs_attach_group(sd->s_element, &group->cg_item,
diff --git a/fs/dcache.c b/fs/dcache.c
index 1d5cf511e1c7..f9693da3efbd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -398,7 +398,7 @@ repeat:
 		return;
 	}
 
-	if (dentry->d_op && dentry->d_op->d_delete) {
+	if (dentry->d_flags & DCACHE_OP_DELETE) {
 		if (dentry->d_op->d_delete(dentry))
 			goto kill_it;
 	}
@@ -1301,6 +1301,28 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 }
 EXPORT_SYMBOL(d_alloc_name);
 
+void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
+{
+	BUG_ON(dentry->d_op);
+	BUG_ON(dentry->d_flags & (DCACHE_OP_HASH	|
+				DCACHE_OP_COMPARE	|
+				DCACHE_OP_REVALIDATE	|
+				DCACHE_OP_DELETE ));
+	dentry->d_op = op;
+	if (!op)
+		return;
+	if (op->d_hash)
+		dentry->d_flags |= DCACHE_OP_HASH;
+	if (op->d_compare)
+		dentry->d_flags |= DCACHE_OP_COMPARE;
+	if (op->d_revalidate)
+		dentry->d_flags |= DCACHE_OP_REVALIDATE;
+	if (op->d_delete)
+		dentry->d_flags |= DCACHE_OP_DELETE;
+
+}
+EXPORT_SYMBOL(d_set_d_op);
+
 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 {
 	spin_lock(&dentry->d_lock);
@@ -1731,7 +1753,7 @@ seqretry:
 		 */
 		if (read_seqcount_retry(&dentry->d_seq, *seq))
 			goto seqretry;
-		if (parent->d_op && parent->d_op->d_compare) {
+		if (parent->d_flags & DCACHE_OP_COMPARE) {
 			if (parent->d_op->d_compare(parent, *inode,
 						dentry, i,
 						tlen, tname, name))
@@ -1846,7 +1868,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 		 */
 		tlen = dentry->d_name.len;
 		tname = dentry->d_name.name;
-		if (parent->d_op && parent->d_op->d_compare) {
+		if (parent->d_flags & DCACHE_OP_COMPARE) {
 			if (parent->d_op->d_compare(parent, parent->d_inode,
 						dentry, dentry->d_inode,
 						tlen, tname, name))
@@ -1887,7 +1909,7 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
 	 * routine may choose to leave the hash value unchanged.
 	 */
 	name->hash = full_name_hash(name->name, name->len);
-	if (dir->d_op && dir->d_op->d_hash) {
+	if (dir->d_flags & DCACHE_OP_HASH) {
 		if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
 			goto out;
 	}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5e5c7ec1fc98..f91b35db4c6e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	struct qstr lower_name;
 	int rc = 0;
 
-	ecryptfs_dentry->d_op = &ecryptfs_dops;
+	d_set_d_op(ecryptfs_dentry, &ecryptfs_dops);
 	if ((ecryptfs_dentry->d_name.len == 1
 	     && !strcmp(ecryptfs_dentry->d_name.name, "."))
 	    || (ecryptfs_dentry->d_name.len == 2
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a9dbd62518e6..351038675376 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 	if (special_file(lower_inode->i_mode))
 		init_special_inode(inode, lower_inode->i_mode,
 				   lower_inode->i_rdev);
-	dentry->d_op = &ecryptfs_dops;
+	d_set_d_op(dentry, &ecryptfs_dops);
 	fsstack_copy_attr_all(inode, lower_inode);
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
@@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 		deactivate_locked_super(s);
 		goto out;
 	}
-	s->s_root->d_op = &ecryptfs_dops;
+	d_set_d_op(s->s_root, &ecryptfs_dops);
 	s->s_root->d_sb = s;
 	s->s_root->d_parent = s->s_root;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8cccfebee180..206351af7c58 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -750,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
 	 */
 	result = d_obtain_alias(inode);
 	if (!IS_ERR(result))
-		result->d_op = sb->s_root->d_op;
+		d_set_d_op(result, sb->s_root->d_op);
 	return result;
 }
 
@@ -800,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
 
 	parent = d_obtain_alias(inode);
 	if (!IS_ERR(parent))
-		parent->d_op = sb->s_root->d_op;
+		d_set_d_op(parent, sb->s_root->d_op);
 out:
 	unlock_super(sb);
 
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3b3e072d8982..35ffe43afa4b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -227,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
 	}
 out:
 	unlock_super(sb);
-	dentry->d_op = &msdos_dentry_operations;
+	d_set_d_op(dentry, &msdos_dentry_operations);
 	dentry = d_splice_alias(inode, dentry);
 	if (dentry)
-		dentry->d_op = &msdos_dentry_operations;
+		d_set_d_op(dentry, &msdos_dentry_operations);
 	return dentry;
 
 error:
@@ -673,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	sb->s_flags |= MS_NOATIME;
-	sb->s_root->d_op = &msdos_dentry_operations;
+	d_set_d_op(sb->s_root, &msdos_dentry_operations);
 	unlock_super(sb);
 	return 0;
 }
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 4fc06278db48..3be5ed7d859f 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -766,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
 
 out:
 	unlock_super(sb);
-	dentry->d_op = sb->s_root->d_op;
+	d_set_d_op(dentry, sb->s_root->d_op);
 	dentry->d_time = dentry->d_parent->d_inode->i_version;
 	dentry = d_splice_alias(inode, dentry);
 	if (dentry) {
-		dentry->d_op = sb->s_root->d_op;
+		d_set_d_op(dentry, sb->s_root->d_op);
 		dentry->d_time = dentry->d_parent->d_inode->i_version;
 	}
 	return dentry;
@@ -1072,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	if (MSDOS_SB(sb)->options.name_check != 's')
-		sb->s_root->d_op = &vfat_ci_dentry_ops;
+		d_set_d_op(sb->s_root, &vfat_ci_dentry_ops);
 	else
-		sb->s_root->d_op = &vfat_dentry_ops;
+		d_set_d_op(sb->s_root, &vfat_dentry_ops);
 
 	unlock_super(sb);
 	return 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9627c95482d..c9a8a426a395 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -347,7 +347,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	}
 
 	entry = newent ? newent : entry;
-	entry->d_op = &fuse_dentry_operations;
+	d_set_d_op(entry, &fuse_dentry_operations);
 	if (outarg_valid)
 		fuse_change_entry_timeout(entry, &outarg);
 	else
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 44e0a6c57e87..a8b31da19b93 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -626,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
 
 	entry = d_obtain_alias(inode);
 	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
-		entry->d_op = &fuse_dentry_operations;
+		d_set_d_op(entry, &fuse_dentry_operations);
 		fuse_invalidate_entry_cache(entry);
 	}
 
@@ -728,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child)
 
 	parent = d_obtain_alias(inode);
 	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
-		parent->d_op = &fuse_dentry_operations;
+		d_set_d_op(parent, &fuse_dentry_operations);
 		fuse_invalidate_entry_cache(parent);
 	}
 
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 5ab3839dfcb9..97012ecff560 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 
 	dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
 	if (!IS_ERR(dentry))
-		dentry->d_op = &gfs2_dops;
+		d_set_d_op(dentry, &gfs2_dops);
 	return dentry;
 }
 
@@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 out_inode:
 	dentry = d_obtain_alias(inode);
 	if (!IS_ERR(dentry))
-		dentry->d_op = &gfs2_dops;
+		d_set_d_op(dentry, &gfs2_dops);
 	return dentry;
 }
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3eb1393f7b81..2aeabd4218cc 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
 		iput(inode);
 		return -ENOMEM;
 	}
-	dentry->d_op = &gfs2_dops;
+	d_set_d_op(dentry, &gfs2_dops);
 	*dptr = dentry;
 	return 0;
 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 12cbea7502c2..f28f89796f4d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
 {
 	struct inode *inode = NULL;
 
-	dentry->d_op = &gfs2_dops;
+	d_set_d_op(dentry, &gfs2_dops);
 
 	inode = gfs2_lookupi(dir, &dentry->d_name, 0);
 	if (inode && IS_ERR(inode))
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 2b3b8611b41b..ea4aefe7c652 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct inode *inode = NULL;
 	int res;
 
-	dentry->d_op = &hfs_dentry_operations;
+	d_set_d_op(dentry, &hfs_dentry_operations);
 
 	hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
 	hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ef4ee5716abe..0bef62aa4f42 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -434,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 	if (!sb->s_root)
 		goto bail_iput;
 
-	sb->s_root->d_op = &hfs_dentry_operations;
+	d_set_d_op(sb->s_root, &hfs_dentry_operations);
 
 	/* everything's okay */
 	return 0;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 9d59c0571f59..ccab87145f7a 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
 
 	sb = dir->i_sb;
 
-	dentry->d_op = &hfsplus_dentry_operations;
+	d_set_d_op(dentry, &hfsplus_dentry_operations);
 	dentry->d_fsdata = NULL;
 	hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 182e83a9079e..ddf712e4700e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -419,7 +419,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
 		err = -ENOMEM;
 		goto cleanup;
 	}
-	sb->s_root->d_op = &hfsplus_dentry_operations;
+	d_set_d_op(sb->s_root, &hfsplus_dentry_operations);
 
 	str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
 	str.name = HFSP_HIDDENDIR_NAME;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 861113fcfc88..0bc81cf256b8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -612,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 		goto out_put;
 
 	d_add(dentry, inode);
-	dentry->d_op = &hostfs_dentry_ops;
+	d_set_d_op(dentry, &hostfs_dentry_ops);
 	return NULL;
 
  out_put:
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 35526df1fd32..32c13a94e1e9 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -65,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = {
 
 void hpfs_set_dentry_operations(struct dentry *dentry)
 {
-	dentry->d_op = &hpfs_dentry_operations;
+	d_set_d_op(dentry, &hpfs_dentry_operations);
 }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d8f3a652243d..844a7903c72f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -949,7 +949,7 @@ root_found:
 		table += 2;
 	if (opt.check == 'r')
 		table++;
-	s->s_root->d_op = &isofs_dentry_ops[table];
+	d_set_d_op(s->s_root, &isofs_dentry_ops[table]);
 
 	kfree(opt.iocharset);
 
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 715f7d318046..679a849c3b27 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -172,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
 	struct inode *inode;
 	struct page *page;
 
-	dentry->d_op = dir->i_sb->s_root->d_op;
+	d_set_d_op(dentry, dir->i_sb->s_root->d_op);
 
 	page = alloc_page(GFP_USER);
 	if (!page)
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 57f90dad8919..a151cbdec626 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1466,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
 	jfs_info("jfs_lookup: name = %s", name);
 
 	if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
-		dentry->d_op = &jfs_ci_dentry_operations;
+		d_set_d_op(dentry, &jfs_ci_dentry_operations);
 
 	if ((name[0] == '.') && (len == 1))
 		inum = dip->i_ino;
@@ -1495,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
 	dentry = d_splice_alias(ip, dentry);
 
 	if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
-		dentry->d_op = &jfs_ci_dentry_operations;
+		d_set_d_op(dentry, &jfs_ci_dentry_operations);
 
 	return dentry;
 }
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b715b0f7bdfd..3150d766e0d4 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -525,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_no_root;
 
 	if (sbi->mntflag & JFS_OS2)
-		sb->s_root->d_op = &jfs_ci_dentry_operations;
+		d_set_d_op(sb->s_root, &jfs_ci_dentry_operations);
 
 	/* logical blocks are represented by 40 bits in pxd_t, etc. */
 	sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
diff --git a/fs/libfs.c b/fs/libfs.c
index 28b36663c44e..889311e3d06b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -59,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
-	dentry->d_op = &simple_dentry_operations;
+	d_set_d_op(dentry, &simple_dentry_operations);
 	d_add(dentry, NULL);
 	return NULL;
 }
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c0d35a3accef..1b9e07728a9f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
 	struct inode * inode = NULL;
 	ino_t ino;
 
-	dentry->d_op = dir->i_sb->s_root->d_op;
+	d_set_d_op(dentry, dir->i_sb->s_root->d_op);
 
 	if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen)
 		return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/namei.c b/fs/namei.c
index c731b50a6184..90bd2873e117 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -587,6 +587,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
 	return dentry;
 }
 
+static inline int need_reval_dot(struct dentry *dentry)
+{
+	if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+		return 0;
+
+	if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
+		return 0;
+
+	return 1;
+}
+
 /*
  * force_reval_path - force revalidation of a dentry
  *
@@ -610,10 +621,9 @@ force_reval_path(struct path *path, struct nameidata *nd)
 
 	/*
 	 * only check on filesystems where it's possible for the dentry to
-	 * become stale. It's assumed that if this flag is set then the
-	 * d_revalidate op will also be defined.
+	 * become stale.
 	 */
-	if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
+	if (!need_reval_dot(dentry))
 		return 0;
 
 	status = dentry->d_op->d_revalidate(dentry, nd);
@@ -1003,7 +1013,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
 	 */
-	if (parent->d_op && parent->d_op->d_hash) {
+	if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
 		int err = parent->d_op->d_hash(parent, nd->inode, name);
 		if (err < 0)
 			return err;
@@ -1029,7 +1039,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 			return -ECHILD;
 
 		nd->seq = seq;
-		if (dentry->d_op && dentry->d_op->d_revalidate) {
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
 			/* We commonly drop rcu-walk here */
 			if (nameidata_dentry_drop_rcu(nd, dentry))
 				return -ECHILD;
@@ -1043,7 +1053,7 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 		if (!dentry)
 			goto need_lookup;
 found:
-		if (dentry->d_op && dentry->d_op->d_revalidate)
+		if (dentry->d_flags & DCACHE_OP_REVALIDATE)
 			goto need_revalidate;
 done:
 		path->mnt = mnt;
@@ -1281,8 +1291,7 @@ return_reval:
 		 * We bypassed the ordinary revalidation routines.
 		 * We may need to check the cached dentry for staleness.
 		 */
-		if (nd->path.dentry && nd->path.dentry->d_sb &&
-		    (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
+		if (need_reval_dot(nd->path.dentry)) {
 			if (nameidata_drop_rcu_maybe(nd))
 				return -ECHILD;
 			err = -ESTALE;
@@ -1602,7 +1611,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
 	 */
-	if (base->d_op && base->d_op->d_hash) {
+	if (base->d_flags & DCACHE_OP_HASH) {
 		err = base->d_op->d_hash(base, inode, name);
 		dentry = ERR_PTR(err);
 		if (err < 0)
@@ -1616,7 +1625,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
 	 */
 	dentry = d_lookup(base, name);
 
-	if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
+	if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
 		dentry = do_revalidate(dentry, nd);
 
 	if (!dentry)
@@ -2070,7 +2079,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		follow_dotdot(nd);
 		dir = nd->path.dentry;
 	case LAST_DOT:
-		if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
+		if (need_reval_dot(dir)) {
 			if (!dir->d_op->d_revalidate(dir, nd)) {
 				error = -ESTALE;
 				goto exit;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 0ba3cdc95a44..4b9cbb28d7fa 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -633,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
 		entry->ino = iunique(dir->i_sb, 2);
 		inode = ncp_iget(dir->i_sb, entry);
 		if (inode) {
-			newdent->d_op = &ncp_dentry_operations;
+			d_set_d_op(newdent, &ncp_dentry_operations);
 			d_instantiate(newdent, inode);
 			if (!hashed)
 				d_rehash(newdent);
@@ -889,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
 	if (inode) {
 		ncp_new_dentry(dentry);
 add_entry:
-		dentry->d_op = &ncp_dentry_operations;
+		d_set_d_op(dentry, &ncp_dentry_operations);
 		d_add(dentry, inode);
 		error = 0;
 	}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 60047dbeb38d..0c75a5f3cafd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -717,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 	sb->s_root = d_alloc_root(root_inode);
         if (!sb->s_root)
 		goto out_no_root;
-	sb->s_root->d_op = &ncp_root_dentry_operations;
+	d_set_d_op(sb->s_root, &ncp_root_dentry_operations);
 	return 0;
 
 out_no_root:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index eb77471b8823..37e0a8bb077e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 	if (dentry == NULL)
 		return;
 
-	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
 	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
 	if (IS_ERR(inode))
 		goto out;
@@ -1188,7 +1188,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
 		goto out;
 
-	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
 
 	/*
 	 * If we're doing an exclusive create, optimize away the lookup
@@ -1333,7 +1333,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 		res = ERR_PTR(-ENAMETOOLONG);
 		goto out;
 	}
-	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
 
 	/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
 	 * the dentry. */
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b3e36c3430de..c3a5a1126833 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -121,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 	security_d_instantiate(ret, inode);
 
 	if (ret->d_op == NULL)
-		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+		d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
 out:
 	nfs_free_fattr(fsinfo.fattr);
 	return ret;
@@ -228,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 	security_d_instantiate(ret, inode);
 
 	if (ret->d_op == NULL)
-		ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+		d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
 
 out:
 	nfs_free_fattr(fattr);
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145d2af3..6adafa576065 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -138,7 +138,7 @@ check_gen:
 
 	result = d_obtain_alias(inode);
 	if (!IS_ERR(result))
-		result->d_op = &ocfs2_dentry_ops;
+		d_set_d_op(result, &ocfs2_dentry_ops);
 	else
 		mlog_errno(PTR_ERR(result));
 
@@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 
 	parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
 	if (!IS_ERR(parent))
-		parent->d_op = &ocfs2_dentry_ops;
+		d_set_d_op(parent, &ocfs2_dentry_ops);
 
 bail_unlock:
 	ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ff5744e1e36f..d14cad6e2e41 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	spin_unlock(&oi->ip_lock);
 
 bail_add:
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 	ret = d_splice_alias(inode, dentry);
 
 	if (inode) {
@@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir,
 		mlog_errno(status);
 		goto leave;
 	}
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 
 	status = ocfs2_add_entry(handle, dentry, inode,
 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
@@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 	}
 
 	ihold(inode);
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 	d_instantiate(dentry, inode);
 
 out_commit:
@@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir,
 		mlog_errno(status);
 		goto bail;
 	}
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 
 	status = ocfs2_add_entry(handle, dentry, inode,
 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
@@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
 		goto out_commit;
 	}
 
-	dentry->d_op = &ocfs2_dentry_ops;
+	d_set_d_op(dentry, &ocfs2_dentry_ops);
 	d_instantiate(dentry, inode);
 	status = 0;
 out_commit:
diff --git a/fs/pipe.c b/fs/pipe.c
index ae3592dcc88c..01a786567810 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1004,7 +1004,7 @@ struct file *create_write_pipe(int flags)
 		goto err_inode;
 	path.mnt = mntget(pipe_mnt);
 
-	path.dentry->d_op = &pipefs_dentry_operations;
+	d_set_d_op(path.dentry, &pipefs_dentry_operations);
 	d_instantiate(path.dentry, inode);
 
 	err = -ENFILE;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d932fdb6a245..85f0a80912aa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
 	inode->i_op = &proc_pid_link_inode_operations;
 	inode->i_size = 64;
 	ei->op.proc_get_link = proc_fd_link;
-	dentry->d_op = &tid_fd_dentry_operations;
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
 	if (tid_fd_revalidate(dentry, NULL))
@@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
 	ei->fd = fd;
 	inode->i_mode = S_IFREG | S_IRUSR;
 	inode->i_fop = &proc_fdinfo_file_operations;
-	dentry->d_op = &tid_fd_dentry_operations;
+	d_set_d_op(dentry, &tid_fd_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
 	if (tid_fd_revalidate(dentry, NULL))
@@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
 	if (p->fop)
 		inode->i_fop = p->fop;
 	ei->op = p->op;
-	dentry->d_op = &pid_dentry_operations;
+	d_set_d_op(dentry, &pid_dentry_operations);
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
 	if (pid_revalidate(dentry, NULL))
@@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
 	if (p->fop)
 		inode->i_fop = p->fop;
 	ei->op = p->op;
-	dentry->d_op = &proc_base_dentry_operations;
+	d_set_d_op(dentry, &proc_base_dentry_operations);
 	d_add(dentry, inode);
 	error = NULL;
 out:
@@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
 	inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
 		ARRAY_SIZE(tgid_base_stuff));
 
-	dentry->d_op = &pid_dentry_operations;
+	d_set_d_op(dentry, &pid_dentry_operations);
 
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
@@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
 	inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
 		ARRAY_SIZE(tid_base_stuff));
 
-	dentry->d_op = &pid_dentry_operations;
+	d_set_d_op(dentry, &pid_dentry_operations);
 
 	d_add(dentry, inode);
 	/* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 1d607be36d95..f766be29d2c7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 out_unlock:
 
 	if (inode) {
-		dentry->d_op = &proc_dentry_operations;
+		d_set_d_op(dentry, &proc_dentry_operations);
 		d_add(dentry, inode);
 		return NULL;
 	}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 998e3a715bcc..35efd85a4d32 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	err = NULL;
-	dentry->d_op = &proc_sys_dentry_operations;
+	d_set_d_op(dentry, &proc_sys_dentry_operations);
 	d_add(dentry, inode);
 
 out:
@@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
 				dput(child);
 				return -ENOMEM;
 			} else {
-				child->d_op = &proc_sys_dentry_operations;
+				d_set_d_op(child, &proc_sys_dentry_operations);
 				d_add(child, inode);
 			}
 		} else {
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5d04a7828e7a..e0f0d7ea10a1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
 				strlen(PRIVROOT_NAME));
 	if (!IS_ERR(dentry)) {
 		REISERFS_SB(s)->priv_root = dentry;
-		dentry->d_op = &xattr_lookup_poison_ops;
+		d_set_d_op(dentry, &xattr_lookup_poison_ops);
 		if (dentry->d_inode)
 			dentry->d_inode->i_flags |= S_PRIVATE;
 	} else
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 27e1102e303e..3e076caa8daf 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -701,7 +701,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	/* instantiate and hash dentry */
 	ret = d_find_alias(inode);
 	if (!ret) {
-		dentry->d_op = &sysfs_dentry_ops;
+		d_set_d_op(dentry, &sysfs_dentry_ops);
 		dentry->d_fsdata = sysfs_get(sd);
 		d_add(dentry, inode);
 	} else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 7507aeb4c90e..b5e68da2db32 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -48,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
 	struct inode * inode = NULL;
 	ino_t ino;
 
-	dentry->d_op = dir->i_sb->s_root->d_op;
+	d_set_d_op(dentry, dir->i_sb->s_root->d_op);
 	if (dentry->d_name.len > SYSV_NAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 	ino = sysv_inode_by_name(dentry);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3d9c62be0c10..76712aefc4ab 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
 	if (sbi->s_forced_ro)
 		sb->s_flags |= MS_RDONLY;
 	if (sbi->s_truncate)
-		sb->s_root->d_op = &sysv_dentry_operations;
+		d_set_d_op(sb->s_root, &sysv_dentry_operations);
 	return 1;
 }
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index e4414693065e..f4b40a751f09 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -183,6 +183,11 @@ struct dentry_operations {
 #define DCACHE_GENOCIDE		0x0200
 #define DCACHE_MOUNTED		0x0400	/* is a mountpoint */
 
+#define DCACHE_OP_HASH		0x1000
+#define DCACHE_OP_COMPARE	0x2000
+#define DCACHE_OP_REVALIDATE	0x4000
+#define DCACHE_OP_DELETE	0x8000
+
 
 extern spinlock_t dcache_inode_lock;
 extern seqlock_t rename_lock;
@@ -201,6 +206,7 @@ extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);
+extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
 
 /* allocate/de-allocate */
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9f41470c3949..51cddc11cd85 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2222,7 +2222,7 @@ static struct dentry *cgroup_lookup(struct inode *dir,
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
-	dentry->d_op = &cgroup_dentry_operations;
+	d_set_d_op(dentry, &cgroup_dentry_operations);
 	d_add(dentry, NULL);
 	return NULL;
 }
diff --git a/net/socket.c b/net/socket.c
index 817dc92e9ef8..991e266bc7ae 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -368,7 +368,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	}
 	path.mnt = mntget(sock_mnt);
 
-	path.dentry->d_op = &sockfs_dentry_operations;
+	d_set_d_op(path.dentry, &sockfs_dentry_operations);
 	d_instantiate(path.dentry, SOCK_INODE(sock));
 	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 2899fe27f880..09f01f41e55a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -591,7 +591,7 @@ static struct dentry *__rpc_lookup_create(struct dentry *parent,
 		}
 	}
 	if (!dentry->d_inode)
-		dentry->d_op = &rpc_dentry_operations;
+		d_set_d_op(dentry, &rpc_dentry_operations);
 out_err:
 	return dentry;
 }
-- 
cgit v1.2.3


From 4b936885ab04dc6e0bb0ef35e0e23c1a7364d9e5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:07 +1100
Subject: fs: improve scalability of pseudo filesystems

Regardless of how much we possibly try to scale dcache, there is likely
always going to be some fundamental contention when adding or removing children
under the same parent. Pseudo filesystems do not seem need to have connected
dentries because by definition they are disconnected.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 fs/anon_inodes.c       |  2 +-
 fs/dcache.c            | 12 ++++++++++++
 fs/pipe.c              |  2 +-
 include/linux/dcache.h |  1 +
 net/socket.c           |  2 +-
 5 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index aca8806fa206..9d92b33da8a0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name,
 	this.name = name;
 	this.len = strlen(name);
 	this.hash = 0;
-	path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+	path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
 	if (!path.dentry)
 		goto err_module;
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 09ec945f3c98..9e6e6db76869 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1330,6 +1330,18 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
+{
+	struct dentry *dentry = d_alloc(NULL, name);
+	if (dentry) {
+		dentry->d_sb = sb;
+		dentry->d_parent = dentry;
+		dentry->d_flags |= DCACHE_DISCONNECTED;
+	}
+	return dentry;
+}
+EXPORT_SYMBOL(d_alloc_pseudo);
+
 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 {
 	struct qstr q;
diff --git a/fs/pipe.c b/fs/pipe.c
index 01a786567810..cfe3a7f2ee21 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -999,7 +999,7 @@ struct file *create_write_pipe(int flags)
 		goto err;
 
 	err = -ENOMEM;
-	path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
 	if (!path.dentry)
 		goto err_inode;
 	path.mnt = mntget(pipe_mnt);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d719e4de8046..c0a2ca97c72f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -211,6 +211,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
 
 /* allocate/de-allocate */
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
+extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern struct dentry * d_obtain_alias(struct inode *);
diff --git a/net/socket.c b/net/socket.c
index 991e266bc7ae..0ee74c325320 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -361,7 +361,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
 	if (unlikely(fd < 0))
 		return fd;
 
-	path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
 	if (unlikely(!path.dentry)) {
 		put_unused_fd(fd);
 		return -ENOMEM;
-- 
cgit v1.2.3


From b3e19d924b6eaf2ca7d22cba99a517c5171007b6 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@kernel.dk>
Date: Fri, 7 Jan 2011 17:50:11 +1100
Subject: fs: scale mntget/mntput

The problem that this patch aims to fix is vfsmount refcounting scalability.
We need to take a reference on the vfsmount for every successful path lookup,
which often go to the same mount point.

The fundamental difficulty is that a "simple" reference count can never be made
scalable, because any time a reference is dropped, we must check whether that
was the last reference. To do that requires communication with all other CPUs
that may have taken a reference count.

We can make refcounts more scalable in a couple of ways, involving keeping
distributed counters, and checking for the global-zero condition less
frequently.

- check the global sum once every interval (this will delay zero detection
  for some interval, so it's probably a showstopper for vfsmounts).

- keep a local count and only taking the global sum when local reaches 0 (this
  is difficult for vfsmounts, because we can't hold preempt off for the life of
  a reference, so a counter would need to be per-thread or tied strongly to a
  particular CPU which requires more locking).

- keep a local difference of increments and decrements, which allows us to sum
  the total difference and hence find the refcount when summing all CPUs. Then,
  keep a single integer "long" refcount for slow and long lasting references,
  and only take the global sum of local counters when the long refcount is 0.

This last scheme is what I implemented here. Attached mounts and process root
and working directory references are "long" references, and everything else is
a short reference.

This allows scalable vfsmount references during path walking over mounted
subtrees and unattached (lazy umounted) mounts with processes still running
in them.

This results in one fewer atomic op in the fastpath: mntget is now just a
per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock
and non-atomic decrement in the common case. However code is otherwise bigger
and heavier, so single threaded performance is basically a wash.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
---
 arch/ia64/kernel/perfmon.c |   2 +-
 drivers/mtd/mtdchar.c      |   2 +-
 fs/anon_inodes.c           |   2 +-
 fs/fs_struct.c             |  26 +++--
 fs/internal.h              |   1 +
 fs/namei.c                 |  24 +++++
 fs/namespace.c             | 242 +++++++++++++++++++++++++++++++++++++--------
 fs/pipe.c                  |   2 +-
 fs/pnode.c                 |   4 +-
 fs/super.c                 |   2 +-
 include/linux/mount.h      |  53 ++++------
 include/linux/path.h       |   2 +
 net/socket.c               |  19 +++-
 13 files changed, 283 insertions(+), 98 deletions(-)

(limited to 'net')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5a24f40bb48e..f099b82703d8 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -1542,7 +1542,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pfm: will go nicely and kill the special-casing in procfs.
  */
-static struct vfsmount *pfmfs_mnt;
+static struct vfsmount *pfmfs_mnt __read_mostly;
 
 static int __init
 init_pfm_fs(void)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4759d827e8c7..f511dd15fd31 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1201,7 +1201,7 @@ err_unregister_chdev:
 static void __exit cleanup_mtdchar(void)
 {
 	unregister_mtd_user(&mtdchar_notifier);
-	mntput(mtd_inode_mnt);
+	mntput_long(mtd_inode_mnt);
 	unregister_filesystem(&mtd_inodefs_type);
 	__unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
 }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 9d92b33da8a0..5fd38112a6ca 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -232,7 +232,7 @@ static int __init anon_inode_init(void)
 	return 0;
 
 err_mntput:
-	mntput(anon_inode_mnt);
+	mntput_long(anon_inode_mnt);
 err_unregister_filesystem:
 	unregister_filesystem(&anon_inode_fs_type);
 err_exit:
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 60b8531f41c5..68ca487bedb1 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -17,11 +17,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 	write_seqcount_begin(&fs->seq);
 	old_root = fs->root;
 	fs->root = *path;
-	path_get(path);
+	path_get_long(path);
 	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 	if (old_root.dentry)
-		path_put(&old_root);
+		path_put_long(&old_root);
 }
 
 /*
@@ -36,12 +36,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 	write_seqcount_begin(&fs->seq);
 	old_pwd = fs->pwd;
 	fs->pwd = *path;
-	path_get(path);
+	path_get_long(path);
 	write_seqcount_end(&fs->seq);
 	spin_unlock(&fs->lock);
 
 	if (old_pwd.dentry)
-		path_put(&old_pwd);
+		path_put_long(&old_pwd);
 }
 
 void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -59,13 +59,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 			write_seqcount_begin(&fs->seq);
 			if (fs->root.dentry == old_root->dentry
 			    && fs->root.mnt == old_root->mnt) {
-				path_get(new_root);
+				path_get_long(new_root);
 				fs->root = *new_root;
 				count++;
 			}
 			if (fs->pwd.dentry == old_root->dentry
 			    && fs->pwd.mnt == old_root->mnt) {
-				path_get(new_root);
+				path_get_long(new_root);
 				fs->pwd = *new_root;
 				count++;
 			}
@@ -76,13 +76,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 	while (count--)
-		path_put(old_root);
+		path_put_long(old_root);
 }
 
 void free_fs_struct(struct fs_struct *fs)
 {
-	path_put(&fs->root);
-	path_put(&fs->pwd);
+	path_put_long(&fs->root);
+	path_put_long(&fs->pwd);
 	kmem_cache_free(fs_cachep, fs);
 }
 
@@ -115,7 +115,13 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 		spin_lock_init(&fs->lock);
 		seqcount_init(&fs->seq);
 		fs->umask = old->umask;
-		get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
+
+		spin_lock(&old->lock);
+		fs->root = old->root;
+		path_get_long(&fs->root);
+		fs->pwd = old->pwd;
+		path_get_long(&fs->pwd);
+		spin_unlock(&old->lock);
 	}
 	return fs;
 }
diff --git a/fs/internal.h b/fs/internal.h
index e43b9a4dbf4e..9687c2ee2735 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
 
 extern void free_vfsmnt(struct vfsmount *);
 extern struct vfsmount *alloc_vfsmnt(const char *);
+extern unsigned int mnt_get_count(struct vfsmount *mnt);
 extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
 extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
 				struct vfsmount *);
diff --git a/fs/namei.c b/fs/namei.c
index 4e957bf744ae..19433cdba011 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -367,6 +367,18 @@ void path_get(struct path *path)
 }
 EXPORT_SYMBOL(path_get);
 
+/**
+ * path_get_long - get a long reference to a path
+ * @path: path to get the reference to
+ *
+ * Given a path increment the reference count to the dentry and the vfsmount.
+ */
+void path_get_long(struct path *path)
+{
+	mntget_long(path->mnt);
+	dget(path->dentry);
+}
+
 /**
  * path_put - put a reference to a path
  * @path: path to put the reference to
@@ -380,6 +392,18 @@ void path_put(struct path *path)
 }
 EXPORT_SYMBOL(path_put);
 
+/**
+ * path_put_long - put a long reference to a path
+ * @path: path to put the reference to
+ *
+ * Given a path decrement the reference count to the dentry and the vfsmount.
+ */
+void path_put_long(struct path *path)
+{
+	dput(path->dentry);
+	mntput_long(path->mnt);
+}
+
 /**
  * nameidata_drop_rcu - drop this nameidata out of rcu-walk
  * @nd: nameidata pathwalk data to drop
diff --git a/fs/namespace.c b/fs/namespace.c
index 03b82350f020..3ddfd9046c44 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
 	mnt->mnt_group_id = 0;
 }
 
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_add_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+	this_cpu_add(mnt->mnt_pcp->mnt_count, n);
+#else
+	preempt_disable();
+	mnt->mnt_count += n;
+	preempt_enable();
+#endif
+}
+
+static inline void mnt_set_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+	this_cpu_write(mnt->mnt_pcp->mnt_count, n);
+#else
+	mnt->mnt_count = n;
+#endif
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_inc_count(struct vfsmount *mnt)
+{
+	mnt_add_count(mnt, 1);
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void mnt_dec_count(struct vfsmount *mnt)
+{
+	mnt_add_count(mnt, -1);
+}
+
+/*
+ * vfsmount lock must be held for write
+ */
+unsigned int mnt_get_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	unsigned int count = atomic_read(&mnt->mnt_longrefs);
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
+	}
+
+	return count;
+#else
+	return mnt->mnt_count;
+#endif
+}
+
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
 	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 				goto out_free_id;
 		}
 
-		atomic_set(&mnt->mnt_count, 1);
+#ifdef CONFIG_SMP
+		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
+		if (!mnt->mnt_pcp)
+			goto out_free_devname;
+
+		atomic_set(&mnt->mnt_longrefs, 1);
+#else
+		mnt->mnt_count = 1;
+		mnt->mnt_writers = 0;
+#endif
+
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -165,13 +233,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 #ifdef CONFIG_FSNOTIFY
 		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
-#endif
-#ifdef CONFIG_SMP
-		mnt->mnt_writers = alloc_percpu(int);
-		if (!mnt->mnt_writers)
-			goto out_free_devname;
-#else
-		mnt->mnt_writers = 0;
 #endif
 	}
 	return mnt;
@@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 static inline void mnt_inc_writers(struct vfsmount *mnt)
 {
 #ifdef CONFIG_SMP
-	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+	this_cpu_inc(mnt->mnt_pcp->mnt_writers);
 #else
 	mnt->mnt_writers++;
 #endif
@@ -228,7 +289,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt)
 static inline void mnt_dec_writers(struct vfsmount *mnt)
 {
 #ifdef CONFIG_SMP
-	(*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+	this_cpu_dec(mnt->mnt_pcp->mnt_writers);
 #else
 	mnt->mnt_writers--;
 #endif
@@ -241,7 +302,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		count += *per_cpu_ptr(mnt->mnt_writers, cpu);
+		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
 	}
 
 	return count;
@@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
 	kfree(mnt->mnt_devname);
 	mnt_free_id(mnt);
 #ifdef CONFIG_SMP
-	free_percpu(mnt->mnt_writers);
+	free_percpu(mnt->mnt_pcp);
 #endif
 	kmem_cache_free(mnt_cache, mnt);
 }
@@ -652,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
 	return NULL;
 }
 
-static inline void __mntput(struct vfsmount *mnt)
+static inline void mntfree(struct vfsmount *mnt)
 {
 	struct super_block *sb = mnt->mnt_sb;
+
 	/*
 	 * This probably indicates that somebody messed
 	 * up a mnt_want/drop_write() pair.  If this
@@ -662,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt)
 	 * to make r/w->r/o transitions.
 	 */
 	/*
-	 * atomic_dec_and_lock() used to deal with ->mnt_count decrements
-	 * provides barriers, so mnt_get_writers() below is safe.  AV
+	 * The locking used to deal with mnt_count decrement provides barriers,
+	 * so mnt_get_writers() below is safe.
 	 */
 	WARN_ON(mnt_get_writers(mnt));
 	fsnotify_vfsmount_delete(mnt);
@@ -672,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt)
 	deactivate_super(sb);
 }
 
-void mntput_no_expire(struct vfsmount *mnt)
-{
-repeat:
-	if (atomic_add_unless(&mnt->mnt_count, -1, 1))
-		return;
+#ifdef CONFIG_SMP
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+	if (!longrefs) {
+put_again:
+		br_read_lock(vfsmount_lock);
+		if (likely(atomic_read(&mnt->mnt_longrefs))) {
+			mnt_dec_count(mnt);
+			br_read_unlock(vfsmount_lock);
+			return;
+		}
+		br_read_unlock(vfsmount_lock);
+	} else {
+		BUG_ON(!atomic_read(&mnt->mnt_longrefs));
+		if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
+			return;
+	}
+
 	br_write_lock(vfsmount_lock);
-	if (!atomic_dec_and_test(&mnt->mnt_count)) {
+	if (!longrefs)
+		mnt_dec_count(mnt);
+	else
+		atomic_dec(&mnt->mnt_longrefs);
+	if (mnt_get_count(mnt)) {
 		br_write_unlock(vfsmount_lock);
 		return;
 	}
-	if (likely(!mnt->mnt_pinned)) {
+	if (unlikely(mnt->mnt_pinned)) {
+		mnt_add_count(mnt, mnt->mnt_pinned + 1);
+		mnt->mnt_pinned = 0;
 		br_write_unlock(vfsmount_lock);
-		__mntput(mnt);
+		acct_auto_close_mnt(mnt);
+		goto put_again;
+	}
+	br_write_unlock(vfsmount_lock);
+	mntfree(mnt);
+}
+#else
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+put_again:
+	mnt_dec_count(mnt);
+	if (likely(mnt_get_count(mnt)))
 		return;
+	br_write_lock(vfsmount_lock);
+	if (unlikely(mnt->mnt_pinned)) {
+		mnt_add_count(mnt, mnt->mnt_pinned + 1);
+		mnt->mnt_pinned = 0;
+		br_write_unlock(vfsmount_lock);
+		acct_auto_close_mnt(mnt);
+		goto put_again;
 	}
-	atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
-	mnt->mnt_pinned = 0;
 	br_write_unlock(vfsmount_lock);
-	acct_auto_close_mnt(mnt);
-	goto repeat;
+	mntfree(mnt);
+}
+#endif
+
+static void mntput_no_expire(struct vfsmount *mnt)
+{
+	__mntput(mnt, 0);
+}
+
+void mntput(struct vfsmount *mnt)
+{
+	if (mnt) {
+		/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+		if (unlikely(mnt->mnt_expiry_mark))
+			mnt->mnt_expiry_mark = 0;
+		__mntput(mnt, 0);
+	}
+}
+EXPORT_SYMBOL(mntput);
+
+struct vfsmount *mntget(struct vfsmount *mnt)
+{
+	if (mnt)
+		mnt_inc_count(mnt);
+	return mnt;
+}
+EXPORT_SYMBOL(mntget);
+
+void mntput_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	if (mnt) {
+		/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+		if (unlikely(mnt->mnt_expiry_mark))
+			mnt->mnt_expiry_mark = 0;
+		__mntput(mnt, 1);
+	}
+#else
+	mntput(mnt);
+#endif
 }
-EXPORT_SYMBOL(mntput_no_expire);
+EXPORT_SYMBOL(mntput_long);
+
+struct vfsmount *mntget_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	if (mnt)
+		atomic_inc(&mnt->mnt_longrefs);
+	return mnt;
+#else
+	return mntget(mnt);
+#endif
+}
+EXPORT_SYMBOL(mntget_long);
 
 void mnt_pin(struct vfsmount *mnt)
 {
@@ -701,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
 	mnt->mnt_pinned++;
 	br_write_unlock(vfsmount_lock);
 }
-
 EXPORT_SYMBOL(mnt_pin);
 
 void mnt_unpin(struct vfsmount *mnt)
 {
 	br_write_lock(vfsmount_lock);
 	if (mnt->mnt_pinned) {
-		atomic_inc(&mnt->mnt_count);
+		mnt_inc_count(mnt);
 		mnt->mnt_pinned--;
 	}
 	br_write_unlock(vfsmount_lock);
 }
-
 EXPORT_SYMBOL(mnt_unpin);
 
 static inline void mangle(struct seq_file *m, const char *s)
@@ -1008,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
 	int minimum_refs = 0;
 	struct vfsmount *p;
 
-	br_read_lock(vfsmount_lock);
+	/* write lock needed for mnt_get_count */
+	br_write_lock(vfsmount_lock);
 	for (p = mnt; p; p = next_mnt(p, mnt)) {
-		actual_refs += atomic_read(&p->mnt_count);
+		actual_refs += mnt_get_count(p);
 		minimum_refs += 2;
 	}
-	br_read_unlock(vfsmount_lock);
+	br_write_unlock(vfsmount_lock);
 
 	if (actual_refs > minimum_refs)
 		return 0;
@@ -1040,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
 {
 	int ret = 1;
 	down_read(&namespace_sem);
-	br_read_lock(vfsmount_lock);
+	br_write_lock(vfsmount_lock);
 	if (propagate_mount_busy(mnt, 2))
 		ret = 0;
-	br_read_unlock(vfsmount_lock);
+	br_write_unlock(vfsmount_lock);
 	up_read(&namespace_sem);
 	return ret;
 }
@@ -1070,7 +1216,7 @@ void release_mounts(struct list_head *head)
 			dput(dentry);
 			mntput(m);
 		}
-		mntput(mnt);
+		mntput_long(mnt);
 	}
 }
 
@@ -1125,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
 		    flags & (MNT_FORCE | MNT_DETACH))
 			return -EINVAL;
 
-		if (atomic_read(&mnt->mnt_count) != 2)
+		/*
+		 * probably don't strictly need the lock here if we examined
+		 * all race cases, but it's a slowpath.
+		 */
+		br_write_lock(vfsmount_lock);
+		if (mnt_get_count(mnt) != 2) {
+			br_write_lock(vfsmount_lock);
 			return -EBUSY;
+		}
+		br_write_unlock(vfsmount_lock);
 
 		if (!xchg(&mnt->mnt_expiry_mark, 1))
 			return -EAGAIN;
@@ -1815,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
 
 unlock:
 	up_write(&namespace_sem);
-	mntput(newmnt);
+	mntput_long(newmnt);
 	return err;
 }
 
@@ -2148,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 		if (fs) {
 			if (p == fs->root.mnt) {
 				rootmnt = p;
-				fs->root.mnt = mntget(q);
+				fs->root.mnt = mntget_long(q);
 			}
 			if (p == fs->pwd.mnt) {
 				pwdmnt = p;
-				fs->pwd.mnt = mntget(q);
+				fs->pwd.mnt = mntget_long(q);
 			}
 		}
 		p = next_mnt(p, mnt_ns->root);
@@ -2161,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 	up_write(&namespace_sem);
 
 	if (rootmnt)
-		mntput(rootmnt);
+		mntput_long(rootmnt);
 	if (pwdmnt)
-		mntput(pwdmnt);
+		mntput_long(pwdmnt);
 
 	return new_ns;
 }
@@ -2350,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	br_write_unlock(vfsmount_lock);
 	chroot_fs_refs(&root, &new);
+
 	error = 0;
 	path_put(&root_parent);
 	path_put(&parent_path);
@@ -2376,6 +2531,7 @@ static void __init init_mount_tree(void)
 	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
 	if (IS_ERR(mnt))
 		panic("Can't create rootfs");
+
 	ns = create_mnt_ns(mnt);
 	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
diff --git a/fs/pipe.c b/fs/pipe.c
index cfe3a7f2ee21..68f1f8e4e23b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
 static void __exit exit_pipe_fs(void)
 {
 	unregister_filesystem(&pipe_fs_type);
-	mntput(pipe_mnt);
+	mntput_long(pipe_mnt);
 }
 
 fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8dd748f..d42514e32380 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
  */
 static inline int do_refcount_check(struct vfsmount *mnt, int count)
 {
-	int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
+	int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
 	return (mycount > count);
 }
 
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
  * Check if any of these mounts that **do not have submounts**
  * have more references than 'refcnt'. If so return busy.
  *
- * vfsmount lock must be held for read or write
+ * vfsmount lock must be held for write
  */
 int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
 {
diff --git a/fs/super.c b/fs/super.c
index 968ba013011a..823e061faa87 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1140,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
 	return mnt;
 
  err:
-	mntput(mnt);
+	mntput_long(mnt);
 	return ERR_PTR(err);
 }
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5e7a59408dd4..1869ea24a739 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -13,6 +13,7 @@
 #include <linux/list.h>
 #include <linux/nodemask.h>
 #include <linux/spinlock.h>
+#include <linux/seqlock.h>
 #include <asm/atomic.h>
 
 struct super_block;
@@ -46,12 +47,24 @@ struct mnt_namespace;
 
 #define MNT_INTERNAL	0x4000
 
+struct mnt_pcp {
+	int mnt_count;
+	int mnt_writers;
+};
+
 struct vfsmount {
 	struct list_head mnt_hash;
 	struct vfsmount *mnt_parent;	/* fs we are mounted on */
 	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */
 	struct dentry *mnt_root;	/* root of the mounted tree */
 	struct super_block *mnt_sb;	/* pointer to superblock */
+#ifdef CONFIG_SMP
+	struct mnt_pcp __percpu *mnt_pcp;
+	atomic_t mnt_longrefs;
+#else
+	int mnt_count;
+	int mnt_writers;
+#endif
 	struct list_head mnt_mounts;	/* list of children, anchored here */
 	struct list_head mnt_child;	/* and going through their mnt_child */
 	int mnt_flags;
@@ -70,57 +83,25 @@ struct vfsmount {
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
-	/*
-	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
-	 * to let these frequently modified fields in a separate cache line
-	 * (so that reads of mnt_flags wont ping-pong on SMP machines)
-	 */
-	atomic_t mnt_count;
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
-#ifdef CONFIG_SMP
-	int __percpu *mnt_writers;
-#else
-	int mnt_writers;
-#endif
 };
 
-static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
-{
-#ifdef CONFIG_SMP
-	return mnt->mnt_writers;
-#else
-	return &mnt->mnt_writers;
-#endif
-}
-
-static inline struct vfsmount *mntget(struct vfsmount *mnt)
-{
-	if (mnt)
-		atomic_inc(&mnt->mnt_count);
-	return mnt;
-}
-
 struct file; /* forward dec */
 
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
 extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
-extern void mntput_no_expire(struct vfsmount *mnt);
+extern void mntput(struct vfsmount *mnt);
+extern struct vfsmount *mntget(struct vfsmount *mnt);
+extern void mntput_long(struct vfsmount *mnt);
+extern struct vfsmount *mntget_long(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
-static inline void mntput(struct vfsmount *mnt)
-{
-	if (mnt) {
-		mnt->mnt_expiry_mark = 0;
-		mntput_no_expire(mnt);
-	}
-}
-
 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
 				      const char *name, void *data);
 
diff --git a/include/linux/path.h b/include/linux/path.h
index edc98dec6266..a581e8c06533 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -10,7 +10,9 @@ struct path {
 };
 
 extern void path_get(struct path *);
+extern void path_get_long(struct path *);
 extern void path_put(struct path *);
+extern void path_put_long(struct path *);
 
 static inline int path_equal(const struct path *path1, const struct path *path2)
 {
diff --git a/net/socket.c b/net/socket.c
index 0ee74c325320..815bba3d2fe0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2390,6 +2390,8 @@ EXPORT_SYMBOL(sock_unregister);
 
 static int __init sock_init(void)
 {
+	int err;
+
 	/*
 	 *      Initialize sock SLAB cache.
 	 */
@@ -2406,8 +2408,15 @@ static int __init sock_init(void)
 	 */
 
 	init_inodecache();
-	register_filesystem(&sock_fs_type);
+
+	err = register_filesystem(&sock_fs_type);
+	if (err)
+		goto out_fs;
 	sock_mnt = kern_mount(&sock_fs_type);
+	if (IS_ERR(sock_mnt)) {
+		err = PTR_ERR(sock_mnt);
+		goto out_mount;
+	}
 
 	/* The real protocol initialization is performed in later initcalls.
 	 */
@@ -2420,7 +2429,13 @@ static int __init sock_init(void)
 	skb_timestamping_init();
 #endif
 
-	return 0;
+out:
+	return err;
+
+out_mount:
+	unregister_filesystem(&sock_fs_type);
+out_fs:
+	goto out;
 }
 
 core_initcall(sock_init);	/* early initcall */
-- 
cgit v1.2.3


From 2cf5be93d1b704f342ad423a49f0e78d73939e66 Mon Sep 17 00:00:00 2001
From: Samuel Jero <sj323707@ohio.edu>
Date: Thu, 30 Dec 2010 12:15:16 +0100
Subject: dccp: fix return value for sequence-invalid packets

Currently dccp_check_seqno returns 0 (indicating a valid packet) if the
acknowledgment number is out of bounds and the sync that RFC 4340 mandates at
this point is currently being rate-limited. This function should return -1,
indicating an invalid packet.

Signed-off-by: Samuel Jero <sj323707@ohio.edu>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 15af247ea007..8cde009e8b85 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -260,7 +260,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 		 */
 		if (time_before(now, (dp->dccps_rate_last +
 				      sysctl_dccp_sync_ratelimit)))
-			return 0;
+			return -1;
 
 		DCCP_WARN("Step 6 failed for %s packet, "
 			  "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
-- 
cgit v1.2.3


From 763dadd47c884853a22f2f19ea27e58431303ff3 Mon Sep 17 00:00:00 2001
From: Samuel Jero <sj323707@ohio.edu>
Date: Thu, 30 Dec 2010 12:15:41 +0100
Subject: dccp: fix bug in updating the GSR

Currently dccp_check_seqno allows any valid packet to update the Greatest
Sequence Number Received, even if that packet's sequence number is less than
the current GSR. This patch adds a check to make sure that the new packet's
sequence number is greater than GSR.

Signed-off-by: Samuel Jero <sj323707@ohio.edu>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/dccp.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 45087052d894..5fdb07229017 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -426,7 +426,8 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 
-	dp->dccps_gsr = seq;
+	if (after48(seq, dp->dccps_gsr))
+		dp->dccps_gsr = seq;
 	/* Sequence validity window depends on remote Sequence Window (7.5.1) */
 	dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
 	/*
-- 
cgit v1.2.3


From bfbb23466adcbc77facea3046b44f75530079472 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Sun, 2 Jan 2011 18:15:58 +0100
Subject: dccp: make upper bound for seq_window consistent on 32/64 bit

The 'seq_window' sysctl sets the initial value for the DCCP Sequence Window,
which may range from 32..2^46-1 (RFC 4340, 7.5.2). The patch sets the upper
bound consistently to 2^32-1 on both 32 and 64 bit systems, which should be
sufficient - with a RTT of 1sec and 1-byte packets, a seq_window of 2^32-1
corresponds to a link speed of 34 Gbps.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 Documentation/networking/dccp.txt | 1 +
 net/dccp/sysctl.c                 | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index b395ca6a49f2..811872b45bee 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -167,6 +167,7 @@ rx_ccid = 2
 seq_window = 100
 	The initial sequence window (sec. 7.5.2) of the sender. This influences
 	the local ackno validity and the remote seqno validity windows (7.5.1).
+	Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set.
 
 tx_qlen = 5
 	The size of the transmit buffer in packets. A value of 0 corresponds
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 563943822e58..42348824ee31 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -21,7 +21,8 @@
 /* Boundary values */
 static int		zero     = 0,
 			u8_max   = 0xFF;
-static unsigned long	seqw_min = 32;
+static unsigned long	seqw_min = DCCPF_SEQ_WMIN,
+			seqw_max = 0xFFFFFFFF;		/* maximum on 32 bit */
 
 static struct ctl_table dccp_default_table[] = {
 	{
@@ -31,6 +32,7 @@ static struct ctl_table dccp_default_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 		.extra1		= &seqw_min,		/* RFC 4340, 7.5.2 */
+		.extra2		= &seqw_max,
 	},
 	{
 		.procname	= "rx_ccid",
-- 
cgit v1.2.3


From 0ab03c2b1478f2438d2c80204f7fef65b1bca9cf Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 7 Jan 2011 03:15:05 +0000
Subject: netlink: test for all flags of the NLM_F_DUMP composite

Due to NLM_F_DUMP is composed of two bits, NLM_F_ROOT | NLM_F_MATCH,
when doing "if (x & NLM_F_DUMP)", it tests for _either_ of the bits
being set. Because NLM_F_MATCH's value overlaps with NLM_F_EXCL,
non-dump requests with NLM_F_EXCL set are mistaken as dump requests.

Substitute the condition to test for _all_ bits being set.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c                 | 2 +-
 net/ipv4/inet_diag.c                 | 2 +-
 net/netfilter/nf_conntrack_netlink.c | 4 ++--
 net/netlink/genetlink.c              | 2 +-
 net/xfrm/xfrm_user.c                 | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 750db57f3bb3..a5f7535aab5b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1820,7 +1820,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+	if (kind == 2 && (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
 		struct sock *rtnl;
 		rtnl_dumpit_func dumpit;
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2ada17129fce..2746c1fa6417 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -858,7 +858,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	    nlmsg_len(nlh) < hdrlen)
 		return -EINVAL;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
 		if (nlmsg_attrlen(nlh, hdrlen)) {
 			struct nlattr *attr;
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0cdba50c0d69..746140264b2d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -928,7 +928,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	u16 zone;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP)
+	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP)
 		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
 					  ctnetlink_done);
 
@@ -1790,7 +1790,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	u16 zone;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
 		return netlink_dump_start(ctnl, skb, nlh,
 					  ctnetlink_exp_dump_table,
 					  ctnetlink_exp_done);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1781d99145e2..f83cb370292b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -519,7 +519,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	    security_netlink_recv(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
 		if (ops->dumpit == NULL)
 			return -EOPNOTSUPP;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8eb889510916..6a8da81ff66f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2187,7 +2187,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
 	     type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
-	    (nlh->nlmsg_flags & NLM_F_DUMP)) {
+	    (nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
 		if (link->dump == NULL)
 			return -EINVAL;
 
-- 
cgit v1.2.3


From 697d0e338c7fd392cf73bf120150ab6e5516a3a3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sat, 8 Jan 2011 17:41:42 +0000
Subject: net: fix kernel-doc warning in core/filter.c

Fix new kernel-doc notation warning in net/core/filter.c:

Warning(net/core/filter.c:172): No description found for parameter 'fentry'
Warning(net/core/filter.c:172): Excess function parameter 'filter' description in 'sk_run_filter'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 2b27d4efdd48..afc58374ca96 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL(sk_filter);
 /**
  *	sk_run_filter - run a filter on a socket
  *	@skb: buffer to run the filter on
- *	@filter: filter to apply
+ *	@fentry: filter to apply
  *
  * Decode and apply filter instructions to the skb->data.
  * Return length to keep, 0 for none. @skb is the data we are
-- 
cgit v1.2.3


From 9497a0518e8183959e45da05f317f1cb36f2cd7c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:30 +0000
Subject: net offloading: Accept NETIF_F_HW_CSUM for all protocols.

We currently only have software fallback for one type of checksum: the
TCP/UDP one's complement.  This means that a protocol that uses hardware
offloading for a different type of checksum (FCoE, SCTP) must directly
check the device's features and do the right thing ahead of time.  By
the time we get to dev_can_checksum(), we're only deciding whether to
apply the one algorithm in software or hardware.  NETIF_F_HW_CSUM has the
same capabilities as the software version, so we should always use it if
present.  The primary advantage of this is multiply tagged vlans can use
hardware checksumming.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a215269d2e35..d8befd06da04 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1734,7 +1734,7 @@ EXPORT_SYMBOL(netif_device_attach);
 
 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 {
-	return ((features & NETIF_F_NO_CSUM) ||
+	return ((features & NETIF_F_GEN_CSUM) ||
 		((features & NETIF_F_V4_CSUM) &&
 		 protocol == htons(ETH_P_IP)) ||
 		((features & NETIF_F_V6_CSUM) &&
-- 
cgit v1.2.3


From f01a5236bd4b140198fbcc550f085e8361fd73fa Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:31 +0000
Subject: net offloading: Generalize netif_get_vlan_features().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

netif_get_vlan_features() is currently only used by netif_needs_gso(),
so it only concerns itself with GSO features.  However, several other
places also should take into account the contents of the packet when
deciding whether to offload to hardware.  This generalizes the function
to return features about all of the various forms of offloading.  Since
offloads tend to be linked together, this avoids duplicating the logic
in each location (i.e. the scatter/gather code also needs the checksum
logic).

Suggested-by: Michał Mirosław <mirqus@gmail.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 35 +++++++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0f6b1c965815..d4dac09a5ad2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2303,7 +2303,7 @@ unsigned long netdev_fix_features(unsigned long features, const char *name);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev);
+int netif_skb_features(struct sk_buff *skb);
 
 static inline int net_gso_ok(int features, int gso_type)
 {
@@ -2320,7 +2320,7 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
 {
 	if (skb_is_gso(skb)) {
-		int features = netif_get_vlan_features(skb, dev);
+		int features = netif_skb_features(skb);
 
 		return (!skb_gso_ok(skb, features) ||
 			unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
diff --git a/net/core/dev.c b/net/core/dev.c
index d8befd06da04..a51dfd7b56fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2017,22 +2017,41 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+{
+	if (!can_checksum_protocol(protocol, features)) {
+		features &= ~NETIF_F_ALL_CSUM;
+		features &= ~NETIF_F_SG;
+	} else if (illegal_highdma(skb->dev, skb)) {
+		features &= ~NETIF_F_SG;
+	}
+
+	return features;
+}
+
+int netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
+	int features = skb->dev->features;
 
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
-	} else if (!skb->vlan_tci)
-		return dev->features;
+	} else if (!vlan_tx_tag_present(skb)) {
+		return harmonize_features(skb, protocol, features);
+	}
 
-	if (protocol != htons(ETH_P_8021Q))
-		return dev->features & dev->vlan_features;
-	else
-		return 0;
+	features &= skb->dev->vlan_features;
+
+	if (protocol != htons(ETH_P_8021Q)) {
+		return harmonize_features(skb, protocol, features);
+	} else {
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+				NETIF_F_GEN_CSUM;
+		return harmonize_features(skb, protocol, features);
+	}
 }
-EXPORT_SYMBOL(netif_get_vlan_features);
+EXPORT_SYMBOL(netif_skb_features);
 
 /*
  * Returns true if either:
-- 
cgit v1.2.3


From fc741216db156994c554ac31c1151fe0e00d8f0e Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:32 +0000
Subject: net offloading: Pass features into netif_needs_gso().

Now that there is a single function that can compute the device
features relevant to a packet, we don't want to run it for each
offload.  This converts netif_needs_gso() to take the features
of the device, rather than computing them itself.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c |  2 +-
 include/linux/netdevice.h  | 12 +++---------
 net/core/dev.c             |  8 ++++++--
 3 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cdbeec9f83ea..546de5749824 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -488,7 +488,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (unlikely(!netif_carrier_ok(dev) ||
 		     (frags > 1 && !xennet_can_sg(dev)) ||
-		     netif_needs_gso(dev, skb))) {
+		     netif_needs_gso(skb, netif_skb_features(skb)))) {
 		spin_unlock_irq(&np->tx_lock);
 		goto drop;
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d4dac09a5ad2..de2bfe6da359 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2317,16 +2317,10 @@ static inline int skb_gso_ok(struct sk_buff *skb, int features)
 	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
-static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+static inline int netif_needs_gso(struct sk_buff *skb, int features)
 {
-	if (skb_is_gso(skb)) {
-		int features = netif_skb_features(skb);
-
-		return (!skb_gso_ok(skb, features) ||
-			unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
-	}
-
-	return 0;
+	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
+		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
 }
 
 static inline void netif_set_gso_max_size(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index a51dfd7b56fb..1444ed3861a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2086,6 +2086,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
+		int features;
+
 		/*
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
@@ -2098,8 +2100,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_orphan_try(skb);
 
+		features = netif_skb_features(skb);
+
 		if (vlan_tx_tag_present(skb) &&
-		    !(dev->features & NETIF_F_HW_VLAN_TX)) {
+		    !(features & NETIF_F_HW_VLAN_TX)) {
 			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
 			if (unlikely(!skb))
 				goto out;
@@ -2107,7 +2111,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			skb->vlan_tci = 0;
 		}
 
-		if (netif_needs_gso(dev, skb)) {
+		if (netif_needs_gso(skb, features)) {
 			if (unlikely(dev_gso_segment(skb)))
 				goto out_kfree_skb;
 			if (skb->next)
-- 
cgit v1.2.3


From 91ecb63c074d802f8cf103f1dafb4aed24d0f24c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:33 +0000
Subject: net offloading: Convert dev_gso_segment() to use precomputed
 features.

This switches dev_gso_segment() to use the device features computed
by the centralized routine.  In doing so, it fixes a problem where
it would always use dev->features, instead of those appropriate
to the number of vlan tags if any are present.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1444ed3861a0..4cd3e84e1294 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1971,16 +1971,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
 /**
  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
  *	@skb: buffer to segment
+ *	@features: device features as applicable to this skb
  *
  *	This function segments the given skb and stores the list of segments
  *	in skb->next.
  */
-static int dev_gso_segment(struct sk_buff *skb)
+static int dev_gso_segment(struct sk_buff *skb, int features)
 {
-	struct net_device *dev = skb->dev;
 	struct sk_buff *segs;
-	int features = dev->features & ~(illegal_highdma(dev, skb) ?
-					 NETIF_F_SG : 0);
 
 	segs = skb_gso_segment(skb, features);
 
@@ -2112,7 +2110,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		if (netif_needs_gso(skb, features)) {
-			if (unlikely(dev_gso_segment(skb)))
+			if (unlikely(dev_gso_segment(skb, features)))
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
-- 
cgit v1.2.3


From 02932ce9e2c136e6fab2571c8e0dd69ae8ec9853 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:34 +0000
Subject: net offloading: Convert skb_need_linearize() to use precomputed
 features.

This switches skb_need_linearize() to use the features that have
been centrally computed.  In doing so, this fixes a problem where
scatter/gather should not be used because the card does not support
checksum offloading on that type of packet.  On device registration
we only check that some form of checksum offloading is available if
scatter/gatther is enabled but we must also check at transmission
time.  Examples of this include IPv6 or vlan packets on a NIC that
only supports IPv4 offloading.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4cd3e84e1294..2f838f1d222c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2059,22 +2059,13 @@ EXPORT_SYMBOL(netif_skb_features);
  *	   support DMA from it.
  */
 static inline int skb_needs_linearize(struct sk_buff *skb,
-				      struct net_device *dev)
+				      int features)
 {
-	if (skb_is_nonlinear(skb)) {
-		int features = dev->features;
-
-		if (vlan_tx_tag_present(skb))
-			features &= dev->vlan_features;
-
-		return (skb_has_frag_list(skb) &&
-			!(features & NETIF_F_FRAGLIST)) ||
+	return skb_is_nonlinear(skb) &&
+			((skb_has_frag_list(skb) &&
+				!(features & NETIF_F_FRAGLIST)) ||
 			(skb_shinfo(skb)->nr_frags &&
-			(!(features & NETIF_F_SG) ||
-			illegal_highdma(dev, skb)));
-	}
-
-	return 0;
+				!(features & NETIF_F_SG)));
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -2115,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			if (skb->next)
 				goto gso;
 		} else {
-			if (skb_needs_linearize(skb, dev) &&
+			if (skb_needs_linearize(skb, features) &&
 			    __skb_linearize(skb))
 				goto out_kfree_skb;
 
-- 
cgit v1.2.3


From 0363466866d901fbc658f4e63dd61e7cc93dd0af Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:35 +0000
Subject: net offloading: Convert checksums to use centrally computed features.

In order to compute the features for other offloads (primarily
scatter/gather), we need to first check the ability of the NIC to
offload the checksum for the packet.  Since we have already computed
this, we can directly use the result instead of figuring it out
again.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2f838f1d222c..3fe443be4b15 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1732,33 +1732,6 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
-{
-	return ((features & NETIF_F_GEN_CSUM) ||
-		((features & NETIF_F_V4_CSUM) &&
-		 protocol == htons(ETH_P_IP)) ||
-		((features & NETIF_F_V6_CSUM) &&
-		 protocol == htons(ETH_P_IPV6)) ||
-		((features & NETIF_F_FCOE_CRC) &&
-		 protocol == htons(ETH_P_FCOE)));
-}
-
-static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
-{
-	__be16 protocol = skb->protocol;
-	int features = dev->features;
-
-	if (vlan_tx_tag_present(skb)) {
-		features &= dev->vlan_features;
-	} else if (protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-		protocol = veh->h_vlan_encapsulated_proto;
-		features &= dev->vlan_features;
-	}
-
-	return can_checksum_protocol(features, protocol);
-}
-
 /**
  * skb_dev_set -- assign a new device to a buffer
  * @skb: buffer for the new device
@@ -2015,6 +1988,17 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+	return ((features & NETIF_F_GEN_CSUM) ||
+		((features & NETIF_F_V4_CSUM) &&
+		 protocol == htons(ETH_P_IP)) ||
+		((features & NETIF_F_V6_CSUM) &&
+		 protocol == htons(ETH_P_IPV6)) ||
+		((features & NETIF_F_FCOE_CRC) &&
+		 protocol == htons(ETH_P_FCOE)));
+}
+
 static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
 {
 	if (!can_checksum_protocol(protocol, features)) {
@@ -2117,7 +2101,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				skb_set_transport_header(skb,
 					skb_checksum_start_offset(skb));
-				if (!dev_can_checksum(dev, skb) &&
+				if (!(features & NETIF_F_ALL_CSUM) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
 			}
-- 
cgit v1.2.3


From 83723d60717f8da0f53f91cf42a845ed56c09662 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 Jan 2011 20:11:38 +0100
Subject: netfilter: x_tables: dont block BH while reading counters

Using "iptables -L" with a lot of rules have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.

Switch to a per_cpu seqlock scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).

This adds two increments on seqlock sequence per ipt_do_table() call,
its a reasonable cost for allowing "iptables -L" not block BH
processing.

Reported-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 10 ++++-----
 net/ipv4/netfilter/arp_tables.c    | 45 ++++++++++++--------------------------
 net/ipv4/netfilter/ip_tables.c     | 45 ++++++++++++--------------------------
 net/ipv6/netfilter/ip6_tables.c    | 45 ++++++++++++--------------------------
 net/netfilter/x_tables.c           |  3 ++-
 5 files changed, 49 insertions(+), 99 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 742bec051440..6712e713b299 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -472,7 +472,7 @@ extern void xt_free_table_info(struct xt_table_info *info);
  *  necessary for reading the counters.
  */
 struct xt_info_lock {
-	spinlock_t lock;
+	seqlock_t lock;
 	unsigned char readers;
 };
 DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
@@ -497,7 +497,7 @@ static inline void xt_info_rdlock_bh(void)
 	local_bh_disable();
 	lock = &__get_cpu_var(xt_info_locks);
 	if (likely(!lock->readers++))
-		spin_lock(&lock->lock);
+		write_seqlock(&lock->lock);
 }
 
 static inline void xt_info_rdunlock_bh(void)
@@ -505,7 +505,7 @@ static inline void xt_info_rdunlock_bh(void)
 	struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
 
 	if (likely(!--lock->readers))
-		spin_unlock(&lock->lock);
+		write_sequnlock(&lock->lock);
 	local_bh_enable();
 }
 
@@ -516,12 +516,12 @@ static inline void xt_info_rdunlock_bh(void)
  */
 static inline void xt_info_wrlock(unsigned int cpu)
 {
-	spin_lock(&per_cpu(xt_info_locks, cpu).lock);
+	write_seqlock(&per_cpu(xt_info_locks, cpu).lock);
 }
 
 static inline void xt_info_wrunlock(unsigned int cpu)
 {
-	spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
+	write_sequnlock(&per_cpu(xt_info_locks, cpu).lock);
 }
 
 /*
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3fac340a28d5..e855fffaed95 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -710,42 +710,25 @@ static void get_counters(const struct xt_table_info *t,
 	struct arpt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu = get_cpu();
-
-	/* Instead of clearing (by a previous call to memset())
-	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU
-	 *
-	 * Bottom half has to be disabled to prevent deadlock
-	 * if new softirq were to run and call ipt_do_table
-	 */
-	local_bh_disable();
-	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-		SET_COUNTER(counters[i], iter->counters.bcnt,
-			    iter->counters.pcnt);
-		++i;
-	}
-	local_bh_enable();
-	/* Processing counters from other cpus, we can let bottom half enabled,
-	 * (preemption is disabled)
-	 */
 
 	for_each_possible_cpu(cpu) {
-		if (cpu == curcpu)
-			continue;
+		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
 		i = 0;
-		local_bh_disable();
-		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
-			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				    iter->counters.pcnt);
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqbegin(lock);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqretry(lock, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
 		}
-		xt_info_wrunlock(cpu);
-		local_bh_enable();
 	}
-	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -759,7 +742,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	 * about).
 	 */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1007,7 +990,7 @@ static int __do_replace(struct net *net, const char *name,
 	struct arpt_entry *iter;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d633b3b6..652efea013dc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -884,42 +884,25 @@ get_counters(const struct xt_table_info *t,
 	struct ipt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu = get_cpu();
-
-	/* Instead of clearing (by a previous call to memset())
-	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU.
-	 *
-	 * Bottom half has to be disabled to prevent deadlock
-	 * if new softirq were to run and call ipt_do_table
-	 */
-	local_bh_disable();
-	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-		SET_COUNTER(counters[i], iter->counters.bcnt,
-			    iter->counters.pcnt);
-		++i;
-	}
-	local_bh_enable();
-	/* Processing counters from other cpus, we can let bottom half enabled,
-	 * (preemption is disabled)
-	 */
 
 	for_each_possible_cpu(cpu) {
-		if (cpu == curcpu)
-			continue;
+		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
 		i = 0;
-		local_bh_disable();
-		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
-			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				    iter->counters.pcnt);
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqbegin(lock);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqretry(lock, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i; /* macro does multi eval of i */
 		}
-		xt_info_wrunlock(cpu);
-		local_bh_enable();
 	}
-	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -932,7 +915,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1203,7 +1186,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ipt_entry *iter;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 455582384ece..7d227c644f72 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
 	struct ip6t_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu = get_cpu();
-
-	/* Instead of clearing (by a previous call to memset())
-	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU
-	 *
-	 * Bottom half has to be disabled to prevent deadlock
-	 * if new softirq were to run and call ipt_do_table
-	 */
-	local_bh_disable();
-	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-		SET_COUNTER(counters[i], iter->counters.bcnt,
-			    iter->counters.pcnt);
-		++i;
-	}
-	local_bh_enable();
-	/* Processing counters from other cpus, we can let bottom half enabled,
-	 * (preemption is disabled)
-	 */
 
 	for_each_possible_cpu(cpu) {
-		if (cpu == curcpu)
-			continue;
+		seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
 		i = 0;
-		local_bh_disable();
-		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
-			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				    iter->counters.pcnt);
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqbegin(lock);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqretry(lock, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i;
 		}
-		xt_info_wrunlock(cpu);
-		local_bh_enable();
 	}
-	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ip6t_entry *iter;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 80463507420e..c94237631077 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1325,7 +1325,8 @@ static int __init xt_init(void)
 
 	for_each_possible_cpu(i) {
 		struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
-		spin_lock_init(&lock->lock);
+
+		seqlock_init(&lock->lock);
 		lock->readers = 0;
 	}
 
-- 
cgit v1.2.3


From 6650239a4b01077e80d5a4468562756d77afaa59 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 8 Jan 2011 17:45:38 -0500
Subject: NFS: Don't use vm_map_ram() in readdir

vm_map_ram() is not available on NOMMU platforms, and causes trouble
on incoherrent architectures such as ARM when we access the page data
through both the direct and the virtual mapping.

The alternative is to use the direct mapping to access page data
for the case when we are not crossing a page boundary, but to copy
the data into a linear scratch buffer when we are accessing data
that spans page boundaries.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Tested-by: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: stable@kernel.org  [2.6.37]
---
 fs/nfs/dir.c               |  44 ++++++-------
 fs/nfs/nfs2xdr.c           |   6 --
 fs/nfs/nfs3xdr.c           |   6 --
 fs/nfs/nfs4xdr.c           |   6 --
 include/linux/sunrpc/xdr.h |   4 +-
 net/sunrpc/xdr.c           | 155 ++++++++++++++++++++++++++++++++++++---------
 6 files changed, 148 insertions(+), 73 deletions(-)

(limited to 'net')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 996dd8989a91..0108cf4f3403 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,7 +33,6 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
-#include <linux/vmalloc.h>
 #include <linux/kmemleak.h>
 
 #include "delegation.h"
@@ -459,25 +458,26 @@ out:
 /* Perform conversion from xdr to cache array */
 static
 int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
-				void *xdr_page, struct page *page, unsigned int buflen)
+				struct page **xdr_pages, struct page *page, unsigned int buflen)
 {
 	struct xdr_stream stream;
-	struct xdr_buf buf;
-	__be32 *ptr = xdr_page;
+	struct xdr_buf buf = {
+		.pages = xdr_pages,
+		.page_len = buflen,
+		.buflen = buflen,
+		.len = buflen,
+	};
+	struct page *scratch;
 	struct nfs_cache_array *array;
 	unsigned int count = 0;
 	int status;
 
-	buf.head->iov_base = xdr_page;
-	buf.head->iov_len = buflen;
-	buf.tail->iov_len = 0;
-	buf.page_base = 0;
-	buf.page_len = 0;
-	buf.buflen = buf.head->iov_len;
-	buf.len = buf.head->iov_len;
-
-	xdr_init_decode(&stream, &buf, ptr);
+	scratch = alloc_page(GFP_KERNEL);
+	if (scratch == NULL)
+		return -ENOMEM;
 
+	xdr_init_decode(&stream, &buf, NULL);
+	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	do {
 		status = xdr_decode(desc, entry, &stream);
@@ -506,6 +506,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 		} else
 			status = PTR_ERR(array);
 	}
+
+	put_page(scratch);
 	return status;
 }
 
@@ -521,7 +523,6 @@ static
 void nfs_readdir_free_large_page(void *ptr, struct page **pages,
 		unsigned int npages)
 {
-	vm_unmap_ram(ptr, npages);
 	nfs_readdir_free_pagearray(pages, npages);
 }
 
@@ -530,9 +531,8 @@ void nfs_readdir_free_large_page(void *ptr, struct page **pages,
  * to nfs_readdir_free_large_page
  */
 static
-void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
+int nfs_readdir_large_page(struct page **pages, unsigned int npages)
 {
-	void *ptr;
 	unsigned int i;
 
 	for (i = 0; i < npages; i++) {
@@ -541,13 +541,11 @@ void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
 			goto out_freepages;
 		pages[i] = page;
 	}
+	return 0;
 
-	ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
-	if (!IS_ERR_OR_NULL(ptr))
-		return ptr;
 out_freepages:
 	nfs_readdir_free_pagearray(pages, i);
-	return NULL;
+	return -ENOMEM;
 }
 
 static
@@ -577,8 +575,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	array->eof_index = -1;
 
-	pages_ptr = nfs_readdir_large_page(pages, array_size);
-	if (!pages_ptr)
+	status = nfs_readdir_large_page(pages, array_size);
+	if (status < 0)
 		goto out_release_array;
 	do {
 		unsigned int pglen;
@@ -587,7 +585,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		if (status < 0)
 			break;
 		pglen = status;
-		status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
 		if (status < 0) {
 			if (status == -ENOSPC)
 				status = 0;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5914a1911c95..b382a1b5e7e4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -487,12 +487,6 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
 
 	entry->d_type = DT_UNKNOWN;
 
-	p = xdr_inline_peek(xdr, 8);
-	if (p != NULL)
-		entry->eof = !p[0] && p[1];
-	else
-		entry->eof = 0;
-
 	return p;
 
 out_overflow:
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f6cc60f06dac..ba91236c6ee7 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -647,12 +647,6 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
 			memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
 	}
 
-	p = xdr_inline_peek(xdr, 8);
-	if (p != NULL)
-		entry->eof = !p[0] && p[1];
-	else
-		entry->eof = 0;
-
 	return p;
 
 out_overflow:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9f1826b012e6..0662a9821df5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6215,12 +6215,6 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (verify_attr_len(xdr, p, len) < 0)
 		goto out_overflow;
 
-	p = xdr_inline_peek(xdr, 8);
-	if (p != NULL)
-		entry->eof = !p[0] && p[1];
-	else
-		entry->eof = 0;
-
 	return p;
 
 out_overflow:
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 498ab93a81e4..7783c687c777 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -201,6 +201,8 @@ struct xdr_stream {
 
 	__be32 *end;		/* end of available buffer space */
 	struct kvec *iov;	/* pointer to the current kvec */
+	struct kvec scratch;	/* Scratch buffer */
+	struct page **page_ptr;	/* pointer to the current page */
 };
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
@@ -208,7 +210,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
-extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index cd9e841e7492..679cd674b81d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -552,6 +552,74 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
 }
 EXPORT_SYMBOL_GPL(xdr_write_pages);
 
+static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
+		__be32 *p, unsigned int len)
+{
+	if (len > iov->iov_len)
+		len = iov->iov_len;
+	if (p == NULL)
+		p = (__be32*)iov->iov_base;
+	xdr->p = p;
+	xdr->end = (__be32*)(iov->iov_base + len);
+	xdr->iov = iov;
+	xdr->page_ptr = NULL;
+}
+
+static int xdr_set_page_base(struct xdr_stream *xdr,
+		unsigned int base, unsigned int len)
+{
+	unsigned int pgnr;
+	unsigned int maxlen;
+	unsigned int pgoff;
+	unsigned int pgend;
+	void *kaddr;
+
+	maxlen = xdr->buf->page_len;
+	if (base >= maxlen)
+		return -EINVAL;
+	maxlen -= base;
+	if (len > maxlen)
+		len = maxlen;
+
+	base += xdr->buf->page_base;
+
+	pgnr = base >> PAGE_SHIFT;
+	xdr->page_ptr = &xdr->buf->pages[pgnr];
+	kaddr = page_address(*xdr->page_ptr);
+
+	pgoff = base & ~PAGE_MASK;
+	xdr->p = (__be32*)(kaddr + pgoff);
+
+	pgend = pgoff + len;
+	if (pgend > PAGE_SIZE)
+		pgend = PAGE_SIZE;
+	xdr->end = (__be32*)(kaddr + pgend);
+	xdr->iov = NULL;
+	return 0;
+}
+
+static void xdr_set_next_page(struct xdr_stream *xdr)
+{
+	unsigned int newbase;
+
+	newbase = (1 + xdr->page_ptr - xdr->buf->pages) << PAGE_SHIFT;
+	newbase -= xdr->buf->page_base;
+
+	if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
+		xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+}
+
+static bool xdr_set_next_buffer(struct xdr_stream *xdr)
+{
+	if (xdr->page_ptr != NULL)
+		xdr_set_next_page(xdr);
+	else if (xdr->iov == xdr->buf->head) {
+		if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
+			xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+	}
+	return xdr->p != xdr->end;
+}
+
 /**
  * xdr_init_decode - Initialize an xdr_stream for decoding data.
  * @xdr: pointer to xdr_stream struct
@@ -560,41 +628,67 @@ EXPORT_SYMBOL_GPL(xdr_write_pages);
  */
 void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 {
-	struct kvec *iov = buf->head;
-	unsigned int len = iov->iov_len;
-
-	if (len > buf->len)
-		len = buf->len;
 	xdr->buf = buf;
-	xdr->iov = iov;
-	xdr->p = p;
-	xdr->end = (__be32 *)((char *)iov->iov_base + len);
+	xdr->scratch.iov_base = NULL;
+	xdr->scratch.iov_len = 0;
+	if (buf->head[0].iov_len != 0)
+		xdr_set_iov(xdr, buf->head, p, buf->len);
+	else if (buf->page_len != 0)
+		xdr_set_page_base(xdr, 0, buf->len);
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
-/**
- * xdr_inline_peek - Allow read-ahead in the XDR data stream
- * @xdr: pointer to xdr_stream struct
- * @nbytes: number of bytes of data to decode
- *
- * Check if the input buffer is long enough to enable us to decode
- * 'nbytes' more bytes of data starting at the current position.
- * If so return the current pointer without updating the current
- * pointer position.
- */
-__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 	__be32 *p = xdr->p;
 	__be32 *q = p + XDR_QUADLEN(nbytes);
 
 	if (unlikely(q > xdr->end || q < p))
 		return NULL;
+	xdr->p = q;
 	return p;
 }
-EXPORT_SYMBOL_GPL(xdr_inline_peek);
 
 /**
- * xdr_inline_decode - Retrieve non-page XDR data to decode
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to an empty buffer
+ * @buflen: size of 'buf'
+ *
+ * The scratch buffer is used when decoding from an array of pages.
+ * If an xdr_inline_decode() call spans across page boundaries, then
+ * we copy the data into the scratch buffer in order to allow linear
+ * access.
+ */
+void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+{
+	xdr->scratch.iov_base = buf;
+	xdr->scratch.iov_len = buflen;
+}
+EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
+
+static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
+{
+	__be32 *p;
+	void *cpdest = xdr->scratch.iov_base;
+	size_t cplen = (char *)xdr->end - (char *)xdr->p;
+
+	if (nbytes > xdr->scratch.iov_len)
+		return NULL;
+	memcpy(cpdest, xdr->p, cplen);
+	cpdest += cplen;
+	nbytes -= cplen;
+	if (!xdr_set_next_buffer(xdr))
+		return NULL;
+	p = __xdr_inline_decode(xdr, nbytes);
+	if (p == NULL)
+		return NULL;
+	memcpy(cpdest, p, nbytes);
+	return xdr->scratch.iov_base;
+}
+
+/**
+ * xdr_inline_decode - Retrieve XDR data to decode
  * @xdr: pointer to xdr_stream struct
  * @nbytes: number of bytes of data to decode
  *
@@ -605,13 +699,16 @@ EXPORT_SYMBOL_GPL(xdr_inline_peek);
  */
 __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
-	__be32 *p = xdr->p;
-	__be32 *q = p + XDR_QUADLEN(nbytes);
+	__be32 *p;
 
-	if (unlikely(q > xdr->end || q < p))
+	if (nbytes == 0)
+		return xdr->p;
+	if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
 		return NULL;
-	xdr->p = q;
-	return p;
+	p = __xdr_inline_decode(xdr, nbytes);
+	if (p != NULL)
+		return p;
+	return xdr_copy_to_scratch(xdr, nbytes);
 }
 EXPORT_SYMBOL_GPL(xdr_inline_decode);
 
@@ -671,16 +768,12 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
  */
 void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
 {
-	char * kaddr = page_address(xdr->buf->pages[0]);
 	xdr_read_pages(xdr, len);
 	/*
 	 * Position current pointer at beginning of tail, and
 	 * set remaining message length.
 	 */
-	if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
-		len = PAGE_CACHE_SIZE - xdr->buf->page_base;
-	xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
-	xdr->end = (__be32 *)((char *)xdr->p + len);
+	xdr_set_page_base(xdr, 0, len);
 }
 EXPORT_SYMBOL_GPL(xdr_enter_page);
 
-- 
cgit v1.2.3


From facb4edc1e0e849ea98e147a821e60d6d6272c0a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 10 Jan 2011 04:06:58 +0000
Subject: phonet: some signedness bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dan Rosenberg pointed out that there were some signed comparison bugs
in the phonet protocol.

http://marc.info/?l=full-disclosure&m=129424528425330&w=2

The problem is that we check for array overflows but "protocol" is
signed and we don't check for array underflows.  If you have already
have CAP_SYS_ADMIN then you could use the bugs to get root, or someone
could cause an oops by mistake.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/phonet.h | 4 ++--
 net/phonet/af_phonet.c      | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index d5df797f9540..5395e09187df 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -107,8 +107,8 @@ struct phonet_protocol {
 	int			sock_type;
 };
 
-int phonet_proto_register(int protocol, struct phonet_protocol *pp);
-void phonet_proto_unregister(int protocol, struct phonet_protocol *pp);
+int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp);
+void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp);
 
 int phonet_sysctl_init(void);
 void phonet_sysctl_exit(void);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index fd95beb72f5d..1072b2c19d31 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -37,7 +37,7 @@
 /* Transport protocol registration */
 static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
 
-static struct phonet_protocol *phonet_proto_get(int protocol)
+static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
 {
 	struct phonet_protocol *pp;
 
@@ -458,7 +458,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
 
 static DEFINE_MUTEX(proto_tab_lock);
 
-int __init_or_module phonet_proto_register(int protocol,
+int __init_or_module phonet_proto_register(unsigned int protocol,
 						struct phonet_protocol *pp)
 {
 	int err = 0;
@@ -481,7 +481,7 @@ int __init_or_module phonet_proto_register(int protocol,
 }
 EXPORT_SYMBOL(phonet_proto_register);
 
-void phonet_proto_unregister(int protocol, struct phonet_protocol *pp)
+void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
 {
 	mutex_lock(&proto_tab_lock);
 	BUG_ON(proto_tab[protocol] != pp);
-- 
cgit v1.2.3


From 91b5c98c2e062f982423686c77b8bf31f37fa196 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Mon, 10 Jan 2011 16:00:54 -0800
Subject: caif: don't set connection request param size before copying data

The size field should not be set until after the data is successfully
copied in.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 1bf0cf503796..8184c031d028 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -740,12 +740,12 @@ static int setsockopt(struct socket *sock,
 		if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
 			return -ENOPROTOOPT;
 		lock_sock(&(cf_sk->sk));
-		cf_sk->conn_req.param.size = ol;
 		if (ol > sizeof(cf_sk->conn_req.param.data) ||
 			copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
 			release_sock(&cf_sk->sk);
 			return -EINVAL;
 		}
+		cf_sk->conn_req.param.size = ol;
 		release_sock(&cf_sk->sk);
 		return 0;
 
-- 
cgit v1.2.3


From 36909ea43814cba34f7c921e99cba33d770a54e1 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 9 Jan 2011 19:36:31 +0000
Subject: net: Add alloc_netdev_mqs function

Added alloc_netdev_mqs function which allows the number of transmit and
receive queues to be specified independenty.  alloc_netdev_mq was
changed to a macro to call the new function.  Also added
alloc_etherdev_mqs with same purpose.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  4 +++-
 include/linux/netdevice.h   | 10 +++++++---
 net/core/dev.c              | 32 +++++++++++++++++++++-----------
 net/ethernet/eth.c          | 12 +++++++-----
 4 files changed, 38 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index f16a01081e15..bec8b82889bf 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -48,8 +48,10 @@ extern int eth_validate_addr(struct net_device *dev);
 
 
-extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count);
+extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+					    unsigned int rxqs);
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
+#define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
 /**
  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index de2bfe6da359..be4957cf6511 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2191,11 +2191,15 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
 extern void		ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
+extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 				       void (*setup)(struct net_device *),
-				       unsigned int queue_count);
+				       unsigned int txqs, unsigned int rxqs);
 #define alloc_netdev(sizeof_priv, name, setup) \
-	alloc_netdev_mq(sizeof_priv, name, setup, 1)
+	alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
+
+#define alloc_netdev_mq(sizeof_priv, name, setup, count) \
+	alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
+
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 3fe443be4b15..3295b94884ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5617,18 +5617,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 }
 
 /**
- *	alloc_netdev_mq - allocate network device
+ *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
  *	@name:		device name format string
  *	@setup:		callback to initialize device
- *	@queue_count:	the number of subqueues to allocate
+ *	@txqs:		the number of TX subqueues to allocate
+ *	@rxqs:		the number of RX subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
  *	and performs basic initialization.  Also allocates subquue structs
- *	for each queue on the device at the end of the netdevice.
+ *	for each queue on the device.
  */
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
-		void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *),
+		unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
 	size_t alloc_size;
@@ -5636,12 +5638,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
-	if (queue_count < 1) {
+	if (txqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device "
 		       "with zero queues.\n");
 		return NULL;
 	}
 
+#ifdef CONFIG_RPS
+	if (rxqs < 1) {
+		pr_err("alloc_netdev: Unable to allocate device "
+		       "with zero RX queues.\n");
+		return NULL;
+	}
+#endif
+
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
@@ -5672,14 +5682,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev_net_set(dev, &init_net);
 
-	dev->num_tx_queues = queue_count;
-	dev->real_num_tx_queues = queue_count;
+	dev->num_tx_queues = txqs;
+	dev->real_num_tx_queues = txqs;
 	if (netif_alloc_netdev_queues(dev))
 		goto free_pcpu;
 
 #ifdef CONFIG_RPS
-	dev->num_rx_queues = queue_count;
-	dev->real_num_rx_queues = queue_count;
+	dev->num_rx_queues = rxqs;
+	dev->real_num_rx_queues = rxqs;
 	if (netif_alloc_rx_queues(dev))
 		goto free_pcpu;
 #endif
@@ -5707,7 +5717,7 @@ free_p:
 	kfree(p);
 	return NULL;
 }
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
 
 /**
  *	free_netdev - free network device
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f00ef2f1d814..f9d7ac924f15 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -347,10 +347,11 @@ void ether_setup(struct net_device *dev)
 EXPORT_SYMBOL(ether_setup);
 
 /**
- * alloc_etherdev_mq - Allocates and sets up an Ethernet device
+ * alloc_etherdev_mqs - Allocates and sets up an Ethernet device
  * @sizeof_priv: Size of additional driver-private structure to be allocated
  *	for this Ethernet device
- * @queue_count: The number of queues this device has.
+ * @txqs: The number of TX queues this device has.
+ * @txqs: The number of RX queues this device has.
  *
  * Fill in the fields of the device structure with Ethernet-generic
  * values. Basically does everything except registering the device.
@@ -360,11 +361,12 @@ EXPORT_SYMBOL(ether_setup);
  * this private data area.
  */
 
-struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
+struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+				      unsigned int rxqs)
 {
-	return alloc_netdev_mq(sizeof_priv, "eth%d", ether_setup, queue_count);
+	return alloc_netdev_mqs(sizeof_priv, "eth%d", ether_setup, txqs, rxqs);
 }
-EXPORT_SYMBOL(alloc_etherdev_mq);
+EXPORT_SYMBOL(alloc_etherdev_mqs);
 
 static size_t _format_mac_addr(char *buf, int buflen,
 			       const unsigned char *addr, int len)
-- 
cgit v1.2.3


From bfe0d0298f2a67d94d58c39ea904a999aeeb7c3c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 9 Jan 2011 08:30:54 +0000
Subject: net_sched: factorize qdisc stats handling

HTB takes into account skb is segmented in stats updates.
Generalize this to all schedulers.

They should use qdisc_bstats_update() helper instead of manipulating
bstats.bytes and bstats.packets

Add bstats_update() helper too for classes that use
gnet_stats_basic_packed fields.

Note : Right now, TCQ_F_CAN_BYPASS shortcurt can be taken only if no
stab is setup on qdisc.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 20 ++++++++++++++------
 net/core/dev.c            |  5 ++++-
 net/sched/act_csum.c      |  3 +--
 net/sched/act_ipt.c       |  3 +--
 net/sched/act_mirred.c    |  3 +--
 net/sched/act_nat.c       |  3 +--
 net/sched/act_pedit.c     |  3 +--
 net/sched/act_police.c    |  3 +--
 net/sched/act_simple.c    |  3 +--
 net/sched/act_skbedit.c   |  3 +--
 net/sched/sch_atm.c       |  6 ++----
 net/sched/sch_cbq.c       |  6 ++----
 net/sched/sch_drr.c       |  8 ++------
 net/sched/sch_dsmark.c    |  3 +--
 net/sched/sch_hfsc.c      |  6 ++----
 net/sched/sch_htb.c       | 17 ++++++-----------
 net/sched/sch_ingress.c   |  3 +--
 net/sched/sch_multiq.c    |  3 +--
 net/sched/sch_netem.c     |  6 ++----
 net/sched/sch_prio.c      |  3 +--
 net/sched/sch_red.c       |  3 +--
 net/sched/sch_sfq.c       |  3 +--
 net/sched/sch_tbf.c       |  3 +--
 net/sched/sch_teql.c      |  3 +--
 24 files changed, 50 insertions(+), 72 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 0af57ebae762..e9eee99d8b1f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -207,7 +207,7 @@ static inline int qdisc_qlen(struct Qdisc *q)
 	return q->q.qlen;
 }
 
-static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
+static inline struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
 {
 	return (struct qdisc_skb_cb *)skb->cb;
 }
@@ -394,7 +394,7 @@ static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 	return true;
 }
 
-static inline unsigned int qdisc_pkt_len(struct sk_buff *skb)
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
 {
 	return qdisc_skb_cb(skb)->pkt_len;
 }
@@ -426,10 +426,18 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
-static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int len)
+
+static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+				 const struct sk_buff *skb)
+{
+	bstats->bytes += qdisc_pkt_len(skb);
+	bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
+}
+
+static inline void qdisc_bstats_update(struct Qdisc *sch,
+				       const struct sk_buff *skb)
 {
-	sch->bstats.bytes += len;
-	sch->bstats.packets++;
+	bstats_update(&sch->bstats, skb);
 }
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
@@ -437,7 +445,7 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 {
 	__skb_queue_tail(list, skb);
 	sch->qstats.backlog += qdisc_pkt_len(skb);
-	__qdisc_update_bstats(sch, qdisc_pkt_len(skb));
+	qdisc_bstats_update(sch, skb);
 
 	return NET_XMIT_SUCCESS;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 3295b94884ab..a3ef808b5e36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2297,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 */
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
-		__qdisc_update_bstats(q, skb->len);
+
+		qdisc_skb_cb(skb)->pkt_len = skb->len;
+		qdisc_bstats_update(q, skb);
+
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 67dc7ce9b63a..83ddfc07e45d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -508,8 +508,7 @@ static int tcf_csum(struct sk_buff *skb,
 
 	spin_lock(&p->tcf_lock);
 	p->tcf_tm.lastuse = jiffies;
-	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	p->tcf_bstats.packets++;
+	bstats_update(&p->tcf_bstats, skb);
 	action = p->tcf_action;
 	update_flags = p->update_flags;
 	spin_unlock(&p->tcf_lock);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8daef9632255..c2a7c20e81c1 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -209,8 +209,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	spin_lock(&ipt->tcf_lock);
 
 	ipt->tcf_tm.lastuse = jiffies;
-	ipt->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	ipt->tcf_bstats.packets++;
+	bstats_update(&ipt->tcf_bstats, skb);
 
 	/* yes, we have to worry about both in and out dev
 	 worry later - danger - this API seems to have changed
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0c311be92827..d765067e99db 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -165,8 +165,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock(&m->tcf_lock);
 	m->tcf_tm.lastuse = jiffies;
-	m->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	m->tcf_bstats.packets++;
+	bstats_update(&m->tcf_bstats, skb);
 
 	dev = m->tcfm_dev;
 	if (!dev) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 186eb837e600..178a4bd7b7cb 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -125,8 +125,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	egress = p->flags & TCA_NAT_FLAG_EGRESS;
 	action = p->tcf_action;
 
-	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	p->tcf_bstats.packets++;
+	bstats_update(&p->tcf_bstats, skb);
 
 	spin_unlock(&p->tcf_lock);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0593c9640db..445bef716f77 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -187,8 +187,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 bad:
 	p->tcf_qstats.overlimits++;
 done:
-	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	p->tcf_bstats.packets++;
+	bstats_update(&p->tcf_bstats, skb);
 	spin_unlock(&p->tcf_lock);
 	return p->tcf_action;
 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 7ebf7439b478..e2f08b1e2e58 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -298,8 +298,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock(&police->tcf_lock);
 
-	police->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	police->tcf_bstats.packets++;
+	bstats_update(&police->tcf_bstats, skb);
 
 	if (police->tcfp_ewma_rate &&
 	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 97e84f3ee775..7287cff7af3e 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -42,8 +42,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 
 	spin_lock(&d->tcf_lock);
 	d->tcf_tm.lastuse = jiffies;
-	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	d->tcf_bstats.packets++;
+	bstats_update(&d->tcf_bstats, skb);
 
 	/* print policy string followed by _ then packet count
 	 * Example if this was the 3rd packet and the string was "hello"
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 66cbf4eb8855..836f5fee9e58 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -46,8 +46,7 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock(&d->tcf_lock);
 	d->tcf_tm.lastuse = jiffies;
-	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
-	d->tcf_bstats.packets++;
+	bstats_update(&d->tcf_bstats, skb);
 
 	if (d->flags & SKBEDIT_F_PRIORITY)
 		skb->priority = d->priority;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 282540778aa8..943d733409d0 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -422,10 +422,8 @@ drop: __maybe_unused
 		}
 		return ret;
 	}
-	sch->bstats.bytes += qdisc_pkt_len(skb);
-	sch->bstats.packets++;
-	flow->bstats.bytes += qdisc_pkt_len(skb);
-	flow->bstats.packets++;
+	qdisc_bstats_update(sch, skb);
+	bstats_update(&flow->bstats, skb);
 	/*
 	 * Okay, this may seem weird. We pretend we've dropped the packet if
 	 * it goes via ATM. The reason for this is that the outer qdisc
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index eb7631590865..c80d1c210c5d 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -390,8 +390,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	ret = qdisc_enqueue(skb, cl->q);
 	if (ret == NET_XMIT_SUCCESS) {
 		sch->q.qlen++;
-		sch->bstats.packets++;
-		sch->bstats.bytes += qdisc_pkt_len(skb);
+		qdisc_bstats_update(sch, skb);
 		cbq_mark_toplevel(q, cl);
 		if (!cl->next_alive)
 			cbq_activate_class(cl);
@@ -650,8 +649,7 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 		ret = qdisc_enqueue(skb, cl->q);
 		if (ret == NET_XMIT_SUCCESS) {
 			sch->q.qlen++;
-			sch->bstats.packets++;
-			sch->bstats.bytes += qdisc_pkt_len(skb);
+			qdisc_bstats_update(sch, skb);
 			if (!cl->next_alive)
 				cbq_activate_class(cl);
 			return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index aa8b5313f8cf..de55e642eafc 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -351,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl;
-	unsigned int len;
 	int err;
 
 	cl = drr_classify(skb, sch, &err);
@@ -362,7 +361,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	len = qdisc_pkt_len(skb);
 	err = qdisc_enqueue(skb, cl->qdisc);
 	if (unlikely(err != NET_XMIT_SUCCESS)) {
 		if (net_xmit_drop_count(err)) {
@@ -377,10 +375,8 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		cl->deficit = cl->quantum;
 	}
 
-	cl->bstats.packets++;
-	cl->bstats.bytes += len;
-	sch->bstats.packets++;
-	sch->bstats.bytes += len;
+	bstats_update(&cl->bstats, skb);
+	qdisc_bstats_update(sch, skb);
 
 	sch->q.qlen++;
 	return err;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1d295d62bb5c..60f4bdd4408e 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -260,8 +260,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
-	sch->bstats.bytes += qdisc_pkt_len(skb);
-	sch->bstats.packets++;
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 069c62b7bb36..2e45791d4f6c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1599,10 +1599,8 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (cl->qdisc->q.qlen == 1)
 		set_active(cl, qdisc_pkt_len(skb));
 
-	cl->bstats.packets++;
-	cl->bstats.bytes += qdisc_pkt_len(skb);
-	sch->bstats.packets++;
-	sch->bstats.bytes += qdisc_pkt_len(skb);
+	bstats_update(&cl->bstats, skb);
+	qdisc_bstats_update(sch, skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 01b519d6c52d..984c1b0c6836 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -569,15 +569,12 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 		return ret;
 	} else {
-		cl->bstats.packets +=
-			skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-		cl->bstats.bytes += qdisc_pkt_len(skb);
+		bstats_update(&cl->bstats, skb);
 		htb_activate(q, cl);
 	}
 
 	sch->q.qlen++;
-	sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-	sch->bstats.bytes += qdisc_pkt_len(skb);
+	qdisc_bstats_update(sch, skb);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -648,12 +645,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 				htb_add_to_wait_tree(q, cl, diff);
 		}
 
-		/* update byte stats except for leaves which are already updated */
-		if (cl->level) {
-			cl->bstats.bytes += bytes;
-			cl->bstats.packets += skb_is_gso(skb)?
-					skb_shinfo(skb)->gso_segs:1;
-		}
+		/* update basic stats except for leaves which are already updated */
+		if (cl->level)
+			bstats_update(&cl->bstats, skb);
+
 		cl = cl->parent;
 	}
 }
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f10e34a68445..bce1665239b8 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -63,8 +63,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	result = tc_classify(skb, p->filter_list, &res);
 
-	sch->bstats.packets++;
-	sch->bstats.bytes += qdisc_pkt_len(skb);
+	qdisc_bstats_update(sch, skb);
 	switch (result) {
 	case TC_ACT_SHOT:
 		result = TC_ACT_SHOT;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 32690deab5d0..21f13da24763 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -83,8 +83,7 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e5593c083a78..1c4bce863479 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -240,8 +240,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
 		sch->q.qlen++;
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, skb);
 	} else if (net_xmit_drop_count(ret)) {
 		sch->qstats.drops++;
 	}
@@ -477,8 +476,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		__skb_queue_after(list, skb, nskb);
 
 		sch->qstats.backlog += qdisc_pkt_len(nskb);
-		sch->bstats.bytes += qdisc_pkt_len(nskb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, nskb);
 
 		return NET_XMIT_SUCCESS;
 	}
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index b1c95bce33ce..966158d49dd1 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -84,8 +84,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 	ret = qdisc_enqueue(skb, qdisc);
 	if (ret == NET_XMIT_SUCCESS) {
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 		return NET_XMIT_SUCCESS;
 	}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index a67ba3c5a0cc..a6009c5a2c97 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -94,8 +94,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	ret = qdisc_enqueue(skb, child);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, skb);
 		sch->q.qlen++;
 	} else if (net_xmit_drop_count(ret)) {
 		q->stats.pdrop++;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index d54ac94066c2..239ec53a634d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -403,8 +403,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit) {
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, skb);
 		return NET_XMIT_SUCCESS;
 	}
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 641a30d64635..77565e721811 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -134,8 +134,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	}
 
 	sch->q.qlen++;
-	sch->bstats.bytes += qdisc_pkt_len(skb);
-	sch->bstats.packets++;
+	qdisc_bstats_update(sch, skb);
 	return NET_XMIT_SUCCESS;
 }
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 106479a7c94a..af9360d1f6eb 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -83,8 +83,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 
 	if (q->q.qlen < dev->tx_queue_len) {
 		__skb_queue_tail(&q->q, skb);
-		sch->bstats.bytes += qdisc_pkt_len(skb);
-		sch->bstats.packets++;
+		qdisc_bstats_update(sch, skb);
 		return NET_XMIT_SUCCESS;
 	}
 
-- 
cgit v1.2.3


From 545ecdc3b3a2fe0b54a3053bf8bf85321bbca7da Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Sat, 8 Jan 2011 13:57:12 +0000
Subject: arp: allow to invalidate specific ARP entries

IPv4 over firewire needs to be able to remove ARP entries
from the ARP cache that belong to nodes that are removed, because
IPv4 over firewire uses ARP packets for private information
about nodes.

This information becomes invalid as soon as node drops
off the bus and when it reconnects, its only possible
to start talking to it after it responded to an ARP packet.
But ARP cache prevents such packets from being sent.

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h |  1 +
 net/ipv4/arp.c    | 29 ++++++++++++++++++-----------
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/arp.h b/include/net/arp.h
index f4cf6ce66586..91f0568a04ef 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -25,5 +25,6 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 				  const unsigned char *src_hw,
 				  const unsigned char *target_hw);
 extern void arp_xmit(struct sk_buff *skb);
+int arp_invalidate(struct net_device *dev, __be32 ip);
 
 #endif	/* _ARP_H */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a2fc7b961dbc..04c8b69fd426 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1143,6 +1143,23 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
 	return err;
 }
 
+int arp_invalidate(struct net_device *dev, __be32 ip)
+{
+	struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
+	int err = -ENXIO;
+
+	if (neigh) {
+		if (neigh->nud_state & ~NUD_NOARP)
+			err = neigh_update(neigh, NULL, NUD_FAILED,
+					   NEIGH_UPDATE_F_OVERRIDE|
+					   NEIGH_UPDATE_F_ADMIN);
+		neigh_release(neigh);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(arp_invalidate);
+
 static int arp_req_delete_public(struct net *net, struct arpreq *r,
 		struct net_device *dev)
 {
@@ -1163,7 +1180,6 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 {
 	int err;
 	__be32 ip;
-	struct neighbour *neigh;
 
 	if (r->arp_flags & ATF_PUBL)
 		return arp_req_delete_public(net, r, dev);
@@ -1181,16 +1197,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 		if (!dev)
 			return -EINVAL;
 	}
-	err = -ENXIO;
-	neigh = neigh_lookup(&arp_tbl, &ip, dev);
-	if (neigh) {
-		if (neigh->nud_state & ~NUD_NOARP)
-			err = neigh_update(neigh, NULL, NUD_FAILED,
-					   NEIGH_UPDATE_F_OVERRIDE|
-					   NEIGH_UPDATE_F_ADMIN);
-		neigh_release(neigh);
-	}
-	return err;
+	return arp_invalidate(dev, ip);
 }
 
 /*
-- 
cgit v1.2.3


From d7b92affba524e0ca848a5ab60649fb91190d9b5 Mon Sep 17 00:00:00 2001
From: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Date: Fri, 7 Jan 2011 01:57:08 +0000
Subject: CAIF: Fix IPv6 support in receive path for GPRS/3G

Checks version field of IP in the receive path for GPRS/3G data
and appropriately sets the value of skb->protocol.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 84a422c98941..fa9dab372b68 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -76,6 +76,8 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 	struct chnl_net *priv  = container_of(layr, struct chnl_net, chnl);
 	int pktlen;
 	int err = 0;
+	const u8 *ip_version;
+	u8 buf;
 
 	priv = container_of(layr, struct chnl_net, chnl);
 
@@ -90,7 +92,21 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 	 * send the packet to the net stack.
 	 */
 	skb->dev = priv->netdev;
-	skb->protocol = htons(ETH_P_IP);
+
+	/* check the version of IP */
+	ip_version = skb_header_pointer(skb, 0, 1, &buf);
+	if (!ip_version)
+		return -EINVAL;
+	switch (*ip_version >> 4) {
+	case 4:
+		skb->protocol = htons(ETH_P_IP);
+		break;
+	case 6:
+		skb->protocol = htons(ETH_P_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	/* If we change the header in loop mode, the checksum is corrupted. */
 	if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
-- 
cgit v1.2.3


From 219fd58be62d01e30224c7af919dea77b7e392a8 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Mon, 10 Jan 2011 14:23:53 -0600
Subject: net/9p: Use proper data types

Use proper data types for storing the count of the binary blob and
length of a string. Without this patch length calculation of string will
always result in -1 because of comparision between signed and unsigned
integer.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/protocol.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 798beac7f100..1e308f210928 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -178,27 +178,24 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			break;
 		case 's':{
 				char **sptr = va_arg(ap, char **);
-				int16_t len;
-				int size;
+				uint16_t len;
 
 				errcode = p9pdu_readf(pdu, proto_version,
 								"w", &len);
 				if (errcode)
 					break;
 
-				size = max_t(int16_t, len, 0);
-
-				*sptr = kmalloc(size + 1, GFP_KERNEL);
+				*sptr = kmalloc(len + 1, GFP_KERNEL);
 				if (*sptr == NULL) {
 					errcode = -EFAULT;
 					break;
 				}
-				if (pdu_read(pdu, *sptr, size)) {
+				if (pdu_read(pdu, *sptr, len)) {
 					errcode = -EFAULT;
 					kfree(*sptr);
 					*sptr = NULL;
 				} else
-					(*sptr)[size] = 0;
+					(*sptr)[len] = 0;
 			}
 			break;
 		case 'Q':{
@@ -234,14 +231,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case 'D':{
-				int32_t *count = va_arg(ap, int32_t *);
+				uint32_t *count = va_arg(ap, uint32_t *);
 				void **data = va_arg(ap, void **);
 
 				errcode =
 				    p9pdu_readf(pdu, proto_version, "d", count);
 				if (!errcode) {
 					*count =
-					    min_t(int32_t, *count,
+					    min_t(uint32_t, *count,
 						  pdu->size - pdu->offset);
 					*data = &pdu->sdata[pdu->offset];
 				}
@@ -404,9 +401,10 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			break;
 		case 's':{
 				const char *sptr = va_arg(ap, const char *);
-				int16_t len = 0;
+				uint16_t len = 0;
 				if (sptr)
-					len = min_t(int16_t, strlen(sptr), USHRT_MAX);
+					len = min_t(uint16_t, strlen(sptr),
+								USHRT_MAX);
 
 				errcode = p9pdu_writef(pdu, proto_version,
 								"w", len);
@@ -438,7 +436,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 						 stbuf->n_gid, stbuf->n_muid);
 			} break;
 		case 'D':{
-				int32_t count = va_arg(ap, int32_t);
+				uint32_t count = va_arg(ap, uint32_t);
 				const void *data = va_arg(ap, const void *);
 
 				errcode = p9pdu_writef(pdu, proto_version, "d",
-- 
cgit v1.2.3


From c191a836a908d1dd6b40c503741f91b914de3348 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 11 Jan 2011 01:14:22 +0000
Subject: tcp: disallow bind() to reuse addr/port

inet_csk_bind_conflict() logic currently disallows a bind() if
it finds a friend socket (a socket bound on same address/port)
satisfying a set of conditions :

1) Current (to be bound) socket doesnt have sk_reuse set
OR
2) other socket doesnt have sk_reuse set
OR
3) other socket is in LISTEN state

We should add the CLOSE state in the 3) condition, in order to avoid two
REUSEADDR sockets in CLOSE state with same local address/port, since
this can deny further operations.

Note : a prior patch tried to address the problem in a different (and
buggy) way. (commit fda48a0d7a8412ced tcp: bind() fix when many ports
are bound).

Reported-by: Gaspar Chilingarov <gasparch@gmail.com>
Reported-by: Daniel Baluta <daniel.baluta@gmail.com>
Tested-by: Daniel Baluta <daniel.baluta@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c  | 5 +++--
 net/ipv6/inet6_connection_sock.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 25e318153f14..97e5fb765265 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -73,7 +73,7 @@ int inet_csk_bind_conflict(const struct sock *sk,
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
 			if (!reuse || !sk2->sk_reuse ||
-			    sk2->sk_state == TCP_LISTEN) {
+			    ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) {
 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
 				    sk2_rcv_saddr == sk_rcv_saddr(sk))
@@ -122,7 +122,8 @@ again:
 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
 						smallest_size = tb->num_owners;
 						smallest_rover = rover;
-						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
+						if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
+						    !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
 							spin_unlock(&head->lock);
 							snum = smallest_rover;
 							goto have_snum;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index e46305d1815a..d144e629d2b4 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 		     !sk2->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
+		     ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) &&
 		     ipv6_rcv_saddr_equal(sk, sk2))
 			break;
 	}
-- 
cgit v1.2.3


From fa6dd8a2c89861d05621ce7e2880e485bec22fba Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 11 Jan 2011 08:04:12 +0000
Subject: xfrm: check trunc_len in XFRMA_ALG_AUTH_TRUNC

Maximum trunc length is defined by MAX_AH_AUTH_LEN (in bytes)
and need to be checked when this value is set (in bits) by
the user. In ah4.c and ah6.c a BUG_ON() checks this condiftion.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6a8da81ff66f..d5e1e0b08890 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -26,6 +26,7 @@
 #include <net/sock.h>
 #include <net/xfrm.h>
 #include <net/netlink.h>
+#include <net/ah.h>
 #include <asm/uaccess.h>
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #include <linux/in6.h>
@@ -302,7 +303,8 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props,
 	algo = xfrm_aalg_get_byname(ualg->alg_name, 1);
 	if (!algo)
 		return -ENOSYS;
-	if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)
+	if ((ualg->alg_trunc_len / 8) > MAX_AH_AUTH_LEN ||
+	    ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits)
 		return -EINVAL;
 	*props = algo->desc.sadb_alg_id;
 
-- 
cgit v1.2.3


From 4b0ef1f223be4e092632b4152ceec5627ac10f59 Mon Sep 17 00:00:00 2001
From: Dang Hongwu <hongwu.dang@6wind.com>
Date: Tue, 11 Jan 2011 07:13:33 +0000
Subject: ah: reload pointers to skb data after calling skb_cow_data()

skb_cow_data() may allocate a new data buffer, so pointers on
skb should be set after this function.

Bug was introduced by commit dff3bb06 ("ah4: convert to ahash")
and 8631e9bd ("ah6: convert to ahash").

Signed-off-by: Wang Xuefu <xuefu.wang@6wind.com>
Acked-by: Krzysztof Witek <krzysztof.witek@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ah4.c | 7 ++++---
 net/ipv6/ah6.c | 8 +++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 880a5ec6dce0..86961bec70ab 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -314,14 +314,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	ah = (struct ip_auth_hdr *)skb->data;
-	iph = ip_hdr(skb);
-	ihl = ip_hdrlen(skb);
 
 	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
 		goto out;
 	nfrags = err;
 
+	ah = (struct ip_auth_hdr *)skb->data;
+	iph = ip_hdr(skb);
+	ihl = ip_hdrlen(skb);
+
 	work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
 	if (!work_iph)
 		goto out;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ee82d4ef26ce..1aba54ae53c4 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -538,14 +538,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, ah_hlen))
 		goto out;
 
-	ip6h = ipv6_hdr(skb);
-
-	skb_push(skb, hdr_len);
 
 	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
 		goto out;
 	nfrags = err;
 
+	ah = (struct ip_auth_hdr *)skb->data;
+	ip6h = ipv6_hdr(skb);
+
+	skb_push(skb, hdr_len);
+
 	work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
 	if (!work_iph)
 		goto out;
-- 
cgit v1.2.3


From 13ee6ac579574a2a95e982b19920fd2495dce8cd Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 11 Jan 2011 23:54:42 +0100
Subject: netfilter: fix race in conntrack between dump_table and destroy

The netlink interface to dump the connection tracking table has a race
when entries are deleted at the same time. A customer reported a crash
and the backtrace showed thatctnetlink_dump_table was running while a
conntrack entry was being destroyed.
(see https://bugzilla.vyatta.com/show_bug.cgi?id=6402).

According to RCU documentation, when using hlist_nulls the reader
must handle the case of seeing a deleted entry and not proceed
further down the linked list.  The old code would continue
which caused the scan to walk into the free list.

This patch uses locking (rather than RCU) for this operation which
is guaranteed safe, and no longer requires getting reference while
doing dump operation.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 746140264b2d..5cb8d3027b18 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -645,25 +645,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 	u_int8_t l3proto = nfmsg->nfgen_family;
 
-	rcu_read_lock();
+	spin_lock_bh(&nf_conntrack_lock);
 	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
 restart:
-		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]],
+		hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]],
 					 hnnode) {
 			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
 			ct = nf_ct_tuplehash_to_ctrack(h);
-			if (!atomic_inc_not_zero(&ct->ct_general.use))
-				continue;
 			/* Dump entries of a given L3 protocol number.
 			 * If it is not specified, ie. l3proto == 0,
 			 * then dump everything. */
 			if (l3proto && nf_ct_l3num(ct) != l3proto)
-				goto releasect;
+				continue;
 			if (cb->args[1]) {
 				if (ct != last)
-					goto releasect;
+					continue;
 				cb->args[1] = 0;
 			}
 			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
@@ -681,8 +679,6 @@ restart:
 				if (acct)
 					memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
 			}
-releasect:
-		nf_ct_put(ct);
 		}
 		if (cb->args[1]) {
 			cb->args[1] = 0;
@@ -690,7 +686,7 @@ releasect:
 		}
 	}
 out:
-	rcu_read_unlock();
+	spin_unlock_bh(&nf_conntrack_lock);
 	if (last)
 		nf_ct_put(last);
 
-- 
cgit v1.2.3