Merge /pub/scm/linux/kernel/git/torvalds/linux-2.6

author: Wim Van Sebroeck <wim@iguana.be> 2007-05-01 08:53:01 +0200
committer: Wim Van Sebroeck <wim@iguana.be> 2007-05-01 08:53:01 +0200
commit: 48a7afe314bfc4d7f50e1608632f503dbba7e013 (patch)
tree: 4a80e6b96321a71affd1bacea817de93be08894b /net
parent: [WATCHDOG] Semi-typical watchdog bug re early misc_register() (diff)
parent: libata: honour host controllers that want just one host (diff)
download: linux-48a7afe314bfc4d7f50e1608632f503dbba7e013.tar.xz
linux-48a7afe314bfc4d7f50e1608632f503dbba7e013.zip
486 files changed, 22429 insertions, 32531 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index ace6386384bc..91dde41b5481 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -100,7 +100,7 @@ static int fddi_rebuild_header(struct sk_buff	*skb)
 	struct fddihdr *fddi = (struct fddihdr *)skb->data;
 
 #ifdef CONFIG_INET
-	if (fddi->hdr.llc_snap.ethertype == __constant_htons(ETH_P_IP))
+	if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP))
 		/* Try to get ARP to resolve the header and fill destination address */
 		return arp_find(fddi->daddr, skb);
 	else
@@ -130,12 +130,13 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 * to start of packet data.  Assume 802.2 SNAP frames for now.
 	 */
 
-	skb->mac.raw = skb->data;	/* point to frame control (FC) */
+	skb->dev = dev;
+	skb_reset_mac_header(skb);	/* point to frame control (FC) */
 
 	if(fddi->hdr.llc_8022_1.dsap==0xe0)
 	{
 		skb_pull(skb, FDDI_K_8022_HLEN-3);
-		type = __constant_htons(ETH_P_802_2);
+		type = htons(ETH_P_802_2);
 	}
 	else
 	{
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 578f2a3d692d..87ffc12b6891 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -60,7 +60,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
 	 * Due to the stupidity of the little endian byte-order we
 	 * have to set the fp field this way.
 	 */
-	hip->fp.fixed		= __constant_htonl(0x04800018);
+	hip->fp.fixed		= htonl(0x04800018);
 	hip->fp.d2_size		= htonl(len + 8);
 	hip->le.fc		= 0;
 	hip->le.double_wide	= 0;	/* only HIPPI 800 for the time being */
@@ -104,7 +104,7 @@ static int hippi_rebuild_header(struct sk_buff *skb)
 	 * Only IP is currently supported
 	 */
 
-	if(hip->snap.ethertype != __constant_htons(ETH_P_IP))
+	if(hip->snap.ethertype != htons(ETH_P_IP))
 	{
 		printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
 		return 0;
@@ -126,14 +126,14 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct hippi_hdr *hip;
 
-	hip = (struct hippi_hdr *) skb->data;
-
 	/*
 	 * This is actually wrong ... question is if we really should
 	 * set the raw address here.
 	 */
-	 skb->mac.raw = skb->data;
-	 skb_pull(skb, HIPPI_HLEN);
+	skb->dev = dev;
+	skb_reset_mac_header(skb);
+	hip = (struct hippi_hdr *)skb_mac_header(skb);
+	skb_pull(skb, HIPPI_HLEN);
 
 	/*
 	 * No fancy promisc stuff here now.
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6e7c2120b83f..04ee43e7538f 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -56,10 +56,10 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
 	};
 
 	rcu_read_lock();
-	proto = find_snap_client(skb->h.raw);
+	proto = find_snap_client(skb_transport_header(skb));
 	if (proto) {
 		/* Pass the frame on. */
-		skb->h.raw  += 5;
+		skb->transport_header += 5;
 		skb_pull_rcsum(skb, 5);
 		rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
 	} else {
diff --git a/net/802/tr.c b/net/802/tr.c
index 96bd14452c55..0ba1946211c9 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -189,11 +189,13 @@ static int tr_rebuild_header(struct sk_buff *skb)
 __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 
-	struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+	struct trh_hdr *trh;
 	struct trllc *trllc;
 	unsigned riflen=0;
 
-	skb->mac.raw = skb->data;
+	skb->dev = dev;
+	skb_reset_mac_header(skb);
+	trh = tr_hdr(skb);
 
 	if(trh->saddr[0] & TR_RII)
 		riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
@@ -552,7 +554,8 @@ static int rif_seq_show(struct seq_file *seq, void *v)
 					if(j==1) {
 						segment=ntohs(entry->rseg[j-1])>>4;
 						seq_printf(seq,"  %03X",segment);
-					};
+					}
+
 					segment=ntohs(entry->rseg[j])>>4;
 					brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
 					seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index eb1c71ed7dfe..c0c7bb8e9f07 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -470,7 +470,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 		 */
 	default:
 		snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
-	};
+	}
 
 	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
 			       vlan_setup);
@@ -685,7 +685,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 				break;
 		}
 		break;
-	};
+	}
 
 out:
 	return NOTIFY_DONE;
@@ -819,7 +819,7 @@ static int vlan_ioctl_handler(void __user *arg)
 		printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
 			__FUNCTION__, args.cmd);
 		return -EINVAL;
-	};
+	}
 out:
 	return err;
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2fc8fe2cb366..ec46084f44b4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -66,7 +66,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
 
 		memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
-	};
+	}
 
 	return 0;
 }
@@ -83,7 +83,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 			/* Lifted from Gleb's VLAN code... */
 			memmove(skb->data - ETH_HLEN,
 				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac.raw += VLAN_HLEN;
+			skb->mac_header += VLAN_HLEN;
 		}
 	}
 
@@ -219,7 +219,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		break;
 	default:
 		break;
-	};
+	}
 
 	/*  Was a VLAN packet, grab the encapsulated protocol, which the layer
 	 * three protocols care about.
@@ -258,7 +258,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	 * won't work for fault tolerant netware but does for the rest.
 	 */
 	if (*(unsigned short *)rawp == 0xFFFF) {
-		skb->protocol = __constant_htons(ETH_P_802_3);
+		skb->protocol = htons(ETH_P_802_3);
 		/* place it back on the queue to be handled by true layer 3 protocols.
 		 */
 
@@ -281,7 +281,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	/*
 	 *	Real 802.2 LLC
 	 */
-	skb->protocol = __constant_htons(ETH_P_802_2);
+	skb->protocol = htons(ETH_P_802_2);
 	/* place it back on the queue to be handled by upper layer protocols.
 	 */
 
@@ -380,6 +380,9 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		} else {
 			vhdr->h_vlan_encapsulated_proto = htons(len);
 		}
+
+		skb->protocol = htons(ETH_P_8021Q);
+		skb_reset_network_header(skb);
 	}
 
 	/* Before delegating work to the lower layer, enter our MAC-address */
@@ -445,7 +448,7 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
 
-	if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) {
+	if (veth->h_vlan_proto != htons(ETH_P_8021Q)) {
 		int orig_headroom = skb_headroom(skb);
 		unsigned short veth_TCI;
 
diff --git a/net/Kconfig b/net/Kconfig
index 915657832d94..2fc8e77b1e62 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -27,13 +27,6 @@ if NET
 
 menu "Networking options"
 
-config NETDEBUG
-	bool "Network packet debugging"
-	help
-	  You can say Y here if you want to get additional messages useful in
-	  debugging bad packets, but can overwhelm logs under denial of service
-	  attacks.
-
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
@@ -219,14 +212,18 @@ endmenu
 source "net/ax25/Kconfig"
 source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
-source "net/ieee80211/Kconfig"
-
-config WIRELESS_EXT
-	bool
+source "net/rxrpc/Kconfig"
 
 config FIB_RULES
 	bool
 
+menu "Wireless"
+
+source "net/wireless/Kconfig"
+source "net/ieee80211/Kconfig"
+
+endmenu
+
 endif   # if NET
 endmenu # Networking
 
diff --git a/net/Makefile b/net/Makefile
index 4854ac506313..6b74d4118c5b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_IRDA)		+= irda/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_RXRPC)		+= rxrpc/
+obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
@@ -52,3 +53,5 @@ obj-$(CONFIG_IUCV)		+= iucv/
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
+
+obj-y				+= wireless/
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d89d62f3702f..5ef6a238bdbc 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -118,7 +118,9 @@ static void __aarp_send_query(struct aarp_entry *a)
 
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
-	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
 	eah		 = aarp_hdr(skb);
@@ -163,7 +165,9 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
 
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
-	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
 	eah		 = aarp_hdr(skb);
@@ -212,7 +216,9 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
 
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
-	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
 	eah		 = aarp_hdr(skb);
@@ -539,7 +545,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
 	int hash;
 	struct aarp_entry *a;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Check for LocalTalk first */
 	if (dev->type == ARPHRD_LOCALTLK) {
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 113c175f1715..f6a92a0b7aa6 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1275,7 +1275,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb)
 	skb->protocol = htons(ETH_P_IP);
 	skb_pull(skb, 13);
 	skb->dev   = dev;
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	stats = dev->priv;
 	stats->rx_packets++;
@@ -1383,10 +1383,10 @@ free_it:
  *	@pt - packet type
  *
  *	Receive a packet (in skb) from device dev. This has come from the SNAP
- *	decoder, and on entry skb->h.raw is the DDP header, skb->len is the DDP
- *	header, skb->len is the DDP length. The physical headers have been
- *	extracted. PPP should probably pass frames marked as for this layer.
- *	[ie ARPHRD_ETHERTALK]
+ *	decoder, and on entry skb->transport_header is the DDP header, skb->len
+ *	is the DDP header, skb->len is the DDP length. The physical headers
+ *	have been extracted. PPP should probably pass frames marked as for this
+ *	layer.  [ie ARPHRD_ETHERTALK]
  */
 static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		     struct packet_type *pt, struct net_device *orig_dev)
@@ -1417,10 +1417,13 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 	/*
 	 * Size check to see if ddp->deh_len was crap
 	 * (Otherwise we'll detonate most spectacularly
-	 * in the middle of recvmsg()).
+	 * in the middle of atalk_checksum() or recvmsg()).
 	 */
-	if (skb->len < sizeof(*ddp))
+	if (skb->len < sizeof(*ddp) || skb->len < (len_hops & 1023)) {
+		pr_debug("AppleTalk: dropping corrupted frame (deh_len=%u, "
+			 "skb->len=%u)\n", len_hops & 1023, skb->len);
 		goto freeit;
+	}
 
 	/*
 	 * Any checksums. Note we don't do htons() on this == is assumed to be
@@ -1481,7 +1484,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		     struct packet_type *pt, struct net_device *orig_dev)
 {
 	/* Expand any short form frames */
-	if (skb->mac.raw[2] == 1) {
+	if (skb_mac_header(skb)[2] == 1) {
 		struct ddpehdr *ddp;
 		/* Find our address */
 		struct atalk_addr *ap = atalk_find_dev_addr(dev);
@@ -1507,8 +1510,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		 * we write the network numbers !
 		 */
 
-		ddp->deh_dnode = skb->mac.raw[0];     /* From physical header */
-		ddp->deh_snode = skb->mac.raw[1];     /* From physical header */
+		ddp->deh_dnode = skb_mac_header(skb)[0];     /* From physical header */
+		ddp->deh_snode = skb_mac_header(skb)[1];     /* From physical header */
 
 		ddp->deh_dnet  = ap->s_net;	/* Network number */
 		ddp->deh_snet  = ap->s_net;
@@ -1519,7 +1522,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		/* Non routable, so force a drop if we slip up later */
 		ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10));
 	}
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	return atalk_rcv(skb, dev, pt, orig_dev);
 freeit:
@@ -1768,6 +1771,9 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCGSTAMP:
 			rc = sock_get_timestamp(sk, argp);
 			break;
+		case SIOCGSTAMPNS:
+			rc = sock_get_timestampns(sk, argp);
+			break;
 		/* Routing */
 		case SIOCADDRT:
 		case SIOCDELRT:
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ec4ebd3299e3..0e9f00c5c899 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -173,7 +173,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 	}
 	skb_push(skb, minheadroom);
 	if (brvcc->encaps == e_llc)
-		memcpy(skb->data, llc_oui_pid_pad, 10);
+		skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
 	else
 		memset(skb->data, 0, 2);
 #endif /* FASTER_VERSION */
@@ -375,11 +375,11 @@ packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
 {
 	if (brvcc->filter.netmask == 0)
 		return 0;			/* no filter in place */
-	if (type == __constant_htons(ETH_P_IP) &&
+	if (type == htons(ETH_P_IP) &&
 	    (((struct iphdr *) (skb->data))->daddr & brvcc->filter.
 	     netmask) == brvcc->filter.prefix)
 		return 0;
-	if (type == __constant_htons(ETH_P_ARP))
+	if (type == htons(ETH_P_ARP))
 		return 0;
 	/* TODO: we should probably filter ARPs too.. don't want to have
 	 *   them returning values that don't make sense, or is that ok?
@@ -458,7 +458,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	/* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
 	   than should be. What else should I set? */
 	skb_pull(skb, plen);
-	skb->mac.raw = ((char *) (skb->data)) - ETH_HLEN;
+	skb_set_mac_header(skb, -ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_BR2684_FAST_TRANS
 	skb->protocol = ((u16 *) skb->data)[-1];
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ebb5d0ce8b6f..876b77f14745 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -213,7 +213,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		return;
 	}
 	ATM_SKB(skb)->vcc = vcc;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	if (!clip_vcc->encap
 	    || skb->len < RFC1483LLC_LEN
 	    || memcmp(skb->data, llc_oui, sizeof (llc_oui)))
@@ -261,14 +261,6 @@ static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 	spin_unlock_irqrestore(&PRIV(dev)->xoff_lock, flags);
 }
 
-static void clip_neigh_destroy(struct neighbour *neigh)
-{
-	DPRINTK("clip_neigh_destroy (neigh %p)\n", neigh);
-	if (NEIGH2ENTRY(neigh)->vccs)
-		printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n");
-	NEIGH2ENTRY(neigh)->vccs = (void *) NEIGHBOR_DEAD;
-}
-
 static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
 {
 	DPRINTK("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb);
@@ -342,7 +334,6 @@ static struct neigh_table clip_tbl = {
 	/* parameters are copied from ARP ... */
 	.parms = {
 		.tbl 			= &clip_tbl,
-		.neigh_destructor	= clip_neigh_destroy,
 		.base_reachable_time 	= 30 * HZ,
 		.retrans_time 		= 1 * HZ,
 		.gc_staletime 		= 60 * HZ,
@@ -711,7 +702,7 @@ static struct atm_dev atmarpd_dev = {
 	.ops =			&atmarpd_dev_ops,
 	.type =			"arpd",
 	.number = 		999,
-	.lock =			SPIN_LOCK_UNLOCKED
+	.lock =			__SPIN_LOCK_UNLOCKED(atmarpd_dev.lock)
 };
 
 
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 8ccee4591f65..7afd8e7754fd 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -82,6 +82,9 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCGSTAMP: /* borrowed from IP */
 			error = sock_get_timestamp(sk, argp);
 			goto done;
+		case SIOCGSTAMPNS: /* borrowed from IP */
+			error = sock_get_timestampns(sk, argp);
+			goto done;
 		case ATM_SETSC:
 			printk(KERN_WARNING "ATM_SETSC is obsolete\n");
 			error = 0;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 3d804d61f656..4dc5f2b8c43c 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -283,8 +283,8 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
-		(long)skb->head, (long)skb->data, (long)skb->tail,
-		(long)skb->end);
+		(long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
+		(long)skb_end_pointer(skb));
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 	if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
 		lec_handle_bridge(skb, dev);
@@ -576,8 +576,8 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 					break;
 				}
 				skb2->len = sizeof(struct atmlec_msg);
-				memcpy(skb2->data, mesg,
-				       sizeof(struct atmlec_msg));
+				skb_copy_to_linear_data(skb2, mesg,
+							sizeof(*mesg));
 				atm_force_charge(priv->lecd, skb2->truesize);
 				sk = sk_atm(priv->lecd);
 				skb_queue_tail(&sk->sk_receive_queue, skb2);
@@ -630,7 +630,7 @@ static struct atm_dev lecatm_dev = {
 	.ops = &lecdev_ops,
 	.type = "lec",
 	.number = 999,		/* dummy device number */
-	.lock = SPIN_LOCK_UNLOCKED
+	.lock = __SPIN_LOCK_UNLOCKED(lecatm_dev.lock)
 };
 
 /*
@@ -825,7 +825,6 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		if (!hlist_empty(&priv->lec_arp_empty_ones)) {
 			lec_arp_check_empties(priv, vcc, skb);
 		}
-		skb->dev = dev;
 		skb_pull(skb, 2);	/* skip lec_id */
 #ifdef CONFIG_TR
 		if (priv->is_trdev)
@@ -1338,7 +1337,7 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
 		if (skb == NULL)
 			return -1;
 		skb->len = *sizeoftlvs;
-		memcpy(skb->data, *tlvs, *sizeoftlvs);
+		skb_copy_to_linear_data(skb, *tlvs, *sizeoftlvs);
 		retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb);
 	}
 	return retval;
@@ -1372,7 +1371,7 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
 	if (skb == NULL)
 		return 0;
 	skb->len = sizeoftlvs;
-	memcpy(skb->data, tlvs, sizeoftlvs);
+	skb_copy_to_linear_data(skb, tlvs, sizeoftlvs);
 	retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
 	if (retval != 0)
 		printk("lec.c: lane2_associate_req() failed\n");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index cb3c004ff022..7c85aa551d5e 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -504,11 +504,13 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 		tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
 		skb_pull(skb, ETH_HLEN);                       /* get rid of Eth header */
 		skb_push(skb, sizeof(tagged_llc_snap_hdr));    /* add LLC/SNAP header   */
-		memcpy(skb->data, &tagged_llc_snap_hdr, sizeof(tagged_llc_snap_hdr));
+		skb_copy_to_linear_data(skb, &tagged_llc_snap_hdr,
+					sizeof(tagged_llc_snap_hdr));
 	} else {
 		skb_pull(skb, ETH_HLEN);                        /* get rid of Eth header */
 		skb_push(skb, sizeof(struct llc_snap_hdr));     /* add LLC/SNAP header + tag  */
-		memcpy(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr));
+		skb_copy_to_linear_data(skb, &llc_snap_mpoa_data,
+					sizeof(struct llc_snap_hdr));
 	}
 
 	atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
@@ -711,11 +713,12 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		return;
 	}
 	skb_push(new_skb, eg->ctrl_info.DH_length);     /* add MAC header */
-	memcpy(new_skb->data, eg->ctrl_info.DLL_header, eg->ctrl_info.DH_length);
+	skb_copy_to_linear_data(new_skb, eg->ctrl_info.DLL_header,
+				eg->ctrl_info.DH_length);
 	new_skb->protocol = eth_type_trans(new_skb, dev);
-	new_skb->nh.raw = new_skb->data;
+	skb_reset_network_header(new_skb);
 
-	eg->latest_ip_addr = new_skb->nh.iph->saddr;
+	eg->latest_ip_addr = ip_hdr(new_skb)->saddr;
 	eg->packets_rcvd++;
 	mpc->eg_ops->put(eg);
 
@@ -734,7 +737,7 @@ static struct atm_dev mpc_dev = {
 	.ops	= &mpc_ops,
 	.type	= "mpc",
 	.number	= 42,
-	.lock	= SPIN_LOCK_UNLOCKED
+	.lock	= __SPIN_LOCK_UNLOCKED(mpc_dev.lock)
 	/* members not explicitly initialised will be 0 */
 };
 
@@ -936,7 +939,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
 	if (skb == NULL)
 		return -ENOMEM;
 	skb_put(skb, sizeof(struct k_message));
-	memcpy(skb->data, mesg, sizeof(struct k_message));
+	skb_copy_to_linear_data(skb, mesg, sizeof(*mesg));
 	atm_force_charge(mpc->mpoad_vcc, skb->truesize);
 
 	sk = sk_atm(mpc->mpoad_vcc);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 31d98b57e1de..d14baaf1f4c3 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -256,7 +256,7 @@ static struct atm_dev sigd_dev = {
 	.ops =		&sigd_dev_ops,
 	.type =		"sig",
 	.number =	999,
-	.lock =		SPIN_LOCK_UNLOCKED
+	.lock =		__SPIN_LOCK_UNLOCKED(sigd_dev.lock)
 };
 
 
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index a8993a041724..43dd86fca4d3 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -1,30 +1,27 @@
 #
 # Amateur Radio protocols and AX.25 device configuration
 #
-# 19971130	Now in an own category to make correct compilation of the
-#		AX.25 stuff easier...
-#		Joerg Reuter DL1BKE <jreuter@yaina.de>
-# 19980129	Moved to net/ax25/Config.in, sourcing device drivers.
 
 menuconfig HAMRADIO
 	depends on NET
 	bool "Amateur Radio support"
 	help
 	  If you want to connect your Linux box to an amateur radio, answer Y
-	  here. You want to read <http://www.tapr.org/tapr/html/pkthome.html> and
-	  the AX25-HOWTO, available from <http://www.tldp.org/docs.html#howto>.
+	  here. You want to read <http://www.tapr.org/tapr/html/pkthome.html>
+	  and more specifically about AX.25 on Linux
+	  <http://www.linux-ax25.org/>.
 
 	  Note that the answer to this question won't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
 	  the questions about amateur radio.
 
 comment "Packet Radio protocols"
-	depends on HAMRADIO && NET
+	depends on HAMRADIO
 
 config AX25
 	tristate "Amateur Radio AX.25 Level 2 protocol"
-	depends on HAMRADIO && NET
-	---help---
+	depends on HAMRADIO
+	help
 	  This is the protocol used for computer communication over amateur
 	  radio. It is either used by itself for point-to-point links, or to
 	  carry other protocols such as tcp/ip. To use it, you need a device
@@ -52,6 +49,7 @@ config AX25
 
 config AX25_DAMA_SLAVE
 	bool "AX.25 DAMA Slave support"
+	default y
 	depends on AX25
 	help
 	  DAMA is a mechanism to prevent collisions when doing AX.25
@@ -59,23 +57,38 @@ config AX25_DAMA_SLAVE
 	  from clients (called "slaves") and redistributes it to other slaves.
 	  If you say Y here, your Linux box will act as a DAMA slave; this is
 	  transparent in that you don't have to do any special DAMA
-	  configuration. (Linux cannot yet act as a DAMA server.) If unsure,
-	  say N.
+	  configuration. Linux cannot yet act as a DAMA server.  This option
+	  only compiles DAMA slave support into the kernel.  It still needs to
+	  be enabled at runtime.  For more about DAMA see
+	  <http://www.linux-ax25.org>.  If unsure, say Y.
+
+# placeholder until implemented
+config AX25_DAMA_MASTER
+	bool 'AX.25 DAMA Master support'
+	depends on AX25_DAMA_SLAVE && BROKEN
+	help
+	  DAMA is a mechanism to prevent collisions when doing AX.25
+	  networking. A DAMA server (called "master") accepts incoming traffic
+	  from clients (called "slaves") and redistributes it to other slaves.
+	  If you say Y here, your Linux box will act as a DAMA master; this is
+	  transparent in that you don't have to do any special DAMA
+	  configuration. Linux cannot yet act as a DAMA server.  This option
+	  only compiles DAMA slave support into the kernel.  It still needs to
+	  be explicitly enabled, so if unsure, say Y.
 
-#	 bool '    AX.25 DAMA Master support' CONFIG_AX25_DAMA_MASTER
 config NETROM
 	tristate "Amateur Radio NET/ROM protocol"
 	depends on AX25
-	---help---
+	help
 	  NET/ROM is a network layer protocol on top of AX.25 useful for
 	  routing.
 
 	  A comprehensive listing of all the software for Linux amateur radio
 	  users as well as information about how to configure an AX.25 port is
-	  contained in the AX25-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>. You also might want to
-	  check out the file <file:Documentation/networking/ax25.txt>. More
-	  information about digital amateur radio in general is on the WWW at
+	  contained in the Linux Ham Wiki, available from
+	  <http://www.linux-ax25.org>. You also might want to check out the
+	  file <file:Documentation/networking/ax25.txt>. More information about
+	  digital amateur radio in general is on the WWW at
 	  <http://www.tapr.org/tapr/html/pkthome.html>.
 
 	  To compile this driver as a module, choose M here: the
@@ -84,27 +97,25 @@ config NETROM
 config ROSE
 	tristate "Amateur Radio X.25 PLP (Rose)"
 	depends on AX25
-	---help---
+	help
 	  The Packet Layer Protocol (PLP) is a way to route packets over X.25
 	  connections in general and amateur radio AX.25 connections in
 	  particular, essentially an alternative to NET/ROM.
 
 	  A comprehensive listing of all the software for Linux amateur radio
 	  users as well as information about how to configure an AX.25 port is
-	  contained in the AX25-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.  You also might want to
-	  check out the file <file:Documentation/networking/ax25.txt>. More
-	  information about digital amateur radio in general is on the WWW at
+	  contained in the Linux Ham Wiki, available from
+	  <http://www.linux-ax25.org>.  You also might want to check out the
+	  file <file:Documentation/networking/ax25.txt>. More information about
+	  digital amateur radio in general is on the WWW at
 	  <http://www.tapr.org/tapr/html/pkthome.html>.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rose.
 
-
 menu "AX.25 network device drivers"
-	depends on HAMRADIO && NET && AX25!=n
+	depends on HAMRADIO && AX25
 
 source "drivers/net/hamradio/Kconfig"
 
 endmenu
-
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1c07c6a50eb8..6ded95272a53 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1127,22 +1127,22 @@ static int __must_check ax25_connect(struct socket *sock,
 		switch (sk->sk_state) {
 		case TCP_SYN_SENT: /* still trying */
 			err = -EINPROGRESS;
-			goto out;
+			goto out_release;
 
 		case TCP_ESTABLISHED: /* connection established */
 			sock->state = SS_CONNECTED;
-			goto out;
+			goto out_release;
 
 		case TCP_CLOSE: /* connection refused */
 			sock->state = SS_UNCONNECTED;
 			err = -ECONNREFUSED;
-			goto out;
+			goto out_release;
 		}
 	}
 
 	if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) {
 		err = -EISCONN;	/* No reconnect on a seqpacket socket */
-		goto out;
+		goto out_release;
 	}
 
 	sk->sk_state   = TCP_CLOSE;
@@ -1159,12 +1159,12 @@ static int __must_check ax25_connect(struct socket *sock,
 		/* Valid number of digipeaters ? */
 		if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
 			err = -EINVAL;
-			goto out;
+			goto out_release;
 		}
 
 		if ((digi = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
 			err = -ENOBUFS;
-			goto out;
+			goto out_release;
 		}
 
 		digi->ndigi      = fsa->fsa_ax25.sax25_ndigis;
@@ -1194,7 +1194,7 @@ static int __must_check ax25_connect(struct socket *sock,
 			current->comm);
 		if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) {
 			kfree(digi);
-			goto out;
+			goto out_release;
 		}
 
 		ax25_fillin_cb(ax25, ax25->ax25_dev);
@@ -1203,7 +1203,7 @@ static int __must_check ax25_connect(struct socket *sock,
 		if (ax25->ax25_dev == NULL) {
 			kfree(digi);
 			err = -EHOSTUNREACH;
-			goto out;
+			goto out_release;
 		}
 	}
 
@@ -1213,7 +1213,7 @@ static int __must_check ax25_connect(struct socket *sock,
 		kfree(digi);
 		err = -EADDRINUSE;		/* Already such a connection */
 		ax25_cb_put(ax25t);
-		goto out;
+		goto out_release;
 	}
 
 	ax25->dest_addr = fsa->fsa_ax25.sax25_call;
@@ -1223,7 +1223,7 @@ static int __must_check ax25_connect(struct socket *sock,
 	if (sk->sk_type != SOCK_SEQPACKET) {
 		sock->state = SS_CONNECTED;
 		sk->sk_state   = TCP_ESTABLISHED;
-		goto out;
+		goto out_release;
 	}
 
 	/* Move to connecting socket, ax.25 lapb WAIT_UA.. */
@@ -1255,55 +1255,53 @@ static int __must_check ax25_connect(struct socket *sock,
 	/* Now the loop */
 	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
 		err = -EINPROGRESS;
-		goto out;
+		goto out_release;
 	}
 
 	if (sk->sk_state == TCP_SYN_SENT) {
-		struct task_struct *tsk = current;
-		DECLARE_WAITQUEUE(wait, tsk);
+		DEFINE_WAIT(wait);
 
-		add_wait_queue(sk->sk_sleep, &wait);
 		for (;;) {
+			prepare_to_wait(sk->sk_sleep, &wait,
+			                TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
-			set_current_state(TASK_INTERRUPTIBLE);
-			release_sock(sk);
-			if (!signal_pending(tsk)) {
+			if (!signal_pending(current)) {
+				release_sock(sk);
 				schedule();
 				lock_sock(sk);
 				continue;
 			}
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -ERESTARTSYS;
+			err = -ERESTARTSYS;
+			break;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
+		finish_wait(sk->sk_sleep, &wait);
+
+		if (err)
+			goto out_release;
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
 		/* Not in ABM, not in WAIT_UA -> failed */
 		sock->state = SS_UNCONNECTED;
 		err = sock_error(sk);	/* Always set at this point */
-		goto out;
+		goto out_release;
 	}
 
 	sock->state = SS_CONNECTED;
 
-	err=0;
-out:
+	err = 0;
+out_release:
 	release_sock(sk);
 
 	return err;
 }
 
-
 static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 	struct sk_buff *skb;
 	struct sock *newsk;
+	DEFINE_WAIT(wait);
 	struct sock *sk;
 	int err = 0;
 
@@ -1328,30 +1326,29 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	The read queue this time is holding sockets ready to use
 	 *	hooked into the SABM we saved
 	 */
-	add_wait_queue(sk->sk_sleep, &wait);
 	for (;;) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
 
-		release_sock(sk);
-		current->state = TASK_INTERRUPTIBLE;
 		if (flags & O_NONBLOCK) {
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -EWOULDBLOCK;
+			err = -EWOULDBLOCK;
+			break;
 		}
-		if (!signal_pending(tsk)) {
+		if (!signal_pending(current)) {
+			release_sock(sk);
 			schedule();
 			lock_sock(sk);
 			continue;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
-		return -ERESTARTSYS;
+		err = -ERESTARTSYS;
+		break;
 	}
-	current->state = TASK_RUNNING;
-	remove_wait_queue(sk->sk_sleep, &wait);
+	finish_wait(sk->sk_sleep, &wait);
+
+	if (err)
+		goto out;
 
 	newsk		 = skb->sk;
 	newsk->sk_socket = newsock;
@@ -1425,7 +1422,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sockaddr_ax25 sax;
 	struct sk_buff *skb;
 	ax25_digi dtmp, *dp;
-	unsigned char *asmptr;
 	ax25_cb *ax25;
 	size_t size;
 	int lv, err, addr_len = msg->msg_namelen;
@@ -1548,13 +1544,11 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Add the PID if one is not supplied by the user in the skb */
-	if (!ax25->pidincl) {
-		asmptr  = skb_push(skb, 1);
-		*asmptr = sk->sk_protocol;
-	}
+	if (!ax25->pidincl)
+		*skb_push(skb, 1) = sk->sk_protocol;
 
 	SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
 
@@ -1573,7 +1567,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	asmptr = skb_push(skb, 1 + ax25_addr_size(dp));
+	skb_push(skb, 1 + ax25_addr_size(dp));
 
 	SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp);
 
@@ -1581,17 +1575,17 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
 
 	/* Build an AX.25 header */
-	asmptr += (lv = ax25_addr_build(asmptr, &ax25->source_addr,
-					&sax.sax25_call, dp,
-					AX25_COMMAND, AX25_MODULUS));
+	lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
+			     dp, AX25_COMMAND, AX25_MODULUS);
 
 	SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
 
-	skb->h.raw = asmptr;
+	skb_set_transport_header(skb, lv);
 
-	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, asmptr);
+	SOCK_DEBUG(sk, "base=%p pos=%p\n",
+		   skb->data, skb_transport_header(skb));
 
-	*asmptr = AX25_UI;
+	*skb_transport_header(skb) = AX25_UI;
 
 	/* Datagram frames go straight out of the door as UI */
 	ax25_queue_xmit(skb, ax25->ax25_dev->dev);
@@ -1631,8 +1625,8 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (!ax25_sk(sk)->pidincl)
 		skb_pull(skb, 1);		/* Remove PID */
 
-	skb->h.raw = skb->data;
-	copied     = skb->len;
+	skb_reset_transport_header(skb);
+	copied = skb->len;
 
 	if (copied > size) {
 		copied = size;
@@ -1645,9 +1639,10 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
 		ax25_digi digi;
 		ax25_address src;
+		const unsigned char *mac = skb_mac_header(skb);
 
-		ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, &src, NULL, &digi, NULL, NULL);
-
+		ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
+				&digi, NULL, NULL);
 		sax->sax25_family = AF_AX25;
 		/* We set this correctly, even though we may not let the
 		   application know the digi calls further down (because it
@@ -1711,6 +1706,10 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		res = sock_get_timestamp(sk, argp);
 		break;
 
+	case SIOCGSTAMPNS:
+		res = sock_get_timestampns(sk, argp);
+		break;
+
 	case SIOCAX25ADDUID:	/* Add a uid to the uid/call map table */
 	case SIOCAX25DELUID:	/* Delete a uid from the uid/call map table */
 	case SIOCAX25GETUID: {
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 9569dd3fa466..a49773ff2b92 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -136,7 +136,7 @@ static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char p
 	if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL)
 		return;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	p = skb_put(skb, 2);
 
 	*p++ = cmd;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 4a6b26becadc..0ddaff0df217 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -61,12 +61,14 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
 					skb_reserve(skbn, AX25_MAX_HEADER_LEN);
 
 					skbn->dev   = ax25->ax25_dev->dev;
-					skbn->h.raw = skbn->data;
-					skbn->nh.raw = skbn->data;
+					skb_reset_network_header(skbn);
+					skb_reset_transport_header(skbn);
 
 					/* Copy data from the fragments */
 					while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
-						memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+						skb_copy_from_linear_data(skbo,
+							  skb_put(skbn, skbo->len),
+									  skbo->len);
 						kfree_skb(skbo);
 					}
 
@@ -122,8 +124,8 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
 		}
 
 		skb_pull(skb, 1);	/* Remove PID */
-		skb->mac.raw  = skb->nh.raw;
-		skb->nh.raw   = skb->data;
+		skb_reset_mac_header(skb);
+		skb_reset_network_header(skb);
 		skb->dev      = ax25->ax25_dev->dev;
 		skb->pkt_type = PACKET_HOST;
 		skb->protocol = htons(ETH_P_IP);
@@ -196,7 +198,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 	 *	Process the AX.25/LAPB frame.
 	 */
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
 		kfree_skb(skb);
@@ -233,7 +235,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	/* UI frame - bypass LAPB processing */
 	if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
-		skb->h.raw = skb->data + 2;		/* skip control and pid */
+		skb_set_transport_header(skb, 2); /* skip control and pid */
 
 		ax25_send_to_raw(&dest, skb, skb->data[1]);
 
@@ -246,8 +248,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 		switch (skb->data[1]) {
 		case AX25_P_IP:
 			skb_pull(skb,2);		/* drop PID/CTRL */
-			skb->h.raw    = skb->data;
-			skb->nh.raw   = skb->data;
+			skb_reset_transport_header(skb);
+			skb_reset_network_header(skb);
 			skb->dev      = dev;
 			skb->pkt_type = PACKET_HOST;
 			skb->protocol = htons(ETH_P_IP);
@@ -256,8 +258,8 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 
 		case AX25_P_ARP:
 			skb_pull(skb,2);
-			skb->h.raw    = skb->data;
-			skb->nh.raw   = skb->data;
+			skb_reset_transport_header(skb);
+			skb_reset_network_header(skb);
 			skb->dev      = dev;
 			skb->pkt_type = PACKET_HOST;
 			skb->protocol = htons(ETH_P_ARP);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7f818bbcd1c5..930e4918037f 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -121,7 +121,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 		digipeat = route->digipeat;
 		dev = route->dev;
 		ip_mode = route->ip_mode;
-	};
+	}
 
 	if (dev == NULL)
 		dev = skb->dev;
@@ -171,7 +171,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 			src_c = *(ax25_address *)(bp + 8);
 
 			skb_pull(ourskb, AX25_HEADER_LEN - 1);	/* Keep PID */
-			ourskb->nh.raw = ourskb->data;
+			skb_reset_network_header(ourskb);
 
 			ax25=ax25_send_frame(
 			    ourskb,
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 223835092b7a..92b517af7260 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -148,8 +148,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 
 			if (ka9qfrag == 1) {
 				skb_reserve(skbn, frontlen + 2);
-				skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
-				memcpy(skb_put(skbn, len), skb->data, len);
+				skb_set_network_header(skbn,
+						      skb_network_offset(skb));
+				skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 				p = skb_push(skbn, 2);
 
 				*p++ = AX25_P_SEGMENT;
@@ -161,8 +162,9 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 				}
 			} else {
 				skb_reserve(skbn, frontlen + 1);
-				skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
-				memcpy(skb_put(skbn, len), skb->data, len);
+				skb_set_network_header(skbn,
+						      skb_network_offset(skb));
+				skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 				p = skb_push(skbn, 1);
 				*p = AX25_P_TEXT;
 			}
@@ -205,7 +207,7 @@ static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit)
 	if (skb == NULL)
 		return;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	if (ax25->modulus == AX25_MODULUS) {
 		frame = skb_push(skb, 1);
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index b6c577e3c914..5fe9b2a6697d 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -162,7 +162,7 @@ void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type)
 
 	skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len);
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Assume a response - address structure for DTE */
 	if (ax25->modulus == AX25_MODULUS) {
@@ -205,7 +205,7 @@ void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *des
 		return;	/* Next SABM will get DM'd */
 
 	skb_reserve(skb, dev->hard_header_len);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	ax25_digi_invert(digi, &retdigi);
 
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c7228cfc6218..d942b946ba07 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index b85d1492c357..ab2db55982ca 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -326,7 +326,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 		return 0;
 	}
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	/* Verify and pull out header */
 	if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK]))
@@ -364,26 +364,28 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 
 	case BNEP_COMPRESSED_SRC_ONLY:
 		memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 
 	case BNEP_COMPRESSED_DST_ONLY:
-		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, ETH_ALEN + 2);
+		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
+		       ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
+		       ETH_ALEN + 2);
 		break;
 
 	case BNEP_GENERAL:
-		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2);
+		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
+		       ETH_ALEN * 2);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 	}
 
-	memcpy(__skb_put(nskb, skb->len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len);
 	kfree_skb(skb);
 
 	s->stats.rx_packets++;
-	nskb->dev       = dev;
 	nskb->ip_summed = CHECKSUM_NONE;
 	nskb->protocol  = eth_type_trans(nskb, dev);
 	netif_rx_ni(nskb);
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 3933608a9296..66bef1ccee2a 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -124,7 +124,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
 	}
 
 	if (skb && (skb->len > 0))
-		memcpy(skb_put(nskb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len);
 
 	memcpy(skb_put(nskb, count), buf, count);
 
@@ -256,7 +256,7 @@ static void cmtp_process_transmit(struct cmtp_session *session)
 			hdr[2] = size >> 8;
 		}
 
-		memcpy(skb_put(nskb, size), skb->data, size);
+		skb_copy_from_linear_data(skb, skb_put(nskb, size), size);
 		skb_pull(skb, size);
 
 		if (skb->len > 0) {
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f3403fdb59f8..63980bd6b5f2 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -72,11 +72,11 @@ void hci_acl_connect(struct hci_conn *conn)
 			inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
 		cp.pscan_rep_mode = ie->data.pscan_rep_mode;
 		cp.pscan_mode     = ie->data.pscan_mode;
-		cp.clock_offset   = ie->data.clock_offset | __cpu_to_le16(0x8000);
+		cp.clock_offset   = ie->data.clock_offset | cpu_to_le16(0x8000);
 		memcpy(conn->dev_class, ie->data.dev_class, 3);
 	}
 
-	cp.pkt_type = __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
+	cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
 	if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
 		cp.role_switch	= 0x01;
 	else
@@ -107,7 +107,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
 
 	conn->state = BT_DISCONN;
 
-	cp.handle = __cpu_to_le16(conn->handle);
+	cp.handle = cpu_to_le16(conn->handle);
 	cp.reason = reason;
 	hci_send_cmd(conn->hdev, OGF_LINK_CTL,
 				OCF_DISCONNECT, sizeof(cp), &cp);
@@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
 	conn->state = BT_CONNECT;
 	conn->out = 1;
 
-	cp.pkt_type = __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
-	cp.handle   = __cpu_to_le16(handle);
+	cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+	cp.handle   = cpu_to_le16(handle);
 
 	hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp);
 }
@@ -348,7 +348,7 @@ int hci_conn_auth(struct hci_conn *conn)
 
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
 		struct hci_cp_auth_requested cp;
-		cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = cpu_to_le16(conn->handle);
 		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp);
 	}
 	return 0;
@@ -368,7 +368,7 @@ int hci_conn_encrypt(struct hci_conn *conn)
 
 	if (hci_conn_auth(conn)) {
 		struct hci_cp_set_conn_encrypt cp;
-		cp.handle  = __cpu_to_le16(conn->handle);
+		cp.handle  = cpu_to_le16(conn->handle);
 		cp.encrypt = 1;
 		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
 	}
@@ -383,7 +383,7 @@ int hci_conn_change_link_key(struct hci_conn *conn)
 
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
 		struct hci_cp_change_conn_link_key cp;
-		cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = cpu_to_le16(conn->handle);
 		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp);
 	}
 	return 0;
@@ -423,7 +423,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn)
 
 	if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
 		struct hci_cp_exit_sniff_mode cp;
-		cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = cpu_to_le16(conn->handle);
 		hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp);
 	}
@@ -452,21 +452,21 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn)
 
 	if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
 		struct hci_cp_sniff_subrate cp;
-		cp.handle             = __cpu_to_le16(conn->handle);
-		cp.max_latency        = __constant_cpu_to_le16(0);
-		cp.min_remote_timeout = __constant_cpu_to_le16(0);
-		cp.min_local_timeout  = __constant_cpu_to_le16(0);
+		cp.handle             = cpu_to_le16(conn->handle);
+		cp.max_latency        = cpu_to_le16(0);
+		cp.min_remote_timeout = cpu_to_le16(0);
+		cp.min_local_timeout  = cpu_to_le16(0);
 		hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_SNIFF_SUBRATE, sizeof(cp), &cp);
 	}
 
 	if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
 		struct hci_cp_sniff_mode cp;
-		cp.handle       = __cpu_to_le16(conn->handle);
-		cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval);
-		cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval);
-		cp.attempt      = __constant_cpu_to_le16(4);
-		cp.timeout      = __constant_cpu_to_le16(1);
+		cp.handle       = cpu_to_le16(conn->handle);
+		cp.max_interval = cpu_to_le16(hdev->sniff_max_interval);
+		cp.min_interval = cpu_to_le16(hdev->sniff_min_interval);
+		cp.attempt      = cpu_to_le16(4);
+		cp.timeout      = cpu_to_le16(1);
 		hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_SNIFF_MODE, sizeof(cp), &cp);
 	}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4917919d86a6..aa4b56a8c3ea 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -149,7 +149,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
 	default:
 		err = -ETIMEDOUT;
 		break;
-	};
+	}
 
 	hdev->req_status = hdev->req_result = 0;
 
@@ -216,10 +216,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	/* Host buffer size */
 	{
 		struct hci_cp_host_buffer_size cp;
-		cp.acl_mtu = __cpu_to_le16(HCI_MAX_ACL_SIZE);
+		cp.acl_mtu = cpu_to_le16(HCI_MAX_ACL_SIZE);
 		cp.sco_mtu = HCI_MAX_SCO_SIZE;
-		cp.acl_max_pkt = __cpu_to_le16(0xffff);
-		cp.sco_max_pkt = __cpu_to_le16(0xffff);
+		cp.acl_max_pkt = cpu_to_le16(0xffff);
+		cp.sco_max_pkt = cpu_to_le16(0xffff);
 		hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp);
 	}
 #endif
@@ -240,11 +240,11 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	}
 
 	/* Page timeout ~20 secs */
-	param = __cpu_to_le16(0x8000);
+	param = cpu_to_le16(0x8000);
 	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, &param);
 
 	/* Connection accept timeout ~20 secs */
-	param = __cpu_to_le16(0x7d00);
+	param = cpu_to_le16(0x7d00);
 	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, &param);
 }
 
@@ -1034,7 +1034,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
 	}
 
 	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
-	hdr->opcode = __cpu_to_le16(hci_opcode_pack(ogf, ocf));
+	hdr->opcode = cpu_to_le16(hci_opcode_pack(ogf, ocf));
 	hdr->plen   = plen;
 
 	if (plen)
@@ -1060,7 +1060,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
 
 	hdr = (void *) hdev->sent_cmd->data;
 
-	if (hdr->opcode != __cpu_to_le16(hci_opcode_pack(ogf, ocf)))
+	if (hdr->opcode != cpu_to_le16(hci_opcode_pack(ogf, ocf)))
 		return NULL;
 
 	BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf);
@@ -1074,11 +1074,11 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 	struct hci_acl_hdr *hdr;
 	int len = skb->len;
 
-	hdr = (struct hci_acl_hdr *) skb_push(skb, HCI_ACL_HDR_SIZE);
-	hdr->handle = __cpu_to_le16(hci_handle_pack(handle, flags));
-	hdr->dlen   = __cpu_to_le16(len);
-
-	skb->h.raw = (void *) hdr;
+	skb_push(skb, HCI_ACL_HDR_SIZE);
+	skb_reset_transport_header(skb);
+	hdr = (struct hci_acl_hdr *)skb_transport_header(skb);
+	hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
+	hdr->dlen   = cpu_to_le16(len);
 }
 
 int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
@@ -1140,11 +1140,12 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 		return -EINVAL;
 	}
 
-	hdr.handle = __cpu_to_le16(conn->handle);
+	hdr.handle = cpu_to_le16(conn->handle);
 	hdr.dlen   = skb->len;
 
-	skb->h.raw = skb_push(skb, HCI_SCO_HDR_SIZE);
-	memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
+	skb_push(skb, HCI_SCO_HDR_SIZE);
+	skb_reset_transport_header(skb);
+	memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);
 
 	skb->dev = (void *) hdev;
 	bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
@@ -1387,7 +1388,7 @@ static void hci_rx_task(unsigned long arg)
 			case HCI_SCODATA_PKT:
 				kfree_skb(skb);
 				continue;
-			};
+			}
 		}
 
 		/* Process frame */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 936d3fc479cd..447ba7131220 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -783,7 +783,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (conn->type == ACL_LINK && hdev->link_policy) {
 			struct hci_cp_write_link_policy cp;
 			cp.handle = ev->handle;
-			cp.policy = __cpu_to_le16(hdev->link_policy);
+			cp.policy = cpu_to_le16(hdev->link_policy);
 			hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
 		}
@@ -793,8 +793,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 			struct hci_cp_change_conn_ptype cp;
 			cp.handle = ev->handle;
 			cp.pkt_type = (conn->type == ACL_LINK) ?
-				__cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
-				__cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+				cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
+				cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
 
 			hci_send_cmd(hdev, OGF_LINK_CTL,
 				OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
@@ -970,7 +970,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
 			if (!ev->status) {
 				struct hci_cp_set_conn_encrypt cp;
-				cp.handle  = __cpu_to_le16(conn->handle);
+				cp.handle  = cpu_to_le16(conn->handle);
 				cp.encrypt = 1;
 				hci_send_cmd(conn->hdev, OGF_LINK_CTL,
 					OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 71f5cfbbebb8..832b5f44be5c 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -375,7 +375,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	hci_sock_cmsg(sk, msg, skb);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index ecfe8da1ce6b..d342e89b8bdd 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -679,6 +679,27 @@ static void hidp_close(struct hid_device *hid)
 {
 }
 
+static const struct {
+	__u16 idVendor;
+	__u16 idProduct;
+	unsigned quirks;
+} hidp_blacklist[] = {
+	/* Apple wireless Mighty Mouse */
+	{ 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL },
+
+	{ }	/* Terminating entry */
+};
+
+static void hidp_setup_quirks(struct hid_device *hid)
+{
+	unsigned int n;
+
+	for (n = 0; hidp_blacklist[n].idVendor; n++)
+		if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) &&
+				hidp_blacklist[n].idProduct == le16_to_cpu(hid->product))
+			hid->quirks = hidp_blacklist[n].quirks;
+}
+
 static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req)
 {
 	struct hid_device *hid = session->hid;
@@ -708,6 +729,8 @@ static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_conn
 
 	hid->hidinput_input_event = hidp_hidinput_event;
 
+	hidp_setup_quirks(hid);
+
 	list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list)
 		hidp_send_report(session, report);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e83ee82440d3..a5867879b615 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -459,8 +459,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 			sk->sk_state = BT_DISCONN;
 			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 
-			req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			l2cap_send_cmd(conn, l2cap_get_ident(conn),
 					L2CAP_DISCONN_REQ, sizeof(req), &req);
 		} else {
@@ -652,7 +652,7 @@ static int l2cap_do_connect(struct sock *sk)
 		if (sk->sk_type == SOCK_SEQPACKET) {
 			struct l2cap_conn_req req;
 			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			req.psm  = l2cap_pi(sk)->psm;
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
@@ -868,8 +868,8 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
 
 	/* Create L2CAP header */
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
-	lh->cid = __cpu_to_le16(l2cap_pi(sk)->dcid);
-	lh->len = __cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+	lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
 
 	if (sk->sk_type == SOCK_DGRAM)
 		put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2));
@@ -1096,7 +1096,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
 		} else if (sk->sk_state == BT_CONNECT) {
 			struct l2cap_conn_req req;
 			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			req.psm  = l2cap_pi(sk)->psm;
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req);
 		}
@@ -1192,13 +1192,13 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 		return NULL;
 
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
-	lh->len = __cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
-	lh->cid = __cpu_to_le16(0x0001);
+	lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
+	lh->cid = cpu_to_le16(0x0001);
 
 	cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
 	cmd->code  = code;
 	cmd->ident = ident;
-	cmd->len   = __cpu_to_le16(dlen);
+	cmd->len   = cpu_to_le16(dlen);
 
 	if (dlen) {
 		count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
@@ -1316,11 +1316,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 		break;
 
 	case 2:
-		*((u16 *) opt->val) = __cpu_to_le16(val);
+		*((u16 *) opt->val) = cpu_to_le16(val);
 		break;
 
 	case 4:
-		*((u32 *) opt->val) = __cpu_to_le32(val);
+		*((u32 *) opt->val) = cpu_to_le32(val);
 		break;
 
 	default:
@@ -1346,8 +1346,8 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
 	//   l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
 
-	req->dcid  = __cpu_to_le16(pi->dcid);
-	req->flags = __cpu_to_le16(0);
+	req->dcid  = cpu_to_le16(pi->dcid);
+	req->flags = cpu_to_le16(0);
 
 	return ptr - data;
 }
@@ -1383,9 +1383,9 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, int *result)
 	else
 		flags = 0x0001;
 
-	rsp->scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
-	rsp->result = __cpu_to_le16(result ? *result : 0);
-	rsp->flags  = __cpu_to_le16(flags);
+	rsp->scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+	rsp->result = cpu_to_le16(result ? *result : 0);
+	rsp->flags  = cpu_to_le16(flags);
 
 	return ptr - data;
 }
@@ -1470,10 +1470,10 @@ response:
 	bh_unlock_sock(parent);
 
 sendresp:
-	rsp.scid   = __cpu_to_le16(scid);
-	rsp.dcid   = __cpu_to_le16(dcid);
-	rsp.result = __cpu_to_le16(result);
-	rsp.status = __cpu_to_le16(status);
+	rsp.scid   = cpu_to_le16(scid);
+	rsp.dcid   = cpu_to_le16(dcid);
+	rsp.result = cpu_to_le16(result);
+	rsp.status = cpu_to_le16(status);
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 	return 0;
 }
@@ -1613,8 +1613,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		l2cap_sock_set_timer(sk, HZ * 5);
 		{
 			struct l2cap_disconn_req req;
-			req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			l2cap_send_cmd(conn, l2cap_get_ident(conn),
 					L2CAP_DISCONN_REQ, sizeof(req), &req);
 		}
@@ -1652,8 +1652,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 	if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
 		return 0;
 
-	rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
-	rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+	rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+	rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1696,8 +1696,8 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
 
 	BT_DBG("type 0x%4.4x", type);
 
-	rsp.type   = __cpu_to_le16(type);
-	rsp.result = __cpu_to_le16(L2CAP_IR_NOTSUPP);
+	rsp.type   = cpu_to_le16(type);
+	rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp);
 
 	return 0;
@@ -1794,7 +1794,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 			BT_DBG("error %d", err);
 
 			/* FIXME: Map err to a valid reason */
-			rej.reason = __cpu_to_le16(0);
+			rej.reason = cpu_to_le16(0);
 			l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
 		}
 
@@ -1993,10 +1993,10 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 			result = L2CAP_CR_SEC_BLOCK;
 		}
 
-		rsp.scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
-		rsp.dcid   = __cpu_to_le16(l2cap_pi(sk)->scid);
-		rsp.result = __cpu_to_le16(result);
-		rsp.status = __cpu_to_le16(0);
+		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+		rsp.result = cpu_to_le16(result);
+		rsp.status = cpu_to_le16(0);
 		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 
@@ -2041,10 +2041,10 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
 			result = L2CAP_CR_SEC_BLOCK;
 		}
 
-		rsp.scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
-		rsp.dcid   = __cpu_to_le16(l2cap_pi(sk)->scid);
-		rsp.result = __cpu_to_le16(result);
-		rsp.status = __cpu_to_le16(0);
+		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+		rsp.result = cpu_to_le16(result);
+		rsp.status = cpu_to_le16(0);
 		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 
@@ -2107,7 +2107,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
 		if (!(conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC)))
 			goto drop;
 
-		memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+			      skb->len);
 		conn->rx_len = len - skb->len;
 	} else {
 		BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
@@ -2128,7 +2129,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
 			goto drop;
 		}
 
-		memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+			      skb->len);
 		conn->rx_len -= skb->len;
 
 		if (!conn->rx_len) {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 94f457360560..fe7df90eb707 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1567,7 +1567,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
 
 	/* Trim FCS */
 	skb->len--; skb->tail--;
-	fcs = *(u8 *) skb->tail;
+	fcs = *(u8 *)skb_tail_pointer(skb);
 
 	if (__check_fcs(skb->data, type, fcs)) {
 		BT_ERR("bad checksum in packet");
@@ -1851,18 +1851,18 @@ static void rfcomm_worker(void)
 	BT_DBG("");
 
 	while (!atomic_read(&terminate)) {
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
 			/* No pending events. Let's sleep.
 			 * Incoming connections and data will wake us up. */
-			set_current_state(TASK_INTERRUPTIBLE);
 			schedule();
 		}
+		set_current_state(TASK_RUNNING);
 
 		/* Process stuff */
 		clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
 		rfcomm_process_sessions();
 	}
-	set_current_state(TASK_RUNNING);
 	return;
 }
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ae4391440950..3f5163e725ed 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -393,7 +393,7 @@ static void sco_sock_close(struct sock *sk)
 	default:
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
-	};
+	}
 
 	release_sock(sk);
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 2994387999a8..848b8fa8bedd 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -37,7 +37,9 @@ static int __init br_init(void)
 		return -EADDRINUSE;
 	}
 
-	br_fdb_init();
+	err = br_fdb_init();
+	if (err)
+		goto err_out1;
 
 	err = br_netfilter_init();
 	if (err)
@@ -47,7 +49,10 @@ static int __init br_init(void)
 	if (err)
 		goto err_out2;
 
-	br_netlink_init();
+	err = br_netlink_init();
+	if (err)
+		goto err_out3;
+
 	brioctl_set(br_ioctl_deviceless_stub);
 	br_handle_frame_hook = br_handle_frame;
 
@@ -55,7 +60,8 @@ static int __init br_init(void)
 	br_fdb_put_hook = br_fdb_put;
 
 	return 0;
-
+err_out3:
+	unregister_netdevice_notifier(&br_device_notifier);
 err_out2:
 	br_netfilter_fini();
 err_out1:
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 905a39c33a16..5e1892d8d874 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -37,7 +37,7 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	br->statistics.tx_packets++;
 	br->statistics.tx_bytes += skb->len;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
 	if (dest[0] & 1)
@@ -83,27 +83,21 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-/* Allow setting mac address of pseudo-bridge to be same as
- * any of the bound interfaces
- */
+/* Allow setting mac address to any valid ethernet address. */
 static int br_set_mac_address(struct net_device *dev, void *p)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct sockaddr *addr = p;
-	struct net_bridge_port *port;
-	int err = -EADDRNOTAVAIL;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EINVAL;
 
 	spin_lock_bh(&br->lock);
-	list_for_each_entry(port, &br->port_list, list) {
-		if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
-			br_stp_change_bridge_id(br, addr->sa_data);
-			err = 0;
-			break;
-		}
-	}
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	br_stp_change_bridge_id(br, addr->sa_data);
 	spin_unlock_bh(&br->lock);
 
-	return err;
+	return 0;
 }
 
 static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 8d566c13cc73..91b017016d5b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -20,19 +20,28 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/jhash.h>
+#include <linux/random.h>
 #include <asm/atomic.h>
+#include <asm/unaligned.h>
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		      const unsigned char *addr);
 
-void __init br_fdb_init(void)
+static u32 fdb_salt __read_mostly;
+
+int __init br_fdb_init(void)
 {
 	br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
 					 sizeof(struct net_bridge_fdb_entry),
 					 0,
 					 SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!br_fdb_cache)
+		return -ENOMEM;
+
+	get_random_bytes(&fdb_salt, sizeof(fdb_salt));
+	return 0;
 }
 
 void __exit br_fdb_fini(void)
@@ -44,24 +53,26 @@ void __exit br_fdb_fini(void)
 /* if topology_changing then use forward_delay (default 15 sec)
  * otherwise keep longer (default 5 minutes)
  */
-static __inline__ unsigned long hold_time(const struct net_bridge *br)
+static inline unsigned long hold_time(const struct net_bridge *br)
 {
 	return br->topology_change ? br->forward_delay : br->ageing_time;
 }
 
-static __inline__ int has_expired(const struct net_bridge *br,
+static inline int has_expired(const struct net_bridge *br,
 				  const struct net_bridge_fdb_entry *fdb)
 {
 	return !fdb->is_static
 		&& time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
 }
 
-static __inline__ int br_mac_hash(const unsigned char *mac)
+static inline int br_mac_hash(const unsigned char *mac)
 {
-	return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1);
+	/* use 1 byte of OUI cnd 3 bytes of NIC */
+	u32 key = get_unaligned((u32 *)(mac + 2));
+	return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
 }
 
-static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f)
+static inline void fdb_delete(struct net_bridge_fdb_entry *f)
 {
 	hlist_del_rcu(&f->hlist);
 	br_fdb_put(f);
@@ -128,7 +139,26 @@ void br_fdb_cleanup(unsigned long _data)
 	mod_timer(&br->gc_timer, jiffies + HZ/10);
 }
 
+/* Completely flush all dynamic entries in forwarding database.*/
+void br_fdb_flush(struct net_bridge *br)
+{
+	int i;
 
+	spin_lock_bh(&br->hash_lock);
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		struct net_bridge_fdb_entry *f;
+		struct hlist_node *h, *n;
+		hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+			if (!f->is_static)
+				fdb_delete(f);
+		}
+	}
+	spin_unlock_bh(&br->hash_lock);
+}
+
+/* Flush all entries refering to a specific port.
+ * if do_all is set also flush static entries
+ */
 void br_fdb_delete_by_port(struct net_bridge *br,
 			   const struct net_bridge_port *p,
 			   int do_all)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 3e45c1a1aa96..ada7f495445c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -71,7 +71,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 
 	indev = skb->dev;
 	skb->dev = to->dev;
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_forward_csum(skb);
 
 	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
 			br_forward_finish);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3a2e29be40c..690573bbf012 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -152,6 +152,8 @@ static void del_nbp(struct net_bridge_port *p)
 	br_stp_disable_port(p);
 	spin_unlock_bh(&br->lock);
 
+	br_ifinfo_notify(RTM_DELLINK, p);
+
 	br_fdb_delete_by_port(br, p, 1);
 
 	list_del_rcu(&p->list);
@@ -203,7 +205,7 @@ static struct net_device *new_bridge_dev(const char *name)
 	memcpy(br->group_addr, br_group_address, ETH_ALEN);
 
 	br->feature_mask = dev->features;
-	br->stp_enabled = 0;
+	br->stp_enabled = BR_NO_STP;
 	br->designated_root = br->bridge_id;
 	br->root_path_cost = 0;
 	br->root_port = 0;
@@ -434,6 +436,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		br_stp_enable_port(p);
 	spin_unlock_bh(&br->lock);
 
+	br_ifinfo_notify(RTM_NEWLINK, p);
+
 	dev_set_mtu(br->dev, br_min_mtu(br));
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 35b94f9a1ac5..420bbb9955e9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -112,46 +112,59 @@ static int br_handle_local_finish(struct sk_buff *skb)
  */
 static inline int is_link_local(const unsigned char *dest)
 {
-	return memcmp(dest, br_group_address, 5) == 0 && (dest[5] & 0xf0) == 0;
+	const u16 *a = (const u16 *) dest;
+	static const u16 *const b = (const u16 *const ) br_group_address;
+	static const u16 m = __constant_cpu_to_be16(0xfff0);
+
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
 }
 
 /*
  * Called via br_handle_frame_hook.
- * Return 0 if *pskb should be processed furthur
- *	  1 if *pskb is handled
+ * Return NULL if skb is handled
  * note: already called with rcu_read_lock (preempt_disabled)
  */
-int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
+struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
-		goto err;
+		goto drop;
 
 	if (unlikely(is_link_local(dest))) {
-		skb->pkt_type = PACKET_HOST;
-		return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
-			       NULL, br_handle_local_finish) != 0;
+		/* Pause frames shouldn't be passed up by driver anyway */
+		if (skb->protocol == htons(ETH_P_PAUSE))
+			goto drop;
+
+		/* Process STP BPDU's through normal netif_receive_skb() path */
+		if (p->br->stp_enabled != BR_NO_STP) {
+			if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+				    NULL, br_handle_local_finish))
+				return NULL;
+			else
+				return skb;
+		}
 	}
 
-	if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
+	switch (p->state) {
+	case BR_STATE_FORWARDING:
+
 		if (br_should_route_hook) {
-			if (br_should_route_hook(pskb))
-				return 0;
-			skb = *pskb;
+			if (br_should_route_hook(&skb))
+				return skb;
 			dest = eth_hdr(skb)->h_dest;
 		}
-
+		/* fall through */
+	case BR_STATE_LEARNING:
 		if (!compare_ether_addr(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
 		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 			br_handle_frame_finish);
-		return 1;
+		break;
+	default:
+drop:
+		kfree_skb(skb);
 	}
-
-err:
-	kfree_skb(skb);
-	return 1;
+	return NULL;
 }
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 147015fe5c75..eda0fbfc923a 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -137,7 +137,8 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		b.topology_change = br->topology_change;
 		b.topology_change_detected = br->topology_change_detected;
 		b.root_port = br->root_port;
-		b.stp_enabled = br->stp_enabled;
+
+		b.stp_enabled = (br->stp_enabled != BR_NO_STP);
 		b.ageing_time = jiffies_to_clock_t(br->ageing_time);
 		b.hello_timer_value = br_timer_value(&br->hello_timer);
 		b.tcn_timer_value = br_timer_value(&br->tcn_timer);
@@ -251,7 +252,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
-		br->stp_enabled = args[1]?1:0;
+		br_stp_set_enabled(br, args[1]);
 		return 0;
 
 	case BRCTL_SET_BRIDGE_PRIORITY:
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5439a3c46c3e..9b2986b182ba 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -29,6 +29,8 @@
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
@@ -48,8 +50,8 @@
 
 #define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
 				 (skb->nf_bridge->data))->daddr.ipv4)
-#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = (skb)->nh.iph->daddr)
-#define dnat_took_place(skb)	 (skb_origaddr(skb) != (skb)->nh.iph->daddr)
+#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
+#define dnat_took_place(skb)	 (skb_origaddr(skb) != ip_hdr(skb)->daddr)
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *brnf_sysctl_header;
@@ -57,8 +59,10 @@ static int brnf_call_iptables __read_mostly = 1;
 static int brnf_call_ip6tables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
 static int brnf_filter_vlan_tagged __read_mostly = 1;
+static int brnf_filter_pppoe_tagged __read_mostly = 1;
 #else
 #define brnf_filter_vlan_tagged 1
+#define brnf_filter_pppoe_tagged 1
 #endif
 
 static inline __be16 vlan_proto(const struct sk_buff *skb)
@@ -81,6 +85,22 @@ static inline __be16 vlan_proto(const struct sk_buff *skb)
 	 vlan_proto(skb) == htons(ETH_P_ARP) &&	\
 	 brnf_filter_vlan_tagged)
 
+static inline __be16 pppoe_proto(const struct sk_buff *skb)
+{
+	return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+			    sizeof(struct pppoe_hdr)));
+}
+
+#define IS_PPPOE_IP(skb) \
+	(skb->protocol == htons(ETH_P_PPP_SES) && \
+	 pppoe_proto(skb) == htons(PPP_IP) && \
+	 brnf_filter_pppoe_tagged)
+
+#define IS_PPPOE_IPV6(skb) \
+	(skb->protocol == htons(ETH_P_PPP_SES) && \
+	 pppoe_proto(skb) == htons(PPP_IPV6) && \
+	 brnf_filter_pppoe_tagged)
+
 /* We need these fake structures to make netfilter happy --
  * lots of places assume that skb->dst != NULL, which isn't
  * all that unreasonable.
@@ -128,8 +148,11 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		header_size += VLAN_HLEN;
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		header_size += PPPOE_SES_HLEN;
 
-	memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
+	skb_copy_from_linear_data_offset(skb, -header_size,
+					 skb->nf_bridge->data, header_size);
 }
 
 /*
@@ -143,15 +166,20 @@ int nf_bridge_copy_header(struct sk_buff *skb)
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		header_size += VLAN_HLEN;
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		header_size += PPPOE_SES_HLEN;
 
 	err = skb_cow(skb, header_size);
 	if (err)
 		return err;
 
-	memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
+	skb_copy_to_linear_data_offset(skb, -header_size,
+				       skb->nf_bridge->data, header_size);
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		__skb_push(skb, VLAN_HLEN);
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		__skb_push(skb, PPPOE_SES_HLEN);
 	return 0;
 }
 
@@ -174,7 +202,10 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	skb->dev = nf_bridge->physindev;
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -255,7 +286,10 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 	else {
 		if (skb->protocol == htons(ETH_P_8021Q)) {
 			skb_pull(skb, VLAN_HLEN);
-			skb->nh.raw += VLAN_HLEN;
+			skb->network_header += VLAN_HLEN;
+		} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+			skb_pull(skb, PPPOE_SES_HLEN);
+			skb->network_header += PPPOE_SES_HLEN;
 		}
 		skb->dst->output(skb);
 	}
@@ -265,7 +299,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 static int br_nf_pre_routing_finish(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	int err;
 
@@ -325,7 +359,11 @@ bridged_dnat:
 				if (skb->protocol ==
 				    htons(ETH_P_8021Q)) {
 					skb_push(skb, VLAN_HLEN);
-					skb->nh.raw -= VLAN_HLEN;
+					skb->network_header -= VLAN_HLEN;
+				} else if(skb->protocol ==
+				    htons(ETH_P_PPP_SES)) {
+					skb_push(skb, PPPOE_SES_HLEN);
+					skb->network_header -= PPPOE_SES_HLEN;
 				}
 				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
 					       skb, skb->dev, NULL,
@@ -344,7 +382,10 @@ bridged_dnat:
 	skb->dev = nf_bridge->physindev;
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -372,9 +413,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
 /* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
 static int check_hbh_len(struct sk_buff *skb)
 {
-	unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1);
+	unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
 	u32 pkt_len;
-	int off = raw - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	int off = raw - nh;
 	int len = (raw[1] + 1) << 3;
 
 	if ((raw + len) - skb->data > skb_headlen(skb))
@@ -384,9 +426,9 @@ static int check_hbh_len(struct sk_buff *skb)
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = skb->nh.raw[off + 1] + 2;
+		int optlen = nh[off + 1] + 2;
 
-		switch (skb->nh.raw[off]) {
+		switch (nh[off]) {
 		case IPV6_TLV_PAD0:
 			optlen = 1;
 			break;
@@ -395,17 +437,18 @@ static int check_hbh_len(struct sk_buff *skb)
 			break;
 
 		case IPV6_TLV_JUMBO:
-			if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2)
+			if (nh[off + 1] != 4 || (off & 3) != 2)
 				goto bad;
-			pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2));
+			pkt_len = ntohl(*(__be32 *) (nh + off + 2));
 			if (pkt_len <= IPV6_MAXPLEN ||
-			    skb->nh.ipv6h->payload_len)
+			    ipv6_hdr(skb)->payload_len)
 				goto bad;
 			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
 				goto bad;
 			if (pskb_trim_rcsum(skb,
 					    pkt_len + sizeof(struct ipv6hdr)))
 				goto bad;
+			nh = skb_network_header(skb);
 			break;
 		default:
 			if (optlen > len)
@@ -439,7 +482,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto inhdr_error;
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	if (hdr->version != 6)
 		goto inhdr_error;
@@ -485,7 +528,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 	__u32 len;
 	struct sk_buff *skb = *pskb;
 
-	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb)) {
+	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
+	    IS_PPPOE_IPV6(skb)) {
 #ifdef CONFIG_SYSCTL
 		if (!brnf_call_ip6tables)
 			return NF_ACCEPT;
@@ -495,7 +539,10 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 
 		if (skb->protocol == htons(ETH_P_8021Q)) {
 			skb_pull_rcsum(skb, VLAN_HLEN);
-			skb->nh.raw += VLAN_HLEN;
+			skb->network_header += VLAN_HLEN;
+		} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+			skb_pull_rcsum(skb, PPPOE_SES_HLEN);
+			skb->network_header += PPPOE_SES_HLEN;
 		}
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
@@ -504,7 +551,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 		return NF_ACCEPT;
 #endif
 
-	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb))
+	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
+	    !IS_PPPOE_IP(skb))
 		return NF_ACCEPT;
 
 	if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
@@ -512,20 +560,23 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull_rcsum(skb, VLAN_HLEN);
-		skb->nh.raw += VLAN_HLEN;
+		skb->network_header += VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_pull_rcsum(skb, PPPOE_SES_HLEN);
+		skb->network_header += PPPOE_SES_HLEN;
 	}
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (iph->ihl < 5 || iph->version != 4)
 		goto inhdr_error;
 
 	if (!pskb_may_pull(skb, 4 * iph->ihl))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
 		goto inhdr_error;
 
@@ -593,7 +644,10 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	}
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
@@ -622,14 +676,18 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
 	if (!parent)
 		return NF_DROP;
 
-	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+	    IS_PPPOE_IP(skb))
 		pf = PF_INET;
 	else
 		pf = PF_INET6;
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull(*pskb, VLAN_HLEN);
-		(*pskb)->nh.raw += VLAN_HLEN;
+		(*pskb)->network_header += VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_pull(*pskb, PPPOE_SES_HLEN);
+		(*pskb)->network_header += PPPOE_SES_HLEN;
 	}
 
 	nf_bridge = skb->nf_bridge;
@@ -665,13 +723,13 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
 		if (!IS_VLAN_ARP(skb))
 			return NF_ACCEPT;
 		skb_pull(*pskb, VLAN_HLEN);
-		(*pskb)->nh.raw += VLAN_HLEN;
+		(*pskb)->network_header += VLAN_HLEN;
 	}
 
-	if (skb->nh.arph->ar_pln != 4) {
+	if (arp_hdr(skb)->ar_pln != 4) {
 		if (IS_VLAN_ARP(skb)) {
 			skb_push(*pskb, VLAN_HLEN);
-			(*pskb)->nh.raw -= VLAN_HLEN;
+			(*pskb)->network_header -= VLAN_HLEN;
 		}
 		return NF_ACCEPT;
 	}
@@ -721,7 +779,10 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
 	}
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 
 	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
@@ -753,7 +814,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 #ifdef CONFIG_NETFILTER_DEBUG
 	/* Be very paranoid. This probably won't happen anymore, but let's
 	 * keep the check just to be sure... */
-	if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+	if (skb_mac_header(skb) < skb->head ||
+	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
 		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
 		       "bad mac.raw pointer.\n");
 		goto print_error;
@@ -766,7 +828,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 	if (!realoutdev)
 		return NF_DROP;
 
-	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+	    IS_PPPOE_IP(skb))
 		pf = PF_INET;
 	else
 		pf = PF_INET6;
@@ -787,7 +850,10 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull(skb, VLAN_HLEN);
-		skb->nh.raw += VLAN_HLEN;
+		skb->network_header += VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_pull(skb, PPPOE_SES_HLEN);
+		skb->network_header += PPPOE_SES_HLEN;
 	}
 
 	nf_bridge_save_header(skb);
@@ -808,7 +874,7 @@ print_error:
 		if (realoutdev)
 			printk("[%s]", realoutdev->name);
 	}
-	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
+	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
 	       skb->data);
 	dump_stack();
 	return NF_ACCEPT;
@@ -925,6 +991,14 @@ static ctl_table brnf_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &brnf_sysctl_call_tables,
 	},
+	{
+		.ctl_name	= NET_BRIDGE_NF_FILTER_PPPOE_TAGGED,
+		.procname	= "bridge-nf-filter-pppoe-tagged",
+		.data		= &brnf_filter_pppoe_tagged,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &brnf_sysctl_call_tables,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 7d68b24b5654..35facc0c11c2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -11,8 +11,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
 #include "br_private.h"
 
 static inline size_t br_nlmsg_size(void)
@@ -110,7 +109,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	int idx;
 
-	read_lock(&dev_base_lock);
 	for (dev = dev_base, idx = 0; dev; dev = dev->next) {
 		/* not a bridge port */
 		if (dev->br_port == NULL || idx < cb->args[0])
@@ -123,7 +121,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 skip:
 		++idx;
 	}
-	read_unlock(&dev_base_lock);
 
 	cb->args[0] = idx;
 
@@ -166,7 +163,7 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 		return -EINVAL;
 
 	/* if kernel STP is running, don't allow changes */
-	if (p->br->stp_enabled)
+	if (p->br->stp_enabled == BR_KERNEL_STP)
 		return -EBUSY;
 
 	if (!netif_running(dev) ||
@@ -179,18 +176,19 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 }
 
 
-static struct rtnetlink_link bridge_rtnetlink_table[RTM_NR_MSGTYPES] = {
-	[RTM_GETLINK - RTM_BASE] = { .dumpit	= br_dump_ifinfo, },
-	[RTM_SETLINK - RTM_BASE] = { .doit      = br_rtm_setlink, },
-};
-
-void __init br_netlink_init(void)
+int __init br_netlink_init(void)
 {
-	rtnetlink_links[PF_BRIDGE] = bridge_rtnetlink_table;
+	if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo))
+		return -ENOBUFS;
+
+	/* Only the first call to __rtnl_register can fail */
+	__rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
+
+	return 0;
 }
 
 void __exit br_netlink_fini(void)
 {
-	rtnetlink_links[PF_BRIDGE] = NULL;
+	rtnl_unregister_all(PF_BRIDGE);
 }
 
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 37357ed2149b..c8451d3a070c 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -50,7 +50,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	case NETDEV_CHANGEADDR:
 		spin_lock_bh(&br->lock);
 		br_fdb_changeaddr(p, dev->dev_addr);
-		br_ifinfo_notify(RTM_NEWLINK, p);
 		br_stp_recalculate_bridge_id(br);
 		spin_unlock_bh(&br->lock);
 		break;
@@ -74,10 +73,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		break;
 
 	case NETDEV_UP:
-		spin_lock_bh(&br->lock);
-		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP))
+		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) {
+			spin_lock_bh(&br->lock);
 			br_stp_enable_port(p);
-		spin_unlock_bh(&br->lock);
+			spin_unlock_bh(&br->lock);
+		}
 		break;
 
 	case NETDEV_UNREGISTER:
@@ -85,5 +85,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		break;
 	}
 
+	/* Events that may cause spanning tree to refresh */
+	if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+	    event == NETDEV_CHANGE || event == NETDEV_DOWN)
+		br_ifinfo_notify(RTM_NEWLINK, p);
+
 	return NOTIFY_DONE;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cc3f1c99261a..21bf3a9a03fd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -26,7 +26,10 @@
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
 
-#define BR_VERSION	"2.2"
+#define BR_VERSION	"2.3"
+
+/* Path to usermode spanning tree program */
+#define BR_STP_PROG	"/sbin/bridge-stp"
 
 typedef struct bridge_id bridge_id;
 typedef struct mac_addr mac_addr;
@@ -107,7 +110,13 @@ struct net_bridge
 
 	u8				group_addr[ETH_ALEN];
 	u16				root_port;
-	unsigned char			stp_enabled;
+
+	enum {
+		BR_NO_STP, 		/* no spanning tree */
+		BR_KERNEL_STP,		/* old STP in kernel */
+		BR_USER_STP,		/* new RSTP in userspace */
+	} stp_enabled;
+
 	unsigned char			topology_change;
 	unsigned char			topology_change_detected;
 
@@ -127,14 +136,14 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 	return !memcmp(&br->bridge_id, &br->designated_root, 8);
 }
 
-
 /* br_device.c */
 extern void br_dev_setup(struct net_device *dev);
 extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* br_fdb.c */
-extern void br_fdb_init(void);
+extern int br_fdb_init(void);
 extern void br_fdb_fini(void);
+extern void br_fdb_flush(struct net_bridge *br);
 extern void br_fdb_changeaddr(struct net_bridge_port *p,
 			      const unsigned char *newaddr);
 extern void br_fdb_cleanup(unsigned long arg);
@@ -182,7 +191,8 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb);
+extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
+				       struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -207,6 +217,7 @@ extern void br_become_designated_port(struct net_bridge_port *p);
 /* br_stp_if.c */
 extern void br_stp_enable_bridge(struct net_bridge *br);
 extern void br_stp_disable_bridge(struct net_bridge *br);
+extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
 extern void br_stp_enable_port(struct net_bridge_port *p);
 extern void br_stp_disable_port(struct net_bridge_port *p);
 extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
@@ -235,7 +246,7 @@ extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
 
 
 /* br_netlink.c */
-extern void br_netlink_init(void);
+extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f9ff4d57b0d7..ebb0861e9bd5 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -370,11 +370,11 @@ static void br_make_blocking(struct net_bridge_port *p)
 static void br_make_forwarding(struct net_bridge_port *p)
 {
 	if (p->state == BR_STATE_BLOCKING) {
-		if (p->br->stp_enabled) {
+		if (p->br->stp_enabled == BR_KERNEL_STP)
 			p->state = BR_STATE_LISTENING;
-		} else {
+		else
 			p->state = BR_STATE_LEARNING;
-		}
+
 		br_log_state(p);
 		mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay);	}
 }
@@ -384,6 +384,10 @@ void br_port_state_selection(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
 
+	/* Don't change port states if userspace is handling STP */
+	if (br->stp_enabled == BR_USER_STP)
+		return;
+
 	list_for_each_entry(p, &br->port_list, list) {
 		if (p->state != BR_STATE_DISABLED) {
 			if (p->port_no == br->root_port) {
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b9fb0dc4ab12..60112bce6698 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -33,9 +33,6 @@ static void br_send_bpdu(struct net_bridge_port *p,
 {
 	struct sk_buff *skb;
 
-	if (!p->br->stp_enabled)
-		return;
-
 	skb = dev_alloc_skb(length+LLC_RESERVE);
 	if (!skb)
 		return;
@@ -75,6 +72,9 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
 {
 	unsigned char buf[35];
 
+	if (p->br->stp_enabled != BR_KERNEL_STP)
+		return;
+
 	buf[0] = 0;
 	buf[1] = 0;
 	buf[2] = 0;
@@ -117,6 +117,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
 {
 	unsigned char buf[4];
 
+	if (p->br->stp_enabled != BR_KERNEL_STP)
+		return;
+
 	buf[0] = 0;
 	buf[1] = 0;
 	buf[2] = 0;
@@ -157,9 +160,13 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
 	br = p->br;
 	spin_lock(&br->lock);
 
-	if (p->state == BR_STATE_DISABLED
-	    || !br->stp_enabled
-	    || !(br->dev->flags & IFF_UP))
+	if (br->stp_enabled != BR_KERNEL_STP)
+		goto out;
+
+	if (!(br->dev->flags & IFF_UP))
+		goto out;
+
+	if (p->state == BR_STATE_DISABLED)
 		goto out;
 
 	if (compare_ether_addr(dest, br->group_addr) != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 58d13f2bd121..3e246b37020e 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -87,7 +87,6 @@ void br_stp_disable_bridge(struct net_bridge *br)
 void br_stp_enable_port(struct net_bridge_port *p)
 {
 	br_init_port(p);
-	br_ifinfo_notify(RTM_NEWLINK, p);
 	br_port_state_selection(p->br);
 }
 
@@ -101,8 +100,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
 	printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
 	       br->dev->name, p->port_no, p->dev->name, "disabled");
 
-	br_ifinfo_notify(RTM_DELLINK, p);
-
 	wasroot = br_is_root_bridge(br);
 	br_become_designated_port(p);
 	p->state = BR_STATE_DISABLED;
@@ -123,10 +120,68 @@ void br_stp_disable_port(struct net_bridge_port *p)
 		br_become_root_bridge(br);
 }
 
+static void br_stp_start(struct net_bridge *br)
+{
+	int r;
+	char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+	char *envp[] = { NULL };
+
+	r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+	if (r == 0) {
+		br->stp_enabled = BR_USER_STP;
+		printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+	} else {
+		br->stp_enabled = BR_KERNEL_STP;
+		printk(KERN_INFO "%s: starting userspace STP failed, "
+				"staring kernel STP\n", br->dev->name);
+
+		/* To start timers on any ports left in blocking */
+		spin_lock_bh(&br->lock);
+		br_port_state_selection(br);
+		spin_unlock_bh(&br->lock);
+	}
+}
+
+static void br_stp_stop(struct net_bridge *br)
+{
+	int r;
+	char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
+	char *envp[] = { NULL };
+
+	if (br->stp_enabled == BR_USER_STP) {
+		r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+		printk(KERN_INFO "%s: userspace STP stopped, return code %d\n",
+			br->dev->name, r);
+
+
+		/* To start timers on any ports left in blocking */
+		spin_lock_bh(&br->lock);
+		br_port_state_selection(br);
+		spin_unlock_bh(&br->lock);
+	}
+
+	br->stp_enabled = BR_NO_STP;
+}
+
+void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
+{
+	ASSERT_RTNL();
+
+	if (val) {
+		if (br->stp_enabled == BR_NO_STP)
+			br_stp_start(br);
+	} else {
+		if (br->stp_enabled != BR_NO_STP)
+			br_stp_stop(br);
+	}
+}
+
 /* called under bridge lock */
 void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 {
-	unsigned char oldaddr[6];
+	/* should be aligned on 2 bytes for compare_ether_addr() */
+	unsigned short oldaddr_aligned[ETH_ALEN >> 1];
+	unsigned char *oldaddr = (unsigned char *)oldaddr_aligned;
 	struct net_bridge_port *p;
 	int wasroot;
 
@@ -151,11 +206,14 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 		br_become_root_bridge(br);
 }
 
-static const unsigned char br_mac_zero[6];
+/* should be aligned on 2 bytes for compare_ether_addr() */
+static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1];
 
 /* called under bridge lock */
 void br_stp_recalculate_bridge_id(struct net_bridge *br)
 {
+	const unsigned char *br_mac_zero =
+			(const unsigned char *)br_mac_zero_aligned;
 	const unsigned char *addr = br_mac_zero;
 	struct net_bridge_port *p;
 
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 01a22ad0cc75..33c6c4a7c689 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,7 +149,11 @@ static ssize_t show_stp_state(struct device *d,
 
 static void set_stp_state(struct net_bridge *br, unsigned long val)
 {
-	br->stp_enabled = val;
+	rtnl_lock();
+	spin_unlock_bh(&br->lock);
+	br_stp_set_enabled(br, val);
+	spin_lock_bh(&br->lock);
+	rtnl_unlock();
 }
 
 static ssize_t store_stp_state(struct device *d,
@@ -309,6 +313,19 @@ static ssize_t store_group_addr(struct device *d,
 static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR,
 		   show_group_addr, store_group_addr);
 
+static ssize_t store_flush(struct device *d,
+			   struct device_attribute *attr,
+			   const char *buf, size_t len)
+{
+	struct net_bridge *br = to_bridge(d);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	br_fdb_flush(br);
+	return len;
+}
+static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush);
 
 static struct attribute *bridge_attrs[] = {
 	&dev_attr_forward_delay.attr,
@@ -328,6 +345,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_topology_change_timer.attr,
 	&dev_attr_gc_timer.attr,
 	&dev_attr_group_addr.attr,
+	&dev_attr_flush.attr,
 	NULL
 };
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0bc2aef8f9f3..2da22927d8dd 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -137,6 +137,13 @@ static ssize_t show_hold_timer(struct net_bridge_port *p,
 }
 static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
 
+static ssize_t store_flush(struct net_bridge_port *p, unsigned long v)
+{
+	br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
+	return 0;
+}
+static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+
 static struct brport_attribute *brport_attrs[] = {
 	&brport_attr_path_cost,
 	&brport_attr_priority,
@@ -152,6 +159,7 @@ static struct brport_attribute *brport_attrs[] = {
 	&brport_attr_message_age_timer,
 	&brport_attr_forward_delay_timer,
 	&brport_attr_hold_timer,
+	&brport_attr_flush,
 	NULL
 };
 
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 9c599800a900..1a46952a56d9 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -35,40 +35,36 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
 		return EBT_NOMATCH;
 
 	if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
-		__be32 _addr, *ap;
+		__be32 saddr, daddr, *sap, *dap;
 
-		/* IPv4 addresses are always 4 bytes */
-		if (ah->ar_pln != sizeof(__be32))
+		if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP))
+			return EBT_NOMATCH;
+		sap = skb_header_pointer(skb, sizeof(struct arphdr) +
+					ah->ar_hln, sizeof(saddr),
+					&saddr);
+		if (sap == NULL)
+			return EBT_NOMATCH;
+		dap = skb_header_pointer(skb, sizeof(struct arphdr) +
+					2*ah->ar_hln+sizeof(saddr),
+					sizeof(daddr), &daddr);
+		if (dap == NULL)
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_SRC_IP &&
+		    FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_DST_IP &&
+		    FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_GRAT &&
+		    FWINV(*dap != *sap, EBT_ARP_GRAT))
 			return EBT_NOMATCH;
-		if (info->bitmask & EBT_ARP_SRC_IP) {
-			ap = skb_header_pointer(skb, sizeof(struct arphdr) +
-						ah->ar_hln, sizeof(_addr),
-						&_addr);
-			if (ap == NULL)
-				return EBT_NOMATCH;
-			if (FWINV(info->saddr != (*ap & info->smsk),
-			   EBT_ARP_SRC_IP))
-				return EBT_NOMATCH;
-		}
-
-		if (info->bitmask & EBT_ARP_DST_IP) {
-			ap = skb_header_pointer(skb, sizeof(struct arphdr) +
-						2*ah->ar_hln+sizeof(__be32),
-						sizeof(_addr), &_addr);
-			if (ap == NULL)
-				return EBT_NOMATCH;
-			if (FWINV(info->daddr != (*ap & info->dmsk),
-			   EBT_ARP_DST_IP))
-				return EBT_NOMATCH;
-		}
 	}
 
 	if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
 		unsigned char _mac[ETH_ALEN], *mp;
 		uint8_t verdict, i;
 
-		/* MAC addresses are 6 bytes */
-		if (ah->ar_hln != ETH_ALEN)
+		if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
 			return EBT_NOMATCH;
 		if (info->bitmask & EBT_ARP_SRC_MAC) {
 			mp = skb_header_pointer(skb, sizeof(struct arphdr),
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 45712aec6a0e..031bfa4a51fc 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -196,14 +196,10 @@ static int __init ebt_log_init(void)
 	ret = ebt_register_watcher(&log);
 	if (ret < 0)
 		return ret;
-	if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) {
-		printk(KERN_WARNING "ebt_log: not logging via system console "
-		       "since somebody else already registered for PF_INET\n");
-		/* we cannot make module load fail here, since otherwise
-		 * ebtables userspace would abort */
-	}
-
-	return 0;
+	ret = nf_log_register(PF_BRIDGE, &ebt_log_logger);
+	if (ret < 0 && ret != -EEXIST)
+		ebt_unregister_watcher(&log);
+	return ret;
 }
 
 static void __exit ebt_log_fini(void)
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 8e15cc47f6c0..9411db625917 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -130,6 +130,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	unsigned int group = uloginfo->nlgroup;
 	ebt_ulog_buff_t *ub = &ulog_buffers[group];
 	spinlock_t *lock = &ub->lock;
+	ktime_t kt;
 
 	if ((uloginfo->cprange == 0) ||
 	    (uloginfo->cprange > skb->len + ETH_HLEN))
@@ -164,9 +165,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 
 	/* Fill in the ulog data */
 	pm->version = EBT_ULOG_VERSION;
-	do_gettimeofday(&pm->stamp);
+	kt = ktime_get_real();
+	pm->stamp = ktime_to_timeval(kt);
 	if (ub->qlen == 1)
-		skb_set_timestamp(ub->skb, &pm->stamp);
+		ub->skb->tstamp = kt;
 	pm->data_len = copy_len;
 	pm->mark = skb->mark;
 	pm->hook = hooknr;
@@ -295,14 +297,12 @@ static int __init ebt_ulog_init(void)
 
 	/* initialize ulog_buffers */
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
-		init_timer(&ulog_buffers[i].timer);
-		ulog_buffers[i].timer.function = ulog_timer;
-		ulog_buffers[i].timer.data = i;
+		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
 		spin_lock_init(&ulog_buffers[i].lock);
 	}
 
 	ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
-					  NULL, THIS_MODULE);
+					  NULL, NULL, THIS_MODULE);
 	if (!ebtulognl)
 		ret = -ENOMEM;
 	else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/compat.c b/net/compat.c
index 1f32866d09b7..9a0f5f2b90c8 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -34,11 +34,11 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
 {
 	int tot_len = 0;
 
-	while(niov > 0) {
+	while (niov > 0) {
 		compat_uptr_t buf;
 		compat_size_t len;
 
-		if(get_user(len, &uiov32->iov_len) ||
+		if (get_user(len, &uiov32->iov_len) ||
 		   get_user(buf, &uiov32->iov_base)) {
 			tot_len = -EFAULT;
 			break;
@@ -78,12 +78,12 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 {
 	int tot_len;
 
-	if(kern_msg->msg_namelen) {
-		if(mode==VERIFY_READ) {
+	if (kern_msg->msg_namelen) {
+		if (mode==VERIFY_READ) {
 			int err = move_addr_to_kernel(kern_msg->msg_name,
 						      kern_msg->msg_namelen,
 						      kern_address);
-			if(err < 0)
+			if (err < 0)
 				return err;
 		}
 		kern_msg->msg_name = kern_address;
@@ -93,7 +93,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 	tot_len = iov_from_user_compat_to_kern(kern_iov,
 					  (struct compat_iovec __user *)kern_msg->msg_iov,
 					  kern_msg->msg_iovlen);
-	if(tot_len >= 0)
+	if (tot_len >= 0)
 		kern_msg->msg_iov = kern_iov;
 
 	return tot_len;
@@ -146,8 +146,8 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 	kcmlen = 0;
 	kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
 	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
-	while(ucmsg != NULL) {
-		if(get_user(ucmlen, &ucmsg->cmsg_len))
+	while (ucmsg != NULL) {
+		if (get_user(ucmlen, &ucmsg->cmsg_len))
 			return -EFAULT;
 
 		/* Catch bogons. */
@@ -160,7 +160,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 		kcmlen += tmp;
 		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
 	}
-	if(kcmlen == 0)
+	if (kcmlen == 0)
 		return -EINVAL;
 
 	/* The kcmlen holds the 64-bit version of the control length.
@@ -176,7 +176,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 	/* Now copy them over neatly. */
 	memset(kcmsg, 0, kcmlen);
 	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
-	while(ucmsg != NULL) {
+	while (ucmsg != NULL) {
 		if (__get_user(ucmlen, &ucmsg->cmsg_len))
 			goto Efault;
 		if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
@@ -215,11 +215,12 @@ Efault:
 int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
 {
 	struct compat_timeval ctv;
+	struct compat_timespec cts;
 	struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
 	struct compat_cmsghdr cmhdr;
 	int cmlen;
 
-	if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
+	if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
 		kmsg->msg_flags |= MSG_CTRUNC;
 		return 0; /* XXX: return error? check spec. */
 	}
@@ -229,11 +230,18 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 		ctv.tv_sec = tv->tv_sec;
 		ctv.tv_usec = tv->tv_usec;
 		data = &ctv;
-		len = sizeof(struct compat_timeval);
+		len = sizeof(ctv);
+	}
+	if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) {
+		struct timespec *ts = (struct timespec *)data;
+		cts.tv_sec = ts->tv_sec;
+		cts.tv_nsec = ts->tv_nsec;
+		data = &cts;
+		len = sizeof(cts);
 	}
 
 	cmlen = CMSG_COMPAT_LEN(len);
-	if(kmsg->msg_controllen < cmlen) {
+	if (kmsg->msg_controllen < cmlen) {
 		kmsg->msg_flags |= MSG_CTRUNC;
 		cmlen = kmsg->msg_controllen;
 	}
@@ -241,9 +249,9 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 	cmhdr.cmsg_type = type;
 	cmhdr.cmsg_len = cmlen;
 
-	if(copy_to_user(cm, &cmhdr, sizeof cmhdr))
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
 		return -EFAULT;
-	if(copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
+	if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
 		return -EFAULT;
 	cmlen = CMSG_COMPAT_SPACE(len);
 	kmsg->msg_control += cmlen;
@@ -545,20 +553,49 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 	struct compat_timeval __user *ctv =
 			(struct compat_timeval __user*) userstamp;
 	int err = -ENOENT;
+	struct timeval tv;
 
 	if (!sock_flag(sk, SOCK_TIMESTAMP))
 		sock_enable_timestamp(sk);
-	if (sk->sk_stamp.tv_sec == -1)
+	tv = ktime_to_timeval(sk->sk_stamp);
+	if (tv.tv_sec == -1)
 		return err;
-	if (sk->sk_stamp.tv_sec == 0)
-		do_gettimeofday(&sk->sk_stamp);
-	if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) ||
-			put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec))
+	if (tv.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		tv = ktime_to_timeval(sk->sk_stamp);
+	}
+	err = 0;
+	if (put_user(tv.tv_sec, &ctv->tv_sec) ||
+			put_user(tv.tv_usec, &ctv->tv_usec))
 		err = -EFAULT;
 	return err;
 }
 EXPORT_SYMBOL(compat_sock_get_timestamp);
 
+int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+	struct compat_timespec __user *ctv =
+			(struct compat_timespec __user*) userstamp;
+	int err = -ENOENT;
+	struct timespec ts;
+
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk);
+	ts = ktime_to_timespec(sk->sk_stamp);
+	if (ts.tv_sec == -1)
+		return err;
+	if (ts.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		ts = ktime_to_timespec(sk->sk_stamp);
+	}
+	err = 0;
+	if (put_user(ts.tv_sec, &ctv->tv_sec) ||
+			put_user(ts.tv_nsec, &ctv->tv_nsec))
+		err = -EFAULT;
+	return err;
+}
+EXPORT_SYMBOL(compat_sock_get_timestampns);
+
 asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
@@ -617,7 +654,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	a0 = a[0];
 	a1 = a[1];
 
-	switch(call) {
+	switch (call) {
 	case SYS_SOCKET:
 		ret = sys_socket(a0, a1, a[2]);
 		break;
diff --git a/net/core/Makefile b/net/core/Makefile
index 73272d506e93..4751613e1b59 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,7 +13,6 @@ obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
 obj-$(CONFIG_XFRM) += flow.o
 obj-$(CONFIG_SYSFS) += net-sysfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
-obj-$(CONFIG_WIRELESS_EXT) += wireless.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 186212b5b7da..cb056f476126 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -411,11 +411,11 @@ fault:
 	return -EFAULT;
 }
 
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
 {
 	__sum16 sum;
 
-	sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
 	if (likely(!sum)) {
 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
@@ -423,6 +423,12 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
 	}
 	return sum;
 }
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+	return __skb_checksum_complete_head(skb, skb->len);
+}
 EXPORT_SYMBOL(__skb_checksum_complete);
 
 /**
diff --git a/net/core/dev.c b/net/core/dev.c
index cf71614dae93..eb999003bbb7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -109,7 +109,7 @@
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
-#include <linux/wireless.h>
+#include <net/wext.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
@@ -146,8 +146,8 @@
  */
 
 static DEFINE_SPINLOCK(ptype_lock);
-static struct list_head ptype_base[16];	/* 16 way hashed list */
-static struct list_head ptype_all;		/* Taps */
+static struct list_head ptype_base[16] __read_mostly;	/* 16 way hashed list */
+static struct list_head ptype_all __read_mostly;	/* Taps */
 
 #ifdef CONFIG_NET_DMA
 static struct dma_client *net_dma_client;
@@ -226,12 +226,6 @@ extern void netdev_unregister_sysfs(struct net_device *);
 *******************************************************************************/
 
 /*
- *	For efficiency
- */
-
-static int netdev_nit;
-
-/*
  *	Add a protocol ID to the list. Now that the input handler is
  *	smarter we can dispense with all the messy stuff that used to be
  *	here.
@@ -265,10 +259,9 @@ void dev_add_pack(struct packet_type *pt)
 	int hash;
 
 	spin_lock_bh(&ptype_lock);
-	if (pt->type == htons(ETH_P_ALL)) {
-		netdev_nit++;
+	if (pt->type == htons(ETH_P_ALL))
 		list_add_rcu(&pt->list, &ptype_all);
-	} else {
+	else {
 		hash = ntohs(pt->type) & 15;
 		list_add_rcu(&pt->list, &ptype_base[hash]);
 	}
@@ -295,10 +288,9 @@ void __dev_remove_pack(struct packet_type *pt)
 
 	spin_lock_bh(&ptype_lock);
 
-	if (pt->type == htons(ETH_P_ALL)) {
-		netdev_nit--;
+	if (pt->type == htons(ETH_P_ALL))
 		head = &ptype_all;
-	} else
+	else
 		head = &ptype_base[ntohs(pt->type) & 15];
 
 	list_for_each_entry(pt1, head, list) {
@@ -751,13 +743,10 @@ int dev_change_name(struct net_device *dev, char *newname)
 	else
 		strlcpy(dev->name, newname, IFNAMSIZ);
 
-	err = device_rename(&dev->dev, dev->name);
-	if (!err) {
-		hlist_del(&dev->name_hlist);
-		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
-		raw_notifier_call_chain(&netdev_chain,
-				NETDEV_CHANGENAME, dev);
-	}
+	device_rename(&dev->dev, dev->name);
+	hlist_del(&dev->name_hlist);
+	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
+	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 
 	return err;
 }
@@ -820,7 +809,6 @@ static int default_rebuild_header(struct sk_buff *skb)
 	return 1;
 }
 
-
 /**
  *	dev_open	- prepare an interface for use.
  *	@dev:	device to open
@@ -1034,23 +1022,12 @@ void net_disable_timestamp(void)
 	atomic_dec(&netstamp_needed);
 }
 
-void __net_timestamp(struct sk_buff *skb)
-{
-	struct timeval tv;
-
-	do_gettimeofday(&tv);
-	skb_set_timestamp(skb, &tv);
-}
-EXPORT_SYMBOL(__net_timestamp);
-
 static inline void net_timestamp(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
 		__net_timestamp(skb);
-	else {
-		skb->tstamp.off_sec = 0;
-		skb->tstamp.off_usec = 0;
-	}
+	else
+		skb->tstamp.tv64 = 0;
 }
 
 /*
@@ -1080,18 +1057,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
 			 */
-			skb2->mac.raw = skb2->data;
+			skb_reset_mac_header(skb2);
 
-			if (skb2->nh.raw < skb2->data ||
-			    skb2->nh.raw > skb2->tail) {
+			if (skb_network_header(skb2) < skb2->data ||
+			    skb2->network_header > skb2->tail) {
 				if (net_ratelimit())
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
 					       skb2->protocol, dev->name);
-				skb2->nh.raw = skb2->data;
+				skb_reset_network_header(skb2);
 			}
 
-			skb2->h.raw = skb2->nh.raw;
+			skb2->transport_header = skb2->network_header;
 			skb2->pkt_type = PACKET_OUTGOING;
 			ptype->func(skb2, skb->dev, ptype, skb->dev);
 		}
@@ -1170,7 +1147,7 @@ EXPORT_SYMBOL(netif_device_attach);
 int skb_checksum_help(struct sk_buff *skb)
 {
 	__wsum csum;
-	int ret = 0, offset = skb->h.raw - skb->data;
+	int ret = 0, offset;
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
@@ -1186,15 +1163,16 @@ int skb_checksum_help(struct sk_buff *skb)
 			goto out;
 	}
 
+	offset = skb->csum_start - skb_headroom(skb);
 	BUG_ON(offset > (int)skb->len);
 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
 
-	offset = skb->tail - skb->h.raw;
+	offset = skb_headlen(skb) - offset;
 	BUG_ON(offset <= 0);
 	BUG_ON(skb->csum_offset + 2 > offset);
 
-	*(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
-
+	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
+		csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
@@ -1220,11 +1198,11 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 
 	BUG_ON(skb_shinfo(skb)->frag_list);
 
-	skb->mac.raw = skb->data;
-	skb->mac_len = skb->nh.raw - skb->data;
+	skb_reset_mac_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
@@ -1238,7 +1216,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
 					break;
-				__skb_push(skb, skb->data - skb->nh.raw);
+				__skb_push(skb, (skb->data -
+						 skb_network_header(skb)));
 			}
 			segs = ptype->gso_segment(skb, features);
 			break;
@@ -1246,7 +1225,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	}
 	rcu_read_unlock();
 
-	__skb_push(skb, skb->data - skb->mac.raw);
+	__skb_push(skb, skb->data - skb_mac_header(skb));
 
 	return segs;
 }
@@ -1343,7 +1322,7 @@ static int dev_gso_segment(struct sk_buff *skb)
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	if (likely(!skb->next)) {
-		if (netdev_nit)
+		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);
 
 		if (netif_needs_gso(dev, skb)) {
@@ -1445,12 +1424,16 @@ int dev_queue_xmit(struct sk_buff *skb)
 	/* If packet is not checksummed and device does not support
 	 * checksumming for this protocol, complete checksumming here.
 	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    (!(dev->features & NETIF_F_GEN_CSUM) &&
-	     (!(dev->features & NETIF_F_IP_CSUM) ||
-	      skb->protocol != htons(ETH_P_IP))))
-		if (skb_checksum_help(skb))
-			goto out_kfree_skb;
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		skb_set_transport_header(skb, skb->csum_start -
+					      skb_headroom(skb));
+
+		if (!(dev->features & NETIF_F_GEN_CSUM) &&
+		    (!(dev->features & NETIF_F_IP_CSUM) ||
+		     skb->protocol != htons(ETH_P_IP)))
+			if (skb_checksum_help(skb))
+				goto out_kfree_skb;
+	}
 
 gso:
 	spin_lock_prefetch(&dev->queue_lock);
@@ -1546,9 +1529,9 @@ out:
 			Receiver routines
   =======================================================================*/
 
-int netdev_max_backlog = 1000;
-int netdev_budget = 300;
-int weight_p = 64;            /* old backlog weight */
+int netdev_max_backlog __read_mostly = 1000;
+int netdev_budget __read_mostly = 300;
+int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
@@ -1580,7 +1563,7 @@ int netif_rx(struct sk_buff *skb)
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.off_sec)
+	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
 	/*
@@ -1687,40 +1670,46 @@ static void net_tx_action(struct softirq_action *h)
 	}
 }
 
-static __inline__ int deliver_skb(struct sk_buff *skb,
-				  struct packet_type *pt_prev,
-				  struct net_device *orig_dev)
+static inline int deliver_skb(struct sk_buff *skb,
+			      struct packet_type *pt_prev,
+			      struct net_device *orig_dev)
 {
 	atomic_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+/* These hooks defined here for ATM */
 struct net_bridge;
 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
 						unsigned char *addr);
-void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
 
-static __inline__ int handle_bridge(struct sk_buff **pskb,
-				    struct packet_type **pt_prev, int *ret,
-				    struct net_device *orig_dev)
+/*
+ * If bridge module is loaded call bridging hook.
+ *  returns NULL if packet was consumed.
+ */
+struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
+					struct sk_buff *skb) __read_mostly;
+static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
+					    struct packet_type **pt_prev, int *ret,
+					    struct net_device *orig_dev)
 {
 	struct net_bridge_port *port;
 
-	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
-		return 0;
+	if (skb->pkt_type == PACKET_LOOPBACK ||
+	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
+		return skb;
 
 	if (*pt_prev) {
-		*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
+		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	}
 
-	return br_handle_frame_hook(port, pskb);
+	return br_handle_frame_hook(port, skb);
 }
 #else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
 #endif
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -1741,8 +1730,8 @@ static int ing_filter(struct sk_buff *skb)
 	if (dev->qdisc_ingress) {
 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
 		if (MAX_RED_LOOP < ttl++) {
-			printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
-				skb->input_dev->name, skb->dev->name);
+			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
+				skb->iif, skb->dev->ifindex);
 			return TC_ACT_SHOT;
 		}
 
@@ -1772,11 +1761,11 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (skb->dev->poll && netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.off_sec)
+	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-	if (!skb->input_dev)
-		skb->input_dev = skb->dev;
+	if (!skb->iif)
+		skb->iif = skb->dev->ifindex;
 
 	orig_dev = skb_bond(skb);
 
@@ -1785,8 +1774,9 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	skb->h.raw = skb->nh.raw = skb->data;
-	skb->mac_len = skb->nh.raw - skb->mac.raw;
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb->mac_len = skb->network_header - skb->mac_header;
 
 	pt_prev = NULL;
 
@@ -1826,7 +1816,8 @@ int netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
+	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
+	if (!skb)
 		goto out;
 
 	type = skb->protocol;
@@ -2079,7 +2070,7 @@ static int dev_ifconf(char __user *arg)
  *	This is invoked by the /proc filesystem handler to display a device
  *	in detail.
  */
-static __inline__ struct net_device *dev_get_idx(loff_t pos)
+static struct net_device *dev_get_idx(loff_t pos)
 {
 	struct net_device *dev;
 	loff_t i;
@@ -2108,28 +2099,25 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	if (dev->get_stats) {
-		struct net_device_stats *stats = dev->get_stats(dev);
-
-		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
-				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
-			   dev->name, stats->rx_bytes, stats->rx_packets,
-			   stats->rx_errors,
-			   stats->rx_dropped + stats->rx_missed_errors,
-			   stats->rx_fifo_errors,
-			   stats->rx_length_errors + stats->rx_over_errors +
-			     stats->rx_crc_errors + stats->rx_frame_errors,
-			   stats->rx_compressed, stats->multicast,
-			   stats->tx_bytes, stats->tx_packets,
-			   stats->tx_errors, stats->tx_dropped,
-			   stats->tx_fifo_errors, stats->collisions,
-			   stats->tx_carrier_errors +
-			     stats->tx_aborted_errors +
-			     stats->tx_window_errors +
-			     stats->tx_heartbeat_errors,
-			   stats->tx_compressed);
-	} else
-		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
+	struct net_device_stats *stats = dev->get_stats(dev);
+
+	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
+		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+		   dev->name, stats->rx_bytes, stats->rx_packets,
+		   stats->rx_errors,
+		   stats->rx_dropped + stats->rx_missed_errors,
+		   stats->rx_fifo_errors,
+		   stats->rx_length_errors + stats->rx_over_errors +
+		    stats->rx_crc_errors + stats->rx_frame_errors,
+		   stats->rx_compressed, stats->multicast,
+		   stats->tx_bytes, stats->tx_packets,
+		   stats->tx_errors, stats->tx_dropped,
+		   stats->tx_fifo_errors, stats->collisions,
+		   stats->tx_carrier_errors +
+		    stats->tx_aborted_errors +
+		    stats->tx_window_errors +
+		    stats->tx_heartbeat_errors,
+		   stats->tx_compressed);
 }
 
 /*
@@ -2188,7 +2176,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations dev_seq_ops = {
+static const struct seq_operations dev_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2208,7 +2196,7 @@ static const struct file_operations dev_seq_fops = {
 	.release = seq_release,
 };
 
-static struct seq_operations softnet_seq_ops = {
+static const struct seq_operations softnet_seq_ops = {
 	.start = softnet_seq_start,
 	.next  = softnet_seq_next,
 	.stop  = softnet_seq_stop,
@@ -2228,12 +2216,135 @@ static const struct file_operations softnet_seq_fops = {
 	.release = seq_release,
 };
 
-#ifdef CONFIG_WIRELESS_EXT
-extern int wireless_proc_init(void);
-#else
-#define wireless_proc_init() 0
+static void *ptype_get_idx(loff_t pos)
+{
+	struct packet_type *pt = NULL;
+	loff_t i = 0;
+	int t;
+
+	list_for_each_entry_rcu(pt, &ptype_all, list) {
+		if (i == pos)
+			return pt;
+		++i;
+	}
+
+	for (t = 0; t < 16; t++) {
+		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+			if (i == pos)
+				return pt;
+			++i;
+		}
+	}
+	return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	rcu_read_lock();
+	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct packet_type *pt;
+	struct list_head *nxt;
+	int hash;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ptype_get_idx(0);
+
+	pt = v;
+	nxt = pt->list.next;
+	if (pt->type == htons(ETH_P_ALL)) {
+		if (nxt != &ptype_all)
+			goto found;
+		hash = 0;
+		nxt = ptype_base[0].next;
+	} else
+		hash = ntohs(pt->type) & 15;
+
+	while (nxt == &ptype_base[hash]) {
+		if (++hash >= 16)
+			return NULL;
+		nxt = ptype_base[hash].next;
+	}
+found:
+	return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+{
+	rcu_read_unlock();
+}
+
+static void ptype_seq_decode(struct seq_file *seq, void *sym)
+{
+#ifdef CONFIG_KALLSYMS
+	unsigned long offset = 0, symsize;
+	const char *symname;
+	char *modname;
+	char namebuf[128];
+
+	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
+				  &modname, namebuf);
+
+	if (symname) {
+		char *delim = ":";
+
+		if (!modname)
+			modname = delim = "";
+		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
+			   symname, offset);
+		return;
+	}
 #endif
 
+	seq_printf(seq, "[%p]", sym);
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+	struct packet_type *pt = v;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Type Device      Function\n");
+	else {
+		if (pt->type == htons(ETH_P_ALL))
+			seq_puts(seq, "ALL ");
+		else
+			seq_printf(seq, "%04x", ntohs(pt->type));
+
+		seq_printf(seq, " %-8s ",
+			   pt->dev ? pt->dev->name : "");
+		ptype_seq_decode(seq,  pt->func);
+		seq_putc(seq, '\n');
+	}
+
+	return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+	.start = ptype_seq_start,
+	.next  = ptype_seq_next,
+	.stop  = ptype_seq_stop,
+	.show  = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ptype_seq_ops);
+}
+
+static const struct file_operations ptype_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ptype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+
 static int __init dev_proc_init(void)
 {
 	int rc = -ENOMEM;
@@ -2242,13 +2353,18 @@ static int __init dev_proc_init(void)
 		goto out;
 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
 		goto out_dev;
-	if (wireless_proc_init())
+	if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
+		goto out_dev2;
+
+	if (wext_proc_init())
 		goto out_softnet;
 	rc = 0;
 out:
 	return rc;
 out_softnet:
 	proc_net_remove("softnet_stat");
+out_dev2:
+	proc_net_remove("ptype");
 out_dev:
 	proc_net_remove("dev");
 	goto out;
@@ -2798,29 +2914,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
 					ret = -EFAULT;
 				return ret;
 			}
-#ifdef CONFIG_WIRELESS_EXT
 			/* Take care of Wireless Extensions */
-			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
-				/* If command is `set a parameter', or
-				 * `get the encoding parameters', check if
-				 * the user has the right to do it */
-				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
-				    || cmd == SIOCGIWENCODEEXT) {
-					if (!capable(CAP_NET_ADMIN))
-						return -EPERM;
-				}
-				dev_load(ifr.ifr_name);
-				rtnl_lock();
-				/* Follow me in net/core/wireless.c */
-				ret = wireless_process_ioctl(&ifr, cmd);
-				rtnl_unlock();
-				if (IW_IS_GET(cmd) &&
-				    copy_to_user(arg, &ifr,
-						 sizeof(struct ifreq)))
-					ret = -EFAULT;
-				return ret;
-			}
-#endif	/* CONFIG_WIRELESS_EXT */
+			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+				return wext_handle_ioctl(&ifr, cmd, arg);
 			return -EINVAL;
 	}
 }
@@ -2850,7 +2946,7 @@ static int dev_boot_phase = 1;
 static DEFINE_SPINLOCK(net_todo_list_lock);
 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
 
-static inline void net_set_todo(struct net_device *dev)
+static void net_set_todo(struct net_device *dev)
 {
 	spin_lock(&net_todo_list_lock);
 	list_add_tail(&dev->todo_list, &net_todo_list);
@@ -2891,9 +2987,7 @@ int register_netdevice(struct net_device *dev)
 	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->_xmit_lock);
 	dev->xmit_lock_owner = -1;
-#ifdef CONFIG_NET_CLS_ACT
 	spin_lock_init(&dev->ingress_lock);
-#endif
 
 	dev->iflink = -1;
 
@@ -3005,7 +3099,7 @@ out:
  *	chain. 0 is returned on success. A negative errno code is returned
  *	on a failure to set up the device, or if the name is a duplicate.
  *
- *	This is a wrapper around register_netdev that takes the rtnl semaphore
+ *	This is a wrapper around register_netdevice that takes the rtnl semaphore
  *	and expands the device name if you passed a format string to
  *	alloc_netdev.
  */
@@ -3160,6 +3254,11 @@ out:
 	mutex_unlock(&net_todo_run_mutex);
 }
 
+static struct net_device_stats *internal_stats(struct net_device *dev)
+{
+	return &dev->stats;
+}
+
 /**
  *	alloc_netdev - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -3195,6 +3294,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 	if (sizeof_priv)
 		dev->priv = netdev_priv(dev);
 
+	dev->get_stats = internal_stats;
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 56b310c0c860..7d57bf77f3a3 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -264,7 +264,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations dev_mc_seq_ops = {
+static const struct seq_operations dev_mc_seq_ops = {
 	.start = dev_mc_seq_start,
 	.next  = dev_mc_seq_next,
 	.stop  = dev_mc_seq_stop,
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6168edd137dd..8d5e5a09b576 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -836,7 +836,7 @@ int dev_ethtool(struct ifreq *ifr)
 			return -EPERM;
 	}
 
-	if(dev->ethtool_ops->begin)
+	if (dev->ethtool_ops->begin)
 		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
 			return rc;
 
@@ -952,7 +952,7 @@ int dev_ethtool(struct ifreq *ifr)
 		rc =  -EOPNOTSUPP;
 	}
 
-	if(dev->ethtool_ops->complete)
+	if (dev->ethtool_ops->complete)
 		dev->ethtool_ops->complete(dev);
 
 	if (old_features != dev->features)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 3aea4e87d3d7..8c5474e16683 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -44,6 +44,12 @@ static void rules_ops_put(struct fib_rules_ops *ops)
 		module_put(ops->owner);
 }
 
+static void flush_route_cache(struct fib_rules_ops *ops)
+{
+	if (ops->flush_cache)
+		ops->flush_cache();
+}
+
 int fib_rules_register(struct fib_rules_ops *ops)
 {
 	int err = -EEXIST;
@@ -132,10 +138,25 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+jumped:
 		if (!fib_rule_match(rule, ops, fl, flags))
 			continue;
 
-		err = ops->action(rule, fl, flags, arg);
+		if (rule->action == FR_ACT_GOTO) {
+			struct fib_rule *target;
+
+			target = rcu_dereference(rule->ctarget);
+			if (target == NULL) {
+				continue;
+			} else {
+				rule = target;
+				goto jumped;
+			}
+		} else if (rule->action == FR_ACT_NOP)
+			continue;
+		else
+			err = ops->action(rule, fl, flags, arg);
+
 		if (err != -EAGAIN) {
 			fib_rule_get(rule);
 			arg->rule = rule;
@@ -143,7 +164,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
 		}
 	}
 
-	err = -ENETUNREACH;
+	err = -ESRCH;
 out:
 	rcu_read_unlock();
 
@@ -152,13 +173,35 @@ out:
 
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
+			    struct fib_rules_ops *ops)
+{
+	int err = -EINVAL;
+
+	if (frh->src_len)
+		if (tb[FRA_SRC] == NULL ||
+		    frh->src_len > (ops->addr_size * 8) ||
+		    nla_len(tb[FRA_SRC]) != ops->addr_size)
+			goto errout;
+
+	if (frh->dst_len)
+		if (tb[FRA_DST] == NULL ||
+		    frh->dst_len > (ops->addr_size * 8) ||
+		    nla_len(tb[FRA_DST]) != ops->addr_size)
+			goto errout;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
 	struct fib_rule *rule, *r, *last = NULL;
 	struct nlattr *tb[FRA_MAX+1];
-	int err = -EINVAL;
+	int err = -EINVAL, unresolved = 0;
 
 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
 		goto errout;
@@ -173,6 +216,10 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (err < 0)
 		goto errout;
 
+	err = validate_rulemsg(frh, tb, ops);
+	if (err < 0)
+		goto errout;
+
 	rule = kzalloc(ops->rule_size, GFP_KERNEL);
 	if (rule == NULL) {
 		err = -ENOMEM;
@@ -211,6 +258,28 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (!rule->pref && ops->default_pref)
 		rule->pref = ops->default_pref();
 
+	err = -EINVAL;
+	if (tb[FRA_GOTO]) {
+		if (rule->action != FR_ACT_GOTO)
+			goto errout_free;
+
+		rule->target = nla_get_u32(tb[FRA_GOTO]);
+		/* Backward jumps are prohibited to avoid endless loops */
+		if (rule->target <= rule->pref)
+			goto errout_free;
+
+		list_for_each_entry(r, ops->rules_list, list) {
+			if (r->pref == rule->target) {
+				rule->ctarget = r;
+				break;
+			}
+		}
+
+		if (rule->ctarget == NULL)
+			unresolved = 1;
+	} else if (rule->action == FR_ACT_GOTO)
+		goto errout_free;
+
 	err = ops->configure(rule, skb, nlh, frh, tb);
 	if (err < 0)
 		goto errout_free;
@@ -223,12 +292,35 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	fib_rule_get(rule);
 
+	if (ops->unresolved_rules) {
+		/*
+		 * There are unresolved goto rules in the list, check if
+		 * any of them are pointing to this new rule.
+		 */
+		list_for_each_entry(r, ops->rules_list, list) {
+			if (r->action == FR_ACT_GOTO &&
+			    r->target == rule->pref) {
+				BUG_ON(r->ctarget != NULL);
+				rcu_assign_pointer(r->ctarget, rule);
+				if (--ops->unresolved_rules == 0)
+					break;
+			}
+		}
+	}
+
+	if (rule->action == FR_ACT_GOTO)
+		ops->nr_goto_rules++;
+
+	if (unresolved)
+		ops->unresolved_rules++;
+
 	if (last)
 		list_add_rcu(&rule->list, &last->list);
 	else
 		list_add_rcu(&rule->list, ops->rules_list);
 
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	flush_route_cache(ops);
 	rules_ops_put(ops);
 	return 0;
 
@@ -239,11 +331,11 @@ errout:
 	return err;
 }
 
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
-	struct fib_rule *rule;
+	struct fib_rule *rule, *tmp;
 	struct nlattr *tb[FRA_MAX+1];
 	int err = -EINVAL;
 
@@ -260,6 +352,10 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (err < 0)
 		goto errout;
 
+	err = validate_rulemsg(frh, tb, ops);
+	if (err < 0)
+		goto errout;
+
 	list_for_each_entry(rule, ops->rules_list, list) {
 		if (frh->action && (frh->action != rule->action))
 			continue;
@@ -292,10 +388,30 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		}
 
 		list_del_rcu(&rule->list);
+
+		if (rule->action == FR_ACT_GOTO)
+			ops->nr_goto_rules--;
+
+		/*
+		 * Check if this rule is a target to any of them. If so,
+		 * disable them. As this operation is eventually very
+		 * expensive, it is only performed if goto rules have
+		 * actually been added.
+		 */
+		if (ops->nr_goto_rules > 0) {
+			list_for_each_entry(tmp, ops->rules_list, list) {
+				if (tmp->ctarget == rule) {
+					rcu_assign_pointer(tmp->ctarget, NULL);
+					ops->unresolved_rules++;
+				}
+			}
+		}
+
 		synchronize_rcu();
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
 		fib_rule_put(rule);
+		flush_route_cache(ops);
 		rules_ops_put(ops);
 		return 0;
 	}
@@ -341,9 +457,16 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	frh->action = rule->action;
 	frh->flags = rule->flags;
 
-	if (rule->ifname[0])
+	if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+		frh->flags |= FIB_RULE_UNRESOLVED;
+
+	if (rule->ifname[0]) {
 		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
 
+		if (rule->ifindex == -1)
+			frh->flags |= FIB_RULE_DEV_DETACHED;
+	}
+
 	if (rule->pref)
 		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
 
@@ -353,6 +476,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (rule->mark_mask || rule->mark)
 		NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
 
+	if (rule->target)
+		NLA_PUT_U32(skb, FRA_GOTO, rule->target);
+
 	if (ops->fill(rule, skb, nlh, frh) < 0)
 		goto nla_put_failure;
 
@@ -363,19 +489,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
+		      struct fib_rules_ops *ops)
 {
 	int idx = 0;
 	struct fib_rule *rule;
-	struct fib_rules_ops *ops;
 
-	ops = lookup_rules_ops(family);
-	if (ops == NULL)
-		return -EAFNOSUPPORT;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(rule, ops->rules_list, list) {
-		if (idx < cb->args[0])
+	list_for_each_entry(rule, ops->rules_list, list) {
+		if (idx < cb->args[1])
 			goto skip;
 
 		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
@@ -385,14 +506,44 @@ int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
 skip:
 		idx++;
 	}
-	rcu_read_unlock();
-	cb->args[0] = idx;
+	cb->args[1] = idx;
 	rules_ops_put(ops);
 
 	return skb->len;
 }
 
-EXPORT_SYMBOL_GPL(fib_rules_dump);
+static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct fib_rules_ops *ops;
+	int idx = 0, family;
+
+	family = rtnl_msg_family(cb->nlh);
+	if (family != AF_UNSPEC) {
+		/* Protocol specific dump request */
+		ops = lookup_rules_ops(family);
+		if (ops == NULL)
+			return -EAFNOSUPPORT;
+
+		return dump_rules(skb, cb, ops);
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &rules_ops, list) {
+		if (idx < cb->args[0] || !try_module_get(ops->owner))
+			goto skip;
+
+		if (dump_rules(skb, cb, ops) < 0)
+			break;
+
+		cb->args[1] = 0;
+	skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+
+	return skb->len;
+}
 
 static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -471,6 +622,10 @@ static struct notifier_block fib_rules_notifier = {
 
 static int __init fib_rules_init(void)
 {
+	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+
 	return register_netdevice_notifier(&fib_rules_notifier);
 }
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d185a089c53..bd903aaf7aa7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -42,11 +42,11 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	u8 *ptr = NULL;
 
 	if (k >= SKF_NET_OFF)
-		ptr = skb->nh.raw + k - SKF_NET_OFF;
+		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
 	else if (k >= SKF_LL_OFF)
-		ptr = skb->mac.raw + k - SKF_LL_OFF;
+		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
-	if (ptr >= skb->head && ptr < skb->tail)
+	if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
 		return ptr;
 	return NULL;
 }
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 259473d0559d..bcc25591d8ac 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -61,7 +61,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 	spin_lock_bh(lock);
 	d->lock = lock;
 	if (type)
-		d->tail = (struct rtattr *) skb->tail;
+		d->tail = (struct rtattr *)skb_tail_pointer(skb);
 	d->skb = skb;
 	d->compat_tc_stats = tc_stats_type;
 	d->compat_xstats = xstats_type;
@@ -212,7 +212,7 @@ int
 gnet_stats_finish_copy(struct gnet_dump *d)
 {
 	if (d->tail)
-		d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+		d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
 
 	if (d->compat_tc_stats)
 		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8b45c9d3b249..e3c26a9ccad6 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -79,7 +79,7 @@ static void rfc2863_policy(struct net_device *dev)
 	case IF_LINK_MODE_DEFAULT:
 	default:
 		break;
-	};
+	}
 
 	dev->operstate = operstate;
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3183142c6044..6f3bb73053c2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -140,6 +140,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 				n->dead = 1;
 				shrunk	= 1;
 				write_unlock(&n->lock);
+				if (n->parms->neigh_cleanup)
+					n->parms->neigh_cleanup(n);
 				neigh_release(n);
 				continue;
 			}
@@ -211,6 +213,8 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
 			}
 			write_unlock(&n->lock);
+			if (n->parms->neigh_cleanup)
+				n->parms->neigh_cleanup(n);
 			neigh_release(n);
 		}
 	}
@@ -582,9 +586,6 @@ void neigh_destroy(struct neighbour *neigh)
 			kfree(hh);
 	}
 
-	if (neigh->parms->neigh_destructor)
-		(neigh->parms->neigh_destructor)(neigh);
-
 	skb_queue_purge(&neigh->arp_queue);
 
 	dev_put(neigh->dev);
@@ -675,6 +676,8 @@ static void neigh_periodic_timer(unsigned long arg)
 			*np = n->next;
 			n->dead = 1;
 			write_unlock(&n->lock);
+			if (n->parms->neigh_cleanup)
+				n->parms->neigh_cleanup(n);
 			neigh_release(n);
 			continue;
 		}
@@ -1122,7 +1125,7 @@ int neigh_compat_output(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 
-	__skb_pull(skb, skb->nh.raw - skb->data);
+	__skb_pull(skb, skb_network_offset(skb));
 
 	if (dev->hard_header &&
 	    dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
@@ -1144,7 +1147,7 @@ int neigh_resolve_output(struct sk_buff *skb)
 	if (!dst || !(neigh = dst->neighbour))
 		goto discard;
 
-	__skb_pull(skb, skb->nh.raw - skb->data);
+	__skb_pull(skb, skb_network_offset(skb));
 
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
@@ -1187,7 +1190,7 @@ int neigh_connected_output(struct sk_buff *skb)
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
 
-	__skb_pull(skb, skb->nh.raw - skb->data);
+	__skb_pull(skb, skb_network_offset(skb));
 
 	read_lock_bh(&neigh->lock);
 	err = dev->hard_header(skb, dev, ntohs(skb->protocol),
@@ -1328,6 +1331,8 @@ void neigh_parms_destroy(struct neigh_parms *parms)
 	kfree(parms);
 }
 
+static struct lock_class_key neigh_table_proxy_queue_class;
+
 void neigh_table_init_no_netlink(struct neigh_table *tbl)
 {
 	unsigned long now = jiffies;
@@ -1376,7 +1381,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	init_timer(&tbl->proxy_timer);
 	tbl->proxy_timer.data	  = (unsigned long)tbl;
 	tbl->proxy_timer.function = neigh_proxy_process;
-	skb_queue_head_init(&tbl->proxy_queue);
+	skb_queue_head_init_class(&tbl->proxy_queue,
+			&neigh_table_proxy_queue_class);
 
 	tbl->last_flush = now;
 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
@@ -1435,7 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl)
 	return 0;
 }
 
-int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct ndmsg *ndm;
 	struct nlattr *dst_attr;
@@ -1500,7 +1506,7 @@ out:
 	return err;
 }
 
-int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct ndmsg *ndm;
 	struct nlattr *tb[NDA_MAX+1];
@@ -1780,7 +1786,7 @@ static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
 };
 
-int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct neigh_table *tbl;
 	struct ndtmsg *ndtmsg;
@@ -1904,7 +1910,7 @@ errout:
 	return err;
 }
 
-int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int family, tidx, nidx = 0;
 	int tbl_skip = cb->args[0];
@@ -2028,7 +2034,7 @@ out:
 	return rc;
 }
 
-int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct neigh_table *tbl;
 	int t, family, s_t;
@@ -2088,8 +2094,11 @@ void __neigh_for_each_release(struct neigh_table *tbl,
 			} else
 				np = &n->next;
 			write_unlock(&n->lock);
-			if (release)
+			if (release) {
+				if (n->parms->neigh_cleanup)
+					n->parms->neigh_cleanup(n);
 				neigh_release(n);
+			}
 		}
 	}
 }
@@ -2384,7 +2393,7 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations neigh_stat_seq_ops = {
+static const struct seq_operations neigh_stat_seq_ops = {
 	.start	= neigh_stat_seq_start,
 	.next	= neigh_stat_seq_next,
 	.stop	= neigh_stat_seq_stop,
@@ -2737,14 +2746,26 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
 
 #endif	/* CONFIG_SYSCTL */
 
+static int __init neigh_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
+
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
+	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
+
+	return 0;
+}
+
+subsys_initcall(neigh_init);
+
 EXPORT_SYMBOL(__neigh_event_send);
 EXPORT_SYMBOL(neigh_changeaddr);
 EXPORT_SYMBOL(neigh_compat_output);
 EXPORT_SYMBOL(neigh_connected_output);
 EXPORT_SYMBOL(neigh_create);
-EXPORT_SYMBOL(neigh_delete);
 EXPORT_SYMBOL(neigh_destroy);
-EXPORT_SYMBOL(neigh_dump_info);
 EXPORT_SYMBOL(neigh_event_ns);
 EXPORT_SYMBOL(neigh_ifdown);
 EXPORT_SYMBOL(neigh_lookup);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4cbb1290a6a3..b21307b15b82 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -352,8 +352,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		if(dev->wireless_handlers &&
-		   dev->wireless_handlers->get_wireless_stats)
+		if (dev->wireless_handlers &&
+		    dev->wireless_handlers->get_wireless_stats)
 			iw = dev->wireless_handlers->get_wireless_stats(dev);
 		if (iw != NULL)
 			ret = (*format)(iw, buf);
@@ -412,20 +412,25 @@ static int netdev_uevent(struct device *d, char **envp,
 			 int num_envp, char *buf, int size)
 {
 	struct net_device *dev = to_net_dev(d);
-	int i = 0;
-	int n;
+	int retval, len = 0, i = 0;
 
 	/* pass interface to uevent. */
-	envp[i++] = buf;
-	n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
-	buf += n;
-	size -= n;
-
-	if ((size <= 0) || (i >= num_envp))
-		return -ENOMEM;
-
+	retval = add_uevent_var(envp, num_envp, &i,
+				buf, size, &len,
+				"INTERFACE=%s", dev->name);
+	if (retval)
+		goto exit;
+
+	/* pass ifindex to uevent.
+	 * ifindex is useful as it won't change (interface name may change)
+	 * and is what RtNetlink uses natively. */
+	retval = add_uevent_var(envp, num_envp, &i,
+				buf, size, &len,
+				"IFINDEX=%d", dev->ifindex);
+
+exit:
 	envp[i] = NULL;
-	return 0;
+	return retval;
 }
 #endif
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index da1019451ccb..b316435b0e2a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -86,7 +86,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 {
 	__wsum psum;
 
-	if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (uh->check == 0 || skb_csum_unnecessary(skb))
 		return 0;
 
 	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
@@ -293,10 +293,12 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	if (!skb)
 		return;
 
-	memcpy(skb->data, msg, len);
+	skb_copy_to_linear_data(skb, msg, len);
 	skb->len += len;
 
-	skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+	skb_push(skb, sizeof(*udph));
+	skb_reset_transport_header(skb);
+	udph = udp_hdr(skb);
 	udph->source = htons(np->local_port);
 	udph->dest = htons(np->remote_port);
 	udph->len = htons(udp_len);
@@ -308,7 +310,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	if (udph->check == 0)
 		udph->check = CSUM_MANGLED_0;
 
-	skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+	skb_push(skb, sizeof(*iph));
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
 
 	/* iph->version = 4; iph->ihl = 5; */
 	put_unaligned(0x45, (unsigned char *)iph);
@@ -324,7 +328,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
 	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = eth->h_proto = htons(ETH_P_IP);
 	memcpy(eth->h_source, np->local_mac, 6);
 	memcpy(eth->h_dest, np->remote_mac, 6);
@@ -359,8 +363,9 @@ static void arp_reply(struct sk_buff *skb)
 				 (2 * sizeof(u32)))))
 		return;
 
-	skb->h.raw = skb->nh.raw = skb->data;
-	arp = skb->nh.arph;
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	arp = arp_hdr(skb);
 
 	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
 	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
@@ -389,7 +394,7 @@ static void arp_reply(struct sk_buff *skb)
 	if (!send_skb)
 		return;
 
-	send_skb->nh.raw = send_skb->data;
+	skb_reset_network_header(send_skb);
 	arp = (struct arphdr *) skb_put(send_skb, size);
 	send_skb->dev = skb->dev;
 	send_skb->protocol = htons(ETH_P_ARP);
@@ -443,7 +448,7 @@ int __netpoll_rx(struct sk_buff *skb)
 		goto out;
 
 	/* check if netpoll clients need ARP */
-	if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
 		skb_queue_tail(&npi->arp_tx, skb);
 		return 1;
@@ -471,6 +476,13 @@ int __netpoll_rx(struct sk_buff *skb)
 	if (skb->len < len || len < iph->ihl*4)
 		goto out;
 
+	/*
+	 * Our transport medium may have padded the buffer out.
+	 * Now We trim to the true length of the frame.
+	 */
+	if (pskb_trim_rcsum(skb, len))
+		goto out;
+
 	if (iph->protocol != IPPROTO_UDP)
 		goto out;
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 74a9a32b906d..b92a322872a8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -129,6 +129,7 @@
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/capability.h>
+#include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/list.h>
@@ -163,14 +164,11 @@
 
 #define VERSION  "pktgen v2.68: Packet Generator for packet performance testing.\n"
 
-/* #define PG_DEBUG(a) a */
-#define PG_DEBUG(a)
-
 /* The buckets are exponential in 'width' */
 #define LAT_BUCKETS_MAX 32
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
-#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100)
+#define MPLS_STACK_BOTTOM htonl(0x00000100)
 
 /* Device flag bits */
 #define F_IPSRC_RND   (1<<0)	/* IP-Src Random  */
@@ -213,15 +211,11 @@ struct flow_state {
 };
 
 struct pktgen_dev {
-
 	/*
 	 * Try to keep frequent/infrequent used vars. separated.
 	 */
-
-	char ifname[IFNAMSIZ];
-	char result[512];
-
-	struct pktgen_thread *pg_thread;	/* the owner */
+	struct proc_dir_entry *entry;	/* proc file */
+	struct pktgen_thread *pg_thread;/* the owner */
 	struct list_head list;		/* Used for chaining in the thread's run-queue */
 
 	int running;		/* if this changes to false, the test will stop */
@@ -348,6 +342,8 @@ struct pktgen_dev {
 	unsigned cflows;	/* Concurrent flows (config) */
 	unsigned lflow;		/* Flow length  (config) */
 	unsigned nflows;	/* accumulated flows (stats) */
+
+	char result[512];
 };
 
 struct pktgen_hdr {
@@ -467,17 +463,6 @@ static inline __u64 pg_div64(__u64 n, __u64 base)
 	return tmp;
 }
 
-static inline u32 pktgen_random(void)
-{
-#if 0
-	__u32 n;
-	get_random_bytes(&n, 4);
-	return n;
-#else
-	return net_random();
-#endif
-}
-
 static inline __u64 getCurMs(void)
 {
 	struct timeval tv;
@@ -511,7 +496,7 @@ static void pktgen_stop_all_threads_ifs(void);
 static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
 static void pktgen_stop(struct pktgen_thread *t);
 static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
-static int pktgen_mark_device(const char *ifname);
+
 static unsigned int scan_ip6(const char *s, char ip[16]);
 static unsigned int fmt_ip6(char *s, const char ip[16]);
 
@@ -605,7 +590,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 		   "     frags: %d  delay: %u  clone_skb: %d  ifname: %s\n",
 		   pkt_dev->nfrags,
 		   1000 * pkt_dev->delay_us + pkt_dev->delay_ns,
-		   pkt_dev->clone_skb, pkt_dev->ifname);
+		   pkt_dev->clone_skb, pkt_dev->odev->name);
 
 	seq_printf(seq, "     flows: %u flowlen: %u\n", pkt_dev->cflows,
 		   pkt_dev->lflow);
@@ -660,7 +645,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->nr_labels) {
 		unsigned i;
 		seq_printf(seq, "     mpls: ");
-		for(i = 0; i < pkt_dev->nr_labels; i++)
+		for (i = 0; i < pkt_dev->nr_labels; i++)
 			seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
 				   i == pkt_dev->nr_labels-1 ? "\n" : ", ");
 	}
@@ -765,7 +750,7 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32
 	int i = 0;
 	*num = 0;
 
-	for(; i < maxlen; i++) {
+	for (; i < maxlen; i++) {
 		char c;
 		*num <<= 4;
 		if (get_user(c, &user_buffer[i]))
@@ -801,7 +786,7 @@ static int count_trail_chars(const char __user * user_buffer,
 			break;
 		default:
 			goto done;
-		};
+		}
 	}
 done:
 	return i;
@@ -844,7 +829,7 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
 			break;
 		default:
 			break;
-		};
+		}
 	}
 done_str:
 	return i;
@@ -873,7 +858,7 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
 		n++;
 		if (n >= MAX_MPLS_LABELS)
 			return -E2BIG;
-	} while(c == ',');
+	} while (c == ',');
 
 	pkt_dev->nr_labels = n;
 	return i;
@@ -1502,7 +1487,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		if (len < 0) { return len; }
 		i += len;
 		offset = sprintf(pg_result, "OK: mpls=");
-		for(n = 0; n < pkt_dev->nr_labels; n++)
+		for (n = 0; n < pkt_dev->nr_labels; n++)
 			offset += sprintf(pg_result + offset,
 					  "%08x%s", ntohl(pkt_dev->labels[n]),
 					  n == pkt_dev->nr_labels-1 ? "" : ",");
@@ -1696,13 +1681,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (pkt_dev->running)
-			seq_printf(seq, "%s ", pkt_dev->ifname);
+			seq_printf(seq, "%s ", pkt_dev->odev->name);
 
 	seq_printf(seq, "\nStopped: ");
 
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (!pkt_dev->running)
-			seq_printf(seq, "%s ", pkt_dev->ifname);
+			seq_printf(seq, "%s ", pkt_dev->odev->name);
 
 	if (t->result[0])
 		seq_printf(seq, "\nResult: %s\n", t->result);
@@ -1848,16 +1833,14 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
 /*
  * mark a device for removal
  */
-static int pktgen_mark_device(const char *ifname)
+static void pktgen_mark_device(const char *ifname)
 {
 	struct pktgen_dev *pkt_dev = NULL;
 	const int max_tries = 10, msec_per_try = 125;
 	int i = 0;
-	int ret = 0;
 
 	mutex_lock(&pktgen_thread_lock);
-	PG_DEBUG(printk("pktgen: pktgen_mark_device marking %s for removal\n",
-			ifname));
+	pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
 
 	while (1) {
 
@@ -1866,8 +1849,8 @@ static int pktgen_mark_device(const char *ifname)
 			break;	/* success */
 
 		mutex_unlock(&pktgen_thread_lock);
-		PG_DEBUG(printk("pktgen: pktgen_mark_device waiting for %s "
-				"to disappear....\n", ifname));
+		pr_debug("pktgen: pktgen_mark_device waiting for %s "
+				"to disappear....\n", ifname);
 		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
 		mutex_lock(&pktgen_thread_lock);
 
@@ -1875,79 +1858,91 @@ static int pktgen_mark_device(const char *ifname)
 			printk("pktgen_mark_device: timed out after waiting "
 			       "%d msec for device %s to be removed\n",
 			       msec_per_try * i, ifname);
-			ret = 1;
 			break;
 		}
 
 	}
 
 	mutex_unlock(&pktgen_thread_lock);
+}
 
-	return ret;
+static void pktgen_change_name(struct net_device *dev)
+{
+	struct pktgen_thread *t;
+
+	list_for_each_entry(t, &pktgen_threads, th_list) {
+		struct pktgen_dev *pkt_dev;
+
+		list_for_each_entry(pkt_dev, &t->if_list, list) {
+			if (pkt_dev->odev != dev)
+				continue;
+
+			remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+
+			pkt_dev->entry = create_proc_entry(dev->name, 0600,
+							   pg_proc_dir);
+			if (!pkt_dev->entry)
+				printk(KERN_ERR "pktgen: can't move proc "
+				       " entry for '%s'\n", dev->name);
+			break;
+		}
+	}
 }
 
 static int pktgen_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
-	struct net_device *dev = (struct net_device *)(ptr);
+	struct net_device *dev = ptr;
 
 	/* It is OK that we do not hold the group lock right now,
 	 * as we run under the RTNL lock.
 	 */
 
 	switch (event) {
-	case NETDEV_CHANGEADDR:
-	case NETDEV_GOING_DOWN:
-	case NETDEV_DOWN:
-	case NETDEV_UP:
-		/* Ignore for now */
+	case NETDEV_CHANGENAME:
+		pktgen_change_name(dev);
 		break;
 
 	case NETDEV_UNREGISTER:
 		pktgen_mark_device(dev->name);
 		break;
-	};
+	}
 
 	return NOTIFY_DONE;
 }
 
 /* Associate pktgen_dev with a device. */
 
-static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev)
+static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 {
 	struct net_device *odev;
+	int err;
 
 	/* Clean old setups */
-
 	if (pkt_dev->odev) {
 		dev_put(pkt_dev->odev);
 		pkt_dev->odev = NULL;
 	}
 
-	odev = dev_get_by_name(pkt_dev->ifname);
-
+	odev = dev_get_by_name(ifname);
 	if (!odev) {
-		printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname);
-		goto out;
+		printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+		return -ENODEV;
 	}
+
 	if (odev->type != ARPHRD_ETHER) {
-		printk("pktgen: not an ethernet device: \"%s\"\n",
-		       pkt_dev->ifname);
-		goto out_put;
-	}
-	if (!netif_running(odev)) {
-		printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname);
-		goto out_put;
+		printk("pktgen: not an ethernet device: \"%s\"\n", ifname);
+		err = -EINVAL;
+	} else if (!netif_running(odev)) {
+		printk("pktgen: device is down: \"%s\"\n", ifname);
+		err = -ENETDOWN;
+	} else {
+		pkt_dev->odev = odev;
+		return 0;
 	}
-	pkt_dev->odev = odev;
-
-	return pkt_dev->odev;
 
-out_put:
 	dev_put(odev);
-out:
-	return NULL;
-
+	return err;
 }
 
 /* Read pkt_dev from the interface and set up internal pktgen_dev
@@ -1955,10 +1950,6 @@ out:
  */
 static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 {
-	/* Try once more, just in case it works now. */
-	if (!pkt_dev->odev)
-		pktgen_setup_dev(pkt_dev);
-
 	if (!pkt_dev->odev) {
 		printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
 		sprintf(pkt_dev->result,
@@ -2095,7 +2086,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 	int flow = 0;
 
 	if (pkt_dev->cflows) {
-		flow = pktgen_random() % pkt_dev->cflows;
+		flow = random32() % pkt_dev->cflows;
 
 		if (pkt_dev->flows[flow].count > pkt_dev->lflow)
 			pkt_dev->flows[flow].count = 0;
@@ -2107,7 +2098,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		__u32 tmp;
 
 		if (pkt_dev->flags & F_MACSRC_RND)
-			mc = pktgen_random() % (pkt_dev->src_mac_count);
+			mc = random32() % pkt_dev->src_mac_count;
 		else {
 			mc = pkt_dev->cur_src_mac_offset++;
 			if (pkt_dev->cur_src_mac_offset >
@@ -2133,7 +2124,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		__u32 tmp;
 
 		if (pkt_dev->flags & F_MACDST_RND)
-			mc = pktgen_random() % (pkt_dev->dst_mac_count);
+			mc = random32() % pkt_dev->dst_mac_count;
 
 		else {
 			mc = pkt_dev->cur_dst_mac_offset++;
@@ -2157,27 +2148,26 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 	if (pkt_dev->flags & F_MPLS_RND) {
 		unsigned i;
-		for(i = 0; i < pkt_dev->nr_labels; i++)
+		for (i = 0; i < pkt_dev->nr_labels; i++)
 			if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
 				pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
-					     ((__force __be32)pktgen_random() &
+					     ((__force __be32)random32() &
 						      htonl(0x000fffff));
 	}
 
 	if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
-		pkt_dev->vlan_id = pktgen_random() % 4096;
+		pkt_dev->vlan_id = random32() & (4096-1);
 	}
 
 	if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
-		pkt_dev->svlan_id = pktgen_random() % 4096;
+		pkt_dev->svlan_id = random32() & (4096 - 1);
 	}
 
 	if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
 		if (pkt_dev->flags & F_UDPSRC_RND)
-			pkt_dev->cur_udp_src =
-			    ((pktgen_random() %
-			      (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) +
-			     pkt_dev->udp_src_min);
+			pkt_dev->cur_udp_src = random32() %
+				(pkt_dev->udp_src_max - pkt_dev->udp_src_min)
+				+ pkt_dev->udp_src_min;
 
 		else {
 			pkt_dev->cur_udp_src++;
@@ -2188,10 +2178,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 	if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
 		if (pkt_dev->flags & F_UDPDST_RND) {
-			pkt_dev->cur_udp_dst =
-			    ((pktgen_random() %
-			      (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) +
-			     pkt_dev->udp_dst_min);
+			pkt_dev->cur_udp_dst = random32() %
+				(pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
+				+ pkt_dev->udp_dst_min;
 		} else {
 			pkt_dev->cur_udp_dst++;
 			if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max)
@@ -2206,7 +2195,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 							       saddr_max))) {
 			__u32 t;
 			if (pkt_dev->flags & F_IPSRC_RND)
-				t = ((pktgen_random() % (imx - imn)) + imn);
+				t = random32() % (imx - imn) + imn;
 			else {
 				t = ntohl(pkt_dev->cur_saddr);
 				t++;
@@ -2227,14 +2216,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 				__be32 s;
 				if (pkt_dev->flags & F_IPDST_RND) {
 
-					t = pktgen_random() % (imx - imn) + imn;
+					t = random32() % (imx - imn) + imn;
 					s = htonl(t);
 
 					while (LOOPBACK(s) || MULTICAST(s)
 					       || BADCLASS(s) || ZERONET(s)
 					       || LOCAL_MCAST(s)) {
-						t = (pktgen_random() %
-						      (imx - imn)) + imn;
+						t = random32() % (imx - imn) + imn;
 						s = htonl(t);
 					}
 					pkt_dev->cur_daddr = s;
@@ -2266,7 +2254,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 			for (i = 0; i < 4; i++) {
 				pkt_dev->cur_in6_daddr.s6_addr32[i] =
-				    (((__force __be32)pktgen_random() |
+				    (((__force __be32)random32() |
 				      pkt_dev->min_in6_daddr.s6_addr32[i]) &
 				     pkt_dev->max_in6_daddr.s6_addr32[i]);
 			}
@@ -2276,9 +2264,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
 		__u32 t;
 		if (pkt_dev->flags & F_TXSIZE_RND) {
-			t = ((pktgen_random() %
-			      (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size))
-			     + pkt_dev->min_pkt_size);
+			t = random32() %
+				(pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
+				+ pkt_dev->min_pkt_size;
 		} else {
 			t = pkt_dev->cur_pkt_size + 1;
 			if (t > pkt_dev->max_pkt_size)
@@ -2293,7 +2281,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
 {
 	unsigned i;
-	for(i = 0; i < pkt_dev->nr_labels; i++) {
+	for (i = 0; i < pkt_dev->nr_labels; i++) {
 		*mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
 	}
 	mpls--;
@@ -2315,7 +2303,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	int datalen, iplen;
 	struct iphdr *iph;
 	struct pktgen_hdr *pgh = NULL;
-	__be16 protocol = __constant_htons(ETH_P_IP);
+	__be16 protocol = htons(ETH_P_IP);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
 	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
@@ -2324,10 +2312,10 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 
 
 	if (pkt_dev->nr_labels)
-		protocol = __constant_htons(ETH_P_MPLS_UC);
+		protocol = htons(ETH_P_MPLS_UC);
 
 	if (pkt_dev->vlan_id != 0xffff)
-		protocol = __constant_htons(ETH_P_8021Q);
+		protocol = htons(ETH_P_8021Q);
 
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
@@ -2353,24 +2341,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		mpls_push(mpls, pkt_dev);
 
 	if (pkt_dev->vlan_id != 0xffff) {
-		if(pkt_dev->svlan_id != 0xffff) {
+		if (pkt_dev->svlan_id != 0xffff) {
 			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 			*svlan_tci = build_tci(pkt_dev->svlan_id,
 					       pkt_dev->svlan_cfi,
 					       pkt_dev->svlan_p);
 			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-			*svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
 		}
 		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 		*vlan_tci = build_tci(pkt_dev->vlan_id,
 				      pkt_dev->vlan_cfi,
 				      pkt_dev->vlan_p);
 		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-		*vlan_encapsulated_proto = __constant_htons(ETH_P_IP);
+		*vlan_encapsulated_proto = htons(ETH_P_IP);
 	}
 
-	iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
-	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+	skb->network_header = skb->tail;
+	skb->transport_header = skb->network_header + sizeof(struct iphdr);
+	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+
+	iph = ip_hdr(skb);
+	udph = udp_hdr(skb);
 
 	memcpy(eth, pkt_dev->hh, 12);
 	*(__be16 *) & eth[12] = protocol;
@@ -2399,12 +2391,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	iph->check = 0;
 	iph->check = ip_fast_csum((void *)iph, iph->ihl);
 	skb->protocol = protocol;
-	skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
-		VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+	skb->mac_header = (skb->network_header - ETH_HLEN -
+			   pkt_dev->nr_labels * sizeof(u32) -
+			   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-	skb->nh.iph = iph;
-	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2653,7 +2644,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	int datalen;
 	struct ipv6hdr *iph;
 	struct pktgen_hdr *pgh = NULL;
-	__be16 protocol = __constant_htons(ETH_P_IPV6);
+	__be16 protocol = htons(ETH_P_IPV6);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
 	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
@@ -2661,10 +2652,10 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
 
 	if (pkt_dev->nr_labels)
-		protocol = __constant_htons(ETH_P_MPLS_UC);
+		protocol = htons(ETH_P_MPLS_UC);
 
 	if (pkt_dev->vlan_id != 0xffff)
-		protocol = __constant_htons(ETH_P_8021Q);
+		protocol = htons(ETH_P_8021Q);
 
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
@@ -2689,24 +2680,28 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 		mpls_push(mpls, pkt_dev);
 
 	if (pkt_dev->vlan_id != 0xffff) {
-		if(pkt_dev->svlan_id != 0xffff) {
+		if (pkt_dev->svlan_id != 0xffff) {
 			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 			*svlan_tci = build_tci(pkt_dev->svlan_id,
 					       pkt_dev->svlan_cfi,
 					       pkt_dev->svlan_p);
 			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-			*svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
 		}
 		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 		*vlan_tci = build_tci(pkt_dev->vlan_id,
 				      pkt_dev->vlan_cfi,
 				      pkt_dev->vlan_p);
 		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-		*vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6);
+		*vlan_encapsulated_proto = htons(ETH_P_IPV6);
 	}
 
-	iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
-	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+	skb->network_header = skb->tail;
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+
+	iph = ipv6_hdr(skb);
+	udph = udp_hdr(skb);
 
 	memcpy(eth, pkt_dev->hh, 12);
 	*(__be16 *) & eth[12] = protocol;
@@ -2728,7 +2723,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	udph->len = htons(datalen + sizeof(struct udphdr));
 	udph->check = 0;	/* No checksum */
 
-	*(__be32 *) iph = __constant_htonl(0x60000000);	/* Version + flow */
+	*(__be32 *) iph = htonl(0x60000000);	/* Version + flow */
 
 	if (pkt_dev->traffic_class) {
 		/* Version + traffic class + flow (0) */
@@ -2743,13 +2738,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
 	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
 
-	skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
-		VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+	skb->mac_header = (skb->network_header - ETH_HLEN -
+			   pkt_dev->nr_labels * sizeof(u32) -
+			   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
 	skb->protocol = protocol;
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-	skb->nh.ipv6h = iph;
-	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2847,7 +2841,7 @@ static void pktgen_run(struct pktgen_thread *t)
 	struct pktgen_dev *pkt_dev;
 	int started = 0;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t));
+	pr_debug("pktgen: entering pktgen_run. %p\n", t);
 
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -2879,7 +2873,7 @@ static void pktgen_stop_all_threads_ifs(void)
 {
 	struct pktgen_thread *t;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads_ifs.\n"));
+	pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -2947,7 +2941,7 @@ static void pktgen_run_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n"));
+	pr_debug("pktgen: entering pktgen_run_all_threads.\n");
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3005,7 +2999,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 
 	if (!pkt_dev->running) {
 		printk("pktgen: interface: %s is already stopped\n",
-		       pkt_dev->ifname);
+		       pkt_dev->odev->name);
 		return -EINVAL;
 	}
 
@@ -3039,7 +3033,7 @@ static void pktgen_stop(struct pktgen_thread *t)
 {
 	struct pktgen_dev *pkt_dev;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_stop\n"));
+	pr_debug("pktgen: entering pktgen_stop\n");
 
 	if_lock(t);
 
@@ -3063,7 +3057,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_rem_one_if\n"));
+	pr_debug("pktgen: entering pktgen_rem_one_if\n");
 
 	if_lock(t);
 
@@ -3092,7 +3086,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 
 	/* Remove all devices, free mem */
 
-	PG_DEBUG(printk("pktgen: entering pktgen_rem_all_ifs\n"));
+	pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
 	if_lock(t);
 
 	list_for_each_safe(q, n, &t->if_list) {
@@ -3275,7 +3269,7 @@ static int pktgen_thread_worker(void *arg)
 
 	t->pid = current->pid;
 
-	PG_DEBUG(printk("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid));
+	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid);
 
 	max_before_softirq = t->max_before_softirq;
 
@@ -3333,16 +3327,18 @@ static int pktgen_thread_worker(void *arg)
 			t->control &= ~(T_REMDEV);
 		}
 
+		try_to_freeze();
+
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
-	PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm));
+	pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
 	pktgen_stop(t);
 
-	PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm));
+	pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
 	pktgen_rem_all_ifs(t);
 
-	PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm));
+	pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
 	pktgen_rem_thread(t);
 
 	return 0;
@@ -3355,13 +3351,13 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 	if_lock(t);
 
 	list_for_each_entry(p, &t->if_list, list)
-		if (strncmp(p->ifname, ifname, IFNAMSIZ) == 0) {
+		if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) {
 			pkt_dev = p;
 			break;
 		}
 
 	if_unlock(t);
-	PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev));
+	pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
 	return pkt_dev;
 }
 
@@ -3396,7 +3392,7 @@ out:
 static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 {
 	struct pktgen_dev *pkt_dev;
-	struct proc_dir_entry *pe;
+	int err;
 
 	/* We don't allow a device to be on several threads */
 
@@ -3438,29 +3434,28 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_cfi = 0;
 	pkt_dev->svlan_id = 0xffff;
 
-	strncpy(pkt_dev->ifname, ifname, IFNAMSIZ);
+	err = pktgen_setup_dev(pkt_dev, ifname);
+	if (err)
+		goto out1;
 
-	if (!pktgen_setup_dev(pkt_dev)) {
-		printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
-		if (pkt_dev->flows)
-			vfree(pkt_dev->flows);
-		kfree(pkt_dev);
-		return -ENODEV;
-	}
-
-	pe = create_proc_entry(ifname, 0600, pg_proc_dir);
-	if (!pe) {
+	pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir);
+	if (!pkt_dev->entry) {
 		printk("pktgen: cannot create %s/%s procfs entry.\n",
 		       PG_PROC_DIR, ifname);
-		if (pkt_dev->flows)
-			vfree(pkt_dev->flows);
-		kfree(pkt_dev);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out2;
 	}
-	pe->proc_fops = &pktgen_if_fops;
-	pe->data = pkt_dev;
+	pkt_dev->entry->proc_fops = &pktgen_if_fops;
+	pkt_dev->entry->data = pkt_dev;
 
 	return add_dev_to_thread(t, pkt_dev);
+out2:
+	dev_put(pkt_dev->odev);
+out1:
+	if (pkt_dev->flows)
+		vfree(pkt_dev->flows);
+	kfree(pkt_dev);
+	return err;
 }
 
 static int __init pktgen_create_thread(int cpu)
@@ -3530,7 +3525,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 				struct pktgen_dev *pkt_dev)
 {
 
-	PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev));
+	pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
 
 	if (pkt_dev->running) {
 		printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
@@ -3548,9 +3543,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 
 	_rem_dev_from_if_list(t, pkt_dev);
 
-	/* Clean up proc file system */
-
-	remove_proc_entry(pkt_dev->ifname, pg_proc_dir);
+	if (pkt_dev->entry)
+		remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
 
 	if (pkt_dev->flows)
 		vfree(pkt_dev->flows);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6055074c4b81..cec111109155 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,11 +50,13 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
-#include <net/netlink.h>
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-#include <linux/wireless.h>
-#include <net/iw_handler.h>
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
+#include <net/rtnetlink.h>
+
+struct rtnl_link
+{
+	rtnl_doit_func		doit;
+	rtnl_dumpit_func	dumpit;
+};
 
 static DEFINE_MUTEX(rtnl_mutex);
 static struct sock *rtnl;
@@ -95,7 +97,151 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 	return 0;
 }
 
-struct rtnetlink_link * rtnetlink_links[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+
+static inline int rtm_msgindex(int msgtype)
+{
+	int msgindex = msgtype - RTM_BASE;
+
+	/*
+	 * msgindex < 0 implies someone tried to register a netlink
+	 * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
+	 * the message type has not been added to linux/rtnetlink.h
+	 */
+	BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
+
+	return msgindex;
+}
+
+static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	tab = rtnl_msg_handlers[protocol];
+	if (tab == NULL || tab[msgindex].doit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab[msgindex].doit : NULL;
+}
+
+static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	tab = rtnl_msg_handlers[protocol];
+	if (tab == NULL || tab[msgindex].dumpit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab[msgindex].dumpit : NULL;
+}
+
+/**
+ * __rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_register(int protocol, int msgtype,
+		    rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+	struct rtnl_link *tab;
+	int msgindex;
+
+	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	msgindex = rtm_msgindex(msgtype);
+
+	tab = rtnl_msg_handlers[protocol];
+	if (tab == NULL) {
+		tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+		if (tab == NULL)
+			return -ENOBUFS;
+
+		rtnl_msg_handlers[protocol] = tab;
+	}
+
+	if (doit)
+		tab[msgindex].doit = doit;
+
+	if (dumpit)
+		tab[msgindex].dumpit = dumpit;
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(__rtnl_register);
+
+/**
+ * rtnl_register - Register a rtnetlink message type
+ *
+ * Identical to __rtnl_register() but panics on failure. This is useful
+ * as failure of this function is very unlikely, it can only happen due
+ * to lack of memory when allocating the chain to store all message
+ * handlers for a protocol. Meant for use in init functions where lack
+ * of memory implies no sense in continueing.
+ */
+void rtnl_register(int protocol, int msgtype,
+		   rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+	if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+		panic("Unable to register rtnetlink message handler, "
+		      "protocol = %d, message type = %d\n",
+		      protocol, msgtype);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_register);
+
+/**
+ * rtnl_unregister - Unregister a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_unregister(int protocol, int msgtype)
+{
+	int msgindex;
+
+	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	msgindex = rtm_msgindex(msgtype);
+
+	if (rtnl_msg_handlers[protocol] == NULL)
+		return -ENOENT;
+
+	rtnl_msg_handlers[protocol][msgindex].doit = NULL;
+	rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister);
+
+/**
+ * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
+ * @protocol : Protocol family or PF_UNSPEC
+ *
+ * Identical to calling rtnl_unregster() for all registered message types
+ * of a certain protocol family.
+ */
+void rtnl_unregister_all(int protocol)
+{
+	BUG_ON(protocol < 0 || protocol >= NPROTO);
+
+	kfree(rtnl_msg_handlers[protocol]);
+	rtnl_msg_handlers[protocol] = NULL;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
 static const int rtm_min[RTM_NR_FAMILIES] =
 {
@@ -249,7 +395,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 		    operstate == IF_OPER_UNKNOWN)
 			operstate = IF_OPER_DORMANT;
 		break;
-	};
+	}
 
 	if (dev->operstate != operstate) {
 		write_lock_bh(&dev_base_lock);
@@ -393,7 +539,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx = cb->args[0];
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
 	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
@@ -402,7 +547,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
 			break;
 	}
-	read_unlock(&dev_base_lock);
 	cb->args[0] = idx;
 
 	return skb->len;
@@ -536,17 +680,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		modified = 1;
 	}
 
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (tb[IFLA_WIRELESS]) {
-		/* Call Wireless Extensions.
-		 * Various stuff checked in there... */
-		err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
-					     nla_len(tb[IFLA_WIRELESS]));
-		if (err < 0)
-			goto errout_dev;
-	}
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 	if (tb[IFLA_BROADCAST]) {
 		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
 		send_addr_notify = 1;
@@ -610,21 +743,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	} else
 		return -EINVAL;
 
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (tb[IFLA_WIRELESS]) {
-		/* Call Wireless Extensions. We need to know the size before
-		 * we can alloc. Various stuff checked in there... */
-		err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
-					     nla_len(tb[IFLA_WIRELESS]),
-					     &iw_buf, &iw_buf_len);
-		if (err < 0)
-			goto errout;
-
-		iw += IW_EV_POINT_OFF;
-	}
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 	nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
 	if (nskb == NULL) {
 		err = -ENOBUFS;
@@ -658,12 +776,12 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
-		if (rtnetlink_links[idx] == NULL ||
-		    rtnetlink_links[idx][type].dumpit == NULL)
+		if (rtnl_msg_handlers[idx] == NULL ||
+		    rtnl_msg_handlers[idx][type].dumpit == NULL)
 			continue;
 		if (idx > s_idx)
 			memset(&cb->args[0], 0, sizeof(cb->args));
-		if (rtnetlink_links[idx][type].dumpit(skb, cb))
+		if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
 			break;
 	}
 	cb->family = idx;
@@ -699,30 +817,18 @@ static int rtattr_max;
 
 /* Process one rtnetlink message. */
 
-static __inline__ int
-rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	struct rtnetlink_link *link;
-	struct rtnetlink_link *link_tab;
+	rtnl_doit_func doit;
 	int sz_idx, kind;
 	int min_len;
 	int family;
 	int type;
 	int err;
 
-	/* Only requests are handled by kernel now */
-	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-		return 0;
-
 	type = nlh->nlmsg_type;
-
-	/* A control message: ignore them */
-	if (type < RTM_BASE)
-		return 0;
-
-	/* Unknown message: reply with EINVAL */
 	if (type > RTM_MAX)
-		goto err_inval;
+		return -EOPNOTSUPP;
 
 	type -= RTM_BASE;
 
@@ -731,45 +837,33 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 		return 0;
 
 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO) {
-		*errp = -EAFNOSUPPORT;
-		return -1;
-	}
-
-	link_tab = rtnetlink_links[family];
-	if (link_tab == NULL)
-		link_tab = rtnetlink_links[PF_UNSPEC];
-	link = &link_tab[type];
+	if (family >= NPROTO)
+		return -EAFNOSUPPORT;
 
 	sz_idx = type>>2;
 	kind = type&3;
 
-	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		*errp = -EPERM;
-		return -1;
-	}
+	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
-		if (link->dumpit == NULL)
-			link = &(rtnetlink_links[PF_UNSPEC][type]);
-
-		if (link->dumpit == NULL)
-			goto err_inval;
+		rtnl_dumpit_func dumpit;
 
-		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
-						link->dumpit, NULL)) != 0) {
-			return -1;
-		}
+		dumpit = rtnl_get_dumpit(family, type);
+		if (dumpit == NULL)
+			return -EOPNOTSUPP;
 
-		netlink_queue_skip(nlh, skb);
-		return -1;
+		__rtnl_unlock();
+		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+		rtnl_lock();
+		return err;
 	}
 
 	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
 
 	min_len = rtm_min[sz_idx];
 	if (nlh->nlmsg_len < min_len)
-		goto err_inval;
+		return -EINVAL;
 
 	if (nlh->nlmsg_len > min_len) {
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -779,25 +873,18 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 			unsigned flavor = attr->rta_type;
 			if (flavor) {
 				if (flavor > rta_max[sz_idx])
-					goto err_inval;
+					return -EINVAL;
 				rta_buf[flavor-1] = attr;
 			}
 			attr = RTA_NEXT(attr, attrlen);
 		}
 	}
 
-	if (link->doit == NULL)
-		link = &(rtnetlink_links[PF_UNSPEC][type]);
-	if (link->doit == NULL)
-		goto err_inval;
-	err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+	doit = rtnl_get_doit(family, type);
+	if (doit == NULL)
+		return -EOPNOTSUPP;
 
-	*errp = err;
-	return err;
-
-err_inval:
-	*errp = -EINVAL;
-	return -1;
+	return doit(skb, nlh, (void *)&rta_buf[0]);
 }
 
 static void rtnetlink_rcv(struct sock *sk, int len)
@@ -813,25 +900,6 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 	} while (qlen);
 }
 
-static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
-	[RTM_GETLINK     - RTM_BASE] = { .doit   = rtnl_getlink,
-					 .dumpit = rtnl_dump_ifinfo	 },
-	[RTM_SETLINK     - RTM_BASE] = { .doit   = rtnl_setlink		 },
-	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
-	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
-	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
-#ifdef CONFIG_FIB_RULES
-	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
-	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
-#endif
-	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
-	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
-};
-
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
@@ -873,19 +941,22 @@ void __init rtnetlink_init(void)
 		panic("rtnetlink_init: cannot allocate rta_buf\n");
 
 	rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
-				     THIS_MODULE);
+				     &rtnl_mutex, THIS_MODULE);
 	if (rtnl == NULL)
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
-	rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
-	rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+
+	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
+	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
+
+	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
+	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
 }
 
 EXPORT_SYMBOL(__rta_fill);
 EXPORT_SYMBOL(rtattr_strlcpy);
 EXPORT_SYMBOL(rtattr_parse);
-EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 702fa8f08747..142257307fa2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
 #include <linux/cache.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
+#include <linux/scatterlist.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -87,8 +88,9 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
-			  "data:%p tail:%p end:%p dev:%s\n",
-	       here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+			  "data:%p tail:%#lx end:%#lx dev:%s\n",
+	       here, skb->len, sz, skb->head, skb->data,
+	       (unsigned long)skb->tail, (unsigned long)skb->end,
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
@@ -105,8 +107,9 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
-			  "data:%p tail:%p end:%p dev:%s\n",
-	       here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+			  "data:%p tail:%#lx end:%#lx dev:%s\n",
+	       here, skb->len, sz, skb->head, skb->data,
+	       (unsigned long)skb->tail, (unsigned long)skb->end,
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
@@ -155,20 +158,22 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	if (!skb)
 		goto out;
 
-	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
 			gfp_mask, node);
 	if (!data)
 		goto nodata;
 
-	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	/*
+	 * See comment in sk_buff definition, just before the 'tail' member
+	 */
+	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->truesize = size + sizeof(struct sk_buff);
 	atomic_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
-	skb->tail = data;
-	skb->end  = data + size;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -197,61 +202,6 @@ nodata:
 }
 
 /**
- *	alloc_skb_from_cache	-	allocate a network buffer
- *	@cp: kmem_cache from which to allocate the data area
- *           (object size must be big enough for @size bytes + skb overheads)
- *	@size: size to allocate
- *	@gfp_mask: allocation mask
- *
- *	Allocate a new &sk_buff. The returned buffer has no headroom and
- *	tail room of size bytes. The object has a reference count of one.
- *	The return is the buffer. On a failure the return is %NULL.
- *
- *	Buffers may only be allocated from interrupts using a @gfp_mask of
- *	%GFP_ATOMIC.
- */
-struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
-				     unsigned int size,
-				     gfp_t gfp_mask)
-{
-	struct sk_buff *skb;
-	u8 *data;
-
-	/* Get the HEAD */
-	skb = kmem_cache_alloc(skbuff_head_cache,
-			       gfp_mask & ~__GFP_DMA);
-	if (!skb)
-		goto out;
-
-	/* Get the DATA. */
-	size = SKB_DATA_ALIGN(size);
-	data = kmem_cache_alloc(cp, gfp_mask);
-	if (!data)
-		goto nodata;
-
-	memset(skb, 0, offsetof(struct sk_buff, truesize));
-	skb->truesize = size + sizeof(struct sk_buff);
-	atomic_set(&skb->users, 1);
-	skb->head = data;
-	skb->data = data;
-	skb->tail = data;
-	skb->end  = data + size;
-
-	atomic_set(&(skb_shinfo(skb)->dataref), 1);
-	skb_shinfo(skb)->nr_frags  = 0;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_segs = 0;
-	skb_shinfo(skb)->gso_type = 0;
-	skb_shinfo(skb)->frag_list = NULL;
-out:
-	return skb;
-nodata:
-	kmem_cache_free(skbuff_head_cache, skb);
-	skb = NULL;
-	goto out;
-}
-
-/**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
  *	@length: length to allocate
@@ -354,7 +304,7 @@ void kfree_skbmem(struct sk_buff *skb)
 		if (atomic_dec_and_test(fclone_ref))
 			kmem_cache_free(skbuff_fclone_cache, other);
 		break;
-	};
+	}
 }
 
 /**
@@ -376,15 +326,13 @@ void __kfree_skb(struct sk_buff *skb)
 		WARN_ON(in_irq());
 		skb->destructor(skb);
 	}
-#ifdef CONFIG_NETFILTER
-	nf_conntrack_put(skb->nfct);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	nf_conntrack_put(skb->nfct);
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(skb->nf_bridge);
 #endif
-#endif
 /* XXX: IS this still necessary? - JHS */
 #ifdef CONFIG_NET_SCHED
 	skb->tc_index = 0;
@@ -451,9 +399,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	n->sk = NULL;
 	C(tstamp);
 	C(dev);
-	C(h);
-	C(nh);
-	C(mac);
+	C(transport_header);
+	C(network_header);
+	C(mac_header);
 	C(dst);
 	dst_clone(skb->dst);
 	C(sp);
@@ -477,26 +425,14 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	C(protocol);
 	n->destructor = NULL;
 	C(mark);
-#ifdef CONFIG_NETFILTER
-	C(nfct);
-	nf_conntrack_get(skb->nfct);
-	C(nfctinfo);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	C(nfct_reasm);
-	nf_conntrack_get_reasm(skb->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	C(nf_bridge);
-	nf_bridge_get(skb->nf_bridge);
-#endif
-#endif /*CONFIG_NETFILTER*/
+	__nf_copy(n, skb);
 #ifdef CONFIG_NET_SCHED
 	C(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
 	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
 	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
 	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
-	C(input_dev);
+	C(iif);
 #endif
 	skb_copy_secmark(n, skb);
 #endif
@@ -515,11 +451,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 
 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 {
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
 	/*
 	 *	Shift between the two data areas in bytes
 	 */
 	unsigned long offset = new->data - old->data;
-
+#endif
 	new->sk		= NULL;
 	new->dev	= old->dev;
 	new->priority	= old->priority;
@@ -528,9 +465,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #ifdef CONFIG_INET
 	new->sp		= secpath_get(old->sp);
 #endif
-	new->h.raw	= old->h.raw + offset;
-	new->nh.raw	= old->nh.raw + offset;
-	new->mac.raw	= old->mac.raw + offset;
+	new->transport_header = old->transport_header;
+	new->network_header   = old->network_header;
+	new->mac_header	      = old->mac_header;
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+	/* {transport,network,mac}_header are relative to skb->head */
+	new->transport_header += offset;
+	new->network_header   += offset;
+	new->mac_header	      += offset;
+#endif
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->local_df	= old->local_df;
 	new->fclone	= SKB_FCLONE_UNAVAILABLE;
@@ -538,22 +481,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->tstamp	= old->tstamp;
 	new->destructor = NULL;
 	new->mark	= old->mark;
-#ifdef CONFIG_NETFILTER
-	new->nfct	= old->nfct;
-	nf_conntrack_get(old->nfct);
-	new->nfctinfo	= old->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	new->nfct_reasm = old->nfct_reasm;
-	nf_conntrack_get_reasm(old->nfct_reasm);
-#endif
+	__nf_copy(new, old);
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	new->ipvs_property = old->ipvs_property;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	new->nf_bridge	= old->nf_bridge;
-	nf_bridge_get(old->nf_bridge);
-#endif
-#endif
 #ifdef CONFIG_NET_SCHED
 #ifdef CONFIG_NET_CLS_ACT
 	new->tc_verd = old->tc_verd;
@@ -590,8 +521,12 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 	/*
 	 *	Allocate the copy buffer
 	 */
-	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
-				      gfp_mask);
+	struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	n = alloc_skb(skb->end + skb->data_len, gfp_mask);
+#else
+	n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+#endif
 	if (!n)
 		return NULL;
 
@@ -628,8 +563,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 	/*
 	 *	Allocate the copy buffer
 	 */
-	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
-
+	struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	n = alloc_skb(skb->end, gfp_mask);
+#else
+	n = alloc_skb(skb->end - skb->head, gfp_mask);
+#endif
 	if (!n)
 		goto out;
 
@@ -638,7 +577,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 	/* Set the tail pointer and length */
 	skb_put(n, skb_headlen(skb));
 	/* Copy the bytes */
-	memcpy(n->data, skb->data, n->len);
+	skb_copy_from_linear_data(skb, n->data, n->len);
 	n->csum	     = skb->csum;
 	n->ip_summed = skb->ip_summed;
 
@@ -687,7 +626,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 {
 	int i;
 	u8 *data;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	int size = nhead + skb->end + ntail;
+#else
 	int size = nhead + (skb->end - skb->head) + ntail;
+#endif
 	long off;
 
 	if (skb_shared(skb))
@@ -701,8 +644,14 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	/* Copy only real data... and, alas, header. This should be
 	 * optimized for the cases when header is void. */
-	memcpy(data + nhead, skb->head, skb->tail - skb->head);
-	memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+	memcpy(data + nhead, skb->head,
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+		skb->tail);
+#else
+		skb->tail - skb->head);
+#endif
+	memcpy(data + size, skb_end_pointer(skb),
+	       sizeof(struct skb_shared_info));
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 		get_page(skb_shinfo(skb)->frags[i].page);
@@ -715,12 +664,18 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	off = (data + nhead) - skb->head;
 
 	skb->head     = data;
-	skb->end      = data + size;
 	skb->data    += off;
-	skb->tail    += off;
-	skb->mac.raw += off;
-	skb->h.raw   += off;
-	skb->nh.raw  += off;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->end      = size;
+	off           = nhead;
+#else
+	skb->end      = skb->head + size;
+#endif
+	/* {transport,network,mac}_header and tail are relative to skb->head */
+	skb->tail	      += off;
+	skb->transport_header += off;
+	skb->network_header   += off;
+	skb->mac_header	      += off;
 	skb->cloned   = 0;
 	skb->nohdr    = 0;
 	atomic_set(&skb_shinfo(skb)->dataref, 1);
@@ -781,7 +736,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	 */
 	struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
 				      gfp_mask);
+	int oldheadroom = skb_headroom(skb);
 	int head_copy_len, head_copy_off;
+	int off = 0;
 
 	if (!n)
 		return NULL;
@@ -791,7 +748,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	/* Set the tail pointer and length */
 	skb_put(n, skb->len);
 
-	head_copy_len = skb_headroom(skb);
+	head_copy_len = oldheadroom;
 	head_copy_off = 0;
 	if (newheadroom <= head_copy_len)
 		head_copy_len = newheadroom;
@@ -805,6 +762,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 
 	copy_skb_header(n, skb);
 
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	off                  = newheadroom - oldheadroom;
+#endif
+	n->transport_header += off;
+	n->network_header   += off;
+	n->mac_header	    += off;
+
 	return n;
 }
 
@@ -932,7 +896,7 @@ done:
 	} else {
 		skb->len       = len;
 		skb->data_len  = 0;
-		skb->tail      = skb->data + len;
+		skb_set_tail_pointer(skb, len);
 	}
 
 	return 0;
@@ -977,7 +941,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
 			return NULL;
 	}
 
-	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+	if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
 		BUG();
 
 	/* Optimization: no fragments, no reasons to preestimate
@@ -1073,7 +1037,7 @@ pull_pages:
 	skb->tail     += delta;
 	skb->data_len -= delta;
 
-	return skb->tail;
+	return skb_tail_pointer(skb);
 }
 
 /* Copy some data bits from skb to kernel buffer. */
@@ -1090,7 +1054,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 	if ((copy = start - offset) > 0) {
 		if (copy > len)
 			copy = len;
-		memcpy(to, skb->data + offset, copy);
+		skb_copy_from_linear_data_offset(skb, offset, to, copy);
 		if ((len -= copy) == 0)
 			return 0;
 		offset += copy;
@@ -1165,7 +1129,7 @@ fault:
  *	traversing fragment lists and such.
  */
 
-int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 {
 	int i, copy;
 	int start = skb_headlen(skb);
@@ -1176,7 +1140,7 @@ int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
 	if ((copy = start - offset) > 0) {
 		if (copy > len)
 			copy = len;
-		memcpy(skb->data + offset, from, copy);
+		skb_copy_to_linear_data_offset(skb, offset, from, copy);
 		if ((len -= copy) == 0)
 			return 0;
 		offset += copy;
@@ -1403,13 +1367,13 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 	long csstart;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		csstart = skb->h.raw - skb->data;
+		csstart = skb->csum_start - skb_headroom(skb);
 	else
 		csstart = skb_headlen(skb);
 
 	BUG_ON(csstart > skb_headlen(skb));
 
-	memcpy(to, skb->data, csstart);
+	skb_copy_from_linear_data(skb, to, csstart);
 
 	csum = 0;
 	if (csstart != skb->len)
@@ -1577,27 +1541,14 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
 	spin_unlock_irqrestore(&list->lock, flags);
 }
 
-#if 0
-/*
- * 	Tune the memory allocator for a new MTU size.
- */
-void skb_add_mtu(int mtu)
-{
-	/* Must match allocation in alloc_skb */
-	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
-
-	kmem_add_cache_size(mtu);
-}
-#endif
-
 static inline void skb_split_inside_header(struct sk_buff *skb,
 					   struct sk_buff* skb1,
 					   const u32 len, const int pos)
 {
 	int i;
 
-	memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
-
+	skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
+					 pos - len);
 	/* And move data appendix as is. */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -1608,7 +1559,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
 	skb1->len		   += skb1->data_len;
 	skb->data_len		   = 0;
 	skb->len		   = len;
-	skb->tail		   = skb->data + len;
+	skb_set_tail_pointer(skb, len);
 }
 
 static inline void skb_split_no_header(struct sk_buff *skb,
@@ -1933,7 +1884,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
-	unsigned int doffset = skb->data - skb->mac.raw;
+	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
 	unsigned int headroom;
 	unsigned int len;
@@ -1983,11 +1934,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		nskb->mac_len = skb->mac_len;
 
 		skb_reserve(nskb, headroom);
-		nskb->mac.raw = nskb->data;
-		nskb->nh.raw = nskb->data + skb->mac_len;
-		nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
-		memcpy(skb_put(nskb, doffset), skb->data, doffset);
-
+		skb_reset_mac_header(nskb);
+		skb_set_network_header(nskb, skb->mac_len);
+		nskb->transport_header = (nskb->network_header +
+					  skb_network_header_len(skb));
+		skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
+					  doffset);
 		if (!sg) {
 			nskb->csum = skb_copy_and_csum_bits(skb, offset,
 							    skb_put(nskb, len),
@@ -2000,7 +1952,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 
 		nskb->ip_summed = CHECKSUM_PARTIAL;
 		nskb->csum = skb->csum;
-		memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+		skb_copy_from_linear_data_offset(skb, offset,
+						 skb_put(nskb, hsize), hsize);
 
 		while (pos < offset + len) {
 			BUG_ON(i >= nfrags);
@@ -2060,6 +2013,190 @@ void __init skb_init(void)
 						NULL, NULL);
 }
 
+/**
+ *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ *	@skb: Socket buffer containing the buffers to be mapped
+ *	@sg: The scatter-gather list to map into
+ *	@offset: The offset into the buffer's contents to start mapping
+ *	@len: Length of buffer space to be mapped
+ *
+ *	Fill the specified scatter-gather list with mappings/pointers into a
+ *	region of the buffer space attached to a socket buffer.
+ */
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int elt = 0;
+
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		sg[elt].page = virt_to_page(skb->data + offset);
+		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg[elt].length = copy;
+		elt++;
+		if ((len -= copy) == 0)
+			return elt;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			sg[elt].page = frag->page;
+			sg[elt].offset = frag->page_offset+offset-start;
+			sg[elt].length = copy;
+			elt++;
+			if (!(len -= copy))
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				if ((len -= copy) == 0)
+					return elt;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	BUG_ON(len);
+	return elt;
+}
+
+/**
+ *	skb_cow_data - Check that a socket buffer's data buffers are writable
+ *	@skb: The socket buffer to check.
+ *	@tailbits: Amount of trailing space to be added
+ *	@trailer: Returned pointer to the skb where the @tailbits space begins
+ *
+ *	Make sure that the data buffers attached to a socket buffer are
+ *	writable. If they are not, private copies are made of the data buffers
+ *	and the socket buffer is set to use these instead.
+ *
+ *	If @tailbits is given, make sure that there is space to write @tailbits
+ *	bytes of data beyond current end of socket buffer.  @trailer will be
+ *	set to point to the skb in which this space begins.
+ *
+ *	The number of scatterlist elements required to completely map the
+ *	COW'd and extended socket buffer will be returned.
+ */
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+	int copyflag;
+	int elt;
+	struct sk_buff *skb1, **skb_p;
+
+	/* If skb is cloned or its head is paged, reallocate
+	 * head pulling out all the pages (pages are considered not writable
+	 * at the moment even if they are anonymous).
+	 */
+	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	/* Easy case. Most of packets will go this way. */
+	if (!skb_shinfo(skb)->frag_list) {
+		/* A little of trouble, not enough of space for trailer.
+		 * This should not happen, when stack is tuned to generate
+		 * good frames. OK, on miss we reallocate and reserve even more
+		 * space, 128 bytes is fair. */
+
+		if (skb_tailroom(skb) < tailbits &&
+		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* Voila! */
+		*trailer = skb;
+		return 1;
+	}
+
+	/* Misery. We are in troubles, going to mincer fragments... */
+
+	elt = 1;
+	skb_p = &skb_shinfo(skb)->frag_list;
+	copyflag = 0;
+
+	while ((skb1 = *skb_p) != NULL) {
+		int ntail = 0;
+
+		/* The fragment is partially pulled by someone,
+		 * this can happen on input. Copy it and everything
+		 * after it. */
+
+		if (skb_shared(skb1))
+			copyflag = 1;
+
+		/* If the skb is the last, worry about trailer. */
+
+		if (skb1->next == NULL && tailbits) {
+			if (skb_shinfo(skb1)->nr_frags ||
+			    skb_shinfo(skb1)->frag_list ||
+			    skb_tailroom(skb1) < tailbits)
+				ntail = tailbits + 128;
+		}
+
+		if (copyflag ||
+		    skb_cloned(skb1) ||
+		    ntail ||
+		    skb_shinfo(skb1)->nr_frags ||
+		    skb_shinfo(skb1)->frag_list) {
+			struct sk_buff *skb2;
+
+			/* Fuck, we are miserable poor guys... */
+			if (ntail == 0)
+				skb2 = skb_copy(skb1, GFP_ATOMIC);
+			else
+				skb2 = skb_copy_expand(skb1,
+						       skb_headroom(skb1),
+						       ntail,
+						       GFP_ATOMIC);
+			if (unlikely(skb2 == NULL))
+				return -ENOMEM;
+
+			if (skb1->sk)
+				skb_set_owner_w(skb2, skb1->sk);
+
+			/* Looking around. Are we still alive?
+			 * OK, link new skb, drop old one */
+
+			skb2->next = skb1->next;
+			*skb_p = skb2;
+			kfree_skb(skb1);
+			skb1 = skb2;
+		}
+		elt++;
+		*trailer = skb1;
+		skb_p = &skb1->next;
+	}
+
+	return elt;
+}
+
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(kfree_skb);
@@ -2094,3 +2231,6 @@ EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
 EXPORT_SYMBOL(skb_append_datato_frags);
+
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/core/sock.c b/net/core/sock.c
index 27c4f62382bd..22183c2ef284 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
-  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
+  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
+  "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
 };
 static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
@@ -167,7 +168,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
   "slock-27"       , "slock-28"          , "slock-29"          ,
-  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_MAX"
+  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
+  "slock-AF_RXRPC" , "slock-AF_MAX"
 };
 #endif
 
@@ -361,8 +363,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	}
 #endif
 
-	if(optlen<sizeof(int))
-		return(-EINVAL);
+	if (optlen < sizeof(int))
+		return -EINVAL;
 
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
@@ -371,265 +373,270 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 
 	lock_sock(sk);
 
-	switch(optname)
-	{
-		case SO_DEBUG:
-			if(val && !capable(CAP_NET_ADMIN))
-			{
-				ret = -EACCES;
-			}
-			else if (valbool)
-				sock_set_flag(sk, SOCK_DBG);
-			else
-				sock_reset_flag(sk, SOCK_DBG);
-			break;
-		case SO_REUSEADDR:
-			sk->sk_reuse = valbool;
-			break;
-		case SO_TYPE:
-		case SO_ERROR:
-			ret = -ENOPROTOOPT;
-			break;
-		case SO_DONTROUTE:
-			if (valbool)
-				sock_set_flag(sk, SOCK_LOCALROUTE);
-			else
-				sock_reset_flag(sk, SOCK_LOCALROUTE);
-			break;
-		case SO_BROADCAST:
-			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
-			break;
-		case SO_SNDBUF:
-			/* Don't error on this BSD doesn't and if you think
-			   about it this is right. Otherwise apps have to
-			   play 'guess the biggest size' games. RCVBUF/SNDBUF
-			   are treated in BSD as hints */
-
-			if (val > sysctl_wmem_max)
-				val = sysctl_wmem_max;
+	switch(optname) {
+	case SO_DEBUG:
+		if (val && !capable(CAP_NET_ADMIN)) {
+			ret = -EACCES;
+		}
+		else if (valbool)
+			sock_set_flag(sk, SOCK_DBG);
+		else
+			sock_reset_flag(sk, SOCK_DBG);
+		break;
+	case SO_REUSEADDR:
+		sk->sk_reuse = valbool;
+		break;
+	case SO_TYPE:
+	case SO_ERROR:
+		ret = -ENOPROTOOPT;
+		break;
+	case SO_DONTROUTE:
+		if (valbool)
+			sock_set_flag(sk, SOCK_LOCALROUTE);
+		else
+			sock_reset_flag(sk, SOCK_LOCALROUTE);
+		break;
+	case SO_BROADCAST:
+		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
+		break;
+	case SO_SNDBUF:
+		/* Don't error on this BSD doesn't and if you think
+		   about it this is right. Otherwise apps have to
+		   play 'guess the biggest size' games. RCVBUF/SNDBUF
+		   are treated in BSD as hints */
+
+		if (val > sysctl_wmem_max)
+			val = sysctl_wmem_max;
 set_sndbuf:
-			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-			if ((val * 2) < SOCK_MIN_SNDBUF)
-				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
-			else
-				sk->sk_sndbuf = val * 2;
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+		if ((val * 2) < SOCK_MIN_SNDBUF)
+			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
+		else
+			sk->sk_sndbuf = val * 2;
 
-			/*
-			 *	Wake up sending tasks if we
-			 *	upped the value.
-			 */
-			sk->sk_write_space(sk);
-			break;
+		/*
+		 *	Wake up sending tasks if we
+		 *	upped the value.
+		 */
+		sk->sk_write_space(sk);
+		break;
 
-		case SO_SNDBUFFORCE:
-			if (!capable(CAP_NET_ADMIN)) {
-				ret = -EPERM;
-				break;
-			}
-			goto set_sndbuf;
+	case SO_SNDBUFFORCE:
+		if (!capable(CAP_NET_ADMIN)) {
+			ret = -EPERM;
+			break;
+		}
+		goto set_sndbuf;
 
-		case SO_RCVBUF:
-			/* Don't error on this BSD doesn't and if you think
-			   about it this is right. Otherwise apps have to
-			   play 'guess the biggest size' games. RCVBUF/SNDBUF
-			   are treated in BSD as hints */
+	case SO_RCVBUF:
+		/* Don't error on this BSD doesn't and if you think
+		   about it this is right. Otherwise apps have to
+		   play 'guess the biggest size' games. RCVBUF/SNDBUF
+		   are treated in BSD as hints */
 
-			if (val > sysctl_rmem_max)
-				val = sysctl_rmem_max;
+		if (val > sysctl_rmem_max)
+			val = sysctl_rmem_max;
 set_rcvbuf:
-			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-			/*
-			 * We double it on the way in to account for
-			 * "struct sk_buff" etc. overhead.   Applications
-			 * assume that the SO_RCVBUF setting they make will
-			 * allow that much actual data to be received on that
-			 * socket.
-			 *
-			 * Applications are unaware that "struct sk_buff" and
-			 * other overheads allocate from the receive buffer
-			 * during socket buffer allocation.
-			 *
-			 * And after considering the possible alternatives,
-			 * returning the value we actually used in getsockopt
-			 * is the most desirable behavior.
-			 */
-			if ((val * 2) < SOCK_MIN_RCVBUF)
-				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
-			else
-				sk->sk_rcvbuf = val * 2;
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+		/*
+		 * We double it on the way in to account for
+		 * "struct sk_buff" etc. overhead.   Applications
+		 * assume that the SO_RCVBUF setting they make will
+		 * allow that much actual data to be received on that
+		 * socket.
+		 *
+		 * Applications are unaware that "struct sk_buff" and
+		 * other overheads allocate from the receive buffer
+		 * during socket buffer allocation.
+		 *
+		 * And after considering the possible alternatives,
+		 * returning the value we actually used in getsockopt
+		 * is the most desirable behavior.
+		 */
+		if ((val * 2) < SOCK_MIN_RCVBUF)
+			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
+		else
+			sk->sk_rcvbuf = val * 2;
+		break;
+
+	case SO_RCVBUFFORCE:
+		if (!capable(CAP_NET_ADMIN)) {
+			ret = -EPERM;
 			break;
+		}
+		goto set_rcvbuf;
 
-		case SO_RCVBUFFORCE:
-			if (!capable(CAP_NET_ADMIN)) {
-				ret = -EPERM;
-				break;
-			}
-			goto set_rcvbuf;
-
-		case SO_KEEPALIVE:
+	case SO_KEEPALIVE:
 #ifdef CONFIG_INET
-			if (sk->sk_protocol == IPPROTO_TCP)
-				tcp_set_keepalive(sk, valbool);
+		if (sk->sk_protocol == IPPROTO_TCP)
+			tcp_set_keepalive(sk, valbool);
 #endif
-			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
-			break;
-
-		case SO_OOBINLINE:
-			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+		break;
+
+	case SO_OOBINLINE:
+		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+		break;
+
+	case SO_NO_CHECK:
+		sk->sk_no_check = valbool;
+		break;
+
+	case SO_PRIORITY:
+		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+			sk->sk_priority = val;
+		else
+			ret = -EPERM;
+		break;
+
+	case SO_LINGER:
+		if (optlen < sizeof(ling)) {
+			ret = -EINVAL;	/* 1003.1g */
 			break;
-
-		case SO_NO_CHECK:
-			sk->sk_no_check = valbool;
-			break;
-
-		case SO_PRIORITY:
-			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
-				sk->sk_priority = val;
-			else
-				ret = -EPERM;
+		}
+		if (copy_from_user(&ling,optval,sizeof(ling))) {
+			ret = -EFAULT;
 			break;
-
-		case SO_LINGER:
-			if(optlen<sizeof(ling)) {
-				ret = -EINVAL;	/* 1003.1g */
-				break;
-			}
-			if (copy_from_user(&ling,optval,sizeof(ling))) {
-				ret = -EFAULT;
-				break;
-			}
-			if (!ling.l_onoff)
-				sock_reset_flag(sk, SOCK_LINGER);
-			else {
+		}
+		if (!ling.l_onoff)
+			sock_reset_flag(sk, SOCK_LINGER);
+		else {
 #if (BITS_PER_LONG == 32)
-				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
-					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
-				else
+			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
+			else
 #endif
-					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
-				sock_set_flag(sk, SOCK_LINGER);
-			}
-			break;
-
-		case SO_BSDCOMPAT:
-			sock_warn_obsolete_bsdism("setsockopt");
-			break;
-
-		case SO_PASSCRED:
-			if (valbool)
-				set_bit(SOCK_PASSCRED, &sock->flags);
+				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+			sock_set_flag(sk, SOCK_LINGER);
+		}
+		break;
+
+	case SO_BSDCOMPAT:
+		sock_warn_obsolete_bsdism("setsockopt");
+		break;
+
+	case SO_PASSCRED:
+		if (valbool)
+			set_bit(SOCK_PASSCRED, &sock->flags);
+		else
+			clear_bit(SOCK_PASSCRED, &sock->flags);
+		break;
+
+	case SO_TIMESTAMP:
+	case SO_TIMESTAMPNS:
+		if (valbool)  {
+			if (optname == SO_TIMESTAMP)
+				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 			else
-				clear_bit(SOCK_PASSCRED, &sock->flags);
-			break;
+				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
+			sock_set_flag(sk, SOCK_RCVTSTAMP);
+			sock_enable_timestamp(sk);
+		} else {
+			sock_reset_flag(sk, SOCK_RCVTSTAMP);
+			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+		}
+		break;
 
-		case SO_TIMESTAMP:
-			if (valbool)  {
-				sock_set_flag(sk, SOCK_RCVTSTAMP);
-				sock_enable_timestamp(sk);
-			} else
-				sock_reset_flag(sk, SOCK_RCVTSTAMP);
-			break;
+	case SO_RCVLOWAT:
+		if (val < 0)
+			val = INT_MAX;
+		sk->sk_rcvlowat = val ? : 1;
+		break;
 
-		case SO_RCVLOWAT:
-			if (val < 0)
-				val = INT_MAX;
-			sk->sk_rcvlowat = val ? : 1;
-			break;
+	case SO_RCVTIMEO:
+		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+		break;
 
-		case SO_RCVTIMEO:
-			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
-			break;
+	case SO_SNDTIMEO:
+		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+		break;
 
-		case SO_SNDTIMEO:
-			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+#ifdef CONFIG_NETDEVICES
+	case SO_BINDTODEVICE:
+	{
+		char devname[IFNAMSIZ];
+
+		/* Sorry... */
+		if (!capable(CAP_NET_RAW)) {
+			ret = -EPERM;
 			break;
+		}
 
-#ifdef CONFIG_NETDEVICES
-		case SO_BINDTODEVICE:
-		{
-			char devname[IFNAMSIZ];
+		/* Bind this socket to a particular device like "eth0",
+		 * as specified in the passed interface name. If the
+		 * name is "" or the option length is zero the socket
+		 * is not bound.
+		 */
 
-			/* Sorry... */
-			if (!capable(CAP_NET_RAW)) {
-				ret = -EPERM;
+		if (!valbool) {
+			sk->sk_bound_dev_if = 0;
+		} else {
+			if (optlen > IFNAMSIZ - 1)
+				optlen = IFNAMSIZ - 1;
+			memset(devname, 0, sizeof(devname));
+			if (copy_from_user(devname, optval, optlen)) {
+				ret = -EFAULT;
 				break;
 			}
 
-			/* Bind this socket to a particular device like "eth0",
-			 * as specified in the passed interface name. If the
-			 * name is "" or the option length is zero the socket
-			 * is not bound.
-			 */
+			/* Remove any cached route for this socket. */
+			sk_dst_reset(sk);
 
-			if (!valbool) {
+			if (devname[0] == '\0') {
 				sk->sk_bound_dev_if = 0;
 			} else {
-				if (optlen > IFNAMSIZ - 1)
-					optlen = IFNAMSIZ - 1;
-				memset(devname, 0, sizeof(devname));
-				if (copy_from_user(devname, optval, optlen)) {
-					ret = -EFAULT;
+				struct net_device *dev = dev_get_by_name(devname);
+				if (!dev) {
+					ret = -ENODEV;
 					break;
 				}
-
-				/* Remove any cached route for this socket. */
-				sk_dst_reset(sk);
-
-				if (devname[0] == '\0') {
-					sk->sk_bound_dev_if = 0;
-				} else {
-					struct net_device *dev = dev_get_by_name(devname);
-					if (!dev) {
-						ret = -ENODEV;
-						break;
-					}
-					sk->sk_bound_dev_if = dev->ifindex;
-					dev_put(dev);
-				}
+				sk->sk_bound_dev_if = dev->ifindex;
+				dev_put(dev);
 			}
-			break;
 		}
+		break;
+	}
 #endif
 
 
-		case SO_ATTACH_FILTER:
-			ret = -EINVAL;
-			if (optlen == sizeof(struct sock_fprog)) {
-				struct sock_fprog fprog;
+	case SO_ATTACH_FILTER:
+		ret = -EINVAL;
+		if (optlen == sizeof(struct sock_fprog)) {
+			struct sock_fprog fprog;
 
-				ret = -EFAULT;
-				if (copy_from_user(&fprog, optval, sizeof(fprog)))
-					break;
-
-				ret = sk_attach_filter(&fprog, sk);
-			}
-			break;
-
-		case SO_DETACH_FILTER:
-			rcu_read_lock_bh();
-			filter = rcu_dereference(sk->sk_filter);
-			if (filter) {
-				rcu_assign_pointer(sk->sk_filter, NULL);
-				sk_filter_release(sk, filter);
-				rcu_read_unlock_bh();
+			ret = -EFAULT;
+			if (copy_from_user(&fprog, optval, sizeof(fprog)))
 				break;
-			}
+
+			ret = sk_attach_filter(&fprog, sk);
+		}
+		break;
+
+	case SO_DETACH_FILTER:
+		rcu_read_lock_bh();
+		filter = rcu_dereference(sk->sk_filter);
+		if (filter) {
+			rcu_assign_pointer(sk->sk_filter, NULL);
+			sk_filter_release(sk, filter);
 			rcu_read_unlock_bh();
-			ret = -ENONET;
 			break;
+		}
+		rcu_read_unlock_bh();
+		ret = -ENONET;
+		break;
 
-		case SO_PASSSEC:
-			if (valbool)
-				set_bit(SOCK_PASSSEC, &sock->flags);
-			else
-				clear_bit(SOCK_PASSSEC, &sock->flags);
-			break;
+	case SO_PASSSEC:
+		if (valbool)
+			set_bit(SOCK_PASSSEC, &sock->flags);
+		else
+			clear_bit(SOCK_PASSSEC, &sock->flags);
+		break;
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
-		default:
-			ret = -ENOPROTOOPT;
-			break;
+	default:
+		ret = -ENOPROTOOPT;
+		break;
 	}
 	release_sock(sk);
 	return ret;
@@ -641,8 +648,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 {
 	struct sock *sk = sock->sk;
 
-	union
-	{
+	union {
 		int val;
 		struct linger ling;
 		struct timeval tm;
@@ -651,148 +657,153 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	unsigned int lv = sizeof(int);
 	int len;
 
-	if(get_user(len,optlen))
+	if (get_user(len, optlen))
 		return -EFAULT;
-	if(len < 0)
+	if (len < 0)
 		return -EINVAL;
 
-	switch(optname)
-	{
-		case SO_DEBUG:
-			v.val = sock_flag(sk, SOCK_DBG);
-			break;
-
-		case SO_DONTROUTE:
-			v.val = sock_flag(sk, SOCK_LOCALROUTE);
-			break;
-
-		case SO_BROADCAST:
-			v.val = !!sock_flag(sk, SOCK_BROADCAST);
-			break;
-
-		case SO_SNDBUF:
-			v.val = sk->sk_sndbuf;
-			break;
-
-		case SO_RCVBUF:
-			v.val = sk->sk_rcvbuf;
-			break;
-
-		case SO_REUSEADDR:
-			v.val = sk->sk_reuse;
-			break;
-
-		case SO_KEEPALIVE:
-			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
-			break;
-
-		case SO_TYPE:
-			v.val = sk->sk_type;
-			break;
-
-		case SO_ERROR:
-			v.val = -sock_error(sk);
-			if(v.val==0)
-				v.val = xchg(&sk->sk_err_soft, 0);
-			break;
-
-		case SO_OOBINLINE:
-			v.val = !!sock_flag(sk, SOCK_URGINLINE);
-			break;
-
-		case SO_NO_CHECK:
-			v.val = sk->sk_no_check;
-			break;
-
-		case SO_PRIORITY:
-			v.val = sk->sk_priority;
-			break;
-
-		case SO_LINGER:
-			lv		= sizeof(v.ling);
-			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
-			v.ling.l_linger	= sk->sk_lingertime / HZ;
-			break;
-
-		case SO_BSDCOMPAT:
-			sock_warn_obsolete_bsdism("getsockopt");
-			break;
-
-		case SO_TIMESTAMP:
-			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
-			break;
+	switch(optname) {
+	case SO_DEBUG:
+		v.val = sock_flag(sk, SOCK_DBG);
+		break;
+
+	case SO_DONTROUTE:
+		v.val = sock_flag(sk, SOCK_LOCALROUTE);
+		break;
+
+	case SO_BROADCAST:
+		v.val = !!sock_flag(sk, SOCK_BROADCAST);
+		break;
+
+	case SO_SNDBUF:
+		v.val = sk->sk_sndbuf;
+		break;
+
+	case SO_RCVBUF:
+		v.val = sk->sk_rcvbuf;
+		break;
+
+	case SO_REUSEADDR:
+		v.val = sk->sk_reuse;
+		break;
+
+	case SO_KEEPALIVE:
+		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+		break;
+
+	case SO_TYPE:
+		v.val = sk->sk_type;
+		break;
+
+	case SO_ERROR:
+		v.val = -sock_error(sk);
+		if (v.val==0)
+			v.val = xchg(&sk->sk_err_soft, 0);
+		break;
+
+	case SO_OOBINLINE:
+		v.val = !!sock_flag(sk, SOCK_URGINLINE);
+		break;
+
+	case SO_NO_CHECK:
+		v.val = sk->sk_no_check;
+		break;
+
+	case SO_PRIORITY:
+		v.val = sk->sk_priority;
+		break;
+
+	case SO_LINGER:
+		lv		= sizeof(v.ling);
+		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
+		v.ling.l_linger	= sk->sk_lingertime / HZ;
+		break;
+
+	case SO_BSDCOMPAT:
+		sock_warn_obsolete_bsdism("getsockopt");
+		break;
+
+	case SO_TIMESTAMP:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
+				!sock_flag(sk, SOCK_RCVTSTAMPNS);
+		break;
+
+	case SO_TIMESTAMPNS:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
+		break;
+
+	case SO_RCVTIMEO:
+		lv=sizeof(struct timeval);
+		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
+			v.tm.tv_sec = 0;
+			v.tm.tv_usec = 0;
+		} else {
+			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
+			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+		}
+		break;
+
+	case SO_SNDTIMEO:
+		lv=sizeof(struct timeval);
+		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
+			v.tm.tv_sec = 0;
+			v.tm.tv_usec = 0;
+		} else {
+			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
+			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+		}
+		break;
 
-		case SO_RCVTIMEO:
-			lv=sizeof(struct timeval);
-			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
-				v.tm.tv_sec = 0;
-				v.tm.tv_usec = 0;
-			} else {
-				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
-				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
-			}
-			break;
+	case SO_RCVLOWAT:
+		v.val = sk->sk_rcvlowat;
+		break;
 
-		case SO_SNDTIMEO:
-			lv=sizeof(struct timeval);
-			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
-				v.tm.tv_sec = 0;
-				v.tm.tv_usec = 0;
-			} else {
-				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
-				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
-			}
-			break;
+	case SO_SNDLOWAT:
+		v.val=1;
+		break;
 
-		case SO_RCVLOWAT:
-			v.val = sk->sk_rcvlowat;
-			break;
+	case SO_PASSCRED:
+		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+		break;
 
-		case SO_SNDLOWAT:
-			v.val=1;
-			break;
+	case SO_PEERCRED:
+		if (len > sizeof(sk->sk_peercred))
+			len = sizeof(sk->sk_peercred);
+		if (copy_to_user(optval, &sk->sk_peercred, len))
+			return -EFAULT;
+		goto lenout;
 
-		case SO_PASSCRED:
-			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
-			break;
-
-		case SO_PEERCRED:
-			if (len > sizeof(sk->sk_peercred))
-				len = sizeof(sk->sk_peercred);
-			if (copy_to_user(optval, &sk->sk_peercred, len))
-				return -EFAULT;
-			goto lenout;
-
-		case SO_PEERNAME:
-		{
-			char address[128];
-
-			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
-				return -ENOTCONN;
-			if (lv < len)
-				return -EINVAL;
-			if (copy_to_user(optval, address, len))
-				return -EFAULT;
-			goto lenout;
-		}
+	case SO_PEERNAME:
+	{
+		char address[128];
+
+		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+			return -ENOTCONN;
+		if (lv < len)
+			return -EINVAL;
+		if (copy_to_user(optval, address, len))
+			return -EFAULT;
+		goto lenout;
+	}
 
-		/* Dubious BSD thing... Probably nobody even uses it, but
-		 * the UNIX standard wants it for whatever reason... -DaveM
-		 */
-		case SO_ACCEPTCONN:
-			v.val = sk->sk_state == TCP_LISTEN;
-			break;
+	/* Dubious BSD thing... Probably nobody even uses it, but
+	 * the UNIX standard wants it for whatever reason... -DaveM
+	 */
+	case SO_ACCEPTCONN:
+		v.val = sk->sk_state == TCP_LISTEN;
+		break;
 
-		case SO_PASSSEC:
-			v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
-			break;
+	case SO_PASSSEC:
+		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+		break;
 
-		case SO_PEERSEC:
-			return security_socket_getpeersec_stream(sock, optval, optlen, len);
+	case SO_PEERSEC:
+		return security_socket_getpeersec_stream(sock, optval, optlen, len);
 
-		default:
-			return(-ENOPROTOOPT);
+	default:
+		return -ENOPROTOOPT;
 	}
+
 	if (len > lv)
 		len = lv;
 	if (copy_to_user(optval, &v, len))
@@ -904,6 +915,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sk_node_init(&newsk->sk_node);
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
+		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		atomic_set(&newsk->sk_wmem_alloc, 0);
@@ -923,7 +935,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		newsk->sk_wmem_queued	= 0;
 		newsk->sk_forward_alloc = 0;
 		newsk->sk_send_head	= NULL;
-		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 
 		sock_reset_flag(newsk, SOCK_DONE);
@@ -970,6 +981,21 @@ out:
 
 EXPORT_SYMBOL_GPL(sk_clone);
 
+void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+{
+	__sk_dst_set(sk, dst);
+	sk->sk_route_caps = dst->dev->features;
+	if (sk->sk_route_caps & NETIF_F_GSO)
+		sk->sk_route_caps |= NETIF_F_GSO_MASK;
+	if (sk_can_gso(sk)) {
+		if (dst->header_len)
+			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		else
+			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+	}
+}
+EXPORT_SYMBOL_GPL(sk_setup_caps);
+
 void __init sk_init(void)
 {
 	if (num_physpages <= 4096) {
@@ -1220,13 +1246,13 @@ static void __lock_sock(struct sock *sk)
 {
 	DEFINE_WAIT(wait);
 
-	for(;;) {
+	for (;;) {
 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
 					TASK_UNINTERRUPTIBLE);
 		spin_unlock_bh(&sk->sk_lock.slock);
 		schedule();
 		spin_lock_bh(&sk->sk_lock.slock);
-		if(!sock_owned_by_user(sk))
+		if (!sock_owned_by_user(sk))
 			break;
 	}
 	finish_wait(&sk->sk_lock.wq, &wait);
@@ -1258,7 +1284,7 @@ static void __release_sock(struct sock *sk)
 		} while (skb != NULL);
 
 		bh_lock_sock(sk);
-	} while((skb = sk->sk_backlog.head) != NULL);
+	} while ((skb = sk->sk_backlog.head) != NULL);
 }
 
 /**
@@ -1420,7 +1446,7 @@ static void sock_def_write_space(struct sock *sk)
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
-	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 			wake_up_interruptible(sk->sk_sleep);
 
@@ -1482,8 +1508,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sock_set_flag(sk, SOCK_ZAPPED);
 
-	if(sock)
-	{
+	if (sock) {
 		sk->sk_type	=	sock->type;
 		sk->sk_sleep	=	&sock->wait;
 		sock->sk	=	sk;
@@ -1512,8 +1537,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 
-	sk->sk_stamp.tv_sec     = -1L;
-	sk->sk_stamp.tv_usec    = -1L;
+	sk->sk_stamp = ktime_set(-1L, -1L);
 
 	atomic_set(&sk->sk_refcnt, 1);
 }
@@ -1554,17 +1578,36 @@ EXPORT_SYMBOL(release_sock);
 
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
+	struct timeval tv;
 	if (!sock_flag(sk, SOCK_TIMESTAMP))
 		sock_enable_timestamp(sk);
-	if (sk->sk_stamp.tv_sec == -1)
+	tv = ktime_to_timeval(sk->sk_stamp);
+	if (tv.tv_sec == -1)
 		return -ENOENT;
-	if (sk->sk_stamp.tv_sec == 0)
-		do_gettimeofday(&sk->sk_stamp);
-	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
-		-EFAULT : 0;
+	if (tv.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		tv = ktime_to_timeval(sk->sk_stamp);
+	}
+	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
 }
 EXPORT_SYMBOL(sock_get_timestamp);
 
+int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+	struct timespec ts;
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk);
+	ts = ktime_to_timespec(sk->sk_stamp);
+	if (ts.tv_sec == -1)
+		return -ENOENT;
+	if (ts.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		ts = ktime_to_timespec(sk->sk_stamp);
+	}
+	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL(sock_get_timestampns);
+
 void sock_enable_timestamp(struct sock *sk)
 {
 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
@@ -1899,7 +1942,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations proto_seq_ops = {
+static const struct seq_operations proto_seq_ops = {
 	.start  = proto_seq_start,
 	.next   = proto_seq_next,
 	.stop   = proto_seq_stop,
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1e75b1585460..b29712033dd4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -136,6 +136,14 @@ ctl_table core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= NET_CORE_WARNINGS,
+		.procname	= "warnings",
+		.data		= &net_msg_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 07236c17fab9..adecfd281ae9 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -30,8 +30,10 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-int net_msg_cost = 5*HZ;
-int net_msg_burst = 10;
+int net_msg_cost __read_mostly = 5*HZ;
+int net_msg_burst __read_mostly = 10;
+int net_msg_warn __read_mostly = 1;
+EXPORT_SYMBOL(net_msg_warn);
 
 /*
  * All net warning printk()s should be guarded by this function.
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index a086c6312d3b..01030f346177 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -157,7 +157,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 
 	if (av != NULL) {
 		av->dccpav_buf_head	= DCCP_MAX_ACKVEC_LEN - 1;
-		av->dccpav_buf_ackno	= DCCP_MAX_SEQNO + 1;
+		av->dccpav_buf_ackno	= UINT48_MAX + 1;
 		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
 		av->dccpav_time.tv_sec	= 0;
 		av->dccpav_time.tv_usec	= 0;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 746f79d104b3..d7d9ce737244 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -33,7 +33,6 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-
 #include "../ccid.h"
 #include "../dccp.h"
 #include "lib/packet_history.h"
@@ -52,6 +51,9 @@ static struct dccp_tx_hist *ccid3_tx_hist;
 static struct dccp_rx_hist *ccid3_rx_hist;
 static struct dccp_li_hist *ccid3_li_hist;
 
+/*
+ *	Transmitter Half-Connection Routines
+ */
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
 {
@@ -80,23 +82,37 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
 }
 
 /*
- * Recalculate scheduled nominal send time t_nom, inter-packet interval
- * t_ipi, and delta value. Should be called after each change to X.
+ * Compute the initial sending rate X_init according to RFC 3390:
+ *	w_init   =    min(4 * MSS, max(2 * MSS, 4380 bytes))
+ *	X_init   =    w_init / RTT
+ * For consistency with other parts of the code, X_init is scaled by 2^6.
  */
-static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
+static inline u64 rfc3390_initial_rate(struct sock *sk)
 {
-	timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+	const struct dccp_sock *dp = dccp_sk(sk);
+	const __u32 w_init = min(4 * dp->dccps_mss_cache,
+				 max(2 * dp->dccps_mss_cache, 4380U));
 
-	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
-	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
-					   hctx->ccid3hctx_x >> 6);
+	return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
+}
 
-	/* Update nominal send time with regard to the new t_ipi */
-	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+/*
+ * Recalculate t_ipi and delta (should be called whenever X changes)
+ */
+static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
+{
+	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
+	hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
+					     hctx->ccid3hctx_x);
 
 	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
 	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
 					   TFRC_OPSYS_HALF_TIME_GRAN);
+
+	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
+		       hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
+		       hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
+
 }
 /*
  * Update X by
@@ -112,19 +128,28 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
  *       fine-grained resolution of sending rates. This requires scaling by 2^6
  *       throughout the code. Only X_calc is unscaled (in bytes/second).
  *
- * If X has changed, we also update the scheduled send time t_now,
- * the inter-packet interval t_ipi, and the delta value.
  */
 static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
 	const  __u64 old_x = hctx->ccid3hctx_x;
 
+	/*
+	 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
+	 * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis.
+	 * For consistency with X and X_recv, min_rate is also scaled by 2^6.
+	 */
+	if (unlikely(hctx->ccid3hctx_idle)) {
+		min_rate = rfc3390_initial_rate(sk);
+		min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
+	}
+
 	if (hctx->ccid3hctx_p > 0) {
 
 		hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
-					hctx->ccid3hctx_x_recv * 2);
+					min_rate);
 		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
 					(((__u64)hctx->ccid3hctx_s) << 6) /
 								TFRC_T_MBI);
@@ -133,14 +158,21 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 			(suseconds_t)hctx->ccid3hctx_rtt >= 0) {
 
 		hctx->ccid3hctx_x =
-			max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
+			max(min(2 * hctx->ccid3hctx_x, min_rate),
 			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
 				       hctx->ccid3hctx_rtt));
 		hctx->ccid3hctx_t_ld = *now;
 	}
 
-	if (hctx->ccid3hctx_x != old_x)
-		ccid3_update_send_time(hctx);
+	if (hctx->ccid3hctx_x != old_x) {
+		ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
+			       "X_recv=%u\n", (unsigned)(old_x >> 6),
+			       (unsigned)(hctx->ccid3hctx_x >> 6),
+			       hctx->ccid3hctx_x_calc,
+			       (unsigned)(hctx->ccid3hctx_x_recv >> 6));
+
+		ccid3_update_send_interval(hctx);
+	}
 }
 
 /*
@@ -149,17 +181,12 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
  */
 static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
 {
-	if (unlikely(len == 0))
-		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
-	else
-		hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
-				    (9 * hctx->ccid3hctx_s + len) / 10;
-	/*
-	 * Note: We could do a potential optimisation here - when `s' changes,
-	 *	 recalculate sending rate and consequently t_ipi, t_delta, and
-	 *	 t_now. This is however non-standard, and the benefits are not
-	 *	 clear, so it is currently left out.
-	 */
+	const u16 old_s = hctx->ccid3hctx_s;
+
+	hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;
+
+	if (hctx->ccid3hctx_s != old_s)
+		ccid3_update_send_interval(hctx);
 }
 
 /*
@@ -193,6 +220,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	struct timeval now;
 	unsigned long t_nfb = USEC_PER_SEC / 5;
 
 	bh_lock_sock(sk);
@@ -205,6 +233,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
 		       ccid3_tx_state_name(hctx->ccid3hctx_state));
 
+	hctx->ccid3hctx_idle = 1;
+
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_FBACK:
 		/* RFC 3448, 4.4: Halve send rate directly */
@@ -219,53 +249,37 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		/* The value of R is still undefined and so we can not recompute
 		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
 		t_nfb = TFRC_INITIAL_TIMEOUT;
-		ccid3_update_send_time(hctx);
+		ccid3_update_send_interval(hctx);
 		break;
 	case TFRC_SSTATE_FBACK:
 		/*
-		 * Check if IDLE since last timeout and recv rate is less than
-		 * 4 packets (in units of 64*bytes/sec) per RTT
+		 *  Modify the cached value of X_recv [RFC 3448, 4.4]
+		 *
+		 *  If (p == 0 || X_calc > 2 * X_recv)
+		 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
+		 *  Else
+		 *    X_recv = X_calc / 4;
+		 *
+		 *  Note that X_recv is scaled by 2^6 while X_calc is not
 		 */
-		if (!hctx->ccid3hctx_idle ||
-		    (hctx->ccid3hctx_x_recv >= 4 *
-		     scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
-				hctx->ccid3hctx_rtt))) {
-			struct timeval now;
+		BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
 
-			ccid3_pr_debug("%s(%p, state=%s), not idle\n",
-				       dccp_role(sk), sk,
-				   ccid3_tx_state_name(hctx->ccid3hctx_state));
+		if (hctx->ccid3hctx_p == 0 ||
+		    (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
 
-			/*
-			 *  Modify the cached value of X_recv [RFC 3448, 4.4]
-			 *
-			 *  If (p == 0 || X_calc > 2 * X_recv)
-			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
-			 *  Else
-			 *    X_recv = X_calc / 4;
-			 *
-			 *  Note that X_recv is scaled by 2^6 while X_calc is not
-			 */
-			BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
-
-			if (hctx->ccid3hctx_p  == 0 ||
-			    (hctx->ccid3hctx_x_calc >
-			     (hctx->ccid3hctx_x_recv >> 5))) {
-
-				hctx->ccid3hctx_x_recv =
-					max(hctx->ccid3hctx_x_recv / 2,
-					    (((__u64)hctx->ccid3hctx_s) << 6) /
-							  (2 * TFRC_T_MBI));
-
-				if (hctx->ccid3hctx_p == 0)
-					dccp_timestamp(sk, &now);
-			} else {
-				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
-				hctx->ccid3hctx_x_recv <<= 4;
-			}
-			/* Now recalculate X [RFC 3448, 4.3, step (4)] */
-			ccid3_hc_tx_update_x(sk, &now);
+			hctx->ccid3hctx_x_recv =
+				max(hctx->ccid3hctx_x_recv / 2,
+				    (((__u64)hctx->ccid3hctx_s) << 6) /
+							      (2 * TFRC_T_MBI));
+
+			if (hctx->ccid3hctx_p == 0)
+				dccp_timestamp(sk, &now);
+		} else {
+			hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
+			hctx->ccid3hctx_x_recv <<= 4;
 		}
+		/* Now recalculate X [RFC 3448, 4.3, step (4)] */
+		ccid3_hc_tx_update_x(sk, &now);
 		/*
 		 * Schedule no feedback timer to expire in
 		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
@@ -280,8 +294,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		goto out;
 	}
 
-	hctx->ccid3hctx_idle = 1;
-
 restart_timer:
 	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
 			   jiffies + usecs_to_jiffies(t_nfb));
@@ -322,24 +334,35 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
 		hctx->ccid3hctx_last_win_count	 = 0;
 		hctx->ccid3hctx_t_last_win_count = now;
-		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-
-		/* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
-		ccid3_hc_tx_update_s(hctx, skb->len);
-		hctx->ccid3hctx_x = hctx->ccid3hctx_s;
-		hctx->ccid3hctx_x <<= 6;
-
-		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
-		hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
-		/* Initial delta: minimum of 0.5 sec and t_gran/2 */
-		hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;
 
 		/* Set t_0 for initial packet */
 		hctx->ccid3hctx_t_nom = now;
+
+		hctx->ccid3hctx_s = skb->len;
+
+		/*
+		 * Use initial RTT sample when available: recommended by erratum
+		 * to RFC 4342. This implements the initialisation procedure of
+		 * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6.
+		 */
+		if (dp->dccps_syn_rtt) {
+			ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
+			hctx->ccid3hctx_rtt  = dp->dccps_syn_rtt;
+			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
+			hctx->ccid3hctx_t_ld = now;
+		} else {
+			/* Sender does not have RTT sample: X = MSS/second */
+			hctx->ccid3hctx_x = dp->dccps_mss_cache;
+			hctx->ccid3hctx_x <<= 6;
+		}
+		ccid3_update_send_interval(hctx);
+
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
 		break;
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
 		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
+		ccid3_pr_debug("delay=%ld\n", (long)delay);
 		/*
 		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
 		 *
@@ -361,6 +384,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	/* prepare to send now (add options etc.) */
 	dp->dccps_hc_tx_insert_options = 1;
 	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+	hctx->ccid3hctx_idle = 0;
 
 	/* set the nominal send time for the next following packet */
 	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -391,7 +415,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
 	packet->dccphtx_seqno  = dccp_sk(sk)->dccps_gss;
 	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
 	packet->dccphtx_sent   = 1;
-	hctx->ccid3hctx_idle   = 0;
 }
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -402,8 +425,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_tx_hist_entry *packet;
 	struct timeval now;
 	unsigned long t_nfb;
-	u32 pinv;
-	suseconds_t r_sample, t_elapsed;
+	u32 pinv, r_sample;
 
 	BUG_ON(hctx == NULL);
 
@@ -445,18 +467,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
 		 *	R_sample  =  (now - t_recvdata) - t_elapsed
 		 */
-		r_sample  = timeval_delta(&now, &packet->dccphtx_tstamp);
-		t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
-
-		DCCP_BUG_ON(r_sample < 0);
-		if (unlikely(r_sample <= t_elapsed))
-			DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
-				  (int)r_sample, (int)t_elapsed);
-		else
-			r_sample -= t_elapsed;
-		CCID3_RTT_SANITY_CHECK(r_sample);
+		r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp);
 
-		/* Update RTT estimate by
+		/*
+		 * Update RTT estimate by
 		 * If (No feedback recv)
 		 *    R = R_sample;
 		 * Else
@@ -467,27 +481,23 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
 			/*
 			 * Larger Initial Windows [RFC 4342, sec. 5]
-			 * We deviate in that we use `s' instead of `MSS'.
 			 */
-			__u64 w_init = min(4 * hctx->ccid3hctx_s,
-					   max(2 * hctx->ccid3hctx_s, 4380));
 			hctx->ccid3hctx_rtt  = r_sample;
-			hctx->ccid3hctx_x    = scaled_div(w_init << 6, r_sample);
+			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
 			hctx->ccid3hctx_t_ld = now;
 
-			ccid3_update_send_time(hctx);
+			ccid3_update_send_interval(hctx);
 
-			ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, "
-				       "R_sample=%dus, X=%u\n", dccp_role(sk),
+			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
+				       "R_sample=%uus, X=%u\n", dccp_role(sk),
 				       sk, hctx->ccid3hctx_s,
-				       (unsigned long long)w_init,
-				       (int)r_sample,
+				       dp->dccps_mss_cache, r_sample,
 				       (unsigned)(hctx->ccid3hctx_x >> 6));
 
 			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
 		} else {
 			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
-						   (u32)r_sample) / 10;
+						   r_sample) / 10;
 
 			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
 			if (hctx->ccid3hctx_p > 0)
@@ -497,10 +507,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 						    hctx->ccid3hctx_p);
 			ccid3_hc_tx_update_x(sk, &now);
 
-			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
+			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
 				       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
 				       dccp_role(sk),
-				       sk, hctx->ccid3hctx_rtt, (int)r_sample,
+				       sk, hctx->ccid3hctx_rtt, r_sample,
 				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
 				       hctx->ccid3hctx_x_calc,
 				       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
@@ -644,10 +654,50 @@ static void ccid3_hc_tx_exit(struct sock *sk)
 	dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
 }
 
+static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
+{
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return;
+
+	BUG_ON(hctx == NULL);
+
+	info->tcpi_rto = hctx->ccid3hctx_t_rto;
+	info->tcpi_rtt = hctx->ccid3hctx_rtt;
+}
+
+static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
+				  u32 __user *optval, int __user *optlen)
+{
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	const void *val;
+
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	switch (optname) {
+	case DCCP_SOCKOPT_CCID_TX_INFO:
+		if (len < sizeof(hctx->ccid3hctx_tfrc))
+			return -EINVAL;
+		len = sizeof(hctx->ccid3hctx_tfrc);
+		val = &hctx->ccid3hctx_tfrc;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen) || copy_to_user(optval, val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
- * RX Half Connection methods
+ *	Receiver Half-Connection Routines
  */
-
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
 {
@@ -977,8 +1027,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	const struct dccp_options_received *opt_recv;
 	struct dccp_rx_hist_entry *packet;
 	struct timeval now;
-	u32 p_prev, rtt_prev;
-	suseconds_t r_sample, t_elapsed;
+	u32 p_prev, r_sample, rtt_prev;
 	int loss, payload_size;
 
 	BUG_ON(hcrx == NULL);
@@ -994,17 +1043,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			break;
 		rtt_prev = hcrx->ccid3hcrx_rtt;
 		dccp_timestamp(sk, &now);
-		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
-		r_sample = timeval_usecs(&now);
-		t_elapsed = opt_recv->dccpor_elapsed_time * 10;
-
-		DCCP_BUG_ON(r_sample < 0);
-		if (unlikely(r_sample <= t_elapsed))
-			DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
-				  (long)r_sample, (long)t_elapsed);
-		else
-			r_sample -= t_elapsed;
-		CCID3_RTT_SANITY_CHECK(r_sample);
+		r_sample = dccp_sample_rtt(sk, &now, NULL);
 
 		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
 			hcrx->ccid3hcrx_rtt = r_sample;
@@ -1132,20 +1171,6 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_rcv_rtt  = hcrx->ccid3hcrx_rtt;
 }
 
-static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
-{
-	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return;
-
-	BUG_ON(hctx == NULL);
-
-	info->tcpi_rto = hctx->ccid3hctx_t_rto;
-	info->tcpi_rtt = hctx->ccid3hctx_rtt;
-}
-
 static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
@@ -1173,33 +1198,6 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 	return 0;
 }
 
-static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
-				  u32 __user *optval, int __user *optlen)
-{
-	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	const void *val;
-
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return -EINVAL;
-
-	switch (optname) {
-	case DCCP_SOCKOPT_CCID_TX_INFO:
-		if (len < sizeof(hctx->ccid3hctx_tfrc))
-			return -EINVAL;
-		len = sizeof(hctx->ccid3hctx_tfrc);
-		val = &hctx->ccid3hctx_tfrc;
-		break;
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	if (put_user(len, optlen) || copy_to_user(optval, val, len))
-		return -EFAULT;
-
-	return 0;
-}
-
 static struct ccid_operations ccid3 = {
 	.ccid_id		   = DCCPC_CCID3,
 	.ccid_name		   = "ccid3",
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 15776a88c090..8d31b389c19c 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -51,16 +51,6 @@
 /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
 #define TFRC_T_MBI		   64
 
-/* What we think is a reasonable upper limit on RTT values */
-#define CCID3_SANE_RTT_MAX	   ((suseconds_t)(4 * USEC_PER_SEC))
-
-#define CCID3_RTT_SANITY_CHECK(rtt) 			do {		   \
-		if (rtt > CCID3_SANE_RTT_MAX) {				   \
-			DCCP_CRIT("RTT (%d) too large, substituting %d",   \
-				  (int)rtt, (int)CCID3_SANE_RTT_MAX);	   \
-			rtt = CCID3_SANE_RTT_MAX;			   \
-		} 					} while (0)
-
 enum ccid3_options {
 	TFRC_OPT_LOSS_EVENT_RATE = 192,
 	TFRC_OPT_LOSS_INTERVALS	 = 193,
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 0a0baef16b3e..372d7e75cdd8 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -91,7 +91,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 	u32 w_tot  = 0;
 
 	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
-		if (li_entry->dccplih_interval != ~0) {
+		if (li_entry->dccplih_interval != ~0U) {
 			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
 			w_tot  += dccp_li_hist_w[i];
 			if (i != 0)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a0e7cd183a5d..d8ad27bfe01a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -31,13 +31,9 @@
 					      __stringify(cond));          \
 			     } while (0)
 
-#ifdef MODULE
 #define DCCP_PRINTK(enable, fmt, args...)	do { if (enable)	     \
 							printk(fmt, ##args); \
 						} while(0)
-#else
-#define DCCP_PRINTK(enable, fmt, args...)	printk(fmt, ##args)
-#endif
 #define DCCP_PR_DEBUG(enable, fmt, a...)	DCCP_PRINTK(enable, KERN_DEBUG \
 						  "%s: " fmt, __FUNCTION__, ##a)
 
@@ -75,11 +71,15 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 /* RFC 1122, 4.2.3.1 initial RTO value */
 #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
 
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+/* bounds for sampled RTT values from packet exchanges (in usec) */
+#define DCCP_SANE_RTT_MIN	100
+#define DCCP_SANE_RTT_MAX	(4 * USEC_PER_SEC)
+
 /* Maximal interval between probes for local resources.  */
 #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
 
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
-
 /* sysctl variables for DCCP */
 extern int  sysctl_dccp_request_retries;
 extern int  sysctl_dccp_retries1;
@@ -92,17 +92,43 @@ extern int  sysctl_dccp_feat_send_ack_vector;
 extern int  sysctl_dccp_feat_send_ndp_count;
 extern int  sysctl_dccp_tx_qlen;
 
+/*
+ *	48-bit sequence number arithmetic (signed and unsigned)
+ */
+#define INT48_MIN	  0x800000000000LL		/* 2^47	    */
+#define UINT48_MAX	  0xFFFFFFFFFFFFLL		/* 2^48 - 1 */
+#define COMPLEMENT48(x)	 (0x1000000000000LL - (x))	/* 2^48 - x */
+#define TO_SIGNED48(x)	 (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
+#define TO_UNSIGNED48(x) (((x) >= 0)?	     (x) :  COMPLEMENT48(-(x)))
+#define ADD48(a, b)	 (((a) + (b)) & UINT48_MAX)
+#define SUB48(a, b)	 ADD48((a), COMPLEMENT48(b))
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+	*seqno = value & UINT48_MAX;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+	*seqno = ADD48(*seqno, 1);
+}
+
+/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
+static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
+{
+	u64 delta = SUB48(seqno2, seqno1);
+
+	return TO_SIGNED48(delta);
+}
+
 /* is seq1 < seq2 ? */
 static inline int before48(const u64 seq1, const u64 seq2)
 {
-	return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+	return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
 }
 
 /* is seq1 > seq2 ? */
-static inline int after48(const u64 seq1, const u64 seq2)
-{
-	return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
-}
+#define after48(seq1, seq2)	before48(seq2, seq1)
 
 /* is seq2 <= seq1 <= seq3 ? */
 static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
@@ -118,9 +144,7 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
 /* is seq1 next seqno after seq2 */
 static inline int follows48(const u64 seq1, const u64 seq2)
 {
-	int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
-
-	return diff==1;
+	return dccp_delta_seqno(seq2, seq1) == 1;
 }
 
 enum {
@@ -191,7 +215,6 @@ extern void dccp_send_sync(struct sock *sk, const u64 seq,
 			   const enum dccp_pkt_type pkt_type);
 
 extern void dccp_write_xmit(struct sock *sk, int block);
-extern void dccp_write_xmit_timer(unsigned long data);
 extern void dccp_write_space(struct sock *sk);
 
 extern void dccp_init_xmit_timers(struct sock *sk);
@@ -273,6 +296,8 @@ extern int	   dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 extern int	   dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
 extern void	   dccp_send_close(struct sock *sk, const int active);
 extern int	   dccp_invalid_packet(struct sk_buff *skb);
+extern u32	   dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+						    struct timeval *t_history);
 
 static inline int dccp_bad_service_code(const struct sock *sk,
 					const __be32 service)
@@ -314,26 +339,7 @@ static inline int dccp_packet_without_ack(const struct sk_buff *skb)
 	return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
 }
 
-#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
-#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
-
-static inline void dccp_set_seqno(u64 *seqno, u64 value)
-{
-	if (value > DCCP_MAX_SEQNO)
-		value -= DCCP_MAX_SEQNO + 1;
-	*seqno = value;
-}
-
-static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
-{
-	return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
-}
-
-static inline void dccp_inc_seqno(u64 *seqno)
-{
-	if (++*seqno > DCCP_MAX_SEQNO)
-		*seqno = 0;
-}
+#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
 
 static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
 {
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 78b043c458bf..da6ec185ed5b 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -86,7 +86,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 	    dh->dccph_type == DCCP_PKT_SYNCACK) {
 		if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
 			      dp->dccps_awl, dp->dccps_awh) &&
-		    !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+		    dccp_delta_seqno(dp->dccps_swl,
+				     DCCP_SKB_CB(skb)->dccpd_seq) >= 0)
 			dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
 		else
 			return -1;
@@ -203,7 +204,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		if (dp->dccps_role != DCCP_ROLE_CLIENT)
 			goto send_sync;
 check_seq:
-		if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+		if (dccp_delta_seqno(dp->dccps_osr,
+				     DCCP_SKB_CB(skb)->dccpd_seq) >= 0) {
 send_sync:
 			dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
 				       DCCP_PKT_SYNC);
@@ -298,6 +300,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		if (dccp_parse_options(sk, skb))
 			goto out_invalid_packet;
 
+		/* Obtain RTT sample from SYN exchange (used by CCID 3) */
+		if (dp->dccps_options_received.dccpor_timestamp_echo) {
+			struct timeval now;
+
+			dccp_timestamp(sk, &now);
+			dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL);
+		}
+
 		if (dccp_msk(sk)->dccpms_send_ack_vector &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
 				    DCCP_SKB_CB(skb)->dccpd_seq,
@@ -575,3 +585,43 @@ discard:
 }
 
 EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
+
+/**
+ * dccp_sample_rtt  -  Sample RTT from packet exchange
+ *
+ * @sk:     connected dccp_sock
+ * @t_recv: receive timestamp of packet with timestamp echo
+ * @t_hist: packet history timestamp or NULL
+ */
+u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+				     struct timeval *t_hist)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_options_received *or = &dp->dccps_options_received;
+	suseconds_t delta;
+
+	if (t_hist == NULL) {
+		if (!or->dccpor_timestamp_echo) {
+			DCCP_WARN("packet without timestamp echo\n");
+			return DCCP_SANE_RTT_MAX;
+		}
+		timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10);
+		delta = timeval_usecs(t_recv);
+	} else
+		delta = timeval_delta(t_recv, t_hist);
+
+	delta -= or->dccpor_elapsed_time * 10;		/* either set or 0 */
+
+	if (unlikely(delta <= 0)) {
+		DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta);
+		return DCCP_SANE_RTT_MIN;
+	}
+	if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) {
+		DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta);
+		return DCCP_SANE_RTT_MAX;
+	}
+
+	return delta;
+}
+
+EXPORT_SYMBOL_GPL(dccp_sample_rtt);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4a83978aa660..718f2fa923a1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -207,8 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 							(iph->ihl << 2));
 	struct dccp_sock *dp;
 	struct inet_sock *inet;
-	const int type = skb->h.icmph->type;
-	const int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	__u64 seq;
 	int err;
@@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_send_check);
 
 static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
 {
-	return secure_dccp_sequence_number(skb->nh.iph->daddr,
-					   skb->nh.iph->saddr,
+	return secure_dccp_sequence_number(ip_hdr(skb)->daddr,
+					   ip_hdr(skb)->saddr,
 					   dccp_hdr(skb)->dccph_dport,
 					   dccp_hdr(skb)->dccph_sport);
 }
@@ -405,7 +405,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->opt	   = ireq->opt;
 	ireq->opt	   = NULL;
 	newinet->mc_index  = inet_iif(skb);
-	newinet->mc_ttl	   = skb->nh.iph->ttl;
+	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
 	newinet->id	   = jiffies;
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
@@ -428,7 +428,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
 static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
-	const struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
 	/* Find possible connection requests. */
@@ -460,8 +460,8 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
 	struct rtable *rt;
 	struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
 			    .nl_u = { .ip4_u =
-				      { .daddr = skb->nh.iph->saddr,
-					.saddr = skb->nh.iph->daddr,
+				      { .daddr = ip_hdr(skb)->saddr,
+					.saddr = ip_hdr(skb)->daddr,
 					.tos = RT_CONN_FLAGS(sk) } },
 			    .proto = sk->sk_protocol,
 			    .uli_u = { .ports =
@@ -513,6 +513,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 {
 	int err;
 	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+	const struct iphdr *rxiph;
 	const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
 				       sizeof(struct dccp_hdr_ext) +
 				       sizeof(struct dccp_hdr_reset);
@@ -559,13 +560,13 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
 
 	dccp_csum_outgoing(skb);
-	dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr,
-						      rxskb->nh.iph->daddr);
+	rxiph = ip_hdr(rxskb);
+	dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
+						 rxiph->daddr);
 
 	bh_lock_sock(dccp_v4_ctl_socket->sk);
 	err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
-				    rxskb->nh.iph->daddr,
-				    rxskb->nh.iph->saddr, NULL);
+				    rxiph->daddr, rxiph->saddr, NULL);
 	bh_unlock_sock(dccp_v4_ctl_socket->sk);
 
 	if (net_xmit_eval(err) == 0) {
@@ -640,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq = inet_rsk(req);
-	ireq->loc_addr = skb->nh.iph->daddr;
-	ireq->rmt_addr = skb->nh.iph->saddr;
+	ireq->loc_addr = ip_hdr(skb)->daddr;
+	ireq->rmt_addr = ip_hdr(skb)->saddr;
 	ireq->opt	= NULL;
 
 	/*
@@ -809,6 +810,7 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet);
 static int dccp_v4_rcv(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
+	const struct iphdr *iph;
 	struct sock *sk;
 	int min_cov;
 
@@ -817,8 +819,9 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 	if (dccp_invalid_packet(skb))
 		goto discard_it;
 
+	iph = ip_hdr(skb);
 	/* Step 1: If header checksum is incorrect, drop packet and return */
-	if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) {
+	if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) {
 		DCCP_WARN("dropped packet with invalid checksum\n");
 		goto discard_it;
 	}
@@ -832,8 +835,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 		      "src=%u.%u.%u.%u@%-5d "
 		      "dst=%u.%u.%u.%u@%-5d seq=%llu",
 		      dccp_packet_name(dh->dccph_type),
-		      NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
-		      NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+		      NIPQUAD(iph->saddr), ntohs(dh->dccph_sport),
+		      NIPQUAD(iph->daddr), ntohs(dh->dccph_dport),
 		      (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
 
 	if (dccp_packet_without_ack(skb)) {
@@ -848,10 +851,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
 	sk = __inet_lookup(&dccp_hashinfo,
-			   skb->nh.iph->saddr, dh->dccph_sport,
-			   skb->nh.iph->daddr, dh->dccph_dport,
-			   inet_iif(skb));
-
+			   iph->saddr, dh->dccph_sport,
+			   iph->daddr, dh->dccph_dport, inet_iif(skb));
 	/*
 	 * Step 2:
 	 *	If no socket ...
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f51e8db3967..64eac2515aa2 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -84,8 +84,8 @@ static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 
 static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
 {
-	return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
-					     skb->nh.ipv6h->saddr.s6_addr32,
+	return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+					     ipv6_hdr(skb)->saddr.s6_addr32,
 					     dccp_hdr(skb)->dccph_dport,
 					     dccp_hdr(skb)->dccph_sport     );
 
@@ -261,8 +261,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 			if (rxopt->srcrt)
 				opt = ipv6_invert_rthdr(sk,
-					(struct ipv6_rt_hdr *)(pktopts->nh.raw +
-							       rxopt->srcrt));
+			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+						 rxopt->srcrt));
 		}
 
 		if (opt != NULL && opt->srcrt != NULL) {
@@ -313,6 +313,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
 static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 {
 	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+	struct ipv6hdr *rxip6h;
 	const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
 				       sizeof(struct dccp_hdr_ext) +
 				       sizeof(struct dccp_hdr_reset);
@@ -352,12 +353,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
 
 	dccp_csum_outgoing(skb);
-	dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr,
-						      &rxskb->nh.ipv6h->daddr);
+	rxip6h = ipv6_hdr(rxskb);
+	dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
+						      &rxip6h->daddr);
 
 	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
 
 	fl.proto = IPPROTO_DCCP;
 	fl.oif = inet6_iif(rxskb);
@@ -390,7 +392,7 @@ static struct request_sock_ops dccp6_request_sock_ops = {
 static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
-	const struct ipv6hdr *iph = skb->nh.ipv6h;
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
 	/* Find possible connection requests. */
@@ -460,8 +462,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq6 = inet6_rsk(req);
-	ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
 	ireq6->pktopts	= NULL;
 
 	if (ipv6_opt_accepted(sk, skb) ||
@@ -546,7 +548,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
-		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -573,8 +575,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		if (rxopt->srcrt)
 			opt = ipv6_invert_rthdr(sk,
-				(struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
-						       rxopt->srcrt));
+		   (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) +
+					  rxopt->srcrt));
 	}
 
 	if (dst == NULL) {
@@ -653,7 +655,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	}
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
-	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
 
 	/*
 	 * Clone native IPv6 options from listening socket (if any)
@@ -826,8 +828,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
 		goto discard_it;
 
 	/* Step 1: If header checksum is incorrect, drop packet and return. */
-	if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr,
-				     &skb->nh.ipv6h->daddr)) {
+	if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
+				     &ipv6_hdr(skb)->daddr)) {
 		DCCP_WARN("dropped packet with invalid checksum\n");
 		goto discard_it;
 	}
@@ -844,9 +846,9 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
 
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
-	sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
+	sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr,
 			    dh->dccph_sport,
-			    &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
+			    &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
 			    inet6_iif(skb));
 	/*
 	 * Step 2:
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6d235b3013dd..e18e249ac49b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,7 +27,7 @@
 struct inet_timewait_death_row dccp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
 	.period		= DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
-	.death_lock	= SPIN_LOCK_UNLOCKED,
+	.death_lock	= __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
 	.hashinfo	= &dccp_hashinfo,
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&dccp_death_row),
diff --git a/net/dccp/options.c b/net/dccp/options.c
index ca13f7731994..34d536d5f1a1 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -29,8 +29,6 @@ int sysctl_dccp_feat_ack_ratio	      = DCCPF_INITIAL_ACK_RATIO;
 int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
 int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
 
-EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
-
 void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
@@ -174,21 +172,25 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 			opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
 
 			dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
-				      "ackno=%llu, ",  dccp_role(sk),
+				      "ackno=%llu", dccp_role(sk),
 				      opt_recv->dccpor_timestamp_echo,
 				      len + 2,
 				      (unsigned long long)
 				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
 
 
-			if (len == 4)
+			if (len == 4) {
+				dccp_pr_debug_cat("\n");
 				break;
+			}
 
 			if (len == 6)
 				elapsed_time = ntohs(*(__be16 *)(value + 4));
 			else
 				elapsed_time = ntohl(*(__be32 *)(value + 4));
 
+			dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time);
+
 			/* Give precedence to the biggest ELAPSED_TIME */
 			if (elapsed_time > opt_recv->dccpor_elapsed_time)
 				opt_recv->dccpor_elapsed_time = elapsed_time;
@@ -565,6 +567,14 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 	    dccp_insert_options_feat(sk, skb))
 		return -1;
 
+	/*
+	 * Obtain RTT sample from Request/Response exchange.
+	 * This is currently used in CCID 3 initialisation.
+	 */
+	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
+	    dccp_insert_option_timestamp(sk, skb))
+		return -1;
+
 	/* XXX: insert other options when appropriate */
 
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index aa21cc4de37f..c8d843e983fc 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -194,6 +194,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
 		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 		if (rc <= 0)
 			break;
+		dccp_pr_debug("delayed send by %d msec\n", rc);
 		delay = msecs_to_jiffies(rc);
 		sk->sk_write_pending++;
 		release_sock(sk);
@@ -255,7 +256,7 @@ void dccp_write_xmit(struct sock *sk, int block)
 				DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
 					 err);
 		} else {
-			dccp_pr_debug("packet discarded\n");
+			dccp_pr_debug("packet discarded due to err=%d\n", err);
 			kfree_skb(skb);
 		}
 	}
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 3b1f509f51dd..1f5e3ba62065 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -90,15 +90,18 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (port == 0 || ntohs(inet->dport) == port ||
 	    ntohs(inet->sport) == port) {
 		if (hctx)
-			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
-			   NIPQUAD(inet->saddr), ntohs(inet->sport),
-			   NIPQUAD(inet->daddr), ntohs(inet->dport), size,
-			   hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
-			   hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
+			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
+			       "%llu %llu %d\n",
+			       NIPQUAD(inet->saddr), ntohs(inet->sport),
+			       NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+			       hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+			       hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
+			       hctx->ccid3hctx_x_recv >> 6,
+			       hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
 		else
 			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
-			   NIPQUAD(inet->saddr), ntohs(inet->sport),
-			   NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+			       NIPQUAD(inet->saddr), ntohs(inet->sport),
+			       NIPQUAD(inet->daddr), ntohs(inet->dport), size);
 	}
 
 	jprobe_return();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index cf28c53a389a..6607b7b14f34 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -575,7 +575,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 	if (get_user(len, optlen))
 		return -EFAULT;
 
-	if (len < sizeof(int))
+	if (len < (int)sizeof(int))
 		return -EINVAL;
 
 	dp = dccp_sk(sk);
@@ -589,9 +589,11 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
 					       (__be32 __user *)optval, optlen);
 	case DCCP_SOCKOPT_SEND_CSCOV:
 		val = dp->dccps_pcslen;
+		len = sizeof(val);
 		break;
 	case DCCP_SOCKOPT_RECV_CSCOV:
 		val = dp->dccps_pcrlen;
+		len = sizeof(val);
 		break;
 	case 128 ... 191:
 		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index b038a0a3ad40..0197a41c256a 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -262,7 +262,7 @@ out:
 }
 
 /* Transmit-delay timer: used by the CCIDs to delay actual send time */
-void dccp_write_xmit_timer(unsigned long data)
+static void dccp_write_xmit_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
 	struct dccp_sock *dp = dccp_sk(sk);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c6568d637e1a..a205eaa87f52 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2413,6 +2413,7 @@ module_init(decnet_init);
 static void __exit decnet_exit(void)
 {
 	sock_unregister(AF_DECnet);
+	rtnl_unregister_all(PF_DECnet);
 	dev_remove_pack(&dn_dix_packet_type);
 
 	dn_unregister_sysctl();
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 060d725e2942..5c2a9951b638 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -799,7 +799,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	skip_ndevs = cb->args[0];
 	skip_naddr = cb->args[1];
 
-	read_lock(&dev_base_lock);
 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
 		if (idx < skip_ndevs)
 			continue;
@@ -824,8 +823,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 done:
-	read_unlock(&dev_base_lock);
-
 	cb->args[0] = idx;
 	cb->args[1] = dn_idx;
 
@@ -913,7 +910,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	pktlen = (__le16 *)skb_push(skb,2);
 	*pktlen = dn_htons(skb->len - 2);
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
 }
@@ -1005,7 +1002,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	pktlen = (__le16 *)skb_push(skb, 2);
 	*pktlen = dn_htons(skb->len - 2);
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	if (dn_am_i_a_router(dn, dn_db, ifa)) {
 		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
@@ -1447,24 +1444,6 @@ static const struct file_operations dn_dev_seq_fops = {
 
 #endif /* CONFIG_PROC_FS */
 
-static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
-	[RTM_NEWADDR  - RTM_BASE] = { .doit	= dn_nl_newaddr,	},
-	[RTM_DELADDR  - RTM_BASE] = { .doit	= dn_nl_deladdr,	},
-	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= dn_nl_dump_ifaddr,	},
-#ifdef CONFIG_DECNET_ROUTER
-	[RTM_NEWROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_newroute,	},
-	[RTM_DELROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_delroute,	},
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
-				      .dumpit	= dn_fib_dump,		},
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= dn_fib_dump_rules,	},
-#else
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
-				      .dumpit	= dn_cache_dump,	},
-#endif
-
-};
-
 static int __initdata addr[2];
 module_param_array(addr, int, NULL, 0444);
 MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
@@ -1485,7 +1464,9 @@ void __init dn_dev_init(void)
 
 	dn_dev_devices_on();
 
-	rtnetlink_links[PF_DECnet] = dnet_rtnetlink_table;
+	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL);
+	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
+	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
 
 	proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
 
@@ -1500,8 +1481,6 @@ void __init dn_dev_init(void)
 
 void __exit dn_dev_cleanup(void)
 {
-	rtnetlink_links[PF_DECnet] = NULL;
-
 #ifdef CONFIG_SYSCTL
 	{
 		int i;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 3cbfddc98430..310a86268d2b 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -63,7 +63,7 @@ static struct
 {
 	int error;
 	u8 scope;
-} dn_fib_props[RTA_MAX+1] = {
+} dn_fib_props[RTN_MAX+1] = {
 	[RTN_UNSPEC] =      { .error = 0,       .scope = RT_SCOPE_NOWHERE },
 	[RTN_UNICAST] =     { .error = 0,       .scope = RT_SCOPE_UNIVERSE },
 	[RTN_LOCAL] =       { .error = 0,       .scope = RT_SCOPE_HOST },
@@ -276,6 +276,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
 	struct dn_fib_info *ofi;
 	int nhs = 1;
 
+	if (r->rtm_type > RTN_MAX)
+		goto err_inval;
+
 	if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
 		goto err_inval;
 
@@ -501,7 +504,7 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
 	return 0;
 }
 
-int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct dn_fib_table *tb;
 	struct rtattr **rta = arg;
@@ -517,7 +520,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return -ESRCH;
 }
 
-int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct dn_fib_table *tb;
 	struct rtattr **rta = arg;
@@ -745,11 +748,13 @@ void __exit dn_fib_cleanup(void)
 
 void __init dn_fib_init(void)
 {
-
 	dn_fib_table_init();
 	dn_fib_rules_init();
 
 	register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+
+	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL);
+	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL);
 }
 
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index bf701cf5a386..4bf066c416e2 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -261,7 +261,7 @@ static int dn_long_output(struct sk_buff *skb)
 	lp->s_class  = 0;
 	lp->pt       = 0;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
 }
@@ -300,7 +300,7 @@ static int dn_short_output(struct sk_buff *skb)
 	sp->srcnode    = cb->src;
 	sp->forward    = cb->hops & 0x3f;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
 }
@@ -342,7 +342,7 @@ static int dn_phase3_output(struct sk_buff *skb)
 	sp->srcnode  = cb->src & dn_htons(0x03ff);
 	sp->forward  = cb->hops & 0x3f;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
 }
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 9d20904f6f52..4074a6e5d0de 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -362,7 +362,8 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
 			u16 dlen = *skb->data;
 			if ((dlen <= 16) && (dlen <= skb->len)) {
 				scp->conndata_in.opt_optl = dn_htons(dlen);
-				memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen);
+				skb_copy_from_linear_data_offset(skb, 1,
+					      scp->conndata_in.opt_data, dlen);
 			}
 		}
 		dn_nsp_send_link(sk, DN_NOCHANGE, 0);
@@ -406,7 +407,7 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
 		u16 dlen = *skb->data;
 		if ((dlen <= 16) && (dlen <= skb->len)) {
 			scp->discdata_in.opt_optl = dn_htons(dlen);
-			memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen);
+			skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
 		}
 	}
 
@@ -725,7 +726,7 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, 2))
 		goto free_out;
 
-	skb->h.raw    = skb->data;
+	skb_reset_transport_header(skb);
 	cb->nsp_flags = *ptr++;
 
 	if (decnet_debug_level & 2)
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 2d2cda82c7db..7404653880b0 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -79,7 +79,7 @@ static void dn_nsp_send(struct sk_buff *skb)
 	struct dst_entry *dst;
 	struct flowi fl;
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	scp->stamp = jiffies;
 
 	dst = sk_dst_check(sk, 0);
@@ -681,8 +681,10 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	if (scp->peer.sdn_objnum)
 		type = 0;
 
-	skb_put(skb, dn_sockaddr2username(&scp->peer, skb->tail, type));
-	skb_put(skb, dn_sockaddr2username(&scp->addr, skb->tail, 2));
+	skb_put(skb, dn_sockaddr2username(&scp->peer,
+					  skb_tail_pointer(skb), type));
+	skb_put(skb, dn_sockaddr2username(&scp->addr,
+					  skb_tail_pointer(skb), 2));
 
 	menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
 	if (scp->peer.sdn_flags & SDF_PROXY)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c1b5502f195b..5d7337bcf0fe 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -77,6 +77,7 @@
 #include <linux/rcupdate.h>
 #include <linux/times.h>
 #include <asm/errno.h>
+#include <net/netlink.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
@@ -386,7 +387,7 @@ static int dn_return_short(struct sk_buff *skb)
 	__le16 tmp;
 
 	/* Add back headers */
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
 		return NET_RX_DROP;
@@ -425,7 +426,7 @@ static int dn_return_long(struct sk_buff *skb)
 	unsigned char tmp[ETH_ALEN];
 
 	/* Add back all headers */
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
 		return NET_RX_DROP;
@@ -504,7 +505,7 @@ static int dn_route_rx_long(struct sk_buff *skb)
 		goto drop_it;
 
 	skb_pull(skb, 20);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	/* Destination info */
 	ptr += 2;
@@ -542,7 +543,7 @@ static int dn_route_rx_short(struct sk_buff *skb)
 		goto drop_it;
 
 	skb_pull(skb, 5);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	cb->dst = *(__le16 *)ptr;
 	ptr += 2;
@@ -615,7 +616,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 		flags = *skb->data;
 	}
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/*
 	 * Weed out future version DECnet
@@ -1468,7 +1469,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	struct dn_route *rt = (struct dn_route *)skb->dst;
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	long expires;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
@@ -1509,19 +1510,19 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	if (rt->fl.iif)
 		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
 /*
  * This is called by both endnodes and routers now.
  */
-int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct rtattr **rta = arg;
 	struct rtmsg *rtm = NLMSG_DATA(nlh);
@@ -1537,7 +1538,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		return -ENOBUFS;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	cb = DN_SKB_CB(skb);
 
 	if (rta[RTA_SRC-1])
@@ -1812,6 +1813,13 @@ void __init dn_route_init(void)
 	dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
 
 	proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+
+#ifdef CONFIG_DECNET_ROUTER
+	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
+#else
+	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
+		      dn_cache_dump);
+#endif
 }
 
 void __exit dn_route_cleanup(void)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index b6c98ac93dc8..17a1932216d6 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -31,6 +31,7 @@
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
+#include <net/dn_route.h>
 
 static struct fib_rules_ops dn_fib_rules_ops;
 
@@ -109,8 +110,6 @@ errout:
 
 static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = {
 	FRA_GENERIC_POLICY,
-	[FRA_SRC]	= { .type = NLA_U16 },
-	[FRA_DST]	= { .type = NLA_U16 },
 };
 
 static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
@@ -133,7 +132,7 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	int err = -EINVAL;
 	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-	if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos)
+	if (frh->tos)
 		goto  errout;
 
 	if (rule->table == RT_TABLE_UNSPEC) {
@@ -150,10 +149,10 @@ static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		}
 	}
 
-	if (tb[FRA_SRC])
+	if (frh->src_len)
 		r->src = nla_get_le16(tb[FRA_SRC]);
 
-	if (tb[FRA_DST])
+	if (frh->dst_len)
 		r->dst = nla_get_le16(tb[FRA_DST]);
 
 	r->src_len = frh->src_len;
@@ -176,10 +175,10 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->dst_len && (r->dst_len != frh->dst_len))
 		return 0;
 
-	if (tb[FRA_SRC] && (r->src != nla_get_le16(tb[FRA_SRC])))
+	if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC])))
 		return 0;
 
-	if (tb[FRA_DST] && (r->dst != nla_get_le16(tb[FRA_DST])))
+	if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST])))
 		return 0;
 
 	return 1;
@@ -241,20 +240,22 @@ static u32 dn_fib_rule_default_pref(void)
 	return 0;
 }
 
-int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static void dn_fib_rule_flush_cache(void)
 {
-	return fib_rules_dump(skb, cb, AF_DECnet);
+	dn_rt_cache_flush(-1);
 }
 
 static struct fib_rules_ops dn_fib_rules_ops = {
 	.family		= AF_DECnet,
 	.rule_size	= sizeof(struct dn_fib_rule),
+	.addr_size	= sizeof(u16),
 	.action		= dn_fib_rule_action,
 	.match		= dn_fib_rule_match,
 	.configure	= dn_fib_rule_configure,
 	.compare	= dn_fib_rule_compare,
 	.fill		= dn_fib_rule_fill,
 	.default_pref	= dn_fib_rule_default_pref,
+	.flush_cache	= dn_fib_rule_flush_cache,
 	.nlgroup	= RTNLGRP_DECnet_RULE,
 	.policy		= dn_fib_rule_policy,
 	.rules_list	= &dn_fib_rules,
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 780a141f8342..d6615c9361e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <linux/route.h> /* RTF_xxx */
 #include <net/neighbour.h>
+#include <net/netlink.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/fib_rules.h>
@@ -295,7 +296,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
 	rtm = NLMSG_DATA(nlh);
@@ -337,19 +338,19 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			nhp->rtnh_ifindex = nh->nh_oif;
 			if (nh->nh_gw)
 				RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw);
-			nhp->rtnh_len = skb->tail - (unsigned char *)nhp;
+			nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
 		} endfor_nexthops(fi);
 		mp_head->rta_type = RTA_MULTIPATH;
-		mp_head->rta_len = skb->tail - (u8*)mp_head;
+		mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 0e62def05a58..696234688cf6 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -33,7 +33,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
 {
 	struct sk_buff *skb = NULL;
 	size_t size;
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	struct nlmsghdr *nlh;
 	unsigned char *ptr;
 	struct nf_dn_rtmsg *rtm;
@@ -48,7 +48,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
 	rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
 	rtm->nfdn_ifindex = rt_skb->dev->ifindex;
 	ptr = NFDN_RTMSG(rtm);
-	memcpy(ptr, rt_skb->data, rt_skb->len);
+	skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
 	nlh->nlmsg_len = skb->tail - old_tail;
 	return skb;
 
@@ -102,7 +102,7 @@ static unsigned int dnrmg_hook(unsigned int hook,
 
 static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 {
-	struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 
 	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
 		return;
@@ -138,7 +138,7 @@ static int __init dn_rtmsg_init(void)
 	int rv = 0;
 
 	dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
-				      dnrmg_receive_user_sk, THIS_MODULE);
+				      dnrmg_receive_user_sk, NULL, THIS_MODULE);
 	if (dnrmg == NULL) {
 		printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
 		return -ENOMEM;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index bc12e36263f0..b5524f32ac2d 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
 	if (err)
 		goto out_free;
-	skb_get_timestamp(skb, &sk->sk_stamp);
+	sk->sk_stamp = skb->tstamp;
 
 	if (msg->msg_name)
 		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
@@ -345,7 +345,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			goto out_unlock;
 
 		skb_reserve(skb, LL_RESERVED_SPACE(dev));
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		eb = (struct ec_cb *)&skb->cb;
 
@@ -366,7 +366,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			fh->cb = cb;
 			fh->port = port;
 			if (sock->type != SOCK_DGRAM) {
-				skb->tail = skb->data;
+				skb_reset_tail_pointer(skb);
 				skb->len = 0;
 			} else if (res < 0)
 				goto out_free;
@@ -727,6 +727,9 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
 		case SIOCGSTAMP:
 			return sock_get_timestamp(sk, argp);
 
+		case SIOCGSTAMPNS:
+			return sock_get_timestampns(sk, argp);
+
 		case SIOCSIFADDR:
 		case SIOCGIFADDR:
 			return ec_dev_ioctl(sock, cmd, argp);
@@ -845,7 +848,7 @@ static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
 
 static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
 {
-	struct iphdr *ip = skb->nh.iph;
+	struct iphdr *ip = ip_hdr(skb);
 	unsigned char stn = ntohl(ip->saddr) & 0xff;
 	struct sock *sk;
 	struct sk_buff *newskb;
@@ -940,10 +943,10 @@ static void aun_data_available(struct sock *sk, int slen)
 		printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err);
 	}
 
-	data = skb->h.raw + sizeof(struct udphdr);
+	data = skb_transport_header(skb) + sizeof(struct udphdr);
 	ah = (struct aunhdr *)data;
 	len = skb->len - sizeof(struct udphdr);
-	ip = skb->nh.iph;
+	ip = ip_hdr(skb);
 
 	switch (ah->code)
 	{
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 7391f55904d1..0ac2524f3b68 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -156,7 +156,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data;
+	skb->dev = dev;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
@@ -228,7 +229,7 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
 	eth = (struct ethhdr *)
 	    (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
 
-	if (type == __constant_htons(ETH_P_802_3))
+	if (type == htons(ETH_P_802_3))
 		return -1;
 
 	eth->h_proto = type;
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 6ef766ef9618..1438adedbc83 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -56,7 +56,8 @@ config IEEE80211_CRYPT_CCMP
 
 config IEEE80211_CRYPT_TKIP
 	tristate "IEEE 802.11i TKIP encryption"
-	depends on IEEE80211 && NET_RADIO
+	depends on IEEE80211
+	select WIRELESS_EXT
 	select CRYPTO
 	select CRYPTO_MICHAEL_MIC
 	select CRYPTO_ECB
diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c
index 5ed0a98b2d76..df5592c9339f 100644
--- a/net/ieee80211/ieee80211_crypt.c
+++ b/net/ieee80211/ieee80211_crypt.c
@@ -1,7 +1,7 @@
 /*
  * Host AP crypto routines
  *
- * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
  * Portions Copyright (C) 2004, Intel Corporation <jketreno@linux.intel.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c
index 35aa3426c3fa..b016b4104de6 100644
--- a/net/ieee80211/ieee80211_crypt_ccmp.c
+++ b/net/ieee80211/ieee80211_crypt_ccmp.c
@@ -1,7 +1,7 @@
 /*
  * Host AP crypt: host-based CCMP encryption implementation for Host AP driver
  *
- * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -338,7 +338,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 
 	if (ccmp_replay_check(pn, key->rx_pn)) {
 		if (net_ratelimit()) {
-			printk(KERN_DEBUG "CCMP: replay detected: STA=" MAC_FMT
+			IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=" MAC_FMT
 			       " previous PN %02x%02x%02x%02x%02x%02x "
 			       "received PN %02x%02x%02x%02x%02x%02x\n",
 			       MAC_ARG(hdr->addr2), MAC_ARG(key->rx_pn),
diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c
index fc1f99a59732..5a48d8e0aec1 100644
--- a/net/ieee80211/ieee80211_crypt_tkip.c
+++ b/net/ieee80211/ieee80211_crypt_tkip.c
@@ -1,7 +1,7 @@
 /*
  * Host AP crypt: host-based TKIP encryption implementation for Host AP driver
  *
- * Copyright (c) 2003-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2003-2004, Jouni Malinen <j@w1.fi>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -465,7 +465,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 
 	if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
 		if (net_ratelimit()) {
-			printk(KERN_DEBUG "TKIP: replay detected: STA=" MAC_FMT
+			IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=" MAC_FMT
 			       " previous TSC %08x%04x received TSC "
 			       "%08x%04x\n", MAC_ARG(hdr->addr2),
 			       tkey->rx_iv32, tkey->rx_iv16, iv32, iv16);
@@ -507,7 +507,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 			tkey->rx_phase1_done = 0;
 		}
 		if (net_ratelimit()) {
-			printk(KERN_DEBUG "TKIP: ICV error detected: STA="
+			IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA="
 			       MAC_FMT "\n", MAC_ARG(hdr->addr2));
 		}
 		tkey->dot11RSNAStatsTKIPICVErrors++;
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index ec6d8851a061..8d182459344e 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -1,7 +1,7 @@
 /*
  * Host AP crypt: host-based WEP encryption implementation for Host AP driver
  *
- * Copyright (c) 2002-2004, Jouni Malinen <jkmaline@cc.hut.fi>
+ * Copyright (c) 2002-2004, Jouni Malinen <j@w1.fi>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -152,7 +152,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		return -1;
 
 	/* Copy the IV into the first 3 bytes of the key */
-	memcpy(key, skb->data + hdr_len, 3);
+	skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
 
 	/* Copy rest of the WEP key (the secret part) */
 	memcpy(key + 3, wep->key, wep->key_len);
diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c
index b1c6d1f717d9..7ec6610841ba 100644
--- a/net/ieee80211/ieee80211_module.c
+++ b/net/ieee80211/ieee80211_module.c
@@ -5,8 +5,8 @@
   Portions of this file are based on the WEP enablement code provided by the
   Host AP project hostap-drivers v0.1.3
   Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
-  <jkmaline@cc.hut.fi>
-  Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+  <j@w1.fi>
+  Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
 
   This program is free software; you can redistribute it and/or modify it
   under the terms of version 2 of the GNU General Public License as
@@ -229,6 +229,7 @@ void free_ieee80211(struct net_device *dev)
 
 static int debug = 0;
 u32 ieee80211_debug_level = 0;
+EXPORT_SYMBOL_GPL(ieee80211_debug_level);
 static struct proc_dir_entry *ieee80211_proc = NULL;
 
 static int show_debug_level(char *page, char **start, off_t offset,
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 4084909f6f92..f2de2e48b021 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -3,8 +3,8 @@
  * for Intersil Prism2/2.5/3 - hostap.o module, common routines
  *
  * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- * <jkmaline@cc.hut.fi>
- * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+ * <j@w1.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
  * Copyright (c) 2004-2005, Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
 	u16 fc = le16_to_cpu(hdr->frame_ctl);
 
 	skb->dev = ieee->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ieee80211_get_hdrlen(fc));
 	skb->pkt_type = PACKET_OTHERHOST;
 	skb->protocol = __constant_htons(ETH_P_80211_RAW);
@@ -606,12 +606,12 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		if (frag == 0) {
 			/* copy first fragment (including full headers) into
 			 * beginning of the fragment cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data, flen);
+			skb_copy_from_linear_data(skb, skb_put(frag_skb, flen), flen);
 		} else {
 			/* append frame payload to the end of the fragment
 			 * cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
-			       flen);
+			skb_copy_from_linear_data_offset(skb, hdrlen,
+				      skb_put(frag_skb, flen), flen);
 		}
 		dev_kfree_skb_any(skb);
 		skb = NULL;
@@ -759,8 +759,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		    IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) {
 		/* Non-standard frame: get addr4 from its bogus location after
 		 * the payload */
-		memcpy(skb->data + ETH_ALEN,
-		       skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+		skb_copy_to_linear_data_offset(skb, ETH_ALEN,
+					       skb->data + skb->len - ETH_ALEN,
+					       ETH_ALEN);
 		skb_trim(skb, skb->len - ETH_ALEN);
 	}
 #endif
@@ -789,10 +790,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 
 	if (skb2 != NULL) {
 		/* send to wireless media */
-		skb2->protocol = __constant_htons(ETH_P_802_3);
-		skb2->mac.raw = skb2->nh.raw = skb2->data;
-		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
 		skb2->dev = dev;
+		skb2->protocol = __constant_htons(ETH_P_802_3);
+		skb_reset_mac_header(skb2);
+		skb_reset_network_header(skb2);
+		/* skb2->network_header += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 #endif
@@ -800,7 +802,6 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	if (skb) {
 		skb->protocol = eth_type_trans(skb, dev);
 		memset(skb->cb, 0, sizeof(skb->cb));
-		skb->dev = dev;
 		skb->ip_summed = CHECKSUM_NONE;	/* 802.11 crc not sufficient */
 		if (netif_rx(skb) == NET_RX_DROP) {
 			/* netif_rx always succeeds, but it might drop
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 0292d6348e12..a4c3c51140a3 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -225,10 +225,10 @@ static int ieee80211_classify(struct sk_buff *skb)
 	struct iphdr *ip;
 
 	eth = (struct ethhdr *)skb->data;
-	if (eth->h_proto != __constant_htons(ETH_P_IP))
+	if (eth->h_proto != htons(ETH_P_IP))
 		return 0;
 
-	ip = skb->nh.iph;
+	ip = ip_hdr(skb);
 	switch (ip->tos & 0xfc) {
 	case 0x20:
 		return 2;
@@ -309,8 +309,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	/* Save source and destination addresses */
-	memcpy(dest, skb->data, ETH_ALEN);
-	memcpy(src, skb->data + ETH_ALEN, ETH_ALEN);
+	skb_copy_from_linear_data(skb, dest, ETH_ALEN);
+	skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN);
 
 	if (host_encrypt || host_build_iv)
 		fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
@@ -363,7 +363,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		snapped = 1;
 		ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
 				    ether_type);
-		memcpy(skb_put(skb_new, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len);
 		res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv);
 		if (res < 0) {
 			IEEE80211_ERROR("msdu encryption failed\n");
@@ -492,7 +492,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 			bytes -= SNAP_SIZE + sizeof(u16);
 		}
 
-		memcpy(skb_put(skb_frag, bytes), skb->data, bytes);
+		skb_copy_from_linear_data(skb, skb_put(skb_frag, bytes), bytes);
 
 		/* Advance the SKB... */
 		skb_pull(skb, bytes);
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 40d7a55fe03e..cee5e13bc427 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -5,8 +5,8 @@
   Portions of this file are based on the WEP enablement code provided by the
   Host AP project hostap-drivers v0.1.3
   Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
-  <jkmaline@cc.hut.fi>
-  Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
+  <j@w1.fi>
+  Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
 
   This program is free software; you can redistribute it and/or modify it
   under the terms of version 2 of the GNU General Public License as
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9e8ef509c51d..e62aee0ec4c5 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -574,6 +574,33 @@ config TCP_CONG_VENO
 	loss packets.
 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
 
+config TCP_CONG_YEAH
+	tristate "YeAH TCP"
+	depends on EXPERIMENTAL
+	default n
+	---help---
+	YeAH-TCP is a sender-side high-speed enabled TCP congestion control
+	algorithm, which uses a mixed loss/delay approach to compute the
+	congestion window. It's design goals target high efficiency,
+	internal, RTT and Reno fairness, resilience to link loss while
+	keeping network elements load as low as possible.
+
+	For further details look here:
+	  http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+
+config TCP_CONG_ILLINOIS
+	tristate "TCP Illinois"
+	depends on EXPERIMENTAL
+	default n
+	---help---
+	TCP-Illinois is a sender-side modificatio of TCP Reno for
+	high speed long delay links. It uses round-trip-time to
+	adjust the alpha and beta parameters to achieve a higher average
+	throughput and maintain fairness.
+
+	For further details see:
+	  http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+
 choice
 	prompt "Default TCP congestion control"
 	default DEFAULT_CUBIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7a068626feea..4ff6c151d7f3 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
+obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cf358c84c440..16aae8ef5555 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -87,6 +87,7 @@
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -217,6 +218,26 @@ out:
 	return err;
 }
 
+u32 inet_ehash_secret __read_mostly;
+EXPORT_SYMBOL(inet_ehash_secret);
+
+/*
+ * inet_ehash_secret must be set exactly once
+ * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
+ */
+void build_ehash_secret(void)
+{
+	u32 rnd;
+	do {
+		get_random_bytes(&rnd, sizeof(rnd));
+	} while (rnd == 0);
+	spin_lock_bh(&inetsw_lock);
+	if (!inet_ehash_secret)
+		inet_ehash_secret = rnd;
+	spin_unlock_bh(&inetsw_lock);
+}
+EXPORT_SYMBOL(build_ehash_secret);
+
 /*
  *	Create an inet socket.
  */
@@ -233,6 +254,11 @@ static int inet_create(struct socket *sock, int protocol)
 	int try_loading_module = 0;
 	int err;
 
+	if (sock->type != SOCK_RAW &&
+	    sock->type != SOCK_DGRAM &&
+	    !inet_ehash_secret)
+		build_ehash_secret();
+
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
@@ -755,6 +781,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCGSTAMP:
 			err = sock_get_timestamp(sk, (struct timeval __user *)arg);
 			break;
+		case SIOCGSTAMPNS:
+			err = sock_get_timestampns(sk, (struct timespec __user *)arg);
+			break;
 		case SIOCADDRT:
 		case SIOCDELRT:
 		case SIOCRTMSG:
@@ -1109,7 +1138,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 	if (ihl < sizeof(*iph))
 		goto out;
@@ -1117,8 +1146,9 @@ static int inet_gso_send_check(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, ihl)))
 		goto out;
 
-	skb->h.raw = __skb_pull(skb, ihl);
-	iph = skb->nh.iph;
+	__skb_pull(skb, ihl);
+	skb_reset_transport_header(skb);
+	iph = ip_hdr(skb);
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
 	err = -EPROTONOSUPPORT;
 
@@ -1152,7 +1182,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 	if (ihl < sizeof(*iph))
 		goto out;
@@ -1160,8 +1190,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	if (unlikely(!pskb_may_pull(skb, ihl)))
 		goto out;
 
-	skb->h.raw = __skb_pull(skb, ihl);
-	iph = skb->nh.iph;
+	__skb_pull(skb, ihl);
+	skb_reset_transport_header(skb);
+	iph = ip_hdr(skb);
 	id = ntohs(iph->id);
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
 	segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -1177,17 +1208,57 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 
 	skb = segs;
 	do {
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 		iph->id = htons(id++);
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
-		iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	} while ((skb = skb->next));
 
 out:
 	return segs;
 }
 
+unsigned long snmp_fold_field(void *mib[], int offt)
+{
+	unsigned long res = 0;
+	int i;
+
+	for_each_possible_cpu(i) {
+		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field);
+
+int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+	BUG_ON(ptr == NULL);
+	ptr[0] = __alloc_percpu(mibsize);
+	if (!ptr[0])
+		goto err0;
+	ptr[1] = __alloc_percpu(mibsize);
+	if (!ptr[1])
+		goto err1;
+	return 0;
+err1:
+	free_percpu(ptr[0]);
+	ptr[0] = NULL;
+err0:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_init);
+
+void snmp_mib_free(void *ptr[2])
+{
+	BUG_ON(ptr == NULL);
+	free_percpu(ptr[0]);
+	free_percpu(ptr[1]);
+	ptr[0] = ptr[1] = NULL;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_free);
+
 #ifdef CONFIG_IP_MULTICAST
 static struct net_protocol igmp_protocol = {
 	.handler =	igmp_rcv,
@@ -1214,28 +1285,47 @@ static struct net_protocol icmp_protocol = {
 
 static int __init init_ipv4_mibs(void)
 {
-	net_statistics[0] = alloc_percpu(struct linux_mib);
-	net_statistics[1] = alloc_percpu(struct linux_mib);
-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-	udp_statistics[0] = alloc_percpu(struct udp_mib);
-	udp_statistics[1] = alloc_percpu(struct udp_mib);
-	udplite_statistics[0] = alloc_percpu(struct udp_mib);
-	udplite_statistics[1] = alloc_percpu(struct udp_mib);
-	if (!
-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-	     && udp_statistics[0] && udp_statistics[1]
-	     && udplite_statistics[0] && udplite_statistics[1]             ) )
-		return -ENOMEM;
-
-	(void) tcp_mib_init();
+	if (snmp_mib_init((void **)net_statistics,
+			  sizeof(struct linux_mib),
+			  __alignof__(struct linux_mib)) < 0)
+		goto err_net_mib;
+	if (snmp_mib_init((void **)ip_statistics,
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
+		goto err_ip_mib;
+	if (snmp_mib_init((void **)icmp_statistics,
+			  sizeof(struct icmp_mib),
+			  __alignof__(struct icmp_mib)) < 0)
+		goto err_icmp_mib;
+	if (snmp_mib_init((void **)tcp_statistics,
+			  sizeof(struct tcp_mib),
+			  __alignof__(struct tcp_mib)) < 0)
+		goto err_tcp_mib;
+	if (snmp_mib_init((void **)udp_statistics,
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
+		goto err_udp_mib;
+	if (snmp_mib_init((void **)udplite_statistics,
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
+		goto err_udplite_mib;
+
+	tcp_mib_init();
 
 	return 0;
+
+err_udplite_mib:
+	snmp_mib_free((void **)udp_statistics);
+err_udp_mib:
+	snmp_mib_free((void **)tcp_statistics);
+err_tcp_mib:
+	snmp_mib_free((void **)icmp_statistics);
+err_icmp_mib:
+	snmp_mib_free((void **)ip_statistics);
+err_ip_mib:
+	snmp_mib_free((void **)net_statistics);
+err_net_mib:
+	return -ENOMEM;
 }
 
 static int ipv4_proc_init(void);
@@ -1336,7 +1426,7 @@ static int __init inet_init(void)
 	 *	Initialise per-cpu ipv4 mibs
 	 */
 
-	if(init_ipv4_mibs())
+	if (init_ipv4_mibs())
 		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
 
 	ipv4_proc_init();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7194eb40b6d0..6da8ff597ad3 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -65,7 +65,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 		char 		buf[60];
 	} tmp_iph;
 
-	top_iph = skb->nh.iph;
+	top_iph = ip_hdr(skb);
 	iph = &tmp_iph.iph;
 
 	iph->tos = top_iph->tos;
@@ -152,9 +152,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	ah = (struct ip_auth_hdr*)skb->data;
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
-	ihl = skb->data - skb->nh.raw;
+	ihl = skb->data - skb_network_header(skb);
 	memcpy(work_buf, iph, ihl);
 
 	iph->ttl = 0;
@@ -181,7 +181,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
-	skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl);
+	skb->network_header += ah_hlen;
+	memcpy(skb_network_header(skb), work_buf, ihl);
+	skb->transport_header = skb->network_header;
 	__skb_pull(skb, ah_hlen + ihl);
 
 	return 0;
@@ -196,8 +198,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
 	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
 	struct xfrm_state *x;
 
-	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
-	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
 	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1a3488a83f49..7110779a0244 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -342,13 +342,13 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
 	default:
 	case 0:		/* By default announce any local IP */
-		if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL)
-			saddr = skb->nh.iph->saddr;
+		if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
+			saddr = ip_hdr(skb)->saddr;
 		break;
 	case 1:		/* Restrict announcements of saddr in same subnet */
 		if (!skb)
 			break;
-		saddr = skb->nh.iph->saddr;
+		saddr = ip_hdr(skb)->saddr;
 		if (inet_addr_type(saddr) == RTN_LOCAL) {
 			/* saddr should be known to target */
 			if (inet_addr_onlink(in_dev, target, saddr))
@@ -578,7 +578,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 		return NULL;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_ARP);
@@ -721,7 +721,7 @@ static int arp_process(struct sk_buff *skb)
 	if (in_dev == NULL)
 		goto out;
 
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 
 	switch (dev_type) {
 	default:
@@ -937,7 +937,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 				 (2 * sizeof(u32)))))
 		goto freeskb;
 
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 	if (arp->ar_hln != dev->addr_len ||
 	    dev->flags & IFF_NOARP ||
 	    skb->pkt_type == PACKET_OTHERHOST ||
@@ -1178,7 +1178,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
 		goto out;
 	}
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCDARP:
 		err = arp_req_delete(&r, dev);
 		break;
@@ -1360,7 +1360,7 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
 
 /* ------------------------------------------------------------------------ */
 
-static struct seq_operations arp_seq_ops = {
+static const struct seq_operations arp_seq_ops = {
 	.start  = arp_seq_start,
 	.next   = neigh_seq_next,
 	.stop   = neigh_seq_stop,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ce5b693a8bd..e1f18489db1d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -92,6 +92,33 @@ int cipso_v4_rbm_optfmt = 0;
 int cipso_v4_rbm_strictvalid = 1;
 
 /*
+ * Protocol Constants
+ */
+
+/* Maximum size of the CIPSO IP option, derived from the fact that the maximum
+ * IPv4 header size is 60 bytes and the base IPv4 header is 20 bytes long. */
+#define CIPSO_V4_OPT_LEN_MAX          40
+
+/* Length of the base CIPSO option, this includes the option type (1 byte), the
+ * option length (1 byte), and the DOI (4 bytes). */
+#define CIPSO_V4_HDR_LEN              6
+
+/* Base length of the restrictive category bitmap tag (tag #1). */
+#define CIPSO_V4_TAG_RBM_BLEN         4
+
+/* Base length of the enumerated category tag (tag #2). */
+#define CIPSO_V4_TAG_ENUM_BLEN        4
+
+/* Base length of the ranged categories bitmap tag (tag #5). */
+#define CIPSO_V4_TAG_RNG_BLEN         4
+/* The maximum number of category ranges permitted in the ranged category tag
+ * (tag #5).  You may note that the IETF draft states that the maximum number
+ * of category ranges is 7, but if the low end of the last category range is
+ * zero then it is possibile to fit 8 category ranges because the zero should
+ * be omitted. */
+#define CIPSO_V4_TAG_RNG_CAT_MAX      8
+
+/*
  * Helper Functions
  */
 
@@ -1109,16 +1136,15 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def,
 				     unsigned char *net_cat,
 				     u32 net_cat_len)
 {
-	/* The constant '16' is not random, it is the maximum number of
-	 * high/low category range pairs as permitted by the CIPSO draft based
-	 * on a maximum IPv4 header length of 60 bytes - the BUG_ON() assertion
-	 * does a sanity check to make sure we don't overflow the array. */
 	int iter = -1;
-	u16 array[16];
+	u16 array[CIPSO_V4_TAG_RNG_CAT_MAX * 2];
 	u32 array_cnt = 0;
 	u32 cat_size = 0;
 
-	BUG_ON(net_cat_len > 30);
+	/* make sure we don't overflow the 'array[]' variable */
+	if (net_cat_len >
+	    (CIPSO_V4_OPT_LEN_MAX - CIPSO_V4_HDR_LEN - CIPSO_V4_TAG_RNG_BLEN))
+		return -ENOSPC;
 
 	for (;;) {
 		iter = netlbl_secattr_catmap_walk(secattr->mls_cat, iter + 1);
@@ -1174,7 +1200,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
 	u16 cat_low;
 	u16 cat_high;
 
-	for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
+	for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
 		cat_high = ntohs(*((__be16 *)&net_cat[net_iter]));
 		if ((net_iter + 4) <= net_cat_len)
 			cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2]));
@@ -1196,9 +1222,6 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
  * Protocol Handling Functions
  */
 
-#define CIPSO_V4_OPT_LEN_MAX          40
-#define CIPSO_V4_HDR_LEN              6
-
 /**
  * cipso_v4_gentag_hdr - Generate a CIPSO option header
  * @doi_def: the DOI definition
@@ -1676,7 +1699,7 @@ validate_return:
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
-	if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
 	if (gateway)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 98a00d0edc76..088888db8b3d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -48,7 +48,6 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/notifier.h>
 #include <linux/inetdevice.h>
@@ -62,7 +61,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
 
 struct ipv4_devconf ipv4_devconf = {
 	.accept_redirects = 1,
@@ -633,7 +632,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 	dev_load(ifr.ifr_name);
 #endif
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCGIFADDR:	/* Get interface address */
 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
 	case SIOCGIFDSTADDR:	/* Get the destination address */
@@ -708,7 +707,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 		goto done;
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCGIFADDR:	/* Get interface address */
 		sin->sin_addr.s_addr = ifa->ifa_local;
 		goto rarok;
@@ -1183,17 +1182,13 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_ip_idx, s_idx = cb->args[0];
 
 	s_ip_idx = ip_idx = cb->args[1];
-	read_lock(&dev_base_lock);
 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
 		if (idx > s_idx)
 			s_ip_idx = 0;
-		rcu_read_lock();
-		if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
-			rcu_read_unlock();
+		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 			continue;
-		}
 
 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
 		     ifa = ifa->ifa_next, ip_idx++) {
@@ -1201,16 +1196,12 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
 					     cb->nlh->nlmsg_seq,
-					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
-				rcu_read_unlock();
+					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
 				goto done;
-			}
 		}
-		rcu_read_unlock();
 	}
 
 done:
-	read_unlock(&dev_base_lock);
 	cb->args[0] = idx;
 	cb->args[1] = ip_idx;
 
@@ -1241,19 +1232,6 @@ errout:
 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
 }
 
-static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
-	[RTM_NEWADDR  - RTM_BASE] = { .doit	= inet_rtm_newaddr,	},
-	[RTM_DELADDR  - RTM_BASE] = { .doit	= inet_rtm_deladdr,	},
-	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= inet_dump_ifaddr,	},
-	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet_rtm_newroute,	},
-	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet_rtm_delroute,	},
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
-				      .dumpit	= inet_dump_fib,	},
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
-#endif
-};
-
 #ifdef CONFIG_SYSCTL
 
 void inet_forward_change(void)
@@ -1636,7 +1614,10 @@ void __init devinet_init(void)
 {
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
-	rtnetlink_links[PF_INET] = inet_rtnetlink_table;
+
+	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
+	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
+	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
 #ifdef CONFIG_SYSCTL
 	devinet_sysctl.sysctl_header =
 		register_sysctl_table(devinet_sysctl.devinet_root_dir);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 31041127eeb8..47c95e8ef045 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -21,13 +21,14 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct blkcipher_desc desc;
 	struct esp_data *esp;
 	struct sk_buff *trailer;
+	u8 *tail;
 	int blksize;
 	int clen;
 	int alen;
 	int nfrags;
 
 	/* Strip IP+ESP header. */
-	__skb_pull(skb, skb->h.raw - skb->data);
+	__skb_pull(skb, skb_transport_offset(skb));
 	/* Now skb is pure payload to encrypt */
 
 	err = -ENOMEM;
@@ -49,19 +50,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 		goto error;
 
 	/* Fill padding... */
+	tail = skb_tail_pointer(trailer);
 	do {
 		int i;
 		for (i=0; i<clen-skb->len - 2; i++)
-			*(u8*)(trailer->tail + i) = i+1;
+			tail[i] = i + 1;
 	} while (0);
-	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	tail[clen - skb->len - 2] = (clen - skb->len) - 2;
 	pskb_put(skb, trailer, clen - skb->len);
 
-	__skb_push(skb, skb->data - skb->nh.raw);
-	top_iph = skb->nh.iph;
-	esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
+	__skb_push(skb, skb->data - skb_network_header(skb));
+	top_iph = ip_hdr(skb);
+	esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
+				     top_iph->ihl * 4);
 	top_iph->tot_len = htons(skb->len + alen);
-	*(u8*)(trailer->tail - 1) = top_iph->protocol;
+	*(skb_tail_pointer(trailer) - 1) = top_iph->protocol;
 
 	/* this is non-NULL only with UDP Encapsulation */
 	if (x->encap) {
@@ -217,12 +220,12 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* ... check padding bits here. Silly. :-) */
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
-		struct udphdr *uh = (void *)(skb->nh.raw + ihl);
+		struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
 
 		/*
 		 * 1) if the NAT-T peer's IP or port changed then
@@ -260,7 +263,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	iph->protocol = nexthdr[1];
 	pskb_trim(skb, skb->len - alen - padlen - 2);
-	skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl;
+	__skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+	skb_set_transport_header(skb, -ihl);
 
 	return 0;
 
@@ -268,32 +272,33 @@ out:
 	return -EINVAL;
 }
 
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
-	int enclen = 0;
+	u32 align = max_t(u32, blksize, esp->conf.padlen);
+	u32 rem;
+
+	mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+	rem = mtu & (align - 1);
+	mtu &= ~(align - 1);
 
 	switch (x->props.mode) {
 	case XFRM_MODE_TUNNEL:
-		mtu = ALIGN(mtu +2, blksize);
 		break;
 	default:
 	case XFRM_MODE_TRANSPORT:
 		/* The worst case */
-		mtu = ALIGN(mtu + 2, 4) + blksize - 4;
+		mtu -= blksize - 4;
+		mtu += min_t(u32, blksize - 4, rem);
 		break;
 	case XFRM_MODE_BEET:
 		/* The worst case. */
-		enclen = IPV4_BEET_PHMAXLEN;
-		mtu = ALIGN(mtu + enclen + 2, blksize);
+		mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
 		break;
 	}
 
-	if (esp->conf.padlen)
-		mtu = ALIGN(mtu, esp->conf.padlen);
-
-	return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
+	return mtu - 2;
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
@@ -302,8 +307,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
 	struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
 	struct xfrm_state *x;
 
-	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
-	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
 	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
@@ -336,6 +341,7 @@ static int esp_init_state(struct xfrm_state *x)
 {
 	struct esp_data *esp = NULL;
 	struct crypto_blkcipher *tfm;
+	u32 align;
 
 	/* null auth and encryption can have zero length keys */
 	if (x->aalg) {
@@ -402,6 +408,8 @@ static int esp_init_state(struct xfrm_state *x)
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
+	else if (x->props.mode == XFRM_MODE_BEET)
+		x->props.header_len += IPV4_BEET_PHMAXLEN;
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
 
@@ -417,7 +425,10 @@ static int esp_init_state(struct xfrm_state *x)
 		}
 	}
 	x->data = esp;
-	x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+	align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+	if (esp->conf.padlen)
+		align = max_t(u32, align, esp->conf.padlen);
+	x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len;
 	return 0;
 
 error:
@@ -434,7 +445,7 @@ static struct xfrm_type esp_type =
 	.proto	     	= IPPROTO_ESP,
 	.init_state	= esp_init_state,
 	.destructor	= esp_destroy,
-	.get_max_size	= esp4_get_max_size,
+	.get_mtu	= esp4_get_mtu,
 	.input		= esp_input,
 	.output		= esp_output
 };
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 1fba6439fc57..837f2957fa83 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -34,7 +34,6 @@
 #include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/init.h>
 #include <linux/list.h>
 
@@ -46,6 +45,7 @@
 #include <net/icmp.h>
 #include <net/arp.h>
 #include <net/ip_fib.h>
+#include <net/rtnetlink.h>
 
 #define FFprint(a...) printk(KERN_DEBUG a)
 
@@ -493,6 +493,11 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
 	cfg->fc_nlinfo.nlh = nlh;
 
+	if (cfg->fc_type > RTN_MAX) {
+		err = -EINVAL;
+		goto errout;
+	}
+
 	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
 		switch (attr->nla_type) {
 		case RTA_DST:
@@ -535,7 +540,7 @@ errout:
 	return err;
 }
 
-int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_config cfg;
 	struct fib_table *tb;
@@ -556,7 +561,7 @@ errout:
 	return err;
 }
 
-int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_config cfg;
 	struct fib_table *tb;
@@ -577,7 +582,7 @@ errout:
 	return err;
 }
 
-int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
@@ -771,6 +776,12 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
 							    .tos = frn->fl_tos,
 							    .scope = frn->fl_scope } } };
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r = NULL;
+#endif
+
+	frn->err = -ENOENT;
 	if (tb) {
 		local_bh_disable();
 
@@ -782,6 +793,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
 			frn->nh_sel = res.nh_sel;
 			frn->type = res.type;
 			frn->scope = res.scope;
+			fib_res_put(&res);
 		}
 		local_bh_enable();
 	}
@@ -796,7 +808,10 @@ static void nl_fib_input(struct sock *sk, int len)
 	struct fib_table *tb;
 
 	skb = skb_dequeue(&sk->sk_receive_queue);
-	nlh = (struct nlmsghdr *)skb->data;
+	if (skb == NULL)
+		return;
+
+	nlh = nlmsg_hdr(skb);
 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
 		kfree_skb(skb);
@@ -808,7 +823,7 @@ static void nl_fib_input(struct sock *sk, int len)
 
 	nl_fib_lookup(frn, tb);
 
-	pid = nlh->nlmsg_pid;           /*pid of sending process */
+	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
 	NETLINK_CB(skb).pid = 0;         /* from kernel */
 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
 	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
@@ -816,7 +831,8 @@ static void nl_fib_input(struct sock *sk, int len)
 
 static void nl_fib_lookup_init(void)
 {
-      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
+      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
+      			    THIS_MODULE);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
@@ -914,6 +930,10 @@ void __init ip_fib_init(void)
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
 	nl_fib_lookup_init();
+
+	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
+	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
 }
 
 EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a4949f957ab5..9cfecf1215c9 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -1027,7 +1027,7 @@ out:
 	return 0;
 }
 
-static struct seq_operations fib_seq_ops = {
+static const struct seq_operations fib_seq_ops = {
 	.start  = fib_seq_start,
 	.next   = fib_seq_next,
 	.stop   = fib_seq_stop,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b837c33e0404..33083ad52e9f 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -171,8 +171,6 @@ static struct fib_table *fib_empty_table(void)
 
 static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = {
 	FRA_GENERIC_POLICY,
-	[FRA_SRC]	= { .type = NLA_U32 },
-	[FRA_DST]	= { .type = NLA_U32 },
 	[FRA_FLOW]	= { .type = NLA_U32 },
 };
 
@@ -183,8 +181,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	int err = -EINVAL;
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	if (frh->src_len > 32 || frh->dst_len > 32 ||
-	    (frh->tos & ~IPTOS_TOS_MASK))
+	if (frh->tos & ~IPTOS_TOS_MASK)
 		goto errout;
 
 	if (rule->table == RT_TABLE_UNSPEC) {
@@ -201,10 +198,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		}
 	}
 
-	if (tb[FRA_SRC])
+	if (frh->src_len)
 		rule4->src = nla_get_be32(tb[FRA_SRC]);
 
-	if (tb[FRA_DST])
+	if (frh->dst_len)
 		rule4->dst = nla_get_be32(tb[FRA_DST]);
 
 #ifdef CONFIG_NET_CLS_ROUTE
@@ -242,10 +239,10 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 		return 0;
 #endif
 
-	if (tb[FRA_SRC] && (rule4->src != nla_get_be32(tb[FRA_SRC])))
+	if (frh->src_len && (rule4->src != nla_get_be32(tb[FRA_SRC])))
 		return 0;
 
-	if (tb[FRA_DST] && (rule4->dst != nla_get_be32(tb[FRA_DST])))
+	if (frh->dst_len && (rule4->dst != nla_get_be32(tb[FRA_DST])))
 		return 0;
 
 	return 1;
@@ -277,11 +274,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return fib_rules_dump(skb, cb, AF_INET);
-}
-
 static u32 fib4_rule_default_pref(void)
 {
 	struct list_head *pos;
@@ -306,9 +298,15 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 	       + nla_total_size(4); /* flow */
 }
 
+static void fib4_rule_flush_cache(void)
+{
+	rt_cache_flush(-1);
+}
+
 static struct fib_rules_ops fib4_rules_ops = {
 	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
+	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
 	.match		= fib4_rule_match,
 	.configure	= fib4_rule_configure,
@@ -316,6 +314,7 @@ static struct fib_rules_ops fib4_rules_ops = {
 	.fill		= fib4_rule_fill,
 	.default_pref	= fib4_rule_default_pref,
 	.nlmsg_payload	= fib4_rule_nlmsg_payload,
+	.flush_cache	= fib4_rule_flush_cache,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
 	.policy		= fib4_rule_policy,
 	.rules_list	= &fib4_rules,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2f1fdae6efa6..406ea7050aed 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -89,7 +89,7 @@ static const struct
 {
 	int	error;
 	u8	scope;
-} fib_props[RTA_MAX + 1] = {
+} fib_props[RTN_MAX + 1] = {
 	{
 		.error	= 0,
 		.scope	= RT_SCOPE_NOWHERE,
@@ -927,7 +927,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
 			default:
 				printk(KERN_DEBUG "impossible 102\n");
 				return -EINVAL;
-			};
+			}
 		}
 		return err;
 	}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ada9b3db507d..9be7da7c3a8f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
  *		Patrick McHardy <kaber@trash.net>
  */
 
-#define VERSION "0.407"
+#define VERSION "0.408"
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -292,8 +292,8 @@ static inline void check_tnode(const struct tnode *tn)
 
 static int halve_threshold = 25;
 static int inflate_threshold = 50;
-static int halve_threshold_root = 15;
-static int inflate_threshold_root = 25;
+static int halve_threshold_root = 8;
+static int inflate_threshold_root = 15;
 
 
 static void __alias_free_mem(struct rcu_head *head)
@@ -350,11 +350,10 @@ static void __tnode_free_rcu(struct rcu_head *head)
 
 static inline void tnode_free(struct tnode *tn)
 {
-	if(IS_LEAF(tn)) {
+	if (IS_LEAF(tn)) {
 		struct leaf *l = (struct leaf *) tn;
 		call_rcu_bh(&l->rcu, __leaf_free_rcu);
-	}
-	else
+	} else
 		call_rcu(&tn->rcu, __tnode_free_rcu);
 }
 
@@ -459,6 +458,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 	struct tnode *old_tn;
 	int inflate_threshold_use;
 	int halve_threshold_use;
+	int max_resize;
 
 	if (!tn)
 		return NULL;
@@ -553,13 +553,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	/* Keep root node larger  */
 
-	if(!tn->parent)
+	if (!tn->parent)
 		inflate_threshold_use = inflate_threshold_root;
 	else
 		inflate_threshold_use = inflate_threshold;
 
 	err = 0;
-	while ((tn->full_children > 0 &&
+	max_resize = 10;
+	while ((tn->full_children > 0 &&  max_resize-- &&
 	       50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
 				inflate_threshold_use * tnode_child_length(tn))) {
 
@@ -574,6 +575,15 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 		}
 	}
 
+	if (max_resize < 0) {
+		if (!tn->parent)
+			printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
+			       inflate_threshold_root, tn->bits);
+		else
+			printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
+			       inflate_threshold, tn->bits);
+	}
+
 	check_tnode(tn);
 
 	/*
@@ -584,13 +594,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	/* Keep root node larger  */
 
-	if(!tn->parent)
+	if (!tn->parent)
 		halve_threshold_use = halve_threshold_root;
 	else
 		halve_threshold_use = halve_threshold;
 
 	err = 0;
-	while (tn->bits > 1 &&
+	max_resize = 10;
+	while (tn->bits > 1 &&  max_resize-- &&
 	       100 * (tnode_child_length(tn) - tn->empty_children) <
 	       halve_threshold_use * tnode_child_length(tn)) {
 
@@ -605,6 +616,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 		}
 	}
 
+	if (max_resize < 0) {
+		if (!tn->parent)
+			printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
+			       halve_threshold_root, tn->bits);
+		else
+			printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
+			       halve_threshold, tn->bits);
+	}
 
 	/* Only one child remains */
 	if (tn->empty_children == tnode_child_length(tn) - 1)
@@ -1123,6 +1142,9 @@ err:
 	return fa_head;
 }
 
+/*
+ * Caller must hold RTNL.
+ */
 static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
@@ -1540,6 +1562,9 @@ static int trie_leaf_remove(struct trie *t, t_key key)
 	return 1;
 }
 
+/*
+ * Caller must hold RTNL.
+ */
 static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
@@ -1718,6 +1743,9 @@ up:
 	return NULL; /* Ready. Root of trie */
 }
 
+/*
+ * Caller must hold RTNL.
+ */
 static int fn_trie_flush(struct fib_table *tb)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
@@ -2030,12 +2058,12 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
 {
 	struct node *n ;
 
-	if(!t)
+	if (!t)
 		return NULL;
 
 	n = rcu_dereference(t->trie);
 
-	if(!iter)
+	if (!iter)
 		return NULL;
 
 	if (n) {
@@ -2075,7 +2103,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 			int i;
 
 			s->tnodes++;
-			if(tn->bits < MAX_STAT_DEPTH)
+			if (tn->bits < MAX_STAT_DEPTH)
 				s->nodesizes[tn->bits]++;
 
 			for (i = 0; i < (1<<tn->bits); i++)
@@ -2241,7 +2269,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
 {
 	static char buf[32];
 
-	switch(s) {
+	switch (s) {
 	case RT_SCOPE_UNIVERSE: return "universe";
 	case RT_SCOPE_SITE:	return "site";
 	case RT_SCOPE_LINK:	return "link";
@@ -2331,7 +2359,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations fib_trie_seq_ops = {
+static const struct seq_operations fib_trie_seq_ops = {
 	.start  = fib_trie_seq_start,
 	.next   = fib_trie_seq_next,
 	.stop   = fib_trie_seq_stop,
@@ -2452,7 +2480,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations fib_route_seq_ops = {
+static const struct seq_operations fib_route_seq_ops = {
 	.start  = fib_trie_seq_start,
 	.next   = fib_trie_seq_next,
 	.stop   = fib_trie_seq_stop,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b7a0d946a0d..d38cbba92a4d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -355,7 +355,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 			   ipc, rt, MSG_DONTWAIT) < 0)
 		ip_flush_pending_frames(icmp_socket->sk);
 	else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
-		struct icmphdr *icmph = skb->h.icmph;
+		struct icmphdr *icmph = icmp_hdr(skb);
 		__wsum csum = 0;
 		struct sk_buff *skb1;
 
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	icmp_param->data.icmph.checksum = 0;
 	icmp_out_count(icmp_param->data.icmph.type);
 
-	inet->tos = skb->nh.iph->tos;
+	inet->tos = ip_hdr(skb)->tos;
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
 	if (icmp_param->replyopts.optlen) {
@@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
 						.saddr = rt->rt_spec_dst,
-						.tos = RT_TOS(skb->nh.iph->tos) } },
+						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    .proto = IPPROTO_ICMP };
 		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
@@ -448,9 +448,10 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	 *	Check this, icmp_send is called from the most obscure devices
 	 *	sometimes.
 	 */
-	iph = skb_in->nh.iph;
+	iph = ip_hdr(skb_in);
 
-	if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail)
+	if ((u8 *)iph < skb_in->head ||
+	    (skb_in->network_header + sizeof(*iph)) > skb_in->tail)
 		goto out;
 
 	/*
@@ -484,7 +485,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			u8 _inner_type, *itp;
 
 			itp = skb_header_pointer(skb_in,
-						 skb_in->nh.raw +
+						 skb_network_header(skb_in) +
 						 (iph->ihl << 2) +
 						 offsetof(struct icmphdr,
 							  type) -
@@ -536,7 +537,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.data.icmph.un.gateway = info;
 	icmp_param.data.icmph.checksum	 = 0;
 	icmp_param.skb	  = skb_in;
-	icmp_param.offset = skb_in->nh.raw - skb_in->data;
+	icmp_param.offset = skb_network_offset(skb_in);
 	icmp_out_count(icmp_param.data.icmph.type);
 	inet_sk(icmp_socket->sk)->tos = tos;
 	ipc.addr = iph->saddr;
@@ -613,7 +614,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto out_err;
 
-	icmph = skb->h.icmph;
+	icmph = icmp_hdr(skb);
 	iph   = (struct iphdr *)skb->data;
 
 	if (iph->ihl < 5) /* Mangled header, drop. */
@@ -676,7 +677,7 @@ static void icmp_unreach(struct sk_buff *skb)
 			printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
 					    "type %u, code %u "
 					    "error to a broadcast: %u.%u.%u.%u on %s\n",
-			       NIPQUAD(skb->nh.iph->saddr),
+			       NIPQUAD(ip_hdr(skb)->saddr),
 			       icmph->type, icmph->code,
 			       NIPQUAD(iph->daddr),
 			       skb->dev->name);
@@ -743,7 +744,7 @@ static void icmp_redirect(struct sk_buff *skb)
 
 	iph = (struct iphdr *)skb->data;
 
-	switch (skb->h.icmph->code & 7) {
+	switch (icmp_hdr(skb)->code & 7) {
 	case ICMP_REDIR_NET:
 	case ICMP_REDIR_NETTOS:
 		/*
@@ -751,8 +752,8 @@ static void icmp_redirect(struct sk_buff *skb)
 		 */
 	case ICMP_REDIR_HOST:
 	case ICMP_REDIR_HOSTTOS:
-		ip_rt_redirect(skb->nh.iph->saddr, iph->daddr,
-			       skb->h.icmph->un.gateway,
+		ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
+			       icmp_hdr(skb)->un.gateway,
 			       iph->saddr, skb->dev);
 		break;
 	}
@@ -780,7 +781,7 @@ static void icmp_echo(struct sk_buff *skb)
 	if (!sysctl_icmp_echo_ignore_all) {
 		struct icmp_bxm icmp_param;
 
-		icmp_param.data.icmph	   = *skb->h.icmph;
+		icmp_param.data.icmph	   = *icmp_hdr(skb);
 		icmp_param.data.icmph.type = ICMP_ECHOREPLY;
 		icmp_param.skb		   = skb;
 		icmp_param.offset	   = 0;
@@ -816,7 +817,7 @@ static void icmp_timestamp(struct sk_buff *skb)
 	icmp_param.data.times[2] = icmp_param.data.times[1];
 	if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
 		BUG();
-	icmp_param.data.icmph	   = *skb->h.icmph;
+	icmp_param.data.icmph	   = *icmp_hdr(skb);
 	icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
 	icmp_param.data.icmph.code = 0;
 	icmp_param.skb		   = skb;
@@ -943,7 +944,7 @@ int icmp_rcv(struct sk_buff *skb)
 	if (!pskb_pull(skb, sizeof(struct icmphdr)))
 		goto error;
 
-	icmph = skb->h.icmph;
+	icmph = icmp_hdr(skb);
 
 	/*
 	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8cedb2a2c9df..2506021c2935 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -314,7 +314,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+	skb_reset_network_header(skb);
+	pip = ip_hdr(skb);
+	skb_put(skb, sizeof(struct iphdr) + 4);
 
 	pip->version  = 4;
 	pip->ihl      = (sizeof(struct iphdr)+4)>>2;
@@ -331,8 +333,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	((u8*)&pip[1])[2] = 0;
 	((u8*)&pip[1])[3] = 0;
 
-	pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig));
-	skb->h.igmph = (struct igmphdr *)pig;
+	skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
+	skb_put(skb, sizeof(*pig));
+	pig = igmpv3_report_hdr(skb);
 	pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
 	pig->resv1 = 0;
 	pig->csum = 0;
@@ -343,16 +346,14 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 
 static int igmpv3_sendpack(struct sk_buff *skb)
 {
-	struct iphdr *pip = skb->nh.iph;
-	struct igmphdr *pig = skb->h.igmph;
-	int iplen, igmplen;
+	struct iphdr *pip = ip_hdr(skb);
+	struct igmphdr *pig = igmp_hdr(skb);
+	const int iplen = skb->tail - skb->network_header;
+	const int igmplen = skb->tail - skb->transport_header;
 
-	iplen = skb->tail - (unsigned char *)skb->nh.iph;
 	pip->tot_len = htons(iplen);
 	ip_send_check(pip);
-
-	igmplen = skb->tail - (unsigned char *)skb->h.igmph;
-	pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
+	pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
 
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
 		       dst_output);
@@ -379,7 +380,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->multiaddr;
-	pih = (struct igmpv3_report *)skb->h.igmph;
+	pih = igmpv3_report_hdr(skb);
 	pih->ngrec = htons(ntohs(pih->ngrec)+1);
 	*ppgr = pgr;
 	return skb;
@@ -412,7 +413,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	if (!*psf_list)
 		goto empty_source;
 
-	pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
+	pih = skb ? igmpv3_report_hdr(skb) : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
@@ -664,7 +665,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
+	skb_put(skb, sizeof(struct iphdr) + 4);
 
 	iph->version  = 4;
 	iph->ihl      = (sizeof(struct iphdr)+4)>>2;
@@ -827,8 +830,8 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
 static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	int len)
 {
-	struct igmphdr 		*ih = skb->h.igmph;
-	struct igmpv3_query *ih3 = (struct igmpv3_query *)ih;
+	struct igmphdr 		*ih = igmp_hdr(skb);
+	struct igmpv3_query *ih3 = igmpv3_query_hdr(skb);
 	struct ip_mc_list	*im;
 	__be32			group = ih->group;
 	int			max_delay;
@@ -861,12 +864,12 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
 			return;
 
-		ih3 = (struct igmpv3_query *) skb->h.raw;
+		ih3 = igmpv3_query_hdr(skb);
 		if (ih3->nsrcs) {
 			if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
 					   + ntohs(ih3->nsrcs)*sizeof(__be32)))
 				return;
-			ih3 = (struct igmpv3_query *) skb->h.raw;
+			ih3 = igmpv3_query_hdr(skb);
 		}
 
 		max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
@@ -943,7 +946,7 @@ int igmp_rcv(struct sk_buff *skb)
 			goto drop;
 	}
 
-	ih = skb->h.igmph;
+	ih = igmp_hdr(skb);
 	switch (ih->type) {
 	case IGMP_HOST_MEMBERSHIP_QUERY:
 		igmp_heard_query(in_dev, skb, len);
@@ -2397,7 +2400,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations igmp_mc_seq_ops = {
+static const struct seq_operations igmp_mc_seq_ops = {
 	.start	=	igmp_mc_seq_start,
 	.next	=	igmp_mc_seq_next,
 	.stop	=	igmp_mc_seq_stop,
@@ -2571,7 +2574,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations igmp_mcf_seq_ops = {
+static const struct seq_operations igmp_mcf_seq_ops = {
 	.start	=	igmp_mcf_seq_start,
 	.next	=	igmp_mcf_seq_next,
 	.stop	=	igmp_mcf_seq_stop,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5df71cd08da8..dbeacd8b0f90 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -27,6 +27,7 @@
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/inet6_hashtables.h>
+#include <net/netlink.h>
 
 #include <linux/inet.h>
 #include <linux/stddef.h>
@@ -60,7 +61,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 	struct nlmsghdr  *nlh;
 	void *info = NULL;
 	struct inet_diag_meminfo  *minfo = NULL;
-	unsigned char	 *b = skb->tail;
+	unsigned char	 *b = skb_tail_pointer(skb);
 	const struct inet_diag_handler *handler;
 
 	handler = inet_diag_table[unlh->nlmsg_type];
@@ -147,12 +148,12 @@ static int inet_csk_diag_fill(struct sock *sk,
 	    icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
 		icsk->icsk_ca_ops->get_info(sk, ext, skb);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
 
@@ -163,7 +164,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 {
 	long tmo;
 	struct inet_diag_msg *r;
-	const unsigned char *previous_tail = skb->tail;
+	const unsigned char *previous_tail = skb_tail_pointer(skb);
 	struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
 					 unlh->nlmsg_type, sizeof(*r));
 
@@ -205,10 +206,10 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 			       &tw6->tw_v6_daddr);
 	}
 #endif
-	nlh->nlmsg_len = skb->tail - previous_tail;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
 	return skb->len;
 nlmsg_failure:
-	skb_trim(skb, previous_tail - skb->data);
+	nlmsg_trim(skb, previous_tail);
 	return -EMSGSIZE;
 }
 
@@ -535,7 +536,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct inet_sock *inet = inet_sk(sk);
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct inet_diag_msg *r;
 	struct nlmsghdr *nlh;
 	long tmo;
@@ -574,12 +575,12 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 			       &inet6_rsk(req)->rmt_addr);
 	}
 #endif
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -805,68 +806,43 @@ done:
 	return skb->len;
 }
 
-static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-		return 0;
+	int hdrlen = sizeof(struct inet_diag_req);
 
-	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX)
-		goto err_inval;
+	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
+	    nlmsg_len(nlh) < hdrlen)
+		return -EINVAL;
 
 	if (inet_diag_table[nlh->nlmsg_type] == NULL)
 		return -ENOENT;
 
-	if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len)
-		goto err_inval;
-
-	if (nlh->nlmsg_flags&NLM_F_DUMP) {
-		if (nlh->nlmsg_len >
-		    (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) {
-			struct rtattr *rta = (void *)(NLMSG_DATA(nlh) +
-						 sizeof(struct inet_diag_req));
-			if (rta->rta_type != INET_DIAG_REQ_BYTECODE ||
-			    rta->rta_len < 8 ||
-			    rta->rta_len >
-			    (nlh->nlmsg_len -
-			     NLMSG_SPACE(sizeof(struct inet_diag_req))))
-				goto err_inval;
-			if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
-				goto err_inval;
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		if (nlmsg_attrlen(nlh, hdrlen)) {
+			struct nlattr *attr;
+
+			attr = nlmsg_find_attr(nlh, hdrlen,
+					       INET_DIAG_REQ_BYTECODE);
+			if (attr == NULL ||
+			    nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
+			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
+				return -EINVAL;
 		}
+
 		return netlink_dump_start(idiagnl, skb, nlh,
 					  inet_diag_dump, NULL);
-	} else
-		return inet_diag_get_exact(skb, nlh);
-
-err_inval:
-	return -EINVAL;
-}
-
-
-static inline void inet_diag_rcv_skb(struct sk_buff *skb)
-{
-	if (skb->len >= NLMSG_SPACE(0)) {
-		int err;
-		struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
-
-		if (nlh->nlmsg_len < sizeof(*nlh) ||
-		    skb->len < nlh->nlmsg_len)
-			return;
-		err = inet_diag_rcv_msg(skb, nlh);
-		if (err || nlh->nlmsg_flags & NLM_F_ACK)
-			netlink_ack(skb, nlh, err);
 	}
+
+	return inet_diag_get_exact(skb, nlh);
 }
 
 static void inet_diag_rcv(struct sock *sk, int len)
 {
-	struct sk_buff *skb;
-	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+	unsigned int qlen = 0;
 
-	while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
-		inet_diag_rcv_skb(skb);
-		kfree_skb(skb);
-	}
+	do {
+		netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg);
+	} while (qlen);
 }
 
 static DEFINE_SPINLOCK(inet_diag_register_lock);
@@ -917,7 +893,7 @@ static int __init inet_diag_init(void)
 		goto out;
 
 	idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
-					THIS_MODULE);
+					NULL, THIS_MODULE);
 	if (idiagnl == NULL)
 		goto out_free_table;
 	err = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index db3ef96bdfd9..2f44e6128068 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -87,10 +87,12 @@ static DEFINE_RWLOCK(peer_pool_lock);
 
 static int peer_total;
 /* Exported for sysctl_net_ipv4.  */
-int inet_peer_threshold = 65536 + 128;	/* start to throw entries more
+int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
 					 * aggressively at this stage */
-int inet_peer_minttl = 120 * HZ;	/* TTL under high load: 120 sec */
-int inet_peer_maxttl = 10 * 60 * HZ;	/* usual time to live: 10 min */
+int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
+int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
+int inet_peer_gc_mintime __read_mostly = 10 * HZ;
+int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
 
 static struct inet_peer *inet_peer_unused_head;
 static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
@@ -99,9 +101,6 @@ static DEFINE_SPINLOCK(inet_peer_unused_lock);
 static void peer_check_expire(unsigned long dummy);
 static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
 
-/* Exported for sysctl_net_ipv4.  */
-int inet_peer_gc_mintime = 10 * HZ,
-    inet_peer_gc_maxtime = 120 * HZ;
 
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
@@ -151,20 +150,27 @@ static void unlink_from_unused(struct inet_peer *p)
 	spin_unlock_bh(&inet_peer_unused_lock);
 }
 
-/* Called with local BH disabled and the pool lock held. */
-#define lookup(daddr) 						\
+/*
+ * Called with local BH disabled and the pool lock held.
+ * _stack is known to be NULL or not at compile time,
+ * so compiler will optimize the if (_stack) tests.
+ */
+#define lookup(_daddr,_stack) 					\
 ({								\
 	struct inet_peer *u, **v;				\
-	stackptr = stack;					\
-	*stackptr++ = &peer_root;				\
+	if (_stack) {						\
+		stackptr = _stack;				\
+		*stackptr++ = &peer_root;			\
+	}							\
 	for (u = peer_root; u != peer_avl_empty; ) {		\
-		if (daddr == u->v4daddr)			\
+		if (_daddr == u->v4daddr)			\
 			break;					\
-		if ((__force __u32)daddr < (__force __u32)u->v4daddr)	\
+		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
-		*stackptr++ = v;				\
+		if (_stack)					\
+			*stackptr++ = v;			\
 		u = *v;						\
 	}							\
 	u;							\
@@ -288,7 +294,7 @@ static void unlink_from_pool(struct inet_peer *p)
 	if (atomic_read(&p->refcnt) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
-		if (lookup(p->v4daddr) != p)
+		if (lookup(p->v4daddr, stack) != p)
 			BUG();
 		delp = stackptr - 1; /* *delp[0] == p */
 		if (p->avl_left == peer_avl_empty) {
@@ -373,7 +379,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 
 	/* Look up for the address quickly. */
 	read_lock_bh(&peer_pool_lock);
-	p = lookup(daddr);
+	p = lookup(daddr, NULL);
 	if (p != peer_avl_empty)
 		atomic_inc(&p->refcnt);
 	read_unlock_bh(&peer_pool_lock);
@@ -400,7 +406,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 
 	write_lock_bh(&peer_pool_lock);
 	/* Check if an entry has suddenly appeared. */
-	p = lookup(daddr);
+	p = lookup(daddr, stack);
 	if (p != peer_avl_empty)
 		goto out_free;
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 369e721c4bab..9cb04df0054b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,14 +67,14 @@ int ip_forward(struct sk_buff *skb)
 	if (skb->pkt_type != PACKET_HOST)
 		goto drop;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_forward_csum(skb);
 
 	/*
 	 *	According to the RFC, we must first decrease the TTL field. If
 	 *	that reaches zero, we must reply an ICMP control message telling
 	 *	that the packet's lifetime expired.
 	 */
-	if (skb->nh.iph->ttl <= 1)
+	if (ip_hdr(skb)->ttl <= 1)
 		goto too_many_hops;
 
 	if (!xfrm4_route_forward(skb))
@@ -85,10 +85,18 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
+	if (unlikely(skb->len > dst_mtu(&rt->u.dst) &&
+	             (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+			  htonl(dst_mtu(&rt->u.dst)));
+		goto drop;
+	}
+
 	/* We are about to mangle packet. Copy it! */
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	/* Decrease ttl after skb cow done */
 	ip_decrease_ttl(iph);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6f055380373..0231bdcb2ab7 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -92,7 +92,7 @@ struct ipq {
 	spinlock_t	lock;
 	atomic_t	refcnt;
 	struct timer_list timer;	/* when will this queue expire?		*/
-	struct timeval	stamp;
+	ktime_t		stamp;
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
@@ -184,7 +184,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
 {
 	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 
-	if(!qp)
+	if (!qp)
 		return NULL;
 	atomic_add(sizeof(struct ipq), &ip_frag_mem);
 	return qp;
@@ -321,11 +321,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 	 * promoted read lock to write lock.
 	 */
 	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
-		if(qp->id == qp_in->id		&&
-		   qp->saddr == qp_in->saddr	&&
-		   qp->daddr == qp_in->daddr	&&
-		   qp->protocol == qp_in->protocol &&
-		   qp->user == qp_in->user) {
+		if (qp->id == qp_in->id		&&
+		    qp->saddr == qp_in->saddr	&&
+		    qp->daddr == qp_in->daddr	&&
+		    qp->protocol == qp_in->protocol &&
+		    qp->user == qp_in->user) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&ipfrag_lock);
 			qp_in->last_in |= COMPLETE;
@@ -398,11 +398,11 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	read_lock(&ipfrag_lock);
 	hash = ipqhashfn(id, saddr, daddr, protocol);
 	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
-		if(qp->id == id		&&
-		   qp->saddr == saddr	&&
-		   qp->daddr == daddr	&&
-		   qp->protocol == protocol &&
-		   qp->user == user) {
+		if (qp->id == id		&&
+		    qp->saddr == saddr	&&
+		    qp->daddr == daddr	&&
+		    qp->protocol == protocol &&
+		    qp->user == user) {
 			atomic_inc(&qp->refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
@@ -479,11 +479,11 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		goto err;
 	}
 
-	offset = ntohs(skb->nh.iph->frag_off);
+	offset = ntohs(ip_hdr(skb)->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
 	offset <<= 3;		/* offset is in 8-byte chunks */
-	ihl = skb->nh.iph->ihl * 4;
+	ihl = ip_hdrlen(skb);
 
 	/* Determine the position of this fragment. */
 	end = offset + skb->len - ihl;
@@ -524,7 +524,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for(next = qp->fragments; next != NULL; next = next->next) {
+	for (next = qp->fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (skb->dev)
 		qp->iif = skb->dev->ifindex;
 	skb->dev = NULL;
-	skb_get_timestamp(skb, &qp->stamp);
+	qp->stamp = skb->tstamp;
 	qp->meat += skb->len;
 	atomic_add(skb->truesize, &ip_frag_mem);
 	if (offset == 0)
@@ -624,10 +624,10 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 	BUG_TRAP(FRAG_CB(head)->offset == 0);
 
 	/* Allocate a new buffer for the datagram. */
-	ihlen = head->nh.iph->ihl*4;
+	ihlen = ip_hdrlen(head);
 	len = ihlen + qp->len;
 
-	if(len > 65535)
+	if (len > 65535)
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
@@ -658,7 +658,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 	}
 
 	skb_shinfo(head)->frag_list = head->next;
-	skb_push(head, head->data - head->nh.raw);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &ip_frag_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
@@ -674,9 +674,9 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 
 	head->next = NULL;
 	head->dev = dev;
-	skb_set_timestamp(head, &qp->stamp);
+	head->tstamp = qp->stamp;
 
-	iph = head->nh.iph;
+	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
@@ -700,7 +700,6 @@ out_fail:
 /* Process an incoming IP datagram fragment. */
 struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 {
-	struct iphdr *iph = skb->nh.iph;
 	struct ipq *qp;
 	struct net_device *dev;
 
@@ -713,7 +712,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 	dev = skb->dev;
 
 	/* Lookup (or create) queue header */
-	if ((qp = ip_find(iph, user)) != NULL) {
+	if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
 		struct sk_buff *ret = NULL;
 
 		spin_lock(&qp->lock);
@@ -734,7 +733,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 	return NULL;
 }
 
-void ipfrag_init(void)
+void __init ipfrag_init(void)
 {
 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
 				 (jiffies ^ (jiffies >> 6)));
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9151da642318..63282934725e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -191,11 +191,11 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
 	return NULL;
 }
 
-static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
 {
-	__be32 remote = t->parms.iph.daddr;
-	__be32 local = t->parms.iph.saddr;
-	__be32 key = t->parms.i_key;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
+	__be32 key = parms->i_key;
 	unsigned h = HASH(key);
 	int prio = 0;
 
@@ -209,6 +209,11 @@ static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
 	return &tunnels[prio][h];
 }
 
+static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+{
+	return __ipgre_bucket(&t->parms);
+}
+
 static void ipgre_tunnel_link(struct ip_tunnel *t)
 {
 	struct ip_tunnel **tp = ipgre_bucket(t);
@@ -240,17 +245,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
 	__be32 key = parms->i_key;
 	struct ip_tunnel *t, **tp, *nt;
 	struct net_device *dev;
-	unsigned h = HASH(key);
-	int prio = 0;
 	char name[IFNAMSIZ];
 
-	if (local)
-		prio |= 1;
-	if (remote && !MULTICAST(remote)) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
 			if (key == t->parms.i_key)
 				return t;
@@ -320,8 +317,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	struct iphdr *iph = (struct iphdr*)skb->data;
 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
 	int grehlen = (iph->ihl<<2) + 4;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	__be16 flags;
 
@@ -388,8 +385,8 @@ out:
 	struct iphdr *iph = (struct iphdr*)dp;
 	struct iphdr *eiph;
 	__be16	     *p = (__be16*)(dp+(iph->ihl<<2));
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int rel_type = 0;
 	int rel_code = 0;
 	__be32 rel_info = 0;
@@ -422,7 +419,7 @@ out:
 	default:
 		return;
 	case ICMP_PARAMETERPROB:
-		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 		if (n < (iph->ihl<<2))
 			return;
 
@@ -442,7 +439,7 @@ out:
 			return;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			n = ntohs(skb->h.icmph->un.frag.mtu);
+			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 			if (n < grehlen+68)
 				return;
 			n -= grehlen;
@@ -474,7 +471,7 @@ out:
 	dst_release(skb2->dst);
 	skb2->dst = NULL;
 	skb_pull(skb2, skb->data - (u8*)eiph);
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	/* Try to guess incoming interface */
 	memset(&fl, 0, sizeof(fl));
@@ -533,9 +530,9 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(iph->tos)) {
 		if (skb->protocol == htons(ETH_P_IP)) {
-			IP_ECN_set_ce(skb->nh.iph);
+			IP_ECN_set_ce(ip_hdr(skb));
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			IP6_ECN_set_ce(skb->nh.ipv6h);
+			IP6_ECN_set_ce(ipv6_hdr(skb));
 		}
 	}
 }
@@ -565,7 +562,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, 16))
 		goto drop_nolock;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	h = skb->data;
 	flags = *(__be16*)h;
 
@@ -616,9 +613,10 @@ static int ipgre_rcv(struct sk_buff *skb)
 				offset += 4;
 		}
 
-		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = __pskb_pull(skb, offset);
-		skb_postpull_rcsum(skb, skb->h.raw, offset);
+		skb_reset_mac_header(skb);
+		__pskb_pull(skb, offset);
+		skb_reset_network_header(skb);
+		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (MULTICAST(iph->daddr)) {
@@ -669,7 +667,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
-	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *old_iph = ip_hdr(skb);
 	struct iphdr  *tiph;
 	u8     tos;
 	__be16 df;
@@ -720,7 +718,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			addr_type = ipv6_addr_type(addr6);
 
 			if (addr_type == IPV6_ADDR_ANY) {
-				addr6 = &skb->nh.ipv6h->daddr;
+				addr6 = &ipv6_hdr(skb)->daddr;
 				addr_type = ipv6_addr_type(addr6);
 			}
 
@@ -824,11 +822,12 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		old_iph = skb->nh.iph;
+		old_iph = ip_hdr(skb);
 	}
 
-	skb->h.raw = skb->nh.raw;
-	skb->nh.raw = skb_push(skb, gre_hlen);
+	skb->transport_header = skb->network_header;
+	skb_push(skb, gre_hlen);
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
@@ -839,7 +838,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Push down and install the IPIP header.
 	 */
 
-	iph 			=	skb->nh.iph;
+	iph 			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr) >> 2;
 	iph->frag_off		=	df;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f38e97647ac0..97069399d864 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -158,7 +158,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
 int ip_call_ra_chain(struct sk_buff *skb)
 {
 	struct ip_ra_chain *ra;
-	u8 protocol = skb->nh.iph->protocol;
+	u8 protocol = ip_hdr(skb)->protocol;
 	struct sock *last = NULL;
 
 	read_lock(&ip_ra_lock);
@@ -171,7 +171,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		if (sk && inet_sk(sk)->num == protocol &&
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
-			if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
 				if (skb == NULL) {
 					read_unlock(&ip_ra_lock);
@@ -198,17 +198,15 @@ int ip_call_ra_chain(struct sk_buff *skb)
 
 static inline int ip_local_deliver_finish(struct sk_buff *skb)
 {
-	int ihl = skb->nh.iph->ihl*4;
-
-	__skb_pull(skb, ihl);
+	__skb_pull(skb, ip_hdrlen(skb));
 
 	/* Point into the IP datagram, just past the header. */
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	rcu_read_lock();
 	{
 		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
-		int protocol = skb->nh.iph->protocol;
+		int protocol = ip_hdr(skb)->protocol;
 		int hash;
 		struct sock *raw_sk;
 		struct net_protocol *ipprot;
@@ -220,7 +218,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 		/* If there maybe a raw socket we must check - if not we
 		 * don't care less
 		 */
-		if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash))
+		if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
 			raw_sk = NULL;
 
 		if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
@@ -266,7 +264,7 @@ int ip_local_deliver(struct sk_buff *skb)
 	 *	Reassemble IP fragments.
 	 */
 
-	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
 		if (!skb)
 			return 0;
@@ -294,7 +292,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
 		goto drop;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (ip_options_compile(NULL, skb)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
@@ -330,7 +328,8 @@ drop:
 
 static inline int ip_rcv_finish(struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt;
 
 	/*
 	 *	Initialise the virtual path cache for the packet. It describes
@@ -342,6 +341,8 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
 		if (unlikely(err)) {
 			if (err == -EHOSTUNREACH)
 				IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+			else if (err == -ENETUNREACH)
+				IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
 			goto drop;
 		}
 	}
@@ -360,6 +361,12 @@ static inline int ip_rcv_finish(struct sk_buff *skb)
 	if (iph->ihl > 5 && ip_rcv_options(skb))
 		goto drop;
 
+	rt = (struct rtable*)skb->dst;
+	if (rt->rt_type == RTN_MULTICAST)
+		IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+	else if (rt->rt_type == RTN_BROADCAST)
+		IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS);
+
 	return dst_input(skb);
 
 drop:
@@ -391,7 +398,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	/*
 	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
@@ -410,13 +417,16 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	if (!pskb_may_pull(skb, iph->ihl*4))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 		goto inhdr_error;
 
 	len = ntohs(iph->tot_len);
-	if (skb->len < len || len < (iph->ihl*4))
+	if (skb->len < len) {
+		IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+		goto drop;
+	} else if (len < (iph->ihl*4))
 		goto inhdr_error;
 
 	/* Our transport medium may have padded the buffer out. Now we know it
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f906a80d5a87..251346828cb4 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -40,7 +40,7 @@
 void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
 			    __be32 daddr, struct rtable *rt, int is_frag)
 {
-	unsigned char * iph = skb->nh.raw;
+	unsigned char *iph = skb_network_header(skb);
 
 	memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
 	memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
@@ -104,13 +104,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 		return 0;
 	}
 
-	sptr = skb->nh.raw;
+	sptr = skb_network_header(skb);
 	dptr = dopt->__data;
 
 	if (skb->dst)
 		daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
 	else
-		daddr = skb->nh.iph->daddr;
+		daddr = ip_hdr(skb)->daddr;
 
 	if (sopt->rr) {
 		optlen  = sptr[sopt->rr+1];
@@ -180,7 +180,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 			/*
 			 * RFC1812 requires to fix illegal source routes.
 			 */
-			if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
+			if (memcmp(&ip_hdr(skb)->saddr,
+				   &start[soffset + 3], 4) == 0)
 				doffset -= 4;
 		}
 		if (doffset > 3) {
@@ -217,7 +218,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 
 void ip_options_fragment(struct sk_buff * skb)
 {
-	unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr);
+	unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
 	struct ip_options * opt = &(IPCB(skb)->opt);
 	int  l = opt->optlen;
 	int  optlen;
@@ -264,12 +265,13 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
 
 	if (!opt) {
 		opt = &(IPCB(skb)->opt);
-		iph = skb->nh.raw;
+		iph = skb_network_header(skb);
 		opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
 		optptr = iph + sizeof(struct iphdr);
 		opt->is_data = 0;
 	} else {
-		optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
+		optptr = opt->is_data ? opt->__data :
+					(unsigned char *)&(ip_hdr(skb)[1]);
 		iph = optptr - sizeof(struct iphdr);
 	}
 
@@ -563,7 +565,7 @@ void ip_forward_options(struct sk_buff *skb)
 	struct   ip_options * opt	= &(IPCB(skb)->opt);
 	unsigned char * optptr;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	unsigned char *raw = skb->nh.raw;
+	unsigned char *raw = skb_network_header(skb);
 
 	if (opt->rr_needaddr) {
 		optptr = (unsigned char *)raw + opt->rr;
@@ -587,7 +589,7 @@ void ip_forward_options(struct sk_buff *skb)
 		if (srrptr + 3 <= srrspace) {
 			opt->is_changed = 1;
 			ip_rt_get_source(&optptr[srrptr-1], rt);
-			skb->nh.iph->daddr = rt->rt_dst;
+			ip_hdr(skb)->daddr = rt->rt_dst;
 			optptr[2] = srrptr+4;
 		} else if (net_ratelimit())
 			printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -599,7 +601,7 @@ void ip_forward_options(struct sk_buff *skb)
 	}
 	if (opt->is_changed) {
 		opt->is_changed = 0;
-		ip_send_check(skb->nh.iph);
+		ip_send_check(ip_hdr(skb));
 	}
 }
 
@@ -608,8 +610,8 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	int srrspace, srrptr;
 	__be32 nexthop;
-	struct iphdr *iph = skb->nh.iph;
-	unsigned char * optptr = skb->nh.raw + opt->srr;
+	struct iphdr *iph = ip_hdr(skb);
+	unsigned char *optptr = skb_network_header(skb) + opt->srr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtable *rt2;
 	int err;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d096332f6c6d..d6427d918512 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,8 +95,8 @@ __inline__ void ip_send_check(struct iphdr *iph)
 /* dev_loopback_xmit for use with netfilter. */
 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 {
-	newskb->mac.raw = newskb->data;
-	__skb_pull(newskb, newskb->nh.raw - newskb->data);
+	skb_reset_mac_header(newskb);
+	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	BUG_TRAP(newskb->dst);
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	struct iphdr *iph;
 
 	/* Build the IP header. */
-	if (opt)
-		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
-	else
-		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
-
+	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->protocol = sk->sk_protocol;
 	iph->tot_len  = htons(skb->len);
 	ip_select_ident(iph, &rt->u.dst, sk);
-	skb->nh.iph   = iph;
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen>>2;
@@ -163,9 +160,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 static inline int ip_finish_output2(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
+	struct rtable *rt = (struct rtable *)dst;
 	struct net_device *dev = dst->dev;
 	int hh_len = LL_RESERVED_SPACE(dev);
 
+	if (rt->rt_type == RTN_MULTICAST)
+		IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+	else if (rt->rt_type == RTN_BROADCAST)
+		IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+
 	/* Be paranoid, rather than too clever. */
 	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
 		struct sk_buff *skb2;
@@ -192,6 +195,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	return -EINVAL;
 }
 
+static inline int ip_skb_dst_mtu(struct sk_buff *skb)
+{
+	struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
+
+	return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
+	       skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
 static inline int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -201,7 +212,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
 		return dst_output(skb);
 	}
 #endif
-	if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
+	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
 		return ip_fragment(skb, ip_finish_output2);
 	else
 		return ip_finish_output2(skb);
@@ -248,7 +259,7 @@ int ip_mc_output(struct sk_buff *skb)
 
 		/* Multicasts with ttl 0 must not go beyond the host */
 
-		if (skb->nh.iph->ttl == 0) {
+		if (ip_hdr(skb)->ttl == 0) {
 			kfree_skb(skb);
 			return 0;
 		}
@@ -333,7 +344,9 @@ packet_routed:
 		goto no_route;
 
 	/* OK, we know where to send it, allocate and build IP header. */
-	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
 	iph->tot_len = htons(skb->len);
 	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
@@ -344,7 +357,6 @@ packet_routed:
 	iph->protocol = sk->sk_protocol;
 	iph->saddr    = rt->rt_src;
 	iph->daddr    = rt->rt_dst;
-	skb->nh.iph   = iph;
 	/* Transport layer set skb->h.foo itself. */
 
 	if (opt && opt->optlen) {
@@ -386,21 +398,10 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif
-#ifdef CONFIG_NETFILTER
-	/* Connection association is same as pre-frag packet */
-	nf_conntrack_put(to->nfct);
-	to->nfct = from->nfct;
-	nf_conntrack_get(to->nfct);
-	to->nfctinfo = from->nfctinfo;
+	nf_copy(to, from);
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	to->ipvs_property = from->ipvs_property;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	nf_bridge_put(to->nf_bridge);
-	to->nf_bridge = from->nf_bridge;
-	nf_bridge_get(to->nf_bridge);
-#endif
-#endif
 	skb_copy_secmark(to, from);
 }
 
@@ -430,12 +431,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	 *	Point into the IP datagram header.
 	 */
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
 		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(dst_mtu(&rt->u.dst)));
+			  htonl(ip_skb_dst_mtu(skb)));
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
@@ -502,10 +503,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 			 * before previous one went down. */
 			if (frag) {
 				frag->ip_summed = CHECKSUM_NONE;
-				frag->h.raw = frag->data;
-				frag->nh.raw = __skb_push(frag, hlen);
-				memcpy(frag->nh.raw, iph, hlen);
-				iph = frag->nh.iph;
+				skb_reset_transport_header(frag);
+				__skb_push(frag, hlen);
+				skb_reset_network_header(frag);
+				memcpy(skb_network_header(frag), iph, hlen);
+				iph = ip_hdr(frag);
 				iph->tot_len = htons(frag->len);
 				ip_copy_metadata(frag, skb);
 				if (offset == 0)
@@ -566,7 +568,7 @@ slow_path:
 	 *	Keep copying data until we run out.
 	 */
 
-	while(left > 0)	{
+	while (left > 0) {
 		len = left;
 		/* IF: it doesn't fit, use 'mtu' - the data space left */
 		if (len > mtu)
@@ -593,8 +595,8 @@ slow_path:
 		ip_copy_metadata(skb2, skb);
 		skb_reserve(skb2, ll_rs);
 		skb_put(skb2, len + hlen);
-		skb2->nh.raw = skb2->data;
-		skb2->h.raw = skb2->data + hlen;
+		skb_reset_network_header(skb2);
+		skb2->transport_header = skb2->network_header + hlen;
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -608,19 +610,19 @@ slow_path:
 		 *	Copy the packet header into the new buffer.
 		 */
 
-		memcpy(skb2->nh.raw, skb->data, hlen);
+		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
 
 		/*
 		 *	Copy a block of the IP datagram.
 		 */
-		if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
+		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
 			BUG();
 		left -= len;
 
 		/*
 		 *	Fill in the new header fields.
 		 */
-		iph = skb2->nh.iph;
+		iph = ip_hdr(skb2);
 		iph->frag_off = htons((offset >> 3));
 
 		/* ANK: dirty, but effective trick. Upgrade options only if
@@ -722,10 +724,10 @@ static inline int ip_ufo_append_data(struct sock *sk,
 		skb_put(skb,fragheaderlen + transhdrlen);
 
 		/* initialize network header pointer */
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->data + fragheaderlen;
+		skb->transport_header = skb->network_header + fragheaderlen;
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
@@ -799,7 +801,9 @@ int ip_append_data(struct sock *sk,
 			inet->cork.addr = ipc->addr;
 		}
 		dst_hold(&rt->u.dst);
-		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
+					    rt->u.dst.dev->mtu :
+					    dst_mtu(rt->u.dst.path);
 		inet->cork.rt = rt;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
@@ -929,9 +933,10 @@ alloc_new_skb:
 			 *	Find where to start putting bytes.
 			 */
 			data = skb_put(skb, fraglen);
-			skb->nh.raw = data + exthdrlen;
+			skb_set_network_header(skb, exthdrlen);
+			skb->transport_header = (skb->network_header +
+						 fragheaderlen);
 			data += fragheaderlen;
-			skb->h.raw = data + exthdrlen;
 
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
@@ -1100,8 +1105,6 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 		}
 		if (len <= 0) {
 			struct sk_buff *skb_prev;
-			char *data;
-			struct iphdr *iph;
 			int alloclen;
 
 			skb_prev = skb;
@@ -1124,15 +1127,15 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			/*
 			 *	Find where to start putting bytes.
 			 */
-			data = skb_put(skb, fragheaderlen + fraggap);
-			skb->nh.iph = iph = (struct iphdr *)data;
-			data += fragheaderlen;
-			skb->h.raw = data;
-
+			skb_put(skb, fragheaderlen + fraggap);
+			skb_reset_network_header(skb);
+			skb->transport_header = (skb->network_header +
+						 fragheaderlen);
 			if (fraggap) {
-				skb->csum = skb_copy_and_csum_bits(
-					skb_prev, maxfraglen,
-					data, fraggap, 0);
+				skb->csum = skb_copy_and_csum_bits(skb_prev,
+								   maxfraglen,
+						    skb_transport_header(skb),
+								   fraggap, 0);
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				pskb_trim_unique(skb_prev, maxfraglen);
@@ -1198,10 +1201,10 @@ int ip_push_pending_frames(struct sock *sk)
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
-	if (skb->data < skb->nh.raw)
-		__skb_pull(skb, skb->nh.raw - skb->data);
+	if (skb->data < skb_network_header(skb))
+		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
-		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
 		skb->len += tmp_skb->len;
@@ -1216,13 +1219,13 @@ int ip_push_pending_frames(struct sock *sk)
 	 * to fragment the frame generated here. No matter, what transforms
 	 * how transforms change size of the packet, it will come out.
 	 */
-	if (inet->pmtudisc != IP_PMTUDISC_DO)
+	if (inet->pmtudisc < IP_PMTUDISC_DO)
 		skb->local_df = 1;
 
 	/* DF bit is set when we want to see DF on outgoing frames.
 	 * If local_df is set too, we still allow to fragment this frame
 	 * locally. */
-	if (inet->pmtudisc == IP_PMTUDISC_DO ||
+	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
 	    (skb->len <= dst_mtu(&rt->u.dst) &&
 	     ip_dont_fragment(sk, &rt->u.dst)))
 		df = htons(IP_DF);
@@ -1352,11 +1355,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
 						.saddr = rt->rt_spec_dst,
-						.tos = RT_TOS(skb->nh.iph->tos) } },
+						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
-					       { .sport = skb->h.th->dest,
-						 .dport = skb->h.th->source } },
+					       { .sport = tcp_hdr(skb)->dest,
+						 .dport = tcp_hdr(skb)->source } },
 				    .proto = sk->sk_protocol };
 		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
@@ -1370,14 +1373,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 	   with locally disabled BH and that sk cannot be already spinlocked.
 	 */
 	bh_lock_sock(sk);
-	inet->tos = skb->nh.iph->tos;
+	inet->tos = ip_hdr(skb)->tos;
 	sk->sk_priority = skb->priority;
-	sk->sk_protocol = skb->nh.iph->protocol;
+	sk->sk_protocol = ip_hdr(skb)->protocol;
 	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
 		       &ipc, rt, MSG_DONTWAIT);
 	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
 		if (arg->csumoffset >= 0)
-			*((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+			*((__sum16 *)skb_transport_header(skb) +
+			  arg->csumoffset) = csum_fold(csum_add(skb->csum,
+								arg->csum));
 		skb->ip_summed = CHECKSUM_NONE;
 		ip_push_pending_frames(sk);
 	}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 23048d9f3584..4d544573f48a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -59,7 +59,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 	struct in_pktinfo info;
 	struct rtable *rt = (struct rtable *)skb->dst;
 
-	info.ipi_addr.s_addr = skb->nh.iph->daddr;
+	info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
 	if (rt) {
 		info.ipi_ifindex = rt->rt_iif;
 		info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
@@ -73,13 +73,13 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 
 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
 {
-	int ttl = skb->nh.iph->ttl;
+	int ttl = ip_hdr(skb)->ttl;
 	put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
 }
 
 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
 {
-	put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
+	put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
 }
 
 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
@@ -87,7 +87,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 	if (IPCB(skb)->opt.optlen == 0)
 		return;
 
-	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1);
+	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
+		 ip_hdr(skb) + 1);
 }
 
 
@@ -268,18 +269,21 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
 	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
-	serr->ee.ee_type = skb->h.icmph->type;
-	serr->ee.ee_code = skb->h.icmph->code;
+	serr->ee.ee_type = icmp_hdr(skb)->type;
+	serr->ee.ee_code = icmp_hdr(skb)->code;
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
+	serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
+				   skb_network_header(skb);
 	serr->port = port;
 
-	skb->h.raw = payload;
-	if (!skb_pull(skb, payload - skb->data) ||
-	    sock_queue_err_skb(sk, skb))
-		kfree_skb(skb);
+	if (skb_pull(skb, payload - skb->data) != NULL) {
+		skb_reset_transport_header(skb);
+		if (sock_queue_err_skb(sk, skb) == 0)
+			return;
+	}
+	kfree_skb(skb);
 }
 
 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
@@ -296,8 +300,9 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 	if (!skb)
 		return;
 
-	iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr));
-	skb->nh.iph = iph;
+	skb_put(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
 	iph->daddr = daddr;
 
 	serr = SKB_EXT_ERR(skb);
@@ -308,11 +313,11 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = port;
 
-	skb->h.raw = skb->tail;
-	__skb_pull(skb, skb->tail - skb->data);
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
@@ -354,7 +359,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
 	sin = (struct sockaddr_in *)msg->msg_name;
 	if (sin) {
 		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset);
+		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
+						   serr->addr_offset);
 		sin->sin_port = serr->port;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
@@ -366,7 +372,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		struct inet_sock *inet = inet_sk(sk);
 
 		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		sin->sin_port = 0;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 		if (inet->cmsg_flags)
@@ -403,20 +409,20 @@ out:
  */
 
 static int do_ip_setsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int optlen)
+			    int optname, char __user *optval, int optlen)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	int val=0,err;
 
 	if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
-			    (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
-			    (1<<IP_RETOPTS) | (1<<IP_TOS) |
-			    (1<<IP_TTL) | (1<<IP_HDRINCL) |
-			    (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
-			    (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
-			    (1<<IP_PASSSEC))) ||
-				optname == IP_MULTICAST_TTL ||
-				optname == IP_MULTICAST_LOOP) {
+			     (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
+			     (1<<IP_RETOPTS) | (1<<IP_TOS) |
+			     (1<<IP_TTL) | (1<<IP_HDRINCL) |
+			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
+			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
+			     (1<<IP_PASSSEC))) ||
+	    optname == IP_MULTICAST_TTL ||
+	    optname == IP_MULTICAST_LOOP) {
 		if (optlen >= sizeof(int)) {
 			if (get_user(val, (int __user *) optval))
 				return -EFAULT;
@@ -440,444 +446,444 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	lock_sock(sk);
 
 	switch (optname) {
-		case IP_OPTIONS:
-		{
-			struct ip_options * opt = NULL;
-			if (optlen > 40 || optlen < 0)
-				goto e_inval;
-			err = ip_options_get_from_user(&opt, optval, optlen);
-			if (err)
-				break;
-			if (inet->is_icsk) {
-				struct inet_connection_sock *icsk = inet_csk(sk);
+	case IP_OPTIONS:
+	{
+		struct ip_options * opt = NULL;
+		if (optlen > 40 || optlen < 0)
+			goto e_inval;
+		err = ip_options_get_from_user(&opt, optval, optlen);
+		if (err)
+			break;
+		if (inet->is_icsk) {
+			struct inet_connection_sock *icsk = inet_csk(sk);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-				if (sk->sk_family == PF_INET ||
-				    (!((1 << sk->sk_state) &
-				       (TCPF_LISTEN | TCPF_CLOSE)) &&
-				     inet->daddr != LOOPBACK4_IPV6)) {
+			if (sk->sk_family == PF_INET ||
+			    (!((1 << sk->sk_state) &
+			       (TCPF_LISTEN | TCPF_CLOSE)) &&
+			     inet->daddr != LOOPBACK4_IPV6)) {
 #endif
-					if (inet->opt)
-						icsk->icsk_ext_hdr_len -= inet->opt->optlen;
-					if (opt)
-						icsk->icsk_ext_hdr_len += opt->optlen;
-					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+				if (inet->opt)
+					icsk->icsk_ext_hdr_len -= inet->opt->optlen;
+				if (opt)
+					icsk->icsk_ext_hdr_len += opt->optlen;
+				icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-				}
-#endif
 			}
-			opt = xchg(&inet->opt, opt);
-			kfree(opt);
-			break;
+#endif
 		}
-		case IP_PKTINFO:
-			if (val)
-				inet->cmsg_flags |= IP_CMSG_PKTINFO;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
-			break;
-		case IP_RECVTTL:
-			if (val)
-				inet->cmsg_flags |=  IP_CMSG_TTL;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_TTL;
-			break;
-		case IP_RECVTOS:
-			if (val)
-				inet->cmsg_flags |=  IP_CMSG_TOS;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_TOS;
-			break;
-		case IP_RECVOPTS:
-			if (val)
-				inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
-			break;
-		case IP_RETOPTS:
-			if (val)
-				inet->cmsg_flags |= IP_CMSG_RETOPTS;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+		opt = xchg(&inet->opt, opt);
+		kfree(opt);
+		break;
+	}
+	case IP_PKTINFO:
+		if (val)
+			inet->cmsg_flags |= IP_CMSG_PKTINFO;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
+		break;
+	case IP_RECVTTL:
+		if (val)
+			inet->cmsg_flags |=  IP_CMSG_TTL;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_TTL;
+		break;
+	case IP_RECVTOS:
+		if (val)
+			inet->cmsg_flags |=  IP_CMSG_TOS;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_TOS;
+		break;
+	case IP_RECVOPTS:
+		if (val)
+			inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
+		break;
+	case IP_RETOPTS:
+		if (val)
+			inet->cmsg_flags |= IP_CMSG_RETOPTS;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+		break;
+	case IP_PASSSEC:
+		if (val)
+			inet->cmsg_flags |= IP_CMSG_PASSSEC;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+		break;
+	case IP_TOS:	/* This sets both TOS and Precedence */
+		if (sk->sk_type == SOCK_STREAM) {
+			val &= ~3;
+			val |= inet->tos & 3;
+		}
+		if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
+		    !capable(CAP_NET_ADMIN)) {
+			err = -EPERM;
 			break;
-		case IP_PASSSEC:
-			if (val)
-				inet->cmsg_flags |= IP_CMSG_PASSSEC;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+		}
+		if (inet->tos != val) {
+			inet->tos = val;
+			sk->sk_priority = rt_tos2priority(val);
+			sk_dst_reset(sk);
+		}
+		break;
+	case IP_TTL:
+		if (optlen<1)
+			goto e_inval;
+		if (val != -1 && (val < 1 || val>255))
+			goto e_inval;
+		inet->uc_ttl = val;
+		break;
+	case IP_HDRINCL:
+		if (sk->sk_type != SOCK_RAW) {
+			err = -ENOPROTOOPT;
 			break;
-		case IP_TOS:	/* This sets both TOS and Precedence */
-			if (sk->sk_type == SOCK_STREAM) {
-				val &= ~3;
-				val |= inet->tos & 3;
-			}
-			if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
-			    !capable(CAP_NET_ADMIN)) {
-				err = -EPERM;
+		}
+		inet->hdrincl = val ? 1 : 0;
+		break;
+	case IP_MTU_DISCOVER:
+		if (val<0 || val>3)
+			goto e_inval;
+		inet->pmtudisc = val;
+		break;
+	case IP_RECVERR:
+		inet->recverr = !!val;
+		if (!val)
+			skb_queue_purge(&sk->sk_error_queue);
+		break;
+	case IP_MULTICAST_TTL:
+		if (sk->sk_type == SOCK_STREAM)
+			goto e_inval;
+		if (optlen<1)
+			goto e_inval;
+		if (val==-1)
+			val = 1;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		inet->mc_ttl = val;
+		break;
+	case IP_MULTICAST_LOOP:
+		if (optlen<1)
+			goto e_inval;
+		inet->mc_loop = !!val;
+		break;
+	case IP_MULTICAST_IF:
+	{
+		struct ip_mreqn mreq;
+		struct net_device *dev = NULL;
+
+		if (sk->sk_type == SOCK_STREAM)
+			goto e_inval;
+		/*
+		 *	Check the arguments are allowable
+		 */
+
+		err = -EFAULT;
+		if (optlen >= sizeof(struct ip_mreqn)) {
+			if (copy_from_user(&mreq,optval,sizeof(mreq)))
 				break;
-			}
-			if (inet->tos != val) {
-				inet->tos = val;
-				sk->sk_priority = rt_tos2priority(val);
-				sk_dst_reset(sk);
-			}
-			break;
-		case IP_TTL:
-			if (optlen<1)
-				goto e_inval;
-			if (val != -1 && (val < 1 || val>255))
-				goto e_inval;
-			inet->uc_ttl = val;
-			break;
-		case IP_HDRINCL:
-			if (sk->sk_type != SOCK_RAW) {
-				err = -ENOPROTOOPT;
+		} else {
+			memset(&mreq, 0, sizeof(mreq));
+			if (optlen >= sizeof(struct in_addr) &&
+			    copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
+				break;
+		}
+
+		if (!mreq.imr_ifindex) {
+			if (mreq.imr_address.s_addr == INADDR_ANY) {
+				inet->mc_index = 0;
+				inet->mc_addr  = 0;
+				err = 0;
 				break;
 			}
-			inet->hdrincl = val ? 1 : 0;
-			break;
-		case IP_MTU_DISCOVER:
-			if (val<0 || val>2)
-				goto e_inval;
-			inet->pmtudisc = val;
-			break;
-		case IP_RECVERR:
-			inet->recverr = !!val;
-			if (!val)
-				skb_queue_purge(&sk->sk_error_queue);
-			break;
-		case IP_MULTICAST_TTL:
-			if (sk->sk_type == SOCK_STREAM)
-				goto e_inval;
-			if (optlen<1)
-				goto e_inval;
-			if (val==-1)
-				val = 1;
-			if (val < 0 || val > 255)
-				goto e_inval;
-			inet->mc_ttl = val;
-			break;
-		case IP_MULTICAST_LOOP:
-			if (optlen<1)
-				goto e_inval;
-			inet->mc_loop = !!val;
-			break;
-		case IP_MULTICAST_IF:
-		{
-			struct ip_mreqn mreq;
-			struct net_device *dev = NULL;
+			dev = ip_dev_find(mreq.imr_address.s_addr);
+			if (dev) {
+				mreq.imr_ifindex = dev->ifindex;
+				dev_put(dev);
+			}
+		} else
+			dev = __dev_get_by_index(mreq.imr_ifindex);
 
-			if (sk->sk_type == SOCK_STREAM)
-				goto e_inval;
-			/*
-			 *	Check the arguments are allowable
-			 */
 
-			err = -EFAULT;
-			if (optlen >= sizeof(struct ip_mreqn)) {
-				if (copy_from_user(&mreq,optval,sizeof(mreq)))
-					break;
-			} else {
-				memset(&mreq, 0, sizeof(mreq));
-				if (optlen >= sizeof(struct in_addr) &&
-				    copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
-					break;
-			}
+		err = -EADDRNOTAVAIL;
+		if (!dev)
+			break;
 
-			if (!mreq.imr_ifindex) {
-				if (mreq.imr_address.s_addr == INADDR_ANY) {
-					inet->mc_index = 0;
-					inet->mc_addr  = 0;
-					err = 0;
-					break;
-				}
-				dev = ip_dev_find(mreq.imr_address.s_addr);
-				if (dev) {
-					mreq.imr_ifindex = dev->ifindex;
-					dev_put(dev);
-				}
-			} else
-				dev = __dev_get_by_index(mreq.imr_ifindex);
+		err = -EINVAL;
+		if (sk->sk_bound_dev_if &&
+		    mreq.imr_ifindex != sk->sk_bound_dev_if)
+			break;
 
+		inet->mc_index = mreq.imr_ifindex;
+		inet->mc_addr  = mreq.imr_address.s_addr;
+		err = 0;
+		break;
+	}
 
-			err = -EADDRNOTAVAIL;
-			if (!dev)
-				break;
+	case IP_ADD_MEMBERSHIP:
+	case IP_DROP_MEMBERSHIP:
+	{
+		struct ip_mreqn mreq;
 
-			err = -EINVAL;
-			if (sk->sk_bound_dev_if &&
-			    mreq.imr_ifindex != sk->sk_bound_dev_if)
+		if (optlen < sizeof(struct ip_mreq))
+			goto e_inval;
+		err = -EFAULT;
+		if (optlen >= sizeof(struct ip_mreqn)) {
+			if (copy_from_user(&mreq,optval,sizeof(mreq)))
 				break;
+		} else {
+			memset(&mreq, 0, sizeof(mreq));
+			if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
+				break;
+		}
 
-			inet->mc_index = mreq.imr_ifindex;
-			inet->mc_addr  = mreq.imr_address.s_addr;
-			err = 0;
+		if (optname == IP_ADD_MEMBERSHIP)
+			err = ip_mc_join_group(sk, &mreq);
+		else
+			err = ip_mc_leave_group(sk, &mreq);
+		break;
+	}
+	case IP_MSFILTER:
+	{
+		extern int sysctl_igmp_max_msf;
+		struct ip_msfilter *msf;
+
+		if (optlen < IP_MSFILTER_SIZE(0))
+			goto e_inval;
+		if (optlen > sysctl_optmem_max) {
+			err = -ENOBUFS;
 			break;
 		}
+		msf = kmalloc(optlen, GFP_KERNEL);
+		if (msf == 0) {
+			err = -ENOBUFS;
+			break;
+		}
+		err = -EFAULT;
+		if (copy_from_user(msf, optval, optlen)) {
+			kfree(msf);
+			break;
+		}
+		/* numsrc >= (1G-4) overflow in 32 bits */
+		if (msf->imsf_numsrc >= 0x3ffffffcU ||
+		    msf->imsf_numsrc > sysctl_igmp_max_msf) {
+			kfree(msf);
+			err = -ENOBUFS;
+			break;
+		}
+		if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
+			kfree(msf);
+			err = -EINVAL;
+			break;
+		}
+		err = ip_mc_msfilter(sk, msf, 0);
+		kfree(msf);
+		break;
+	}
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE:
+	case IP_ADD_SOURCE_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+	{
+		struct ip_mreq_source mreqs;
+		int omode, add;
 
-		case IP_ADD_MEMBERSHIP:
-		case IP_DROP_MEMBERSHIP:
-		{
-			struct ip_mreqn mreq;
-
-			if (optlen < sizeof(struct ip_mreq))
-				goto e_inval;
+		if (optlen != sizeof(struct ip_mreq_source))
+			goto e_inval;
+		if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
 			err = -EFAULT;
-			if (optlen >= sizeof(struct ip_mreqn)) {
-				if(copy_from_user(&mreq,optval,sizeof(mreq)))
-					break;
-			} else {
-				memset(&mreq, 0, sizeof(mreq));
-				if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
-					break;
-			}
-
-			if (optname == IP_ADD_MEMBERSHIP)
-				err = ip_mc_join_group(sk, &mreq);
-			else
-				err = ip_mc_leave_group(sk, &mreq);
 			break;
 		}
-		case IP_MSFILTER:
-		{
-			extern int sysctl_igmp_max_msf;
-			struct ip_msfilter *msf;
+		if (optname == IP_BLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 1;
+		} else if (optname == IP_UNBLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 0;
+		} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
+			struct ip_mreqn mreq;
 
-			if (optlen < IP_MSFILTER_SIZE(0))
-				goto e_inval;
-			if (optlen > sysctl_optmem_max) {
-				err = -ENOBUFS;
-				break;
-			}
-			msf = kmalloc(optlen, GFP_KERNEL);
-			if (msf == 0) {
-				err = -ENOBUFS;
+			mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
+			mreq.imr_address.s_addr = mreqs.imr_interface;
+			mreq.imr_ifindex = 0;
+			err = ip_mc_join_group(sk, &mreq);
+			if (err && err != -EADDRINUSE)
 				break;
-			}
+			omode = MCAST_INCLUDE;
+			add = 1;
+		} else /* IP_DROP_SOURCE_MEMBERSHIP */ {
+			omode = MCAST_INCLUDE;
+			add = 0;
+		}
+		err = ip_mc_source(add, omode, sk, &mreqs, 0);
+		break;
+	}
+	case MCAST_JOIN_GROUP:
+	case MCAST_LEAVE_GROUP:
+	{
+		struct group_req greq;
+		struct sockaddr_in *psin;
+		struct ip_mreqn mreq;
+
+		if (optlen < sizeof(struct group_req))
+			goto e_inval;
+		err = -EFAULT;
+		if (copy_from_user(&greq, optval, sizeof(greq)))
+			break;
+		psin = (struct sockaddr_in *)&greq.gr_group;
+		if (psin->sin_family != AF_INET)
+			goto e_inval;
+		memset(&mreq, 0, sizeof(mreq));
+		mreq.imr_multiaddr = psin->sin_addr;
+		mreq.imr_ifindex = greq.gr_interface;
+
+		if (optname == MCAST_JOIN_GROUP)
+			err = ip_mc_join_group(sk, &mreq);
+		else
+			err = ip_mc_leave_group(sk, &mreq);
+		break;
+	}
+	case MCAST_JOIN_SOURCE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+	{
+		struct group_source_req greqs;
+		struct ip_mreq_source mreqs;
+		struct sockaddr_in *psin;
+		int omode, add;
+
+		if (optlen != sizeof(struct group_source_req))
+			goto e_inval;
+		if (copy_from_user(&greqs, optval, sizeof(greqs))) {
 			err = -EFAULT;
-			if (copy_from_user(msf, optval, optlen)) {
-				kfree(msf);
-				break;
-			}
-			/* numsrc >= (1G-4) overflow in 32 bits */
-			if (msf->imsf_numsrc >= 0x3ffffffcU ||
-			    msf->imsf_numsrc > sysctl_igmp_max_msf) {
-				kfree(msf);
-				err = -ENOBUFS;
-				break;
-			}
-			if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
-				kfree(msf);
-				err = -EINVAL;
-				break;
-			}
-			err = ip_mc_msfilter(sk, msf, 0);
-			kfree(msf);
 			break;
 		}
-		case IP_BLOCK_SOURCE:
-		case IP_UNBLOCK_SOURCE:
-		case IP_ADD_SOURCE_MEMBERSHIP:
-		case IP_DROP_SOURCE_MEMBERSHIP:
-		{
-			struct ip_mreq_source mreqs;
-			int omode, add;
-
-			if (optlen != sizeof(struct ip_mreq_source))
-				goto e_inval;
-			if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
-				err = -EFAULT;
-				break;
-			}
-			if (optname == IP_BLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 1;
-			} else if (optname == IP_UNBLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 0;
-			} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
-				struct ip_mreqn mreq;
-
-				mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
-				mreq.imr_address.s_addr = mreqs.imr_interface;
-				mreq.imr_ifindex = 0;
-				err = ip_mc_join_group(sk, &mreq);
-				if (err && err != -EADDRINUSE)
-					break;
-				omode = MCAST_INCLUDE;
-				add = 1;
-			} else /* IP_DROP_SOURCE_MEMBERSHIP */ {
-				omode = MCAST_INCLUDE;
-				add = 0;
-			}
-			err = ip_mc_source(add, omode, sk, &mreqs, 0);
+		if (greqs.gsr_group.ss_family != AF_INET ||
+		    greqs.gsr_source.ss_family != AF_INET) {
+			err = -EADDRNOTAVAIL;
 			break;
 		}
-		case MCAST_JOIN_GROUP:
-		case MCAST_LEAVE_GROUP:
-		{
-			struct group_req greq;
-			struct sockaddr_in *psin;
+		psin = (struct sockaddr_in *)&greqs.gsr_group;
+		mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+		psin = (struct sockaddr_in *)&greqs.gsr_source;
+		mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+		mreqs.imr_interface = 0; /* use index for mc_source */
+
+		if (optname == MCAST_BLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 1;
+		} else if (optname == MCAST_UNBLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 0;
+		} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
 			struct ip_mreqn mreq;
 
-			if (optlen < sizeof(struct group_req))
-				goto e_inval;
-			err = -EFAULT;
-			if(copy_from_user(&greq, optval, sizeof(greq)))
-				break;
-			psin = (struct sockaddr_in *)&greq.gr_group;
-			if (psin->sin_family != AF_INET)
-				goto e_inval;
-			memset(&mreq, 0, sizeof(mreq));
+			psin = (struct sockaddr_in *)&greqs.gsr_group;
 			mreq.imr_multiaddr = psin->sin_addr;
-			mreq.imr_ifindex = greq.gr_interface;
-
-			if (optname == MCAST_JOIN_GROUP)
-				err = ip_mc_join_group(sk, &mreq);
-			else
-				err = ip_mc_leave_group(sk, &mreq);
+			mreq.imr_address.s_addr = 0;
+			mreq.imr_ifindex = greqs.gsr_interface;
+			err = ip_mc_join_group(sk, &mreq);
+			if (err && err != -EADDRINUSE)
+				break;
+			greqs.gsr_interface = mreq.imr_ifindex;
+			omode = MCAST_INCLUDE;
+			add = 1;
+		} else /* MCAST_LEAVE_SOURCE_GROUP */ {
+			omode = MCAST_INCLUDE;
+			add = 0;
+		}
+		err = ip_mc_source(add, omode, sk, &mreqs,
+				   greqs.gsr_interface);
+		break;
+	}
+	case MCAST_MSFILTER:
+	{
+		extern int sysctl_igmp_max_msf;
+		struct sockaddr_in *psin;
+		struct ip_msfilter *msf = NULL;
+		struct group_filter *gsf = NULL;
+		int msize, i, ifindex;
+
+		if (optlen < GROUP_FILTER_SIZE(0))
+			goto e_inval;
+		if (optlen > sysctl_optmem_max) {
+			err = -ENOBUFS;
 			break;
 		}
-		case MCAST_JOIN_SOURCE_GROUP:
-		case MCAST_LEAVE_SOURCE_GROUP:
-		case MCAST_BLOCK_SOURCE:
-		case MCAST_UNBLOCK_SOURCE:
-		{
-			struct group_source_req greqs;
-			struct ip_mreq_source mreqs;
-			struct sockaddr_in *psin;
-			int omode, add;
-
-			if (optlen != sizeof(struct group_source_req))
-				goto e_inval;
-			if (copy_from_user(&greqs, optval, sizeof(greqs))) {
-				err = -EFAULT;
-				break;
-			}
-			if (greqs.gsr_group.ss_family != AF_INET ||
-			    greqs.gsr_source.ss_family != AF_INET) {
-				err = -EADDRNOTAVAIL;
-				break;
-			}
-			psin = (struct sockaddr_in *)&greqs.gsr_group;
-			mreqs.imr_multiaddr = psin->sin_addr.s_addr;
-			psin = (struct sockaddr_in *)&greqs.gsr_source;
-			mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
-			mreqs.imr_interface = 0; /* use index for mc_source */
-
-			if (optname == MCAST_BLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 1;
-			} else if (optname == MCAST_UNBLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 0;
-			} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
-				struct ip_mreqn mreq;
-
-				psin = (struct sockaddr_in *)&greqs.gsr_group;
-				mreq.imr_multiaddr = psin->sin_addr;
-				mreq.imr_address.s_addr = 0;
-				mreq.imr_ifindex = greqs.gsr_interface;
-				err = ip_mc_join_group(sk, &mreq);
-				if (err && err != -EADDRINUSE)
-					break;
-				greqs.gsr_interface = mreq.imr_ifindex;
-				omode = MCAST_INCLUDE;
-				add = 1;
-			} else /* MCAST_LEAVE_SOURCE_GROUP */ {
-				omode = MCAST_INCLUDE;
-				add = 0;
-			}
-			err = ip_mc_source(add, omode, sk, &mreqs,
-				greqs.gsr_interface);
+		gsf = kmalloc(optlen,GFP_KERNEL);
+		if (gsf == 0) {
+			err = -ENOBUFS;
 			break;
 		}
-		case MCAST_MSFILTER:
-		{
-			extern int sysctl_igmp_max_msf;
-			struct sockaddr_in *psin;
-			struct ip_msfilter *msf = NULL;
-			struct group_filter *gsf = NULL;
-			int msize, i, ifindex;
-
-			if (optlen < GROUP_FILTER_SIZE(0))
-				goto e_inval;
-			if (optlen > sysctl_optmem_max) {
-				err = -ENOBUFS;
-				break;
-			}
-			gsf = kmalloc(optlen,GFP_KERNEL);
-			if (gsf == 0) {
-				err = -ENOBUFS;
-				break;
-			}
-			err = -EFAULT;
-			if (copy_from_user(gsf, optval, optlen)) {
-				goto mc_msf_out;
-			}
-			/* numsrc >= (4G-140)/128 overflow in 32 bits */
-			if (gsf->gf_numsrc >= 0x1ffffff ||
-			    gsf->gf_numsrc > sysctl_igmp_max_msf) {
-				err = -ENOBUFS;
-				goto mc_msf_out;
-			}
-			if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
-				err = -EINVAL;
-				goto mc_msf_out;
-			}
-			msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
-			msf = kmalloc(msize,GFP_KERNEL);
-			if (msf == 0) {
-				err = -ENOBUFS;
-				goto mc_msf_out;
-			}
-			ifindex = gsf->gf_interface;
-			psin = (struct sockaddr_in *)&gsf->gf_group;
-			if (psin->sin_family != AF_INET) {
-				err = -EADDRNOTAVAIL;
-				goto mc_msf_out;
-			}
-			msf->imsf_multiaddr = psin->sin_addr.s_addr;
-			msf->imsf_interface = 0;
-			msf->imsf_fmode = gsf->gf_fmode;
-			msf->imsf_numsrc = gsf->gf_numsrc;
+		err = -EFAULT;
+		if (copy_from_user(gsf, optval, optlen)) {
+			goto mc_msf_out;
+		}
+		/* numsrc >= (4G-140)/128 overflow in 32 bits */
+		if (gsf->gf_numsrc >= 0x1ffffff ||
+		    gsf->gf_numsrc > sysctl_igmp_max_msf) {
+			err = -ENOBUFS;
+			goto mc_msf_out;
+		}
+		if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+			err = -EINVAL;
+			goto mc_msf_out;
+		}
+		msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
+		msf = kmalloc(msize,GFP_KERNEL);
+		if (msf == 0) {
+			err = -ENOBUFS;
+			goto mc_msf_out;
+		}
+		ifindex = gsf->gf_interface;
+		psin = (struct sockaddr_in *)&gsf->gf_group;
+		if (psin->sin_family != AF_INET) {
 			err = -EADDRNOTAVAIL;
-			for (i=0; i<gsf->gf_numsrc; ++i) {
-				psin = (struct sockaddr_in *)&gsf->gf_slist[i];
-
-				if (psin->sin_family != AF_INET)
-					goto mc_msf_out;
-				msf->imsf_slist[i] = psin->sin_addr.s_addr;
-			}
-			kfree(gsf);
-			gsf = NULL;
-
-			err = ip_mc_msfilter(sk, msf, ifindex);
-mc_msf_out:
-			kfree(msf);
-			kfree(gsf);
-			break;
+			goto mc_msf_out;
 		}
-		case IP_ROUTER_ALERT:
-			err = ip_ra_control(sk, val ? 1 : 0, NULL);
-			break;
-
-		case IP_FREEBIND:
-			if (optlen<1)
-				goto e_inval;
-			inet->freebind = !!val;
-			break;
+		msf->imsf_multiaddr = psin->sin_addr.s_addr;
+		msf->imsf_interface = 0;
+		msf->imsf_fmode = gsf->gf_fmode;
+		msf->imsf_numsrc = gsf->gf_numsrc;
+		err = -EADDRNOTAVAIL;
+		for (i=0; i<gsf->gf_numsrc; ++i) {
+			psin = (struct sockaddr_in *)&gsf->gf_slist[i];
 
-		case IP_IPSEC_POLICY:
-		case IP_XFRM_POLICY:
-			err = -EPERM;
-			if (!capable(CAP_NET_ADMIN))
-				break;
-			err = xfrm_user_policy(sk, optname, optval, optlen);
+			if (psin->sin_family != AF_INET)
+				goto mc_msf_out;
+			msf->imsf_slist[i] = psin->sin_addr.s_addr;
+		}
+		kfree(gsf);
+		gsf = NULL;
+
+		err = ip_mc_msfilter(sk, msf, ifindex);
+	mc_msf_out:
+		kfree(msf);
+		kfree(gsf);
+		break;
+	}
+	case IP_ROUTER_ALERT:
+		err = ip_ra_control(sk, val ? 1 : 0, NULL);
+		break;
+
+	case IP_FREEBIND:
+		if (optlen<1)
+			goto e_inval;
+		inet->freebind = !!val;
+		break;
+
+	case IP_IPSEC_POLICY:
+	case IP_XFRM_POLICY:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
 			break;
+		err = xfrm_user_policy(sk, optname, optval, optlen);
+		break;
 
-		default:
-			err = -ENOPROTOOPT;
-			break;
+	default:
+		err = -ENOPROTOOPT;
+		break;
 	}
 	release_sock(sk);
 	return err;
@@ -948,214 +954,213 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
  */
 
 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
-		char __user *optval, int __user *optlen)
+			    char __user *optval, int __user *optlen)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	int val;
 	int len;
 
-	if(level!=SOL_IP)
+	if (level != SOL_IP)
 		return -EOPNOTSUPP;
 
 #ifdef CONFIG_IP_MROUTE
-	if(optname>=MRT_BASE && optname <=MRT_BASE+10)
-	{
+	if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
 		return ip_mroute_getsockopt(sk,optname,optval,optlen);
 	}
 #endif
 
-	if(get_user(len,optlen))
+	if (get_user(len,optlen))
 		return -EFAULT;
-	if(len < 0)
+	if (len < 0)
 		return -EINVAL;
 
 	lock_sock(sk);
 
-	switch(optname)	{
-		case IP_OPTIONS:
-			{
-				unsigned char optbuf[sizeof(struct ip_options)+40];
-				struct ip_options * opt = (struct ip_options*)optbuf;
-				opt->optlen = 0;
-				if (inet->opt)
-					memcpy(optbuf, inet->opt,
-					       sizeof(struct ip_options)+
-					       inet->opt->optlen);
-				release_sock(sk);
-
-				if (opt->optlen == 0)
-					return put_user(0, optlen);
-
-				ip_options_undo(opt);
-
-				len = min_t(unsigned int, len, opt->optlen);
-				if(put_user(len, optlen))
-					return -EFAULT;
-				if(copy_to_user(optval, opt->__data, len))
-					return -EFAULT;
-				return 0;
-			}
-		case IP_PKTINFO:
-			val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
-			break;
-		case IP_RECVTTL:
-			val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
-			break;
-		case IP_RECVTOS:
-			val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
-			break;
-		case IP_RECVOPTS:
-			val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
-			break;
-		case IP_RETOPTS:
-			val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
-			break;
-		case IP_PASSSEC:
-			val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
-			break;
-		case IP_TOS:
-			val = inet->tos;
-			break;
-		case IP_TTL:
-			val = (inet->uc_ttl == -1 ?
-			       sysctl_ip_default_ttl :
-			       inet->uc_ttl);
-			break;
-		case IP_HDRINCL:
-			val = inet->hdrincl;
-			break;
-		case IP_MTU_DISCOVER:
-			val = inet->pmtudisc;
-			break;
-		case IP_MTU:
-		{
-			struct dst_entry *dst;
-			val = 0;
-			dst = sk_dst_get(sk);
-			if (dst) {
-				val = dst_mtu(dst);
-				dst_release(dst);
-			}
-			if (!val) {
-				release_sock(sk);
-				return -ENOTCONN;
-			}
-			break;
+	switch (optname) {
+	case IP_OPTIONS:
+	{
+		unsigned char optbuf[sizeof(struct ip_options)+40];
+		struct ip_options * opt = (struct ip_options*)optbuf;
+		opt->optlen = 0;
+		if (inet->opt)
+			memcpy(optbuf, inet->opt,
+			       sizeof(struct ip_options)+
+			       inet->opt->optlen);
+		release_sock(sk);
+
+		if (opt->optlen == 0)
+			return put_user(0, optlen);
+
+		ip_options_undo(opt);
+
+		len = min_t(unsigned int, len, opt->optlen);
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, opt->__data, len))
+			return -EFAULT;
+		return 0;
+	}
+	case IP_PKTINFO:
+		val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
+		break;
+	case IP_RECVTTL:
+		val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
+		break;
+	case IP_RECVTOS:
+		val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
+		break;
+	case IP_RECVOPTS:
+		val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
+		break;
+	case IP_RETOPTS:
+		val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
+		break;
+	case IP_PASSSEC:
+		val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
+		break;
+	case IP_TOS:
+		val = inet->tos;
+		break;
+	case IP_TTL:
+		val = (inet->uc_ttl == -1 ?
+		       sysctl_ip_default_ttl :
+		       inet->uc_ttl);
+		break;
+	case IP_HDRINCL:
+		val = inet->hdrincl;
+		break;
+	case IP_MTU_DISCOVER:
+		val = inet->pmtudisc;
+		break;
+	case IP_MTU:
+	{
+		struct dst_entry *dst;
+		val = 0;
+		dst = sk_dst_get(sk);
+		if (dst) {
+			val = dst_mtu(dst);
+			dst_release(dst);
 		}
-		case IP_RECVERR:
-			val = inet->recverr;
-			break;
-		case IP_MULTICAST_TTL:
-			val = inet->mc_ttl;
-			break;
-		case IP_MULTICAST_LOOP:
-			val = inet->mc_loop;
-			break;
-		case IP_MULTICAST_IF:
-		{
-			struct in_addr addr;
-			len = min_t(unsigned int, len, sizeof(struct in_addr));
-			addr.s_addr = inet->mc_addr;
+		if (!val) {
 			release_sock(sk);
-
-			if(put_user(len, optlen))
-				return -EFAULT;
-			if(copy_to_user(optval, &addr, len))
-				return -EFAULT;
-			return 0;
+			return -ENOTCONN;
 		}
-		case IP_MSFILTER:
-		{
-			struct ip_msfilter msf;
-			int err;
+		break;
+	}
+	case IP_RECVERR:
+		val = inet->recverr;
+		break;
+	case IP_MULTICAST_TTL:
+		val = inet->mc_ttl;
+		break;
+	case IP_MULTICAST_LOOP:
+		val = inet->mc_loop;
+		break;
+	case IP_MULTICAST_IF:
+	{
+		struct in_addr addr;
+		len = min_t(unsigned int, len, sizeof(struct in_addr));
+		addr.s_addr = inet->mc_addr;
+		release_sock(sk);
 
-			if (len < IP_MSFILTER_SIZE(0)) {
-				release_sock(sk);
-				return -EINVAL;
-			}
-			if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
-				release_sock(sk);
-				return -EFAULT;
-			}
-			err = ip_mc_msfget(sk, &msf,
-				(struct ip_msfilter __user *)optval, optlen);
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &addr, len))
+			return -EFAULT;
+		return 0;
+	}
+	case IP_MSFILTER:
+	{
+		struct ip_msfilter msf;
+		int err;
+
+		if (len < IP_MSFILTER_SIZE(0)) {
 			release_sock(sk);
-			return err;
+			return -EINVAL;
 		}
-		case MCAST_MSFILTER:
-		{
-			struct group_filter gsf;
-			int err;
-
-			if (len < GROUP_FILTER_SIZE(0)) {
-				release_sock(sk);
-				return -EINVAL;
-			}
-			if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
-				release_sock(sk);
-				return -EFAULT;
-			}
-			err = ip_mc_gsfget(sk, &gsf,
-				(struct group_filter __user *)optval, optlen);
+		if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
 			release_sock(sk);
-			return err;
+			return -EFAULT;
 		}
-		case IP_PKTOPTIONS:
-		{
-			struct msghdr msg;
+		err = ip_mc_msfget(sk, &msf,
+				   (struct ip_msfilter __user *)optval, optlen);
+		release_sock(sk);
+		return err;
+	}
+	case MCAST_MSFILTER:
+	{
+		struct group_filter gsf;
+		int err;
 
+		if (len < GROUP_FILTER_SIZE(0)) {
 			release_sock(sk);
+			return -EINVAL;
+		}
+		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
+			release_sock(sk);
+			return -EFAULT;
+		}
+		err = ip_mc_gsfget(sk, &gsf,
+				   (struct group_filter __user *)optval, optlen);
+		release_sock(sk);
+		return err;
+	}
+	case IP_PKTOPTIONS:
+	{
+		struct msghdr msg;
+
+		release_sock(sk);
 
-			if (sk->sk_type != SOCK_STREAM)
-				return -ENOPROTOOPT;
+		if (sk->sk_type != SOCK_STREAM)
+			return -ENOPROTOOPT;
 
-			msg.msg_control = optval;
-			msg.msg_controllen = len;
-			msg.msg_flags = 0;
+		msg.msg_control = optval;
+		msg.msg_controllen = len;
+		msg.msg_flags = 0;
 
-			if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
-				struct in_pktinfo info;
+		if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+			struct in_pktinfo info;
 
-				info.ipi_addr.s_addr = inet->rcv_saddr;
-				info.ipi_spec_dst.s_addr = inet->rcv_saddr;
-				info.ipi_ifindex = inet->mc_index;
-				put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
-			}
-			if (inet->cmsg_flags & IP_CMSG_TTL) {
-				int hlim = inet->mc_ttl;
-				put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
-			}
-			len -= msg.msg_controllen;
-			return put_user(len, optlen);
+			info.ipi_addr.s_addr = inet->rcv_saddr;
+			info.ipi_spec_dst.s_addr = inet->rcv_saddr;
+			info.ipi_ifindex = inet->mc_index;
+			put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
 		}
-		case IP_FREEBIND:
-			val = inet->freebind;
-			break;
-		default:
-			release_sock(sk);
-			return -ENOPROTOOPT;
+		if (inet->cmsg_flags & IP_CMSG_TTL) {
+			int hlim = inet->mc_ttl;
+			put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
+		}
+		len -= msg.msg_controllen;
+		return put_user(len, optlen);
+	}
+	case IP_FREEBIND:
+		val = inet->freebind;
+		break;
+	default:
+		release_sock(sk);
+		return -ENOPROTOOPT;
 	}
 	release_sock(sk);
 
 	if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
 		unsigned char ucval = (unsigned char)val;
 		len = 1;
-		if(put_user(len, optlen))
+		if (put_user(len, optlen))
 			return -EFAULT;
-		if(copy_to_user(optval,&ucval,1))
+		if (copy_to_user(optval,&ucval,1))
 			return -EFAULT;
 	} else {
 		len = min_t(unsigned int, sizeof(int), len);
-		if(put_user(len, optlen))
+		if (put_user(len, optlen))
 			return -EFAULT;
-		if(copy_to_user(optval,&val,len))
+		if (copy_to_user(optval,&val,len))
 			return -EFAULT;
 	}
 	return 0;
 }
 
 int ip_getsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int __user *optlen)
+		  int optname, char __user *optval, int __user *optlen)
 {
 	int err;
 
@@ -1169,7 +1174,7 @@ int ip_getsockopt(struct sock *sk, int level,
 	   ) {
 		int len;
 
-		if(get_user(len,optlen))
+		if (get_user(len,optlen))
 			return -EFAULT;
 
 		lock_sock(sk);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index aa704b88f014..ab86137c71d2 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -43,21 +43,15 @@ static LIST_HEAD(ipcomp_tfms_list);
 
 static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int err, plen, dlen;
 	struct ipcomp_data *ipcd = x->data;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
-
-	plen = skb->len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data;
+	const int plen = skb->len;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	const u8 *start = skb->data;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
 
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
 	if (err)
 		goto out;
 
@@ -72,7 +66,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->truesize += dlen - plen;
 	__skb_put(skb, dlen - plen);
-	memcpy(skb->data, scratch, dlen);
+	skb_copy_to_linear_data(skb, scratch, dlen);
 out:
 	put_cpu();
 	return err;
@@ -90,10 +84,10 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Remove ipcomp header and decompress original payload */
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ipch = (void *)skb->data;
 	iph->protocol = ipch->nexthdr;
-	skb->h.raw = skb->nh.raw + sizeof(*ipch);
+	skb->transport_header = skb->network_header + sizeof(*ipch);
 	__skb_pull(skb, sizeof(*ipch));
 	err = ipcomp_decompress(x, skb);
 
@@ -103,23 +97,16 @@ out:
 
 static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int err, plen, dlen, ihlen;
-	struct iphdr *iph = skb->nh.iph;
 	struct ipcomp_data *ipcd = x->data;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
+	const int ihlen = ip_hdrlen(skb);
+	const int plen = skb->len - ihlen;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	u8 *start = skb->data + ihlen;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
 
-	ihlen = iph->ihl * 4;
-	plen = skb->len - ihlen;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data + ihlen;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
 	if (err)
 		goto out;
 
@@ -142,12 +129,11 @@ out:
 static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
-	struct iphdr *iph;
 	struct ip_comp_hdr *ipch;
 	struct ipcomp_data *ipcd = x->data;
 	int hdr_len = 0;
+	struct iphdr *iph = ip_hdr(skb);
 
-	iph = skb->nh.iph;
 	iph->tot_len = htons(skb->len);
 	hdr_len = iph->ihl * 4;
 	if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -159,7 +145,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
 		goto out_ok;
 
 	err = ipcomp_compress(x, skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (err) {
 		goto out_ok;
@@ -188,8 +174,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 	struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
 	struct xfrm_state *x;
 
-	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
-	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
 	spi = htonl(ntohs(ipch->cpi));
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cf49de1a4983..597c800b2fdc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		goto drop;
 
 	/* Basic sanity checks can be done without the lock.  */
-	rarp = (struct arphdr *)skb->h.raw;
+	rarp = (struct arphdr *)skb_transport_header(skb);
 
 	/* If this test doesn't pass, it's not IP, or we should
 	 * ignore it anyway.
@@ -455,7 +455,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		goto drop;
 
 	/* OK, it is all there and looks valid, process... */
-	rarp = (struct arphdr *)skb->h.raw;
+	rarp = (struct arphdr *)skb_transport_header(skb);
 	rarp_ptr = (unsigned char *) (rarp + 1);
 
 	/* One reply at a time, please. */
@@ -702,7 +702,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 	memset(b, 0, sizeof(struct bootp_pkt));
 
 	/* Construct IP header */
-	skb->nh.iph = h = &b->iph;
+	skb_reset_network_header(skb);
+	h = ip_hdr(skb);
 	h->version = 4;
 	h->ihl = 5;
 	h->tot_len = htons(sizeof(struct bootp_pkt));
@@ -782,7 +783,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
 	u8 *c;
 
 	printk("DHCP/BOOTP: Got extension %d:",*ext);
-	for(c=ext+2; c<ext+2+ext[1]; c++)
+	for (c=ext+2; c<ext+2+ext[1]; c++)
 		printk(" %02x", *c);
 	printk("\n");
 #endif
@@ -845,7 +846,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 			   sizeof(struct udphdr)))
 		goto drop;
 
-	b = (struct bootp_pkt *) skb->nh.iph;
+	b = (struct bootp_pkt *)skb_network_header(skb);
 	h = &b->iph;
 
 	if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
@@ -883,7 +884,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	if (!pskb_may_pull(skb, skb->len))
 		goto drop;
 
-	b = (struct bootp_pkt *) skb->nh.iph;
+	b = (struct bootp_pkt *)skb_network_header(skb);
 	h = &b->iph;
 
 	/* One reply at a time, please. */
@@ -938,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 					if (opt[1] >= 4)
 						memcpy(&server_id, opt + 2, 4);
 					break;
-				};
+				}
 			}
 
 #ifdef IPCONFIG_DEBUG
@@ -983,7 +984,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 				ic_myaddr = NONE;
 				ic_servaddr = NONE;
 				goto drop_unlock;
-			};
+			}
 
 			ic_dhcp_msgtype = mt;
 
@@ -1094,7 +1095,7 @@ static int __init ic_dynamic(void)
 	retries = CONF_SEND_RETRIES;
 	get_random_bytes(&timeout, sizeof(timeout));
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
-	for(;;) {
+	for (;;) {
 #ifdef IPCONFIG_BOOTP
 		if (do_bootp && (d->able & IC_BOOTP))
 			ic_bootp_send_if(d, jiffies - start_jiffies);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3ec5ce0f5498..ebd2f2d532f6 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -157,10 +157,10 @@ static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 	return NULL;
 }
 
-static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
 {
-	__be32 remote = t->parms.iph.daddr;
-	__be32 local = t->parms.iph.saddr;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
 	unsigned h = 0;
 	int prio = 0;
 
@@ -175,6 +175,10 @@ static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 	return &tunnels[prio][h];
 }
 
+static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+{
+	return __ipip_bucket(&t->parms);
+}
 
 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 {
@@ -206,19 +210,9 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
 	__be32 local = parms->iph.saddr;
 	struct ip_tunnel *t, **tp, *nt;
 	struct net_device *dev;
-	unsigned h = 0;
-	int prio = 0;
 	char name[IFNAMSIZ];
 
-	if (remote) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	if (local) {
-		prio |= 1;
-		h ^= HASH(local);
-	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 			return t;
 	}
@@ -280,8 +274,8 @@ static int ipip_err(struct sk_buff *skb, u32 info)
    ICMP in the real Internet is absolutely infeasible.
  */
 	struct iphdr *iph = (struct iphdr*)skb->data;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	int err;
 
@@ -336,8 +330,8 @@ out:
 	struct iphdr *iph = (struct iphdr*)dp;
 	int hlen = iph->ihl<<2;
 	struct iphdr *eiph;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int rel_type = 0;
 	int rel_code = 0;
 	__be32 rel_info = 0;
@@ -354,7 +348,7 @@ out:
 	default:
 		return 0;
 	case ICMP_PARAMETERPROB:
-		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 		if (n < hlen)
 			return 0;
 
@@ -373,7 +367,7 @@ out:
 			return 0;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			n = ntohs(skb->h.icmph->un.frag.mtu);
+			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 			if (n < hlen+68)
 				return 0;
 			n -= hlen;
@@ -405,7 +399,7 @@ out:
 	dst_release(skb2->dst);
 	skb2->dst = NULL;
 	skb_pull(skb2, skb->data - (u8*)eiph);
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	/* Try to guess incoming interface */
 	memset(&fl, 0, sizeof(fl));
@@ -461,9 +455,10 @@ out:
 #endif
 }
 
-static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
+static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
+					struct sk_buff *skb)
 {
-	struct iphdr *inner_iph = skb->nh.iph;
+	struct iphdr *inner_iph = ip_hdr(skb);
 
 	if (INET_ECN_is_ce(outer_iph->tos))
 		IP_ECN_set_ce(inner_iph);
@@ -471,10 +466,8 @@ static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff
 
 static int ipip_rcv(struct sk_buff *skb)
 {
-	struct iphdr *iph;
 	struct ip_tunnel *tunnel;
-
-	iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 
 	read_lock(&ipip_lock);
 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
@@ -486,8 +479,8 @@ static int ipip_rcv(struct sk_buff *skb)
 
 		secpath_reset(skb);
 
-		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
+		skb->mac_header = skb->network_header;
+		skb_reset_network_header(skb);
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
@@ -521,7 +514,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be16 df = tiph->frag_off;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;			/* Device to other host */
-	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *old_iph = ip_hdr(skb);
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	__be32 dst = tiph->daddr;
@@ -615,11 +608,12 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		old_iph = skb->nh.iph;
+		old_iph = ip_hdr(skb);
 	}
 
-	skb->h.raw = skb->nh.raw;
-	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	skb->transport_header = skb->network_header;
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
@@ -630,7 +624,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Push down and install the IPIP header.
 	 */
 
-	iph 			=	skb->nh.iph;
+	iph 			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr)>>2;
 	iph->frag_off		=	df;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 601e3df69258..0ebae413ae87 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/ipip.h>
 #include <net/checksum.h>
+#include <net/netlink.h>
 
 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 #define CONFIG_IP_PIMSM	1
@@ -302,8 +303,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 
 	atomic_dec(&cache_resolve_queue_len);
 
-	while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
-		if (skb->nh.iph->version == 0) {
+	while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
+		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 			nlh->nlmsg_type = NLMSG_ERROR;
 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -479,7 +480,7 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
 static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
-	if(c==NULL)
+	if (c==NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXVIFS;
 	return c;
@@ -488,7 +489,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
 	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
-	if(c==NULL)
+	if (c==NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10*HZ;
@@ -508,12 +509,13 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 	 *	Play the pending entries through our router
 	 */
 
-	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
-		if (skb->nh.iph->version == 0) {
+	while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
-				nlh->nlmsg_len = skb->tail - (u8*)nlh;
+				nlh->nlmsg_len = (skb_tail_pointer(skb) -
+						  (u8 *)nlh);
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -539,7 +541,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 {
 	struct sk_buff *skb;
-	int ihl = pkt->nh.iph->ihl<<2;
+	const int ihl = ip_hdrlen(pkt);
 	struct igmphdr *igmp;
 	struct igmpmsg *msg;
 	int ret;
@@ -551,7 +553,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 #endif
 		skb = alloc_skb(128, GFP_ATOMIC);
 
-	if(!skb)
+	if (!skb)
 		return -ENOBUFS;
 
 #ifdef CONFIG_IP_PIMSM
@@ -561,14 +563,17 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		   And all this only to mangle msg->im_msgtype and
 		   to set msg->im_mbz to "mbz" :-)
 		 */
-		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
-		skb->nh.raw = skb->h.raw = (u8*)msg;
-		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+		skb_push(skb, sizeof(struct iphdr));
+		skb_reset_network_header(skb);
+		skb_reset_transport_header(skb);
+		msg = (struct igmpmsg *)skb_network_header(skb);
+		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
 		msg->im_vif = reg_vif_num;
-		skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
-		skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
+		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
+		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
+					     sizeof(struct iphdr));
 	} else
 #endif
 	{
@@ -577,10 +582,11 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	 *	Copy the IP header
 	 */
 
-	skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
-	memcpy(skb->data,pkt->data,ihl);
-	skb->nh.iph->protocol = 0;			/* Flag to the kernel this is a route add */
-	msg = (struct igmpmsg*)skb->nh.iph;
+	skb->network_header = skb->tail;
+	skb_put(skb, ihl);
+	skb_copy_to_linear_data(skb, pkt->data, ihl);
+	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
+	msg = (struct igmpmsg *)skb_network_header(skb);
 	msg->im_vif = vifi;
 	skb->dst = dst_clone(pkt->dst);
 
@@ -592,8 +598,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	igmp->type	=
 	msg->im_msgtype = assert;
 	igmp->code 	=	0;
-	skb->nh.iph->tot_len=htons(skb->len);			/* Fix the length */
-	skb->h.raw = skb->nh.raw;
+	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
+	skb->transport_header = skb->network_header;
 	}
 
 	if (mroute_socket == NULL) {
@@ -622,11 +628,12 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 {
 	int err;
 	struct mfc_cache *c;
+	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
 	for (c=mfc_unres_queue; c; c=c->next) {
-		if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
-		    c->mfc_origin == skb->nh.iph->saddr)
+		if (c->mfc_mcastgrp == iph->daddr &&
+		    c->mfc_origin == iph->saddr)
 			break;
 	}
 
@@ -646,9 +653,9 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 		/*
 		 *	Fill in the new cache entry
 		 */
-		c->mfc_parent=-1;
-		c->mfc_origin=skb->nh.iph->saddr;
-		c->mfc_mcastgrp=skb->nh.iph->daddr;
+		c->mfc_parent	= -1;
+		c->mfc_origin	= iph->saddr;
+		c->mfc_mcastgrp	= iph->daddr;
 
 		/*
 		 *	Reflect first query at mrouted.
@@ -734,7 +741,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 		return 0;
 	}
 
-	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+	if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
 	c=ipmr_cache_alloc();
@@ -788,7 +795,7 @@ static void mroute_clean_tables(struct sock *sk)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for(i=0; i<maxvif; i++) {
+	for (i=0; i<maxvif; i++) {
 		if (!(vif_table[i].flags&VIFF_STATIC))
 			vif_delete(i);
 	}
@@ -858,119 +865,117 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
 	struct vifctl vif;
 	struct mfcctl mfc;
 
-	if(optname!=MRT_INIT)
-	{
-		if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
+	if (optname != MRT_INIT) {
+		if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
-	switch(optname)
-	{
-		case MRT_INIT:
-			if (sk->sk_type != SOCK_RAW ||
-			    inet_sk(sk)->num != IPPROTO_IGMP)
-				return -EOPNOTSUPP;
-			if(optlen!=sizeof(int))
-				return -ENOPROTOOPT;
-
-			rtnl_lock();
-			if (mroute_socket) {
-				rtnl_unlock();
-				return -EADDRINUSE;
-			}
-
-			ret = ip_ra_control(sk, 1, mrtsock_destruct);
-			if (ret == 0) {
-				write_lock_bh(&mrt_lock);
-				mroute_socket=sk;
-				write_unlock_bh(&mrt_lock);
+	switch (optname) {
+	case MRT_INIT:
+		if (sk->sk_type != SOCK_RAW ||
+		    inet_sk(sk)->num != IPPROTO_IGMP)
+			return -EOPNOTSUPP;
+		if (optlen!=sizeof(int))
+			return -ENOPROTOOPT;
 
-				ipv4_devconf.mc_forwarding++;
-			}
+		rtnl_lock();
+		if (mroute_socket) {
 			rtnl_unlock();
-			return ret;
-		case MRT_DONE:
-			if (sk!=mroute_socket)
-				return -EACCES;
-			return ip_ra_control(sk, 0, NULL);
-		case MRT_ADD_VIF:
-		case MRT_DEL_VIF:
-			if(optlen!=sizeof(vif))
-				return -EINVAL;
-			if (copy_from_user(&vif,optval,sizeof(vif)))
-				return -EFAULT;
-			if(vif.vifc_vifi >= MAXVIFS)
-				return -ENFILE;
-			rtnl_lock();
-			if (optname==MRT_ADD_VIF) {
-				ret = vif_add(&vif, sk==mroute_socket);
-			} else {
-				ret = vif_delete(vif.vifc_vifi);
-			}
-			rtnl_unlock();
-			return ret;
+			return -EADDRINUSE;
+		}
+
+		ret = ip_ra_control(sk, 1, mrtsock_destruct);
+		if (ret == 0) {
+			write_lock_bh(&mrt_lock);
+			mroute_socket=sk;
+			write_unlock_bh(&mrt_lock);
+
+			ipv4_devconf.mc_forwarding++;
+		}
+		rtnl_unlock();
+		return ret;
+	case MRT_DONE:
+		if (sk!=mroute_socket)
+			return -EACCES;
+		return ip_ra_control(sk, 0, NULL);
+	case MRT_ADD_VIF:
+	case MRT_DEL_VIF:
+		if (optlen!=sizeof(vif))
+			return -EINVAL;
+		if (copy_from_user(&vif,optval,sizeof(vif)))
+			return -EFAULT;
+		if (vif.vifc_vifi >= MAXVIFS)
+			return -ENFILE;
+		rtnl_lock();
+		if (optname==MRT_ADD_VIF) {
+			ret = vif_add(&vif, sk==mroute_socket);
+		} else {
+			ret = vif_delete(vif.vifc_vifi);
+		}
+		rtnl_unlock();
+		return ret;
 
 		/*
 		 *	Manipulate the forwarding caches. These live
 		 *	in a sort of kernel/user symbiosis.
 		 */
-		case MRT_ADD_MFC:
-		case MRT_DEL_MFC:
-			if(optlen!=sizeof(mfc))
-				return -EINVAL;
-			if (copy_from_user(&mfc,optval, sizeof(mfc)))
-				return -EFAULT;
-			rtnl_lock();
-			if (optname==MRT_DEL_MFC)
-				ret = ipmr_mfc_delete(&mfc);
-			else
-				ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
-			rtnl_unlock();
-			return ret;
+	case MRT_ADD_MFC:
+	case MRT_DEL_MFC:
+		if (optlen!=sizeof(mfc))
+			return -EINVAL;
+		if (copy_from_user(&mfc,optval, sizeof(mfc)))
+			return -EFAULT;
+		rtnl_lock();
+		if (optname==MRT_DEL_MFC)
+			ret = ipmr_mfc_delete(&mfc);
+		else
+			ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
+		rtnl_unlock();
+		return ret;
 		/*
 		 *	Control PIM assert.
 		 */
-		case MRT_ASSERT:
-		{
-			int v;
-			if(get_user(v,(int __user *)optval))
-				return -EFAULT;
-			mroute_do_assert=(v)?1:0;
-			return 0;
-		}
+	case MRT_ASSERT:
+	{
+		int v;
+		if (get_user(v,(int __user *)optval))
+			return -EFAULT;
+		mroute_do_assert=(v)?1:0;
+		return 0;
+	}
 #ifdef CONFIG_IP_PIMSM
-		case MRT_PIM:
-		{
-			int v, ret;
-			if(get_user(v,(int __user *)optval))
-				return -EFAULT;
-			v = (v)?1:0;
-			rtnl_lock();
-			ret = 0;
-			if (v != mroute_do_pim) {
-				mroute_do_pim = v;
-				mroute_do_assert = v;
+	case MRT_PIM:
+	{
+		int v, ret;
+		if (get_user(v,(int __user *)optval))
+			return -EFAULT;
+		v = (v)?1:0;
+		rtnl_lock();
+		ret = 0;
+		if (v != mroute_do_pim) {
+			mroute_do_pim = v;
+			mroute_do_assert = v;
 #ifdef CONFIG_IP_PIMSM_V2
-				if (mroute_do_pim)
-					ret = inet_add_protocol(&pim_protocol,
-								IPPROTO_PIM);
-				else
-					ret = inet_del_protocol(&pim_protocol,
-								IPPROTO_PIM);
-				if (ret < 0)
-					ret = -EAGAIN;
+			if (mroute_do_pim)
+				ret = inet_add_protocol(&pim_protocol,
+							IPPROTO_PIM);
+			else
+				ret = inet_del_protocol(&pim_protocol,
+							IPPROTO_PIM);
+			if (ret < 0)
+				ret = -EAGAIN;
 #endif
-			}
-			rtnl_unlock();
-			return ret;
 		}
+		rtnl_unlock();
+		return ret;
+	}
 #endif
-		/*
-		 *	Spurious command, or MRT_VERSION which you cannot
-		 *	set.
-		 */
-		default:
-			return -ENOPROTOOPT;
+	/*
+	 *	Spurious command, or MRT_VERSION which you cannot
+	 *	set.
+	 */
+	default:
+		return -ENOPROTOOPT;
 	}
 }
 
@@ -983,7 +988,7 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
 	int olr;
 	int val;
 
-	if(optname!=MRT_VERSION &&
+	if (optname!=MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
 	   optname!=MRT_PIM &&
 #endif
@@ -997,17 +1002,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
 	if (olr < 0)
 		return -EINVAL;
 
-	if(put_user(olr,optlen))
+	if (put_user(olr,optlen))
 		return -EFAULT;
-	if(optname==MRT_VERSION)
+	if (optname==MRT_VERSION)
 		val=0x0305;
 #ifdef CONFIG_IP_PIMSM
-	else if(optname==MRT_PIM)
+	else if (optname==MRT_PIM)
 		val=mroute_do_pim;
 #endif
 	else
 		val=mroute_do_assert;
-	if(copy_to_user(optval,&val,olr))
+	if (copy_to_user(optval,&val,olr))
 		return -EFAULT;
 	return 0;
 }
@@ -1023,48 +1028,47 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc_cache *c;
 
-	switch(cmd)
-	{
-		case SIOCGETVIFCNT:
-			if (copy_from_user(&vr,arg,sizeof(vr)))
-				return -EFAULT;
-			if(vr.vifi>=maxvif)
-				return -EINVAL;
-			read_lock(&mrt_lock);
-			vif=&vif_table[vr.vifi];
-			if(VIF_EXISTS(vr.vifi))	{
-				vr.icount=vif->pkt_in;
-				vr.ocount=vif->pkt_out;
-				vr.ibytes=vif->bytes_in;
-				vr.obytes=vif->bytes_out;
-				read_unlock(&mrt_lock);
-
-				if (copy_to_user(arg,&vr,sizeof(vr)))
-					return -EFAULT;
-				return 0;
-			}
+	switch (cmd) {
+	case SIOCGETVIFCNT:
+		if (copy_from_user(&vr,arg,sizeof(vr)))
+			return -EFAULT;
+		if (vr.vifi>=maxvif)
+			return -EINVAL;
+		read_lock(&mrt_lock);
+		vif=&vif_table[vr.vifi];
+		if (VIF_EXISTS(vr.vifi))	{
+			vr.icount=vif->pkt_in;
+			vr.ocount=vif->pkt_out;
+			vr.ibytes=vif->bytes_in;
+			vr.obytes=vif->bytes_out;
 			read_unlock(&mrt_lock);
-			return -EADDRNOTAVAIL;
-		case SIOCGETSGCNT:
-			if (copy_from_user(&sr,arg,sizeof(sr)))
-				return -EFAULT;
 
-			read_lock(&mrt_lock);
-			c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
-			if (c) {
-				sr.pktcnt = c->mfc_un.res.pkt;
-				sr.bytecnt = c->mfc_un.res.bytes;
-				sr.wrong_if = c->mfc_un.res.wrong_if;
-				read_unlock(&mrt_lock);
-
-				if (copy_to_user(arg,&sr,sizeof(sr)))
-					return -EFAULT;
-				return 0;
-			}
+			if (copy_to_user(arg,&vr,sizeof(vr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	case SIOCGETSGCNT:
+		if (copy_from_user(&sr,arg,sizeof(sr)))
+			return -EFAULT;
+
+		read_lock(&mrt_lock);
+		c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
+		if (c) {
+			sr.pktcnt = c->mfc_un.res.pkt;
+			sr.bytecnt = c->mfc_un.res.bytes;
+			sr.wrong_if = c->mfc_un.res.wrong_if;
 			read_unlock(&mrt_lock);
-			return -EADDRNOTAVAIL;
-		default:
-			return -ENOIOCTLCMD;
+
+			if (copy_to_user(arg,&sr,sizeof(sr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	default:
+		return -ENOIOCTLCMD;
 	}
 }
 
@@ -1076,7 +1080,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 	v=&vif_table[0];
-	for(ct=0;ct<maxvif;ct++,v++) {
+	for (ct=0;ct<maxvif;ct++,v++) {
 		if (v->dev==ptr)
 			vif_delete(ct);
 	}
@@ -1096,11 +1100,17 @@ static struct notifier_block ip_mr_notifier={
 
 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
-	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+	struct iphdr *iph;
+	struct iphdr *old_iph = ip_hdr(skb);
+
+	skb_push(skb, sizeof(struct iphdr));
+	skb->transport_header = skb->network_header;
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
 
 	iph->version	= 	4;
-	iph->tos	=	skb->nh.iph->tos;
-	iph->ttl	=	skb->nh.iph->ttl;
+	iph->tos	=	old_iph->tos;
+	iph->ttl	=	old_iph->ttl;
 	iph->frag_off	=	0;
 	iph->daddr	=	daddr;
 	iph->saddr	=	saddr;
@@ -1110,8 +1120,6 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	ip_select_ident(iph, skb->dst, NULL);
 	ip_send_check(iph);
 
-	skb->h.ipiph = skb->nh.iph;
-	skb->nh.iph = iph;
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	nf_reset(skb);
 }
@@ -1134,7 +1142,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
 
 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 {
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &vif_table[vifi];
 	struct net_device *dev;
 	struct rtable *rt;
@@ -1200,8 +1208,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
-	iph = skb->nh.iph;
-	ip_decrease_ttl(iph);
+	ip_decrease_ttl(ip_hdr(skb));
 
 	/* FIXME: forward and output firewalls used to be called here.
 	 * What do we do with netfilter? -- RR */
@@ -1301,7 +1308,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 	 *	Forward the frame
 	 */
 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
-		if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
+		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
@@ -1347,7 +1354,7 @@ int ip_mr_input(struct sk_buff *skb)
 		    if (IPCB(skb)->opt.router_alert) {
 			    if (ip_call_ra_chain(skb))
 				    return 0;
-		    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
+		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
 			       Cisco IOS <= 11.2(8)) do not put router alert
 			       option to IGMP packets destined to routable
@@ -1366,7 +1373,7 @@ int ip_mr_input(struct sk_buff *skb)
 	}
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
+	cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 
 	/*
 	 *	No usable cache entry
@@ -1426,14 +1433,15 @@ int pim_rcv_v1(struct sk_buff * skb)
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
 
-	pim = (struct igmphdr*)skb->h.raw;
+	pim = igmp_hdr(skb);
 
 	if (!mroute_do_pim ||
 	    skb->len < sizeof(*pim) + sizeof(*encap) ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
-	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+	encap = (struct iphdr *)(skb_transport_header(skb) +
+				 sizeof(struct igmphdr));
 	/*
 	   Check that:
 	   a. packet is really destinted to a multicast group
@@ -1455,9 +1463,9 @@ int pim_rcv_v1(struct sk_buff * skb)
 	if (reg_dev == NULL)
 		goto drop;
 
-	skb->mac.raw = skb->nh.raw;
+	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8*)encap - skb->data);
-	skb->nh.iph = (struct iphdr *)skb->data;
+	skb_reset_network_header(skb);
 	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
@@ -1486,7 +1494,7 @@ static int pim_rcv(struct sk_buff * skb)
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
 
-	pim = (struct pimreghdr*)skb->h.raw;
+	pim = (struct pimreghdr *)skb_transport_header(skb);
 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
 	    (pim->flags&PIM_NULL_REGISTER) ||
 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
@@ -1494,7 +1502,8 @@ static int pim_rcv(struct sk_buff * skb)
 		goto drop;
 
 	/* check if the inner packet is destined to mcast group */
-	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+	encap = (struct iphdr *)(skb_transport_header(skb) +
+				 sizeof(struct pimreghdr));
 	if (!MULTICAST(encap->daddr) ||
 	    encap->tot_len == 0 ||
 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
@@ -1510,9 +1519,9 @@ static int pim_rcv(struct sk_buff * skb)
 	if (reg_dev == NULL)
 		goto drop;
 
-	skb->mac.raw = skb->nh.raw;
+	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8*)encap - skb->data);
-	skb->nh.iph = (struct iphdr *)skb->data;
+	skb_reset_network_header(skb);
 	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
@@ -1537,7 +1546,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 	int ct;
 	struct rtnexthop *nhp;
 	struct net_device *dev = vif_table[c->mfc_parent].dev;
-	u8 *b = skb->tail;
+	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
 	if (dev)
@@ -1557,12 +1566,12 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 		}
 	}
 	mp_head->rta_type = RTA_MULTIPATH;
-	mp_head->rta_len = skb->tail - (u8*)mp_head;
+	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
 	rtm->rtm_type = RTN_MULTICAST;
 	return 1;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
 
@@ -1577,6 +1586,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 
 	if (cache==NULL) {
 		struct sk_buff *skb2;
+		struct iphdr *iph;
 		struct net_device *dev;
 		int vif;
 
@@ -1596,11 +1606,13 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 			return -ENOMEM;
 		}
 
-		skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
-		skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
-		skb2->nh.iph->saddr = rt->rt_src;
-		skb2->nh.iph->daddr = rt->rt_dst;
-		skb2->nh.iph->version = 0;
+		skb_push(skb2, sizeof(struct iphdr));
+		skb_reset_network_header(skb2);
+		iph = ip_hdr(skb2);
+		iph->ihl = sizeof(struct iphdr) >> 2;
+		iph->saddr = rt->rt_src;
+		iph->daddr = rt->rt_dst;
+		iph->version = 0;
 		err = ipmr_cache_unresolved(vif, skb2);
 		read_unlock(&mrt_lock);
 		return err;
@@ -1625,7 +1637,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
 					   loff_t pos)
 {
 	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
-		if(!VIF_EXISTS(iter->ct))
+		if (!VIF_EXISTS(iter->ct))
 			continue;
 		if (pos-- == 0)
 			return &vif_table[iter->ct];
@@ -1649,7 +1661,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return ipmr_vif_seq_idx(iter, 0);
 
 	while (++iter->ct < maxvif) {
-		if(!VIF_EXISTS(iter->ct))
+		if (!VIF_EXISTS(iter->ct))
 			continue;
 		return &vif_table[iter->ct];
 	}
@@ -1680,7 +1692,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ipmr_vif_seq_ops = {
+static const struct seq_operations ipmr_vif_seq_ops = {
 	.start = ipmr_vif_seq_start,
 	.next  = ipmr_vif_seq_next,
 	.stop  = ipmr_vif_seq_stop,
@@ -1732,14 +1744,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
 	it->cache = mfc_cache_array;
 	read_lock(&mrt_lock);
 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
-		for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
+		for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
 			if (pos-- == 0)
 				return mfc;
 	read_unlock(&mrt_lock);
 
 	it->cache = &mfc_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
+	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1829,9 +1841,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   mfc->mfc_un.res.wrong_if);
 
 		if (it->cache != &mfc_unres_queue) {
-			for(n = mfc->mfc_un.res.minvif;
-			    n < mfc->mfc_un.res.maxvif; n++ ) {
-				if(VIF_EXISTS(n)
+			for (n = mfc->mfc_un.res.minvif;
+			     n < mfc->mfc_un.res.maxvif; n++ ) {
+				if (VIF_EXISTS(n)
 				   && mfc->mfc_un.res.ttls[n] < 255)
 				seq_printf(seq,
 					   " %2d:%-3d",
@@ -1843,7 +1855,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ipmr_mfc_seq_ops = {
+static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
 	.next  = ipmr_mfc_seq_next,
 	.stop  = ipmr_mfc_seq_stop,
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 22e104c6a493..15ad5dd2d984 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -331,14 +331,14 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
 				  struct ip_vs_app *app)
 {
 	int diff;
-	unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+	const unsigned int tcp_offset = ip_hdrlen(*pskb);
 	struct tcphdr *th;
 	__u32 seq;
 
 	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -406,14 +406,14 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
 				 struct ip_vs_app *app)
 {
 	int diff;
-	unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+	const unsigned int tcp_offset = ip_hdrlen(*pskb);
 	struct tcphdr *th;
 	__u32 seq;
 
 	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -577,7 +577,6 @@ static const struct file_operations ip_vs_app_fops = {
 int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 		      char *o_buf, int o_len, char *n_buf, int n_len)
 {
-	struct iphdr *iph;
 	int diff;
 	int o_offset;
 	int o_left;
@@ -603,12 +602,11 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 		skb_put(skb, diff);
 		memmove(skb->data + o_offset + n_len,
 			skb->data + o_offset + o_len, o_left);
-		memcpy(skb->data + o_offset, n_buf, n_len);
+		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
 	}
 
 	/* must update the iph total length here */
-	iph = skb->nh.iph;
-	iph->tot_len = htons(skb->len);
+	ip_hdr(skb)->tot_len = htons(skb->len);
 
 	LeaveFunction(9);
 	return 0;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 24d7b66eb6d2..f005a2f929f4 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -212,7 +212,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		    __be16 ports[2])
 {
 	struct ip_vs_conn *cp = NULL;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;	 /* destination port to forward */
@@ -381,7 +381,7 @@ struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_conn *cp = NULL;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct ip_vs_dest *dest;
 	__be16 _ports[2], *pptr;
 
@@ -447,7 +447,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		struct ip_vs_protocol *pp)
 {
 	__be16 _ports[2], *pptr;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	pptr = skb_header_pointer(skb, iph->ihl*4,
 				  sizeof(_ports), _ports);
@@ -546,7 +546,7 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
 	skb = ip_defrag(skb, user);
 	if (skb)
-		ip_send_check(skb->nh.iph);
+		ip_send_check(ip_hdr(skb));
 	return skb;
 }
 
@@ -557,9 +557,10 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		    struct ip_vs_conn *cp, int inout)
 {
-	struct iphdr *iph	 = skb->nh.iph;
+	struct iphdr *iph	 = ip_hdr(skb);
 	unsigned int icmp_offset = iph->ihl*4;
-	struct icmphdr *icmph	 = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
+						      icmp_offset);
 	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
 
 	if (inout) {
@@ -617,14 +618,14 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 	*related = 1;
 
 	/* reassemble IP fragments */
-	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
 			return NF_STOLEN;
 		*pskb = skb;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	offset = ihl = iph->ihl * 4;
 	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
 	if (ic == NULL)
@@ -659,7 +660,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 		return NF_ACCEPT;
 
 	/* Is the embedded protocol header present? */
-	if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
 		     pp->dont_defrag))
 		return NF_ACCEPT;
 
@@ -680,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 	}
 
 	/* Ensure the checksum is correct */
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	    ip_vs_checksum_complete(skb, ihl)) {
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
 		/* Failed checksum! */
 		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
 			  NIPQUAD(iph->saddr));
@@ -712,8 +712,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
 {
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 	return th->rst;
@@ -740,14 +739,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
 		int related, verdict = ip_vs_out_icmp(pskb, &related);
 
 		if (related)
 			return verdict;
 		skb = *pskb;
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 	}
 
 	pp = ip_vs_proto_get(iph->protocol);
@@ -755,12 +754,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 		return NF_ACCEPT;
 
 	/* reassemble IP fragments */
-	if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+	if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
 		     !pp->dont_defrag)) {
 		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
 			return NF_STOLEN;
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 		*pskb = skb;
 	}
 
@@ -810,8 +809,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
 		goto drop;
 	skb = *pskb;
-	skb->nh.iph->saddr = cp->vaddr;
-	ip_send_check(skb->nh.iph);
+	ip_hdr(skb)->saddr = cp->vaddr;
+	ip_send_check(ip_hdr(skb));
 
 	/* For policy routing, packets originating from this
 	 * machine itself may be routed differently to packets
@@ -861,7 +860,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 	*related = 1;
 
 	/* reassemble IP fragments */
-	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		skb = ip_vs_gather_frags(skb,
 					 hooknum == NF_IP_LOCAL_IN ?
 					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
@@ -870,7 +869,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 		*pskb = skb;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	offset = ihl = iph->ihl * 4;
 	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
 	if (ic == NULL)
@@ -905,7 +904,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 		return NF_ACCEPT;
 
 	/* Is the embedded protocol header present? */
-	if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
 		     pp->dont_defrag))
 		return NF_ACCEPT;
 
@@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 	verdict = NF_DROP;
 
 	/* Ensure the checksum is correct */
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	    ip_vs_checksum_complete(skb, ihl)) {
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
 		/* Failed checksum! */
 		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
 			  NIPQUAD(iph->saddr));
@@ -966,19 +964,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
 		     || skb->dev == &loopback_dev || skb->sk)) {
 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
 			  skb->pkt_type,
-			  skb->nh.iph->protocol,
-			  NIPQUAD(skb->nh.iph->daddr));
+			  ip_hdr(skb)->protocol,
+			  NIPQUAD(ip_hdr(skb)->daddr));
 		return NF_ACCEPT;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
 		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
 
 		if (related)
 			return verdict;
 		skb = *pskb;
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 	}
 
 	/* Protocol supported? */
@@ -1064,7 +1062,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
 {
 	int r;
 
-	if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+	if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp(pskb, &r, hooknum);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 502111fba872..dcf5d46aaa5e 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -204,7 +204,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_dh_bucket *tbl;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 847c47af040c..344ddbbdc756 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -159,10 +159,10 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		return 0;
 
 	if (cp->app_data == &ip_vs_ftp_pasv) {
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 		data = (char *)th + (th->doff << 2);
-		data_limit = (*pskb)->tail;
+		data_limit = skb_tail_pointer(*pskb);
 
 		if (ip_vs_ftp_get_addrport(data, data_limit,
 					   SERVER_STRING,
@@ -262,14 +262,14 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	/*
 	 * Detecting whether it is passive
 	 */
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 
 	/* Since there may be OPTIONS in the TCP packet and the HLEN is
 	   the length of the header in 32-bit multiples, it is accurate
 	   to calculate data address by th+HLEN*4 */
 	data = data_start = (char *)th + (th->doff << 2);
-	data_limit = (*pskb)->tail;
+	data_limit = skb_tail_pointer(*pskb);
 
 	while (data <= data_limit - 6) {
 		if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index c801273cb881..052f4ed59174 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -521,7 +521,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest;
 	struct ip_vs_lblc_table *tbl;
 	struct ip_vs_lblc_entry *en;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 23f9b9e73c85..6225acac7a3b 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -775,7 +775,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest;
 	struct ip_vs_lblcr_table *tbl;
 	struct ip_vs_lblcr_entry *en;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 8b0505b09317..a842676e1c69 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -52,15 +52,15 @@ ah_conn_in_get(const struct sk_buff *skb,
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
 				       iph->saddr,
-				       __constant_htons(PORT_ISAKMP),
+				       htons(PORT_ISAKMP),
 				       iph->daddr,
-				       __constant_htons(PORT_ISAKMP));
+				       htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
 				       iph->daddr,
-				       __constant_htons(PORT_ISAKMP),
+				       htons(PORT_ISAKMP),
 				       iph->saddr,
-				       __constant_htons(PORT_ISAKMP));
+				       htons(PORT_ISAKMP));
 	}
 
 	if (!cp) {
@@ -89,15 +89,15 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
 					iph->saddr,
-					__constant_htons(PORT_ISAKMP),
+					htons(PORT_ISAKMP),
 					iph->daddr,
-					__constant_htons(PORT_ISAKMP));
+					htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
 					iph->daddr,
-					__constant_htons(PORT_ISAKMP),
+					htons(PORT_ISAKMP),
 					iph->saddr,
-					__constant_htons(PORT_ISAKMP));
+					htons(PORT_ISAKMP));
 	}
 
 	if (!cp) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 16a9ebee2fe6..e65577a77006 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -76,16 +76,15 @@ tcp_conn_schedule(struct sk_buff *skb,
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		*verdict = NF_DROP;
 		return 0;
 	}
 
 	if (th->syn &&
-	    (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
-				     skb->nh.iph->daddr, th->dest))) {
+	    (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+				     ip_hdr(skb)->daddr, th->dest))) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -127,7 +126,7 @@ tcp_snat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+	const unsigned int tcphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -143,7 +142,7 @@ tcp_snat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	tcph = (void *)(*pskb)->nh.iph + tcphoff;
+	tcph = (void *)ip_hdr(*pskb) + tcphoff;
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
@@ -175,7 +174,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+	const unsigned int tcphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -194,7 +193,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	tcph = (void *)(*pskb)->nh.iph + tcphoff;
+	tcph = (void *)ip_hdr(*pskb) + tcphoff;
 	tcph->dest = cp->dport;
 
 	/*
@@ -224,15 +223,15 @@ tcp_dnat_handler(struct sk_buff **pskb,
 static int
 tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
-	unsigned int tcphoff = skb->nh.iph->ihl*4;
+	const unsigned int tcphoff = ip_hdrlen(skb);
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_NONE:
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 	case CHECKSUM_COMPLETE:
-		if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+		if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 				      skb->len - tcphoff,
-				      skb->nh.iph->protocol, skb->csum)) {
+				      ip_hdr(skb)->protocol, skb->csum)) {
 			IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 					 "Failed checksum for");
 			return 0;
@@ -467,8 +466,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 {
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 03f0a414cfa4..8ee5fe6a101d 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -22,7 +22,7 @@
 #include <linux/udp.h>
 
 #include <net/ip_vs.h>
-
+#include <net/ip.h>
 
 static struct ip_vs_conn *
 udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -56,7 +56,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct ip_vs_conn *cp;
 	__be16 _ports[2], *pptr;
 
-	pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+	pptr = skb_header_pointer(skb, ip_hdrlen(skb),
 				  sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return NULL;
@@ -82,15 +82,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
 
-	uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+	uh = skb_header_pointer(skb, ip_hdrlen(skb),
 				sizeof(_udph), &_udph);
 	if (uh == NULL) {
 		*verdict = NF_DROP;
 		return 0;
 	}
 
-	if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
-				     skb->nh.iph->daddr, uh->dest))) {
+	if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+				     ip_hdr(skb)->daddr, uh->dest))) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -133,7 +133,7 @@ udp_snat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+	const unsigned int udphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -151,7 +151,7 @@ udp_snat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	udph = (void *)(*pskb)->nh.iph + udphoff;
+	udph = (void *)ip_hdr(*pskb) + udphoff;
 	udph->source = cp->vport;
 
 	/*
@@ -187,7 +187,7 @@ udp_dnat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+	unsigned int udphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -206,7 +206,7 @@ udp_dnat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	udph = (void *)(*pskb)->nh.iph + udphoff;
+	udph = (void *)ip_hdr(*pskb) + udphoff;
 	udph->dest = cp->dport;
 
 	/*
@@ -239,7 +239,7 @@ static int
 udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
 	struct udphdr _udph, *uh;
-	unsigned int udphoff = skb->nh.iph->ihl*4;
+	const unsigned int udphoff = ip_hdrlen(skb);
 
 	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 	if (uh == NULL)
@@ -251,10 +251,10 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 			skb->csum = skb_checksum(skb, udphoff,
 						 skb->len - udphoff, 0);
 		case CHECKSUM_COMPLETE:
-			if (csum_tcpudp_magic(skb->nh.iph->saddr,
-					      skb->nh.iph->daddr,
+			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+					      ip_hdr(skb)->daddr,
 					      skb->len - udphoff,
-					      skb->nh.iph->protocol,
+					      ip_hdr(skb)->protocol,
 					      skb->csum)) {
 				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 						 "Failed checksum for");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 338668f88fe2..1b25b00ef1e1 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -201,7 +201,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_sh_bucket *tbl;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index e1f77bd7c9a5..900ce29db382 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -156,7 +156,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp)
 {
 	struct rtable *rt;			/* Route to the other host */
-	struct iphdr  *iph = skb->nh.iph;
+	struct iphdr  *iph = ip_hdr(skb);
 	u8     tos = iph->tos;
 	int    mtu;
 	struct flowi fl = {
@@ -178,7 +178,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
@@ -193,7 +193,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip_rt_put(rt);
 		return NF_STOLEN;
 	}
-	ip_send_check(skb->nh.iph);
+	ip_send_check(ip_hdr(skb));
 
 	/* drop old route */
 	dst_release(skb->dst);
@@ -226,7 +226,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 {
 	struct rtable *rt;		/* Route to the other host */
 	int mtu;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	EnterFunction(10);
 
@@ -245,7 +245,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
@@ -266,8 +266,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
 		goto tx_error;
-	skb->nh.iph->daddr = cp->daddr;
-	ip_send_check(skb->nh.iph);
+	ip_hdr(skb)->daddr = cp->daddr;
+	ip_send_check(ip_hdr(skb));
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
@@ -320,19 +320,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 {
 	struct rtable *rt;			/* Route to the other host */
 	struct net_device *tdev;		/* Device to other host */
-	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *old_iph = ip_hdr(skb);
 	u8     tos = old_iph->tos;
 	__be16 df = old_iph->frag_off;
+	sk_buff_data_t old_transport_header = skb->transport_header;
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
 
 	EnterFunction(10);
 
-	if (skb->protocol != __constant_htons(ETH_P_IP)) {
+	if (skb->protocol != htons(ETH_P_IP)) {
 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
 			     "ETH_P_IP: %d, skb protocol: %d\n",
-			     __constant_htons(ETH_P_IP), skb->protocol);
+			     htons(ETH_P_IP), skb->protocol);
 		goto tx_error;
 	}
 
@@ -350,9 +351,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb->dst)
 		skb->dst->ops->update_pmtu(skb->dst, mtu);
 
-	df |= (old_iph->frag_off&__constant_htons(IP_DF));
+	df |= (old_iph->frag_off & htons(IP_DF));
 
-	if ((old_iph->frag_off&__constant_htons(IP_DF))
+	if ((old_iph->frag_off & htons(IP_DF))
 	    && mtu < ntohs(old_iph->tot_len)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
@@ -377,15 +378,16 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		}
 		kfree_skb(skb);
 		skb = new_skb;
-		old_iph = skb->nh.iph;
+		old_iph = ip_hdr(skb);
 	}
 
-	skb->h.raw = (void *) old_iph;
+	skb->transport_header = old_transport_header;
 
 	/* fix old IP header checksum */
 	ip_send_check(old_iph);
 
-	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
 	/* drop old route */
@@ -395,7 +397,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/*
 	 *	Push down and install the IPIP header.
 	 */
-	iph			=	skb->nh.iph;
+	iph			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr)>>2;
 	iph->frag_off		=	df;
@@ -435,7 +437,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	      struct ip_vs_protocol *pp)
 {
 	struct rtable *rt;			/* Route to the other host */
-	struct iphdr  *iph = skb->nh.iph;
+	struct iphdr  *iph = ip_hdr(skb);
 	int    mtu;
 
 	EnterFunction(10);
@@ -445,7 +447,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
@@ -460,7 +462,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip_rt_put(rt);
 		return NF_STOLEN;
 	}
-	ip_send_check(skb->nh.iph);
+	ip_send_check(ip_hdr(skb));
 
 	/* drop old route */
 	dst_release(skb->dst);
@@ -514,12 +516,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 * mangle and send the packet here (only for VS/NAT)
 	 */
 
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
 		goto tx_error_icmp;
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
+	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 574c735836fc..b03c5ca2c823 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -100,7 +100,7 @@ static int drr_dev_event(struct notifier_block *this,
 
 		spin_unlock_bh(&state_lock);
 		break;
-	};
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6069a11514f6..b44192924f95 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -10,7 +10,7 @@
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	const struct iphdr *iph = ip_hdr(*pskb);
 	struct rtable *rt;
 	struct flowi fl = {};
 	struct dst_entry *odst;
@@ -142,7 +142,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	struct ip_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP_LOCAL_OUT) {
-		const struct iphdr *iph = skb->nh.iph;
+		const struct iphdr *iph = ip_hdr(skb);
 
 		rt_info->tos = iph->tos;
 		rt_info->daddr = iph->daddr;
@@ -155,7 +155,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
 	const struct ip_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP_LOCAL_OUT) {
-		struct iphdr *iph = (*pskb)->nh.iph;
+		const struct iphdr *iph = ip_hdr(*pskb);
 
 		if (!(iph->tos == rt_info->tos
 		      && iph->daddr == rt_info->daddr
@@ -168,7 +168,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 			    unsigned int dataoff, u_int8_t protocol)
 {
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	__sum16 csum = 0;
 
 	switch (skb->ip_summed) {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 601808c796ec..46509fae9fd8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -30,188 +30,6 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
-# connection tracking, helpers and protocols
-config IP_NF_CT_ACCT
-	bool "Connection tracking flow accounting"
-	depends on IP_NF_CONNTRACK
-	help
-	  If this option is enabled, the connection tracking code will
-	  keep per-flow packet and byte counters.
-
-	  Those counters can be used for flow-based accounting or the
-	  `connbytes' match.
-
-	  If unsure, say `N'.
-
-config IP_NF_CONNTRACK_MARK
-	bool  'Connection mark tracking support'
-	depends on IP_NF_CONNTRACK
-	help
-	  This option enables support for connection marks, used by the
-	  `CONNMARK' target and `connmark' match. Similar to the mark value
-	  of packets, but this mark value is kept in the conntrack session
-	  instead of the individual packets.
-	
-config IP_NF_CONNTRACK_SECMARK
-	bool  'Connection tracking security mark support'
-	depends on IP_NF_CONNTRACK && NETWORK_SECMARK
-	help
-	  This option enables security markings to be applied to
-	  connections.  Typically they are copied to connections from
-	  packets using the CONNSECMARK target and copied back from
-	  connections to packets with the same target, with the packets
-	  being originally labeled via SECMARK.
-
-	  If unsure, say 'N'.
-
-config IP_NF_CONNTRACK_EVENTS
-	bool "Connection tracking events (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && IP_NF_CONNTRACK
-	help
-	  If this option is enabled, the connection tracking code will
-	  provide a notifier chain that can be used by other kernel code
-	  to get notified about changes in the connection tracking state.
-	  
-	  IF unsure, say `N'.
-
-config IP_NF_CONNTRACK_NETLINK
-	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
-	depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
-	depends on IP_NF_NAT=n || IP_NF_NAT
-	help
-	  This option enables support for a netlink-based userspace interface
-
-
-config IP_NF_CT_PROTO_SCTP
-	tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  With this option enabled, the connection tracking code will
-	  be able to do state tracking on SCTP connections.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_FTP
-	tristate "FTP protocol support"
-	depends on IP_NF_CONNTRACK
-	help
-	  Tracking FTP connections is problematic: special helpers are
-	  required for tracking them, and doing masquerading and other forms
-	  of Network Address Translation on them.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_IRC
-	tristate "IRC protocol support"
-	depends on IP_NF_CONNTRACK
-	---help---
-	  There is a commonly-used extension to IRC called
-	  Direct Client-to-Client Protocol (DCC).  This enables users to send
-	  files to each other, and also chat to each other without the need
-	  of a server.  DCC Sending is used anywhere you send files over IRC,
-	  and DCC Chat is most commonly used by Eggdrop bots.  If you are
-	  using NAT, this extension will enable you to send files and initiate
-	  chats.  Note that you do NOT need this extension to get files or
-	  have others initiate chats, or everything else in IRC.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_NETBIOS_NS
-	tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  NetBIOS name service requests are sent as broadcast messages from an
-	  unprivileged port and responded to with unicast messages to the
-	  same port. This make them hard to firewall properly because connection
-	  tracking doesn't deal with broadcasts. This helper tracks locally
-	  originating NetBIOS name service requests and the corresponding
-	  responses. It relies on correct IP address configuration, specifically
-	  netmask and broadcast address. When properly configured, the output
-	  of "ip address show" should look similar to this:
-
-	  $ ip -4 address show eth0
-	  4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
-	      inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
-	  
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_TFTP
-	tristate "TFTP protocol support"
-	depends on IP_NF_CONNTRACK
-	help
-	  TFTP connection tracking helper, this is required depending
-	  on how restrictive your ruleset is.
-	  If you are using a tftp client behind -j SNAT or -j MASQUERADING
-	  you will need this.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_AMANDA
-	tristate "Amanda backup protocol support"
-	depends on IP_NF_CONNTRACK
-	select TEXTSEARCH
-	select TEXTSEARCH_KMP
-	help
-	  If you are running the Amanda backup package <http://www.amanda.org/>
-	  on this machine or machines that will be MASQUERADED through this
-	  machine, then you may want to enable this feature.  This allows the
-	  connection tracking and natting code to allow the sub-channels that
-	  Amanda requires for communication of the backup data, messages and
-	  index.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_PPTP
-	tristate  'PPTP protocol support'
-	depends on IP_NF_CONNTRACK
-	help
-	  This module adds support for PPTP (Point to Point Tunnelling
-	  Protocol, RFC2637) connection tracking and NAT. 
-	
-	  If you are running PPTP sessions over a stateful firewall or NAT
-	  box, you may want to enable this feature.  
-	
-	  Please note that not all PPTP modes of operation are supported yet.
-	  For more info, read top of the file
-	  net/ipv4/netfilter/ip_conntrack_pptp.c
-	
-	  If you want to compile it as a module, say M here and read
-	  Documentation/modules.txt.  If unsure, say `N'.
-
-config IP_NF_H323
-	tristate  'H.323 protocol support (EXPERIMENTAL)'
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  H.323 is a VoIP signalling protocol from ITU-T. As one of the most
-	  important VoIP protocols, it is widely used by voice hardware and
-	  software including voice gateways, IP phones, Netmeeting, OpenPhone,
-	  Gnomemeeting, etc.
-
-	  With this module you can support H.323 on a connection tracking/NAT
-	  firewall.
-
-	  This module supports RAS, Fast Start, H.245 Tunnelling, Call
-	  Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
-	  whiteboard, file transfer, etc. For more information, please
-	  visit http://nath323.sourceforge.net/.
-
-	  If you want to compile it as a module, say 'M' here and read
-	  Documentation/modules.txt.  If unsure, say 'N'.
-
-config IP_NF_SIP
-	tristate "SIP protocol support (EXPERIMENTAL)"
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  SIP is an application-layer control protocol that can establish,
-	  modify, and terminate multimedia sessions (conferences) such as
-	  Internet telephony calls. With the ip_conntrack_sip and
-	  the ip_nat_sip modules you can support the protocol on a connection
-	  tracking/NATing firewall.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
 config IP_NF_QUEUE
 	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
 	help
@@ -361,17 +179,6 @@ config IP_NF_TARGET_ULOG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-# NAT + specific targets: ip_conntrack
-config IP_NF_NAT
-	tristate "Full NAT"
-	depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
-	help
-	  The Full NAT option allows masquerading, port forwarding and other
-	  forms of full Network Address Port Translation.  It is controlled by
-	  the `nat' table in iptables: see the man page for iptables(8).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 # NAT + specific targets: nf_conntrack
 config NF_NAT
 	tristate "Full NAT"
@@ -383,11 +190,6 @@ config NF_NAT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_NAT_NEEDED
-	bool
-	depends on IP_NF_NAT
-	default y
-
 config NF_NAT_NEEDED
 	bool
 	depends on NF_NAT
@@ -395,7 +197,7 @@ config NF_NAT_NEEDED
 
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  Masquerading is a special case of NAT: all outgoing connections are
 	  changed to seem to come from a particular interface's address, and
@@ -407,7 +209,7 @@ config IP_NF_TARGET_MASQUERADE
 
 config IP_NF_TARGET_REDIRECT
 	tristate "REDIRECT target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  REDIRECT is a special case of NAT: all incoming connections are
 	  mapped onto the incoming interface's address, causing the packets to
@@ -418,7 +220,7 @@ config IP_NF_TARGET_REDIRECT
 
 config IP_NF_TARGET_NETMAP
 	tristate "NETMAP target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  NETMAP is an implementation of static 1:1 NAT mapping of network
 	  addresses. It maps the network address part, while keeping the host
@@ -429,28 +231,13 @@ config IP_NF_TARGET_NETMAP
 
 config IP_NF_TARGET_SAME
 	tristate "SAME target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  This option adds a `SAME' target, which works like the standard SNAT
 	  target, but attempts to give clients the same IP for all connections.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_NAT_SNMP_BASIC
-	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && IP_NF_NAT
-	---help---
-
-	  This module implements an Application Layer Gateway (ALG) for
-	  SNMP payloads.  In conjunction with NAT, it allows a network
-	  management system to access multiple private networks with
-	  conflicting addresses.  It works by modifying IP addresses
-	  inside SNMP payloads to match IP-layer NAT mapping.
-
-	  This is the "basic" form of SNMP-ALG, as described in RFC 2962
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_NAT
@@ -477,78 +264,37 @@ config NF_NAT_PROTO_GRE
 	tristate
 	depends on NF_NAT && NF_CT_PROTO_GRE
 
-config IP_NF_NAT_FTP
-	tristate
-	depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT
-	default IP_NF_NAT && IP_NF_FTP
-
 config NF_NAT_FTP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_FTP
 
-config IP_NF_NAT_IRC
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_IRC=y
-	default m if IP_NF_IRC=m
-
 config NF_NAT_IRC
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_IRC
 
-config IP_NF_NAT_TFTP
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_TFTP=y
-	default m if IP_NF_TFTP=m
-
 config NF_NAT_TFTP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_TFTP
 
-config IP_NF_NAT_AMANDA
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_AMANDA=y
-	default m if IP_NF_AMANDA=m
-
 config NF_NAT_AMANDA
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_AMANDA
 
-config IP_NF_NAT_PPTP
-	tristate
-	depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
-	default IP_NF_NAT if IP_NF_PPTP=y
-	default m if IP_NF_PPTP=m
-
 config NF_NAT_PPTP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_PPTP
 	select NF_NAT_PROTO_GRE
 
-config IP_NF_NAT_H323
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_H323=y
-	default m if IP_NF_H323=m
-
 config NF_NAT_H323
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_H323
 
-config IP_NF_NAT_SIP
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_SIP=y
-	default m if IP_NF_SIP=m
-
 config NF_NAT_SIP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
@@ -606,9 +352,8 @@ config IP_NF_TARGET_TTL
 config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support (EXPERIMENTAL)"
 	depends on IP_NF_MANGLE && EXPERIMENTAL
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
-	select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
+	depends on NF_CONNTRACK_IPV4
+	select NF_CONNTRACK_MARK
 	help
 	  The CLUSTERIP target allows you to build load-balancing clusters of
 	  network servers without having a dedicated load-balancing
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 6625ec68180c..409d273f6f82 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the netfilter modules on top of IPv4.
 #
 
-# objects for the standalone - connection tracking / NAT
-ip_conntrack-objs	:= ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
 # objects for l3 independent conntrack
 nf_conntrack_ipv4-objs  :=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
 ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
@@ -12,53 +10,14 @@ nf_conntrack_ipv4-objs	+= nf_conntrack_l3proto_ipv4_compat.o
 endif
 endif
 
-ip_nat-objs	:= ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
-nf_nat-objs	:= nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-ifneq ($(CONFIG_NF_NAT),)
+nf_nat-objs		:= nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
 iptable_nat-objs	:= nf_nat_rule.o nf_nat_standalone.o
-else
-iptable_nat-objs	:= ip_nat_rule.o ip_nat_standalone.o
-endif
-
-ip_conntrack_pptp-objs	:= ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
-ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
-
-ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o
-ip_nat_h323-objs := ip_nat_helper_h323.o
 
 # connection tracking
-obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
 obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
 
-obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 
-# conntrack netlink interface
-obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
-
-
-# SCTP protocol connection tracking
-obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
-
-# connection tracking helpers
-obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o
-obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
-obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
-obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
-obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
-obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
-obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o
-obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
-
-# NAT helpers (ip_conntrack)
-obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
-obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
-obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
-obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
-obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
-obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
-obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o
-
 # NAT helpers (nf_conntrack)
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
 obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -78,7 +37,6 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
@@ -100,7 +58,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
 obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
 obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5170f5c75f9d..cae41215e3c7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -166,13 +166,9 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 		return 0;
 	}
 
-	for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
-		unsigned long odev;
-		memcpy(&odev, outdev + i*sizeof(unsigned long),
-		       sizeof(unsigned long));
-		ret |= (odev
-			^ ((const unsigned long *)arpinfo->outiface)[i])
-			& ((const unsigned long *)arpinfo->outiface_mask)[i];
+	for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
+		ret |= (outdev[i] ^ arpinfo->outiface[i])
+			& arpinfo->outiface_mask[i];
 	}
 
 	if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
@@ -249,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	arp = (*pskb)->nh.arph;
+	arp = arp_hdr(*pskb);
 	do {
 		if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
 			struct arpt_entry_target *t;
@@ -301,7 +297,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 								     t->data);
 
 				/* Target might have changed stuff. */
-				arp = (*pskb)->nh.arph;
+				arp = arp_hdr(*pskb);
 
 				if (verdict == ARPT_CONTINUE)
 					e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 709db4d3f48f..6298d404e7c7 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -30,35 +30,35 @@ target(struct sk_buff **pskb,
 		*pskb = nskb;
 	}
 
-	arp = (*pskb)->nh.arph;
-	arpptr = (*pskb)->nh.raw + sizeof(*arp);
+	arp = arp_hdr(*pskb);
+	arpptr = skb_network_header(*pskb) + sizeof(*arp);
 	pln = arp->ar_pln;
 	hln = arp->ar_hln;
 	/* We assume that pln and hln were checked in the match */
 	if (mangle->flags & ARPT_MANGLE_SDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > (**pskb).tail))
+		   (arpptr + hln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->src_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_SIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > (**pskb).tail))
+		   (arpptr + pln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_s.src_ip, pln);
 	}
 	arpptr += pln;
 	if (mangle->flags & ARPT_MANGLE_TDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > (**pskb).tail))
+		   (arpptr + hln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->tgt_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_TIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > (**pskb).tail))
+		   (arpptr + pln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
 	}
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
deleted file mode 100644
index 4f561f52c83a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/* Amanda extension for IP connection tracking, Version 0.2
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on HW's ip_conntrack_irc.c as well as other modules
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *	Module load syntax:
- * 	insmod ip_conntrack_amanda.o [master_timeout=n]
- *
- *	Where master_timeout is the timeout (in seconds) of the master
- *	connection (port 10080).  This defaults to 5 minutes but if
- *	your clients take longer than 5 minutes to do their work
- *	before getting back to the Amanda server, you can increase
- *	this value.
- *
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/textsearch.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-static unsigned int master_timeout = 300;
-static char *ts_algo = "kmp";
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda connection tracking module");
-MODULE_LICENSE("GPL");
-module_param(master_timeout, uint, 0600);
-MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
-module_param(ts_algo, charp, 0400);
-MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
-
-unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
-				   enum ip_conntrack_info ctinfo,
-				   unsigned int matchoff,
-				   unsigned int matchlen,
-				   struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
-
-enum amanda_strings {
-	SEARCH_CONNECT,
-	SEARCH_NEWLINE,
-	SEARCH_DATA,
-	SEARCH_MESG,
-	SEARCH_INDEX,
-};
-
-static struct {
-	char			*string;
-	size_t			len;
-	struct ts_config	*ts;
-} search[] = {
-	[SEARCH_CONNECT] = {
-		.string	= "CONNECT ",
-		.len	= 8,
-	},
-	[SEARCH_NEWLINE] = {
-		.string	= "\n",
-		.len	= 1,
-	},
-	[SEARCH_DATA] = {
-		.string	= "DATA ",
-		.len	= 5,
-	},
-	[SEARCH_MESG] = {
-		.string	= "MESG ",
-		.len	= 5,
-	},
-	[SEARCH_INDEX] = {
-		.string = "INDEX ",
-		.len	= 6,
-	},
-};
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
-	struct ts_state ts;
-	struct ip_conntrack_expect *exp;
-	unsigned int dataoff, start, stop, off, i;
-	char pbuf[sizeof("65535")], *tmp;
-	u_int16_t port, len;
-	int ret = NF_ACCEPT;
-	typeof(ip_nat_amanda_hook) ip_nat_amanda;
-
-	/* Only look at packets from the Amanda server */
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
-		return NF_ACCEPT;
-
-	/* increase the UDP timeout of the master connection as replies from
-	 * Amanda clients to the server can be quite delayed */
-	ip_ct_refresh(ct, *pskb, master_timeout * HZ);
-
-	/* No data? */
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len) {
-		if (net_ratelimit())
-			printk("amanda_help: skblen = %u\n", (*pskb)->len);
-		return NF_ACCEPT;
-	}
-
-	memset(&ts, 0, sizeof(ts));
-	start = skb_find_text(*pskb, dataoff, (*pskb)->len,
-			      search[SEARCH_CONNECT].ts, &ts);
-	if (start == UINT_MAX)
-		goto out;
-	start += dataoff + search[SEARCH_CONNECT].len;
-
-	memset(&ts, 0, sizeof(ts));
-	stop = skb_find_text(*pskb, start, (*pskb)->len,
-			     search[SEARCH_NEWLINE].ts, &ts);
-	if (stop == UINT_MAX)
-		goto out;
-	stop += start;
-
-	for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
-		memset(&ts, 0, sizeof(ts));
-		off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
-		if (off == UINT_MAX)
-			continue;
-		off += start + search[i].len;
-
-		len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
-		if (skb_copy_bits(*pskb, off, pbuf, len))
-			break;
-		pbuf[len] = '\0';
-
-		port = simple_strtoul(pbuf, &tmp, 10);
-		len = tmp - pbuf;
-		if (port == 0 || len > 5)
-			break;
-
-		exp = ip_conntrack_expect_alloc(ct);
-		if (exp == NULL) {
-			ret = NF_DROP;
-			goto out;
-		}
-
-		exp->expectfn = NULL;
-		exp->flags = 0;
-
-		exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-		exp->tuple.src.u.tcp.port = 0;
-		exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-		exp->tuple.dst.protonum = IPPROTO_TCP;
-		exp->tuple.dst.u.tcp.port = htons(port);
-
-		exp->mask.src.ip = htonl(0xFFFFFFFF);
-		exp->mask.src.u.tcp.port = 0;
-		exp->mask.dst.ip = htonl(0xFFFFFFFF);
-		exp->mask.dst.protonum = 0xFF;
-		exp->mask.dst.u.tcp.port = htons(0xFFFF);
-
-		/* RCU read locked by nf_hook_slow */
-		ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook);
-		if (ip_nat_amanda)
-			ret = ip_nat_amanda(pskb, ctinfo, off - dataoff,
-					    len, exp);
-		else if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		ip_conntrack_expect_put(exp);
-	}
-
-out:
-	return ret;
-}
-
-static struct ip_conntrack_helper amanda_helper = {
-	.max_expected = 3,
-	.timeout = 180,
-	.me = THIS_MODULE,
-	.help = help,
-	.name = "amanda",
-
-	.tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } },
-		   .dst = { .protonum = IPPROTO_UDP },
-	},
-	.mask = { .src = { .u = { 0xFFFF } },
-		 .dst = { .protonum = 0xFF },
-	},
-};
-
-static void __exit ip_conntrack_amanda_fini(void)
-{
-	int i;
-
-	ip_conntrack_helper_unregister(&amanda_helper);
-	for (i = 0; i < ARRAY_SIZE(search); i++)
-		textsearch_destroy(search[i].ts);
-}
-
-static int __init ip_conntrack_amanda_init(void)
-{
-	int ret, i;
-
-	ret = -ENOMEM;
-	for (i = 0; i < ARRAY_SIZE(search); i++) {
-		search[i].ts = textsearch_prepare(ts_algo, search[i].string,
-						  search[i].len,
-						  GFP_KERNEL, TS_AUTOLOAD);
-		if (search[i].ts == NULL)
-			goto err;
-	}
-	ret = ip_conntrack_helper_register(&amanda_helper);
-	if (ret < 0)
-		goto err;
-	return 0;
-
-err:
-	for (; i >= 0; i--) {
-		if (search[i].ts)
-			textsearch_destroy(search[i].ts);
-	}
-	return ret;
-}
-
-module_init(ip_conntrack_amanda_init);
-module_exit(ip_conntrack_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
deleted file mode 100644
index 23b99ae2cc37..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ /dev/null
@@ -1,1550 +0,0 @@
-/* Connection state tracking for netfilter.  This is separated from,
-   but required by, the NAT layer; it can also be used by an iptables
-   extension. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * 	- new API and handling of conntrack/nat helpers
- * 	- now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- * 	- add usage/reference counts to ip_conntrack_expect
- *	- export ip_conntrack[_expect]_{find_get,put} functions
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <linux/stddef.h>
-#include <linux/sysctl.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/err.h>
-#include <linux/percpu.h>
-#include <linux/moduleparam.h>
-#include <linux/notifier.h>
-
-/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
-   registrations, conntrack timers*/
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#define IP_CONNTRACK_VERSION	"2.4"
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_conntrack_lock);
-
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
-
-void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
-static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size __read_mostly = 0;
-int ip_conntrack_max __read_mostly;
-struct list_head *ip_conntrack_hash __read_mostly;
-static struct kmem_cache *ip_conntrack_cachep __read_mostly;
-static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
-struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid __read_mostly;
-static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc __read_mostly;
-
-static unsigned int ip_conntrack_next_id;
-static unsigned int ip_conntrack_expect_next_id;
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
-
-DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
-
-/* deliver cached events and clear cache entry - must be called with locally
- * disabled softirqs */
-static inline void
-__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
-{
-	DEBUGP("ecache: delivering events for %p\n", ecache->ct);
-	if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
-		atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
-				    ecache->ct);
-	ecache->events = 0;
-	ip_conntrack_put(ecache->ct);
-	ecache->ct = NULL;
-}
-
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling or freeing the skb */
-void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
-{
-	struct ip_conntrack_ecache *ecache;
-
-	local_bh_disable();
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
-	if (ecache->ct == ct)
-		__ip_ct_deliver_cached_events(ecache);
-	local_bh_enable();
-}
-
-void __ip_ct_event_cache_init(struct ip_conntrack *ct)
-{
-	struct ip_conntrack_ecache *ecache;
-
-	/* take care of delivering potentially old events */
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
-	BUG_ON(ecache->ct == ct);
-	if (ecache->ct)
-		__ip_ct_deliver_cached_events(ecache);
-	/* initialize for this conntrack/packet */
-	ecache->ct = ct;
-	nf_conntrack_get(&ct->ct_general);
-}
-
-/* flush the event cache - touches other CPU's data and must not be called while
- * packets are still passing through the code */
-static void ip_ct_event_cache_flush(void)
-{
-	struct ip_conntrack_ecache *ecache;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		ecache = &per_cpu(ip_conntrack_ecache, cpu);
-		if (ecache->ct)
-			ip_conntrack_put(ecache->ct);
-	}
-}
-#else
-static inline void ip_ct_event_cache_flush(void) {}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int ip_conntrack_hash_rnd_initted;
-static unsigned int ip_conntrack_hash_rnd;
-
-static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
-			    unsigned int size, unsigned int rnd)
-{
-	return (jhash_3words((__force u32)tuple->src.ip,
-			     ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
-			     (tuple->src.u.all | (tuple->dst.u.all << 16)),
-			     rnd) % size);
-}
-
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
-{
-	return __hash_conntrack(tuple, ip_conntrack_htable_size,
-				ip_conntrack_hash_rnd);
-}
-
-int
-ip_ct_get_tuple(const struct iphdr *iph,
-		const struct sk_buff *skb,
-		unsigned int dataoff,
-		struct ip_conntrack_tuple *tuple,
-		const struct ip_conntrack_protocol *protocol)
-{
-	/* Never happen */
-	if (iph->frag_off & htons(IP_OFFSET)) {
-		printk("ip_conntrack_core: Frag of proto %u.\n",
-		       iph->protocol);
-		return 0;
-	}
-
-	tuple->src.ip = iph->saddr;
-	tuple->dst.ip = iph->daddr;
-	tuple->dst.protonum = iph->protocol;
-	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
-	return protocol->pkt_to_tuple(skb, dataoff, tuple);
-}
-
-int
-ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
-		   const struct ip_conntrack_tuple *orig,
-		   const struct ip_conntrack_protocol *protocol)
-{
-	inverse->src.ip = orig->dst.ip;
-	inverse->dst.ip = orig->src.ip;
-	inverse->dst.protonum = orig->dst.protonum;
-	inverse->dst.dir = !orig->dst.dir;
-
-	return protocol->invert_tuple(inverse, orig);
-}
-
-
-/* ip_conntrack_expect helper functions */
-void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
-{
-	IP_NF_ASSERT(!timer_pending(&exp->timeout));
-	list_del(&exp->list);
-	CONNTRACK_STAT_INC(expect_delete);
-	exp->master->expecting--;
-	ip_conntrack_expect_put(exp);
-}
-
-static void expectation_timed_out(unsigned long ul_expect)
-{
-	struct ip_conntrack_expect *exp = (void *)ul_expect;
-
-	write_lock_bh(&ip_conntrack_lock);
-	ip_ct_unlink_expect(exp);
-	write_unlock_bh(&ip_conntrack_lock);
-	ip_conntrack_expect_put(exp);
-}
-
-struct ip_conntrack_expect *
-__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_expect *i;
-
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
-			return i;
-	}
-	return NULL;
-}
-
-/* Just find a expectation corresponding to a tuple. */
-struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_expect *i;
-
-	read_lock_bh(&ip_conntrack_lock);
-	i = __ip_conntrack_expect_find(tuple);
-	if (i)
-		atomic_inc(&i->use);
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return i;
-}
-
-/* If an expectation for this connection is found, it gets delete from
- * global list then returned. */
-static struct ip_conntrack_expect *
-find_expectation(const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_expect *i;
-
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-		/* If master is not in hash table yet (ie. packet hasn't left
-		   this machine yet), how can other end know about expected?
-		   Hence these are not the droids you are looking for (if
-		   master ct never got confirmed, we'd hold a reference to it
-		   and weird things would happen to future packets). */
-		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
-		    && is_confirmed(i->master)) {
-			if (i->flags & IP_CT_EXPECT_PERMANENT) {
-				atomic_inc(&i->use);
-				return i;
-			} else if (del_timer(&i->timeout)) {
-				ip_ct_unlink_expect(i);
-				return i;
-			}
-		}
-	}
-	return NULL;
-}
-
-/* delete all expectations for this conntrack */
-void ip_ct_remove_expectations(struct ip_conntrack *ct)
-{
-	struct ip_conntrack_expect *i, *tmp;
-
-	/* Optimization: most connection never expect any others. */
-	if (ct->expecting == 0)
-		return;
-
-	list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
-		if (i->master == ct && del_timer(&i->timeout)) {
-			ip_ct_unlink_expect(i);
-			ip_conntrack_expect_put(i);
-		}
-	}
-}
-
-static void
-clean_from_lists(struct ip_conntrack *ct)
-{
-	DEBUGP("clean_from_lists(%p)\n", ct);
-	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
-
-	/* Destroy all pending expectations */
-	ip_ct_remove_expectations(ct);
-}
-
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
-{
-	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
-	struct ip_conntrack_protocol *proto;
-	struct ip_conntrack_helper *helper;
-	typeof(ip_conntrack_destroyed) destroyed;
-
-	DEBUGP("destroy_conntrack(%p)\n", ct);
-	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
-	IP_NF_ASSERT(!timer_pending(&ct->timeout));
-
-	ip_conntrack_event(IPCT_DESTROY, ct);
-	set_bit(IPS_DYING_BIT, &ct->status);
-
-	helper = ct->helper;
-	if (helper && helper->destroy)
-		helper->destroy(ct);
-
-	/* To make sure we don't get any weird locking issues here:
-	 * destroy_conntrack() MUST NOT be called with a write lock
-	 * to ip_conntrack_lock!!! -HW */
-	rcu_read_lock();
-	proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
-	if (proto && proto->destroy)
-		proto->destroy(ct);
-
-	destroyed = rcu_dereference(ip_conntrack_destroyed);
-	if (destroyed)
-		destroyed(ct);
-
-	rcu_read_unlock();
-
-	write_lock_bh(&ip_conntrack_lock);
-	/* Expectations will have been removed in clean_from_lists,
-	 * except TFTP can create an expectation on the first packet,
-	 * before connection is in the list, so we need to clean here,
-	 * too. */
-	ip_ct_remove_expectations(ct);
-
-	/* We overload first tuple to link into unconfirmed list. */
-	if (!is_confirmed(ct)) {
-		BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	}
-
-	CONNTRACK_STAT_INC(delete);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	if (ct->master)
-		ip_conntrack_put(ct->master);
-
-	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
-	ip_conntrack_free(ct);
-}
-
-static void death_by_timeout(unsigned long ul_conntrack)
-{
-	struct ip_conntrack *ct = (void *)ul_conntrack;
-
-	write_lock_bh(&ip_conntrack_lock);
-	/* Inside lock so preempt is disabled on module removal path.
-	 * Otherwise we can get spurious warnings. */
-	CONNTRACK_STAT_INC(delete_list);
-	clean_from_lists(ct);
-	write_unlock_bh(&ip_conntrack_lock);
-	ip_conntrack_put(ct);
-}
-
-struct ip_conntrack_tuple_hash *
-__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
-		    const struct ip_conntrack *ignored_conntrack)
-{
-	struct ip_conntrack_tuple_hash *h;
-	unsigned int hash = hash_conntrack(tuple);
-
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
-		if (tuplehash_to_ctrack(h) != ignored_conntrack &&
-		    ip_ct_tuple_equal(tuple, &h->tuple)) {
-			CONNTRACK_STAT_INC(found);
-			return h;
-		}
-		CONNTRACK_STAT_INC(searched);
-	}
-
-	return NULL;
-}
-
-/* Find a connection corresponding to a tuple. */
-struct ip_conntrack_tuple_hash *
-ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
-		      const struct ip_conntrack *ignored_conntrack)
-{
-	struct ip_conntrack_tuple_hash *h;
-
-	read_lock_bh(&ip_conntrack_lock);
-	h = __ip_conntrack_find(tuple, ignored_conntrack);
-	if (h)
-		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return h;
-}
-
-static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
-					unsigned int hash,
-					unsigned int repl_hash)
-{
-	ct->id = ++ip_conntrack_next_id;
-	list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
-		 &ip_conntrack_hash[hash]);
-	list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
-		 &ip_conntrack_hash[repl_hash]);
-}
-
-void ip_conntrack_hash_insert(struct ip_conntrack *ct)
-{
-	unsigned int hash, repl_hash;
-
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-	write_lock_bh(&ip_conntrack_lock);
-	__ip_conntrack_hash_insert(ct, hash, repl_hash);
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* Confirm a connection given skb; places it in hash table */
-int
-__ip_conntrack_confirm(struct sk_buff **pskb)
-{
-	unsigned int hash, repl_hash;
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-
-	/* ipt_REJECT uses ip_conntrack_attach to attach related
-	   ICMP/TCP RST packets in other direction.  Actual packet
-	   which created connection will be IP_CT_NEW or for an
-	   expected connection, IP_CT_RELATED. */
-	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-		return NF_ACCEPT;
-
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-	/* We're not in hash table, and we refuse to set up related
-	   connections for unconfirmed conns.  But packet copies and
-	   REJECT will give spurious warnings here. */
-	/* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
-
-	/* No external references means noone else could have
-	   confirmed us. */
-	IP_NF_ASSERT(!is_confirmed(ct));
-	DEBUGP("Confirming conntrack %p\n", ct);
-
-	write_lock_bh(&ip_conntrack_lock);
-
-	/* See if there's one in the list already, including reverse:
-	   NAT could have grabbed it without realizing, since we're
-	   not in the hash.  If there is, we lost race. */
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list)
-		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-				      &h->tuple))
-			goto out;
-	list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
-		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-				      &h->tuple))
-			goto out;
-
-	/* Remove from unconfirmed list */
-	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-
-	__ip_conntrack_hash_insert(ct, hash, repl_hash);
-	/* Timer relative to confirmation time, not original
-	   setting time, otherwise we'd get timer wrap in
-	   weird delay cases. */
-	ct->timeout.expires += jiffies;
-	add_timer(&ct->timeout);
-	atomic_inc(&ct->ct_general.use);
-	set_bit(IPS_CONFIRMED_BIT, &ct->status);
-	CONNTRACK_STAT_INC(insert);
-	write_unlock_bh(&ip_conntrack_lock);
-	if (ct->helper)
-		ip_conntrack_event_cache(IPCT_HELPER, *pskb);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-		ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
-#endif
-	ip_conntrack_event_cache(master_ct(ct) ?
-				 IPCT_RELATED : IPCT_NEW, *pskb);
-
-	return NF_ACCEPT;
-
-out:
-	CONNTRACK_STAT_INC(insert_failed);
-	write_unlock_bh(&ip_conntrack_lock);
-	return NF_DROP;
-}
-
-/* Returns true if a connection correspondings to the tuple (required
-   for NAT). */
-int
-ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
-			 const struct ip_conntrack *ignored_conntrack)
-{
-	struct ip_conntrack_tuple_hash *h;
-
-	read_lock_bh(&ip_conntrack_lock);
-	h = __ip_conntrack_find(tuple, ignored_conntrack);
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return h != NULL;
-}
-
-/* There's a small race here where we may free a just-assured
-   connection.  Too bad: we're in trouble anyway. */
-static int early_drop(struct list_head *chain)
-{
-	/* Traverse backwards: gives us oldest, which is roughly LRU */
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct = NULL, *tmp;
-	int dropped = 0;
-
-	read_lock_bh(&ip_conntrack_lock);
-	list_for_each_entry_reverse(h, chain, list) {
-		tmp = tuplehash_to_ctrack(h);
-		if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
-			ct = tmp;
-			atomic_inc(&ct->ct_general.use);
-			break;
-		}
-	}
-	read_unlock_bh(&ip_conntrack_lock);
-
-	if (!ct)
-		return dropped;
-
-	if (del_timer(&ct->timeout)) {
-		death_by_timeout((unsigned long)ct);
-		dropped = 1;
-		CONNTRACK_STAT_INC_ATOMIC(early_drop);
-	}
-	ip_conntrack_put(ct);
-	return dropped;
-}
-
-static struct ip_conntrack_helper *
-__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_helper *h;
-
-	list_for_each_entry(h, &helpers, list) {
-		if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
-			return h;
-	}
-	return NULL;
-}
-
-struct ip_conntrack_helper *
-ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_helper *helper;
-
-	/* need ip_conntrack_lock to assure that helper exists until
-	 * try_module_get() is called */
-	read_lock_bh(&ip_conntrack_lock);
-
-	helper = __ip_conntrack_helper_find(tuple);
-	if (helper) {
-		/* need to increase module usage count to assure helper will
-		 * not go away while the caller is e.g. busy putting a
-		 * conntrack in the hash that uses the helper */
-		if (!try_module_get(helper->me))
-			helper = NULL;
-	}
-
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return helper;
-}
-
-void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
-{
-	module_put(helper->me);
-}
-
-struct ip_conntrack_protocol *
-__ip_conntrack_proto_find(u_int8_t protocol)
-{
-	return ip_ct_protos[protocol];
-}
-
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-struct ip_conntrack_protocol *
-ip_conntrack_proto_find_get(u_int8_t protocol)
-{
-	struct ip_conntrack_protocol *p;
-
-	rcu_read_lock();
-	p = __ip_conntrack_proto_find(protocol);
-	if (p) {
-		if (!try_module_get(p->me))
-			p = &ip_conntrack_generic_protocol;
-	}
-	rcu_read_unlock();
-
-	return p;
-}
-
-void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
-{
-	module_put(p->me);
-}
-
-struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
-					struct ip_conntrack_tuple *repl)
-{
-	struct ip_conntrack *conntrack;
-
-	if (!ip_conntrack_hash_rnd_initted) {
-		get_random_bytes(&ip_conntrack_hash_rnd, 4);
-		ip_conntrack_hash_rnd_initted = 1;
-	}
-
-	/* We don't want any race condition at early drop stage */
-	atomic_inc(&ip_conntrack_count);
-
-	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
-		unsigned int hash = hash_conntrack(orig);
-		/* Try dropping from this hash chain. */
-		if (!early_drop(&ip_conntrack_hash[hash])) {
-			atomic_dec(&ip_conntrack_count);
-			if (net_ratelimit())
-				printk(KERN_WARNING
-				       "ip_conntrack: table full, dropping"
-				       " packet.\n");
-			return ERR_PTR(-ENOMEM);
-		}
-	}
-
-	conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
-	if (!conntrack) {
-		DEBUGP("Can't allocate conntrack.\n");
-		atomic_dec(&ip_conntrack_count);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	atomic_set(&conntrack->ct_general.use, 1);
-	conntrack->ct_general.destroy = destroy_conntrack;
-	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
-	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
-	/* Don't set timer yet: wait for confirmation */
-	init_timer(&conntrack->timeout);
-	conntrack->timeout.data = (unsigned long)conntrack;
-	conntrack->timeout.function = death_by_timeout;
-
-	return conntrack;
-}
-
-void
-ip_conntrack_free(struct ip_conntrack *conntrack)
-{
-	atomic_dec(&ip_conntrack_count);
-	kmem_cache_free(ip_conntrack_cachep, conntrack);
-}
-
-/* Allocate a new conntrack: we return -ENOMEM if classification
- * failed due to stress.   Otherwise it really is unclassifiable */
-static struct ip_conntrack_tuple_hash *
-init_conntrack(struct ip_conntrack_tuple *tuple,
-	       struct ip_conntrack_protocol *protocol,
-	       struct sk_buff *skb)
-{
-	struct ip_conntrack *conntrack;
-	struct ip_conntrack_tuple repl_tuple;
-	struct ip_conntrack_expect *exp;
-
-	if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
-		DEBUGP("Can't invert tuple.\n");
-		return NULL;
-	}
-
-	conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
-	if (conntrack == NULL || IS_ERR(conntrack))
-		return (struct ip_conntrack_tuple_hash *)conntrack;
-
-	if (!protocol->new(conntrack, skb)) {
-		ip_conntrack_free(conntrack);
-		return NULL;
-	}
-
-	write_lock_bh(&ip_conntrack_lock);
-	exp = find_expectation(tuple);
-
-	if (exp) {
-		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
-			conntrack, exp);
-		/* Welcome, Mr. Bond.  We've been expecting you... */
-		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
-		conntrack->master = exp->master;
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-		conntrack->mark = exp->master->mark;
-#endif
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
-		/* this is ugly, but there is no other place where to put it */
-		conntrack->nat.masq_index = exp->master->nat.masq_index;
-#endif
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
-		conntrack->secmark = exp->master->secmark;
-#endif
-		nf_conntrack_get(&conntrack->master->ct_general);
-		CONNTRACK_STAT_INC(expect_new);
-	} else {
-		conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
-
-		CONNTRACK_STAT_INC(new);
-	}
-
-	/* Overload tuple linked list to put us in unconfirmed list. */
-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
-
-	write_unlock_bh(&ip_conntrack_lock);
-
-	if (exp) {
-		if (exp->expectfn)
-			exp->expectfn(conntrack, exp);
-		ip_conntrack_expect_put(exp);
-	}
-
-	return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
-}
-
-/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
-static inline struct ip_conntrack *
-resolve_normal_ct(struct sk_buff *skb,
-		  struct ip_conntrack_protocol *proto,
-		  int *set_reply,
-		  unsigned int hooknum,
-		  enum ip_conntrack_info *ctinfo)
-{
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct;
-
-	IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
-
-	if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
-				&tuple,proto))
-		return NULL;
-
-	/* look for tuple match */
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (!h) {
-		h = init_conntrack(&tuple, proto, skb);
-		if (!h)
-			return NULL;
-		if (IS_ERR(h))
-			return (void *)h;
-	}
-	ct = tuplehash_to_ctrack(h);
-
-	/* It exists; we have (non-exclusive) reference. */
-	if (DIRECTION(h) == IP_CT_DIR_REPLY) {
-		*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
-		/* Please set reply bit if this packet OK */
-		*set_reply = 1;
-	} else {
-		/* Once we've had two way comms, always ESTABLISHED. */
-		if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
-			DEBUGP("ip_conntrack_in: normal packet for %p\n",
-			       ct);
-			*ctinfo = IP_CT_ESTABLISHED;
-		} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
-			DEBUGP("ip_conntrack_in: related packet for %p\n",
-			       ct);
-			*ctinfo = IP_CT_RELATED;
-		} else {
-			DEBUGP("ip_conntrack_in: new packet for %p\n",
-			       ct);
-			*ctinfo = IP_CT_NEW;
-		}
-		*set_reply = 0;
-	}
-	skb->nfct = &ct->ct_general;
-	skb->nfctinfo = *ctinfo;
-	return ct;
-}
-
-/* Netfilter hook itself. */
-unsigned int ip_conntrack_in(unsigned int hooknum,
-			     struct sk_buff **pskb,
-			     const struct net_device *in,
-			     const struct net_device *out,
-			     int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack_protocol *proto;
-	int set_reply = 0;
-	int ret;
-
-	/* Previously seen (loopback or untracked)?  Ignore. */
-	if ((*pskb)->nfct) {
-		CONNTRACK_STAT_INC_ATOMIC(ignore);
-		return NF_ACCEPT;
-	}
-
-	/* Never happen */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
-		if (net_ratelimit()) {
-		printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
-		       (*pskb)->nh.iph->protocol, hooknum);
-		}
-		return NF_DROP;
-	}
-
-/* Doesn't cover locally-generated broadcast, so not worth it. */
-#if 0
-	/* Ignore broadcast: no `connection'. */
-	if ((*pskb)->pkt_type == PACKET_BROADCAST) {
-		printk("Broadcast packet!\n");
-		return NF_ACCEPT;
-	} else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
-		   == htonl(0x000000FF)) {
-		printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
-		       NIPQUAD((*pskb)->nh.iph->saddr),
-		       NIPQUAD((*pskb)->nh.iph->daddr),
-		       (*pskb)->sk, (*pskb)->pkt_type);
-	}
-#endif
-
-	/* rcu_read_lock()ed by nf_hook_slow */
-	proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
-
-	/* It may be an special packet, error, unclean...
-	 * inverse of the return code tells to the netfilter
-	 * core what to do with the packet. */
-	if (proto->error != NULL
-	    && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
-		CONNTRACK_STAT_INC_ATOMIC(error);
-		CONNTRACK_STAT_INC_ATOMIC(invalid);
-		return -ret;
-	}
-
-	if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
-		/* Not valid part of a connection */
-		CONNTRACK_STAT_INC_ATOMIC(invalid);
-		return NF_ACCEPT;
-	}
-
-	if (IS_ERR(ct)) {
-		/* Too stressed to deal. */
-		CONNTRACK_STAT_INC_ATOMIC(drop);
-		return NF_DROP;
-	}
-
-	IP_NF_ASSERT((*pskb)->nfct);
-
-	ret = proto->packet(ct, *pskb, ctinfo);
-	if (ret < 0) {
-		/* Invalid: inverse of the return code tells
-		 * the netfilter core what to do*/
-		nf_conntrack_put((*pskb)->nfct);
-		(*pskb)->nfct = NULL;
-		CONNTRACK_STAT_INC_ATOMIC(invalid);
-		return -ret;
-	}
-
-	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		ip_conntrack_event_cache(IPCT_STATUS, *pskb);
-
-	return ret;
-}
-
-int invert_tuplepr(struct ip_conntrack_tuple *inverse,
-		   const struct ip_conntrack_tuple *orig)
-{
-	struct ip_conntrack_protocol *proto;
-	int ret;
-
-	rcu_read_lock();
-	proto = __ip_conntrack_proto_find(orig->dst.protonum);
-	ret = ip_ct_invert_tuple(inverse, orig, proto);
-	rcu_read_unlock();
-
-	return ret;
-}
-
-/* Would two expected things clash? */
-static inline int expect_clash(const struct ip_conntrack_expect *a,
-			       const struct ip_conntrack_expect *b)
-{
-	/* Part covered by intersection of masks must be unequal,
-	   otherwise they clash */
-	struct ip_conntrack_tuple intersect_mask
-		= { { a->mask.src.ip & b->mask.src.ip,
-		      { a->mask.src.u.all & b->mask.src.u.all } },
-		    { a->mask.dst.ip & b->mask.dst.ip,
-		      { a->mask.dst.u.all & b->mask.dst.u.all },
-		      a->mask.dst.protonum & b->mask.dst.protonum } };
-
-	return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
-}
-
-static inline int expect_matches(const struct ip_conntrack_expect *a,
-				 const struct ip_conntrack_expect *b)
-{
-	return a->master == b->master
-		&& ip_ct_tuple_equal(&a->tuple, &b->tuple)
-		&& ip_ct_tuple_equal(&a->mask, &b->mask);
-}
-
-/* Generally a bad idea to call this: could have matched already. */
-void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack_expect *i;
-
-	write_lock_bh(&ip_conntrack_lock);
-	/* choose the the oldest expectation to evict */
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
-		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
-			ip_ct_unlink_expect(i);
-			write_unlock_bh(&ip_conntrack_lock);
-			ip_conntrack_expect_put(i);
-			return;
-		}
-	}
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* We don't increase the master conntrack refcount for non-fulfilled
- * conntracks. During the conntrack destruction, the expectations are
- * always killed before the conntrack itself */
-struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
-{
-	struct ip_conntrack_expect *new;
-
-	new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
-	if (!new) {
-		DEBUGP("expect_related: OOM allocating expect\n");
-		return NULL;
-	}
-	new->master = me;
-	atomic_set(&new->use, 1);
-	return new;
-}
-
-void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
-{
-	if (atomic_dec_and_test(&exp->use))
-		kmem_cache_free(ip_conntrack_expect_cachep, exp);
-}
-
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
-{
-	atomic_inc(&exp->use);
-	exp->master->expecting++;
-	list_add(&exp->list, &ip_conntrack_expect_list);
-
-	init_timer(&exp->timeout);
-	exp->timeout.data = (unsigned long)exp;
-	exp->timeout.function = expectation_timed_out;
-	exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
-	add_timer(&exp->timeout);
-
-	exp->id = ++ip_conntrack_expect_next_id;
-	atomic_inc(&exp->use);
-	CONNTRACK_STAT_INC(expect_create);
-}
-
-/* Race with expectations being used means we could have none to find; OK. */
-static void evict_oldest_expect(struct ip_conntrack *master)
-{
-	struct ip_conntrack_expect *i;
-
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
-		if (i->master == master) {
-			if (del_timer(&i->timeout)) {
-				ip_ct_unlink_expect(i);
-				ip_conntrack_expect_put(i);
-			}
-			break;
-		}
-	}
-}
-
-static inline int refresh_timer(struct ip_conntrack_expect *i)
-{
-	if (!del_timer(&i->timeout))
-		return 0;
-
-	i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
-	add_timer(&i->timeout);
-	return 1;
-}
-
-int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
-{
-	struct ip_conntrack_expect *i;
-	int ret;
-
-	DEBUGP("ip_conntrack_expect_related %p\n", related_to);
-	DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
-	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
-
-	write_lock_bh(&ip_conntrack_lock);
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-		if (expect_matches(i, expect)) {
-			/* Refresh timer: if it's dying, ignore.. */
-			if (refresh_timer(i)) {
-				ret = 0;
-				goto out;
-			}
-		} else if (expect_clash(i, expect)) {
-			ret = -EBUSY;
-			goto out;
-		}
-	}
-
-	/* Will be over limit? */
-	if (expect->master->helper->max_expected &&
-	    expect->master->expecting >= expect->master->helper->max_expected)
-		evict_oldest_expect(expect->master);
-
-	ip_conntrack_expect_insert(expect);
-	ip_conntrack_expect_event(IPEXP_NEW, expect);
-	ret = 0;
-out:
-	write_unlock_bh(&ip_conntrack_lock);
-	return ret;
-}
-
-/* Alter reply tuple (maybe alter helper).  This is for NAT, and is
-   implicitly racy: see __ip_conntrack_confirm */
-void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
-			      const struct ip_conntrack_tuple *newreply)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	/* Should be unconfirmed, so not in hash table yet */
-	IP_NF_ASSERT(!is_confirmed(conntrack));
-
-	DEBUGP("Altering reply tuple of %p to ", conntrack);
-	DUMP_TUPLE(newreply);
-
-	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
-	if (!conntrack->master && conntrack->expecting == 0)
-		conntrack->helper = __ip_conntrack_helper_find(newreply);
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
-{
-	BUG_ON(me->timeout == 0);
-	write_lock_bh(&ip_conntrack_lock);
-	list_add(&me->list, &helpers);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	return 0;
-}
-
-struct ip_conntrack_helper *
-__ip_conntrack_helper_find_byname(const char *name)
-{
-	struct ip_conntrack_helper *h;
-
-	list_for_each_entry(h, &helpers, list) {
-		if (!strcmp(h->name, name))
-			return h;
-	}
-
-	return NULL;
-}
-
-static inline void unhelp(struct ip_conntrack_tuple_hash *i,
-			  const struct ip_conntrack_helper *me)
-{
-	if (tuplehash_to_ctrack(i)->helper == me) {
-		ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
-		tuplehash_to_ctrack(i)->helper = NULL;
-	}
-}
-
-void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
-{
-	unsigned int i;
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_expect *exp, *tmp;
-
-	/* Need write lock here, to delete helper. */
-	write_lock_bh(&ip_conntrack_lock);
-	list_del(&me->list);
-
-	/* Get rid of expectations */
-	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
-		if (exp->master->helper == me && del_timer(&exp->timeout)) {
-			ip_ct_unlink_expect(exp);
-			ip_conntrack_expect_put(exp);
-		}
-	}
-	/* Get rid of expecteds, set helpers to NULL. */
-	list_for_each_entry(h, &unconfirmed, list)
-		unhelp(h, me);
-	for (i = 0; i < ip_conntrack_htable_size; i++) {
-		list_for_each_entry(h, &ip_conntrack_hash[i], list)
-			unhelp(h, me);
-	}
-	write_unlock_bh(&ip_conntrack_lock);
-
-	/* Someone could be still looking at the helper in a bh. */
-	synchronize_net();
-}
-
-/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
-void __ip_ct_refresh_acct(struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			const struct sk_buff *skb,
-			unsigned long extra_jiffies,
-			int do_acct)
-{
-	int event = 0;
-
-	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
-	IP_NF_ASSERT(skb);
-
-	write_lock_bh(&ip_conntrack_lock);
-
-	/* Only update if this is not a fixed timeout */
-	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
-		write_unlock_bh(&ip_conntrack_lock);
-		return;
-	}
-
-	/* If not in hash table, timer will not be active yet */
-	if (!is_confirmed(ct)) {
-		ct->timeout.expires = extra_jiffies;
-		event = IPCT_REFRESH;
-	} else {
-		/* Need del_timer for race avoidance (may already be dying). */
-		if (del_timer(&ct->timeout)) {
-			ct->timeout.expires = jiffies + extra_jiffies;
-			add_timer(&ct->timeout);
-			event = IPCT_REFRESH;
-		}
-	}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-	if (do_acct) {
-		ct->counters[CTINFO2DIR(ctinfo)].packets++;
-		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-						ntohs(skb->nh.iph->tot_len);
-		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
-		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
-			event |= IPCT_COUNTER_FILLING;
-	}
-#endif
-
-	write_unlock_bh(&ip_conntrack_lock);
-
-	/* must be unlocked when calling event cache */
-	if (event)
-		ip_conntrack_event_cache(event, skb);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
-int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
-			       const struct ip_conntrack_tuple *tuple)
-{
-	NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
-		&tuple->src.u.tcp.port);
-	NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
-		&tuple->dst.u.tcp.port);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
-			       struct ip_conntrack_tuple *t)
-{
-	if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
-		return -EINVAL;
-
-	t->src.u.tcp.port =
-		*(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
-	t->dst.u.tcp.port =
-		*(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
-
-	return 0;
-}
-#endif
-
-/* Returns new sk_buff, or NULL */
-struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
-	skb_orphan(skb);
-
-	local_bh_disable();
-	skb = ip_defrag(skb, user);
-	local_bh_enable();
-
-	if (skb)
-		ip_send_check(skb->nh.iph);
-	return skb;
-}
-
-/* Used by ipt_REJECT. */
-static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	/* This ICMP is in reverse direction to the packet which caused it */
-	ct = ip_conntrack_get(skb, &ctinfo);
-
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
-		ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
-	else
-		ctinfo = IP_CT_RELATED;
-
-	/* Attach to new skbuff, and increment count */
-	nskb->nfct = &ct->ct_general;
-	nskb->nfctinfo = ctinfo;
-	nf_conntrack_get(nskb->nfct);
-}
-
-/* Bring out ya dead! */
-static struct ip_conntrack *
-get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
-		void *data, unsigned int *bucket)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct;
-
-	write_lock_bh(&ip_conntrack_lock);
-	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
-			ct = tuplehash_to_ctrack(h);
-			if (iter(ct, data))
-				goto found;
-		}
-	}
-	list_for_each_entry(h, &unconfirmed, list) {
-		ct = tuplehash_to_ctrack(h);
-		if (iter(ct, data))
-			set_bit(IPS_DYING_BIT, &ct->status);
-	}
-	write_unlock_bh(&ip_conntrack_lock);
-	return NULL;
-
-found:
-	atomic_inc(&ct->ct_general.use);
-	write_unlock_bh(&ip_conntrack_lock);
-	return ct;
-}
-
-void
-ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
-{
-	struct ip_conntrack *ct;
-	unsigned int bucket = 0;
-
-	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
-		/* Time to push up daises... */
-		if (del_timer(&ct->timeout))
-			death_by_timeout((unsigned long)ct);
-		/* ... else the timer will get him soon. */
-
-		ip_conntrack_put(ct);
-	}
-}
-
-/* Fast function for those who don't want to parse /proc (and I don't
-   blame them). */
-/* Reversing the socket's dst/src point of view gives us the reply
-   mapping. */
-static int
-getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_tuple tuple;
-
-	IP_CT_TUPLE_U_BLANK(&tuple);
-	tuple.src.ip = inet->rcv_saddr;
-	tuple.src.u.tcp.port = inet->sport;
-	tuple.dst.ip = inet->daddr;
-	tuple.dst.u.tcp.port = inet->dport;
-	tuple.dst.protonum = IPPROTO_TCP;
-
-	/* We only do TCP at the moment: is there a better way? */
-	if (strcmp(sk->sk_prot->name, "TCP")) {
-		DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
-		return -ENOPROTOOPT;
-	}
-
-	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
-		DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
-		       *len, sizeof(struct sockaddr_in));
-		return -EINVAL;
-	}
-
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (h) {
-		struct sockaddr_in sin;
-		struct ip_conntrack *ct = tuplehash_to_ctrack(h);
-
-		sin.sin_family = AF_INET;
-		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-			.tuple.dst.u.tcp.port;
-		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-			.tuple.dst.ip;
-		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
-
-		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
-		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-		ip_conntrack_put(ct);
-		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
-			return -EFAULT;
-		else
-			return 0;
-	}
-	DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
-	       NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
-	       NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
-	return -ENOENT;
-}
-
-static struct nf_sockopt_ops so_getorigdst = {
-	.pf		= PF_INET,
-	.get_optmin	= SO_ORIGINAL_DST,
-	.get_optmax	= SO_ORIGINAL_DST+1,
-	.get		= &getorigdst,
-};
-
-static int kill_all(struct ip_conntrack *i, void *data)
-{
-	return 1;
-}
-
-void ip_conntrack_flush(void)
-{
-	ip_ct_iterate_cleanup(kill_all, NULL);
-}
-
-static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
-{
-	if (vmalloced)
-		vfree(hash);
-	else
-		free_pages((unsigned long)hash,
-			   get_order(sizeof(struct list_head) * size));
-}
-
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
-	rcu_assign_pointer(ip_ct_attach, NULL);
-
-	/* This makes sure all current packets have passed through
-	   netfilter framework.  Roll on, two-stage module
-	   delete... */
-	synchronize_net();
-
-	ip_ct_event_cache_flush();
- i_see_dead_people:
-	ip_conntrack_flush();
-	if (atomic_read(&ip_conntrack_count) != 0) {
-		schedule();
-		goto i_see_dead_people;
-	}
-	/* wait until all references to ip_conntrack_untracked are dropped */
-	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
-		schedule();
-
-	kmem_cache_destroy(ip_conntrack_cachep);
-	kmem_cache_destroy(ip_conntrack_expect_cachep);
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
-			    ip_conntrack_htable_size);
-	nf_unregister_sockopt(&so_getorigdst);
-}
-
-static struct list_head *alloc_hashtable(int size, int *vmalloced)
-{
-	struct list_head *hash;
-	unsigned int i;
-
-	*vmalloced = 0;
-	hash = (void*)__get_free_pages(GFP_KERNEL,
-				       get_order(sizeof(struct list_head)
-						 * size));
-	if (!hash) {
-		*vmalloced = 1;
-		printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
-		hash = vmalloc(sizeof(struct list_head) * size);
-	}
-
-	if (hash)
-		for (i = 0; i < size; i++)
-			INIT_LIST_HEAD(&hash[i]);
-
-	return hash;
-}
-
-static int set_hashsize(const char *val, struct kernel_param *kp)
-{
-	int i, bucket, hashsize, vmalloced;
-	int old_vmalloced, old_size;
-	int rnd;
-	struct list_head *hash, *old_hash;
-	struct ip_conntrack_tuple_hash *h;
-
-	/* On boot, we can set this without any fancy locking. */
-	if (!ip_conntrack_htable_size)
-		return param_set_int(val, kp);
-
-	hashsize = simple_strtol(val, NULL, 0);
-	if (!hashsize)
-		return -EINVAL;
-
-	hash = alloc_hashtable(hashsize, &vmalloced);
-	if (!hash)
-		return -ENOMEM;
-
-	/* We have to rehash for the new table anyway, so we also can
-	 * use a new random seed */
-	get_random_bytes(&rnd, 4);
-
-	write_lock_bh(&ip_conntrack_lock);
-	for (i = 0; i < ip_conntrack_htable_size; i++) {
-		while (!list_empty(&ip_conntrack_hash[i])) {
-			h = list_entry(ip_conntrack_hash[i].next,
-				       struct ip_conntrack_tuple_hash, list);
-			list_del(&h->list);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
-			list_add_tail(&h->list, &hash[bucket]);
-		}
-	}
-	old_size = ip_conntrack_htable_size;
-	old_vmalloced = ip_conntrack_vmalloc;
-	old_hash = ip_conntrack_hash;
-
-	ip_conntrack_htable_size = hashsize;
-	ip_conntrack_vmalloc = vmalloced;
-	ip_conntrack_hash = hash;
-	ip_conntrack_hash_rnd = rnd;
-	write_unlock_bh(&ip_conntrack_lock);
-
-	free_conntrack_hash(old_hash, old_vmalloced, old_size);
-	return 0;
-}
-
-module_param_call(hashsize, set_hashsize, param_get_uint,
-		  &ip_conntrack_htable_size, 0600);
-
-int __init ip_conntrack_init(void)
-{
-	unsigned int i;
-	int ret;
-
-	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
-	 * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
-	if (!ip_conntrack_htable_size) {
-		ip_conntrack_htable_size
-			= (((num_physpages << PAGE_SHIFT) / 16384)
-			   / sizeof(struct list_head));
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
-			ip_conntrack_htable_size = 8192;
-		if (ip_conntrack_htable_size < 16)
-			ip_conntrack_htable_size = 16;
-	}
-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
-
-	printk("ip_conntrack version %s (%u buckets, %d max)"
-	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
-	       ip_conntrack_htable_size, ip_conntrack_max,
-	       sizeof(struct ip_conntrack));
-
-	ret = nf_register_sockopt(&so_getorigdst);
-	if (ret != 0) {
-		printk(KERN_ERR "Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-					    &ip_conntrack_vmalloc);
-	if (!ip_conntrack_hash) {
-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-		goto err_unreg_sockopt;
-	}
-
-	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
-						sizeof(struct ip_conntrack), 0,
-						0, NULL, NULL);
-	if (!ip_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
-		goto err_free_hash;
-	}
-
-	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
-					sizeof(struct ip_conntrack_expect),
-					0, 0, NULL, NULL);
-	if (!ip_conntrack_expect_cachep) {
-		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
-		goto err_free_conntrack_slab;
-	}
-
-	/* Don't NEED lock here, but good form anyway. */
-	write_lock_bh(&ip_conntrack_lock);
-	for (i = 0; i < MAX_IP_CT_PROTO; i++)
-		rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
-	/* Sew in builtin protocols. */
-	rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
-	rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
-	rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	/* For use by ipt_REJECT */
-	rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
-
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-
-	return ret;
-
-err_free_conntrack_slab:
-	kmem_cache_destroy(ip_conntrack_cachep);
-err_free_hash:
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
-			    ip_conntrack_htable_size);
-err_unreg_sockopt:
-	nf_unregister_sockopt(&so_getorigdst);
-
-	return -ENOMEM;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
deleted file mode 100644
index 1faa68ab9432..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* FTP extension for IP connection tracking. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/ctype.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp connection tracking helper");
-
-/* This is slow, but it's simple. --RR */
-static char *ftp_buffer;
-static DEFINE_SPINLOCK(ip_ftp_lock);
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-
-static int loose;
-module_param(loose, bool, 0600);
-
-unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				enum ip_ct_ftp_type type,
-				unsigned int matchoff,
-				unsigned int matchlen,
-				struct ip_conntrack_expect *exp,
-				u32 *seq);
-EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int try_rfc959(const char *, size_t, u_int32_t [], char);
-static int try_eprt(const char *, size_t, u_int32_t [], char);
-static int try_epsv_response(const char *, size_t, u_int32_t [], char);
-
-static const struct ftp_search {
-	const char *pattern;
-	size_t plen;
-	char skip;
-	char term;
-	enum ip_ct_ftp_type ftptype;
-	int (*getnum)(const char *, size_t, u_int32_t[], char);
-} search[IP_CT_DIR_MAX][2] = {
-	[IP_CT_DIR_ORIGINAL] = {
-		{
-			.pattern	=  "PORT",
-			.plen		= sizeof("PORT") - 1,
-			.skip		= ' ',
-			.term		= '\r',
-			.ftptype	= IP_CT_FTP_PORT,
-			.getnum		= try_rfc959,
-		},
-		{
-			.pattern	= "EPRT",
-			.plen		= sizeof("EPRT") - 1,
-			.skip		= ' ',
-			.term		= '\r',
-			.ftptype	= IP_CT_FTP_EPRT,
-			.getnum		= try_eprt,
-		},
-	},
-	[IP_CT_DIR_REPLY] = {
-		{
-			.pattern	= "227 ",
-			.plen		= sizeof("227 ") - 1,
-			.skip		= '(',
-			.term		= ')',
-			.ftptype	= IP_CT_FTP_PASV,
-			.getnum		= try_rfc959,
-		},
-		{
-			.pattern	= "229 ",
-			.plen		= sizeof("229 ") - 1,
-			.skip		= '(',
-			.term		= ')',
-			.ftptype	= IP_CT_FTP_EPSV,
-			.getnum		= try_epsv_response,
-		},
-	},
-};
-
-static int try_number(const char *data, size_t dlen, u_int32_t array[],
-		      int array_size, char sep, char term)
-{
-	u_int32_t i, len;
-
-	memset(array, 0, sizeof(array[0])*array_size);
-
-	/* Keep data pointing at next char. */
-	for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
-		if (*data >= '0' && *data <= '9') {
-			array[i] = array[i]*10 + *data - '0';
-		}
-		else if (*data == sep)
-			i++;
-		else {
-			/* Unexpected character; true if it's the
-			   terminator and we're finished. */
-			if (*data == term && i == array_size - 1)
-				return len;
-
-			DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
-			       len, i, *data);
-			return 0;
-		}
-	}
-	DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
-
-	return 0;
-}
-
-/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
-static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
-		       char term)
-{
-	return try_number(data, dlen, array, 6, ',', term);
-}
-
-/* Grab port: number up to delimiter */
-static int get_port(const char *data, int start, size_t dlen, char delim,
-		    u_int32_t array[2])
-{
-	u_int16_t port = 0;
-	int i;
-
-	for (i = start; i < dlen; i++) {
-		/* Finished? */
-		if (data[i] == delim) {
-			if (port == 0)
-				break;
-			array[0] = port >> 8;
-			array[1] = port;
-			return i + 1;
-		}
-		else if (data[i] >= '0' && data[i] <= '9')
-			port = port*10 + data[i] - '0';
-		else /* Some other crap */
-			break;
-	}
-	return 0;
-}
-
-/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
-static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
-		    char term)
-{
-	char delim;
-	int length;
-
-	/* First character is delimiter, then "1" for IPv4, then
-	   delimiter again. */
-	if (dlen <= 3) return 0;
-	delim = data[0];
-	if (isdigit(delim) || delim < 33 || delim > 126
-	    || data[1] != '1' || data[2] != delim)
-		return 0;
-
-	DEBUGP("EPRT: Got |1|!\n");
-	/* Now we have IP address. */
-	length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
-	if (length == 0)
-		return 0;
-
-	DEBUGP("EPRT: Got IP address!\n");
-	/* Start offset includes initial "|1|", and trailing delimiter */
-	return get_port(data, 3 + length + 1, dlen, delim, array+4);
-}
-
-/* Returns 0, or length of numbers: |||6446| */
-static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
-			     char term)
-{
-	char delim;
-
-	/* Three delimiters. */
-	if (dlen <= 3) return 0;
-	delim = data[0];
-	if (isdigit(delim) || delim < 33 || delim > 126
-	    || data[1] != delim || data[2] != delim)
-		return 0;
-
-	return get_port(data, 3, dlen, delim, array+4);
-}
-
-/* Return 1 for match, 0 for accept, -1 for partial. */
-static int find_pattern(const char *data, size_t dlen,
-			const char *pattern, size_t plen,
-			char skip, char term,
-			unsigned int *numoff,
-			unsigned int *numlen,
-			u_int32_t array[6],
-			int (*getnum)(const char *, size_t, u_int32_t[], char))
-{
-	size_t i;
-
-	DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
-	if (dlen == 0)
-		return 0;
-
-	if (dlen <= plen) {
-		/* Short packet: try for partial? */
-		if (strnicmp(data, pattern, dlen) == 0)
-			return -1;
-		else return 0;
-	}
-
-	if (strnicmp(data, pattern, plen) != 0) {
-#if 0
-		size_t i;
-
-		DEBUGP("ftp: string mismatch\n");
-		for (i = 0; i < plen; i++) {
-			DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
-				i, data[i], data[i],
-				pattern[i], pattern[i]);
-		}
-#endif
-		return 0;
-	}
-
-	DEBUGP("Pattern matches!\n");
-	/* Now we've found the constant string, try to skip
-	   to the 'skip' character */
-	for (i = plen; data[i] != skip; i++)
-		if (i == dlen - 1) return -1;
-
-	/* Skip over the last character */
-	i++;
-
-	DEBUGP("Skipped up to `%c'!\n", skip);
-
-	*numoff = i;
-	*numlen = getnum(data + i, dlen - i, array, term);
-	if (!*numlen)
-		return -1;
-
-	DEBUGP("Match succeeded!\n");
-	return 1;
-}
-
-/* Look up to see if we're just after a \n. */
-static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
-{
-	unsigned int i;
-
-	for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
-		if (info->seq_aft_nl[dir][i] == seq)
-			return 1;
-	return 0;
-}
-
-/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
-			  struct sk_buff *skb)
-{
-	unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
-
-	/* Look for oldest: if we find exact match, we're done. */
-	for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
-		if (info->seq_aft_nl[dir][i] == nl_seq)
-			return;
-
-		if (oldest == info->seq_aft_nl_num[dir]
-		    || before(info->seq_aft_nl[dir][i], oldest))
-			oldest = i;
-	}
-
-	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
-		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
-	} else if (oldest != NUM_SEQ_TO_REMEMBER) {
-		info->seq_aft_nl[dir][oldest] = nl_seq;
-		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
-	}
-}
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct,
-		enum ip_conntrack_info ctinfo)
-{
-	unsigned int dataoff, datalen;
-	struct tcphdr _tcph, *th;
-	char *fb_ptr;
-	int ret;
-	u32 seq, array[6] = { 0 };
-	int dir = CTINFO2DIR(ctinfo);
-	unsigned int matchlen, matchoff;
-	struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
-	struct ip_conntrack_expect *exp;
-	unsigned int i;
-	int found = 0, ends_in_nl;
-	typeof(ip_nat_ftp_hook) ip_nat_ftp;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
-		DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
-		return NF_ACCEPT;
-	}
-
-	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return NF_ACCEPT;
-
-	dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
-	/* No data? */
-	if (dataoff >= (*pskb)->len) {
-		DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
-		return NF_ACCEPT;
-	}
-	datalen = (*pskb)->len - dataoff;
-
-	spin_lock_bh(&ip_ftp_lock);
-	fb_ptr = skb_header_pointer(*pskb, dataoff,
-				    (*pskb)->len - dataoff, ftp_buffer);
-	BUG_ON(fb_ptr == NULL);
-
-	ends_in_nl = (fb_ptr[datalen - 1] == '\n');
-	seq = ntohl(th->seq) + datalen;
-
-	/* Look up to see if we're just after a \n. */
-	if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
-		/* Now if this ends in \n, update ftp info. */
-		DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
-		       ct_ftp_info->seq_aft_nl[0][dir]
-		       old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
-		ret = NF_ACCEPT;
-		goto out_update_nl;
-	}
-
-	/* Initialize IP array to expected address (it's not mentioned
-	   in EPSV responses) */
-	array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
-	array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
-	array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
-	array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
-
-	for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
-		found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
-				     search[dir][i].pattern,
-				     search[dir][i].plen,
-				     search[dir][i].skip,
-				     search[dir][i].term,
-				     &matchoff, &matchlen,
-				     array,
-				     search[dir][i].getnum);
-		if (found) break;
-	}
-	if (found == -1) {
-		/* We don't usually drop packets.  After all, this is
-		   connection tracking, not packet filtering.
-		   However, it is necessary for accurate tracking in
-		   this case. */
-		if (net_ratelimit())
-			printk("conntrack_ftp: partial %s %u+%u\n",
-			       search[dir][i].pattern,
-			       ntohl(th->seq), datalen);
-		ret = NF_DROP;
-		goto out;
-	} else if (found == 0) { /* No match */
-		ret = NF_ACCEPT;
-		goto out_update_nl;
-	}
-
-	DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
-	       fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
-
-	/* Allocate expectation which will be inserted */
-	exp = ip_conntrack_expect_alloc(ct);
-	if (exp == NULL) {
-		ret = NF_DROP;
-		goto out;
-	}
-
-	/* We refer to the reverse direction ("!dir") tuples here,
-	 * because we're expecting something in the other direction.
-	 * Doesn't matter unless NAT is happening.  */
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-
-	if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
-	    != ct->tuplehash[dir].tuple.src.ip) {
-		/* Enrico Scholz's passive FTP to partially RNAT'd ftp
-		   server: it really wants us to connect to a
-		   different IP address.  Simply don't record it for
-		   NAT. */
-		DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
-		       array[0], array[1], array[2], array[3],
-		       NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
-
-		/* Thanks to Cristiano Lincoln Mattos
-		   <lincoln@cesar.org.br> for reporting this potential
-		   problem (DMZ machines opening holes to internal
-		   networks, or the packet filter itself). */
-		if (!loose) {
-			ret = NF_ACCEPT;
-			goto out_put_expect;
-		}
-		exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
-					 | (array[2] << 8) | array[3]);
-	}
-
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
-	exp->tuple.src.u.tcp.port = 0; /* Don't care. */
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask = ((struct ip_conntrack_tuple)
-		{ { htonl(0xFFFFFFFF), { 0 } },
-		  { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }});
-
-	exp->expectfn = NULL;
-	exp->flags = 0;
-
-	/* Now, NAT might want to mangle the packet, and register the
-	 * (possibly changed) expectation itself. */
-	ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook);
-	if (ip_nat_ftp)
-		ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
-				 matchoff, matchlen, exp, &seq);
-	else {
-		/* Can't expect this?  Best to drop packet now. */
-		if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		else
-			ret = NF_ACCEPT;
-	}
-
-out_put_expect:
-	ip_conntrack_expect_put(exp);
-
-out_update_nl:
-	/* Now if this ends in \n, update ftp info.  Seq may have been
-	 * adjusted by NAT code. */
-	if (ends_in_nl)
-		update_nl_seq(seq, ct_ftp_info,dir, *pskb);
- out:
-	spin_unlock_bh(&ip_ftp_lock);
-	return ret;
-}
-
-static struct ip_conntrack_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
-
-/* Not __exit: called from init() */
-static void ip_conntrack_ftp_fini(void)
-{
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
-				ports[i]);
-		ip_conntrack_helper_unregister(&ftp[i]);
-	}
-
-	kfree(ftp_buffer);
-}
-
-static int __init ip_conntrack_ftp_init(void)
-{
-	int i, ret;
-	char *tmpname;
-
-	ftp_buffer = kmalloc(65536, GFP_KERNEL);
-	if (!ftp_buffer)
-		return -ENOMEM;
-
-	if (ports_c == 0)
-		ports[ports_c++] = FTP_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
-		ftp[i].tuple.dst.protonum = IPPROTO_TCP;
-		ftp[i].mask.src.u.tcp.port = htons(0xFFFF);
-		ftp[i].mask.dst.protonum = 0xFF;
-		ftp[i].max_expected = 1;
-		ftp[i].timeout = 5 * 60; /* 5 minutes */
-		ftp[i].me = THIS_MODULE;
-		ftp[i].help = help;
-
-		tmpname = &ftp_names[i][0];
-		if (ports[i] == FTP_PORT)
-			sprintf(tmpname, "ftp");
-		else
-			sprintf(tmpname, "ftp-%d", ports[i]);
-		ftp[i].name = tmpname;
-
-		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
-				ports[i]);
-		ret = ip_conntrack_helper_register(&ftp[i]);
-
-		if (ret) {
-			ip_conntrack_ftp_fini();
-			return ret;
-		}
-	}
-	return 0;
-}
-
-module_init(ip_conntrack_ftp_init);
-module_exit(ip_conntrack_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
deleted file mode 100644
index 53eb365ccc7e..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ /dev/null
@@ -1,1841 +0,0 @@
-/*
- * H.323 connection tracking helper
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 connection tracking module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * For more information, please see http://nath323.sourceforge.net/
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/moduleparam.h>
-#include <linux/ctype.h>
-#include <linux/inet.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Parameters */
-static unsigned int default_rrq_ttl = 300;
-module_param(default_rrq_ttl, uint, 0600);
-MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
-
-static int gkrouted_only = 1;
-module_param(gkrouted_only, int, 0600);
-MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
-
-static int callforward_filter = 1;
-module_param(callforward_filter, bool, 0600);
-MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
-				     "if both endpoints are on different sides "
-				     "(determined by routing information)");
-
-/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff ** pskb,
-			   unsigned char **data, int dataoff,
-			   H245_TransportAddress * addr,
-			   __be32 ip, u_int16_t port);
-int (*set_h225_addr_hook) (struct sk_buff ** pskb,
-			   unsigned char **data, int dataoff,
-			   TransportAddress * addr,
-			   __be32 ip, u_int16_t port);
-int (*set_sig_addr_hook) (struct sk_buff ** pskb,
-			  struct ip_conntrack * ct,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data,
-			  TransportAddress * addr, int count);
-int (*set_ras_addr_hook) (struct sk_buff ** pskb,
-			  struct ip_conntrack * ct,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data,
-			  TransportAddress * addr, int count);
-int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb,
-			  struct ip_conntrack * ct,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data, int dataoff,
-			  H245_TransportAddress * addr,
-			  u_int16_t port, u_int16_t rtp_port,
-			  struct ip_conntrack_expect * rtp_exp,
-			  struct ip_conntrack_expect * rtcp_exp);
-int (*nat_t120_hook) (struct sk_buff ** pskb,
-		      struct ip_conntrack * ct,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned char **data, int dataoff,
-		      H245_TransportAddress * addr, u_int16_t port,
-		      struct ip_conntrack_expect * exp);
-int (*nat_h245_hook) (struct sk_buff ** pskb,
-		      struct ip_conntrack * ct,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned char **data, int dataoff,
-		      TransportAddress * addr, u_int16_t port,
-		      struct ip_conntrack_expect * exp);
-int (*nat_callforwarding_hook) (struct sk_buff ** pskb,
-				struct ip_conntrack * ct,
-				enum ip_conntrack_info ctinfo,
-				unsigned char **data, int dataoff,
-				TransportAddress * addr, u_int16_t port,
-				struct ip_conntrack_expect * exp);
-int (*nat_q931_hook) (struct sk_buff ** pskb,
-		      struct ip_conntrack * ct,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned char **data, TransportAddress * addr, int idx,
-		      u_int16_t port, struct ip_conntrack_expect * exp);
-
-
-static DEFINE_SPINLOCK(ip_h323_lock);
-static char *h323_buffer;
-
-/****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned char **data, int *datalen, int *dataoff)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	struct tcphdr _tcph, *th;
-	int tcpdatalen;
-	int tcpdataoff;
-	unsigned char *tpkt;
-	int tpktlen;
-	int tpktoff;
-
-	/* Get TCP header */
-	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return 0;
-
-	/* Get TCP data offset */
-	tcpdataoff = (*pskb)->nh.iph->ihl * 4 + th->doff * 4;
-
-	/* Get TCP data length */
-	tcpdatalen = (*pskb)->len - tcpdataoff;
-	if (tcpdatalen <= 0)	/* No TCP data */
-		goto clear_out;
-
-	if (*data == NULL) {	/* first TPKT */
-		/* Get first TPKT pointer */
-		tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
-					  h323_buffer);
-		BUG_ON(tpkt == NULL);
-
-		/* Validate TPKT identifier */
-		if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
-			/* Netmeeting sends TPKT header and data separately */
-			if (info->tpkt_len[dir] > 0) {
-				DEBUGP("ip_ct_h323: previous packet "
-				       "indicated separate TPKT data of %hu "
-				       "bytes\n", info->tpkt_len[dir]);
-				if (info->tpkt_len[dir] <= tcpdatalen) {
-					/* Yes, there was a TPKT header
-					 * received */
-					*data = tpkt;
-					*datalen = info->tpkt_len[dir];
-					*dataoff = 0;
-					goto out;
-				}
-
-				/* Fragmented TPKT */
-				if (net_ratelimit())
-					printk("ip_ct_h323: "
-					       "fragmented TPKT\n");
-				goto clear_out;
-			}
-
-			/* It is not even a TPKT */
-			return 0;
-		}
-		tpktoff = 0;
-	} else {		/* Next TPKT */
-		tpktoff = *dataoff + *datalen;
-		tcpdatalen -= tpktoff;
-		if (tcpdatalen <= 4)	/* No more TPKT */
-			goto clear_out;
-		tpkt = *data + *datalen;
-
-		/* Validate TPKT identifier */
-		if (tpkt[0] != 0x03 || tpkt[1] != 0)
-			goto clear_out;
-	}
-
-	/* Validate TPKT length */
-	tpktlen = tpkt[2] * 256 + tpkt[3];
-	if (tpktlen < 4)
-		goto clear_out;
-	if (tpktlen > tcpdatalen) {
-		if (tcpdatalen == 4) {	/* Separate TPKT header */
-			/* Netmeeting sends TPKT header and data separately */
-			DEBUGP("ip_ct_h323: separate TPKT header indicates "
-			       "there will be TPKT data of %hu bytes\n",
-			       tpktlen - 4);
-			info->tpkt_len[dir] = tpktlen - 4;
-			return 0;
-		}
-
-		if (net_ratelimit())
-			printk("ip_ct_h323: incomplete TPKT (fragmented?)\n");
-		goto clear_out;
-	}
-
-	/* This is the encapsulated data */
-	*data = tpkt + 4;
-	*datalen = tpktlen - 4;
-	*dataoff = tpktoff + 4;
-
-      out:
-	/* Clear TPKT length */
-	info->tpkt_len[dir] = 0;
-	return 1;
-
-      clear_out:
-	info->tpkt_len[dir] = 0;
-	return 0;
-}
-
-/****************************************************************************/
-static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr,
-			 __be32 * ip, u_int16_t * port)
-{
-	unsigned char *p;
-
-	if (addr->choice != eH245_TransportAddress_unicastAddress ||
-	    addr->unicastAddress.choice != eUnicastAddress_iPAddress)
-		return 0;
-
-	p = data + addr->unicastAddress.iPAddress.network;
-	*ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
-	*port = (p[4] << 8) | (p[5]);
-
-	return 1;
-}
-
-/****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
-			   enum ip_conntrack_info ctinfo,
-			   unsigned char **data, int dataoff,
-			   H245_TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	u_int16_t rtp_port;
-	struct ip_conntrack_expect *rtp_exp;
-	struct ip_conntrack_expect *rtcp_exp;
-	typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
-
-	/* Read RTP or RTCP address */
-	if (!get_h245_addr(*data, addr, &ip, &port) ||
-	    ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
-		return 0;
-
-	/* RTP port is even */
-	rtp_port = port & (~1);
-
-	/* Create expect for RTP */
-	if ((rtp_exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	rtp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	rtp_exp->tuple.src.u.udp.port = 0;
-	rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	rtp_exp->tuple.dst.u.udp.port = htons(rtp_port);
-	rtp_exp->tuple.dst.protonum = IPPROTO_UDP;
-	rtp_exp->mask.src.ip = htonl(0xFFFFFFFF);
-	rtp_exp->mask.src.u.udp.port = 0;
-	rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	rtp_exp->mask.dst.u.udp.port = htons(0xFFFF);
-	rtp_exp->mask.dst.protonum = 0xFF;
-	rtp_exp->flags = 0;
-
-	/* Create expect for RTCP */
-	if ((rtcp_exp = ip_conntrack_expect_alloc(ct)) == NULL) {
-		ip_conntrack_expect_put(rtp_exp);
-		return -1;
-	}
-	rtcp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	rtcp_exp->tuple.src.u.udp.port = 0;
-	rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1);
-	rtcp_exp->tuple.dst.protonum = IPPROTO_UDP;
-	rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF);
-	rtcp_exp->mask.src.u.udp.port = 0;
-	rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF);
-	rtcp_exp->mask.dst.protonum = 0xFF;
-	rtcp_exp->flags = 0;
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) {
-		/* NAT needed */
-		ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-				   addr, port, rtp_port, rtp_exp, rtcp_exp);
-	} else {		/* Conntrack only */
-		rtp_exp->expectfn = NULL;
-		rtcp_exp->expectfn = NULL;
-
-		if (ip_conntrack_expect_related(rtp_exp) == 0) {
-			if (ip_conntrack_expect_related(rtcp_exp) == 0) {
-				DEBUGP("ip_ct_h323: expect RTP "
-				       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				       NIPQUAD(rtp_exp->tuple.src.ip),
-				       ntohs(rtp_exp->tuple.src.u.udp.port),
-				       NIPQUAD(rtp_exp->tuple.dst.ip),
-				       ntohs(rtp_exp->tuple.dst.u.udp.port));
-				DEBUGP("ip_ct_h323: expect RTCP "
-				       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				       NIPQUAD(rtcp_exp->tuple.src.ip),
-				       ntohs(rtcp_exp->tuple.src.u.udp.port),
-				       NIPQUAD(rtcp_exp->tuple.dst.ip),
-				       ntohs(rtcp_exp->tuple.dst.u.udp.port));
-			} else {
-				ip_conntrack_unexpect_related(rtp_exp);
-				ret = -1;
-			}
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(rtp_exp);
-	ip_conntrack_expect_put(rtcp_exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
-		       struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, int dataoff,
-		       H245_TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-	typeof(nat_t120_hook) nat_t120;
-
-	/* Read T.120 address */
-	if (!get_h245_addr(*data, addr, &ip, &port) ||
-	    ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
-		return 0;
-
-	/* Create expect for T.120 connections */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;	/* Accept multiple channels */
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_t120 = rcu_dereference(nat_t120_hook))) {
-		/* NAT needed */
-		ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr,
-			       port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = NULL;
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_h323: expect T.120 "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
-				struct ip_conntrack *ct,
-				enum ip_conntrack_info ctinfo,
-				unsigned char **data, int dataoff,
-				H2250LogicalChannelParameters * channel)
-{
-	int ret;
-
-	if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
-		/* RTP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-				      &channel->mediaChannel);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (channel->
-	    options & eH2250LogicalChannelParameters_mediaControlChannel) {
-		/* RTCP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-				      &channel->mediaControlChannel);
-		if (ret < 0)
-			return -1;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, int dataoff,
-		       OpenLogicalChannel * olc)
-{
-	int ret;
-
-	DEBUGP("ip_ct_h323: OpenLogicalChannel\n");
-
-	if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
-	    eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
-	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
-					   &olc->
-					   forwardLogicalChannelParameters.
-					   multiplexParameters.
-					   h2250LogicalChannelParameters);
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((olc->options &
-	     eOpenLogicalChannel_reverseLogicalChannelParameters) &&
-	    (olc->reverseLogicalChannelParameters.options &
-	     eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
-	    && (olc->reverseLogicalChannelParameters.multiplexParameters.
-		choice ==
-		eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
-	{
-		ret =
-		    process_h245_channel(pskb, ct, ctinfo, data, dataoff,
-					 &olc->
-					 reverseLogicalChannelParameters.
-					 multiplexParameters.
-					 h2250LogicalChannelParameters);
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((olc->options & eOpenLogicalChannel_separateStack) &&
-	    olc->forwardLogicalChannelParameters.dataType.choice ==
-	    eDataType_data &&
-	    olc->forwardLogicalChannelParameters.dataType.data.application.
-	    choice == eDataApplicationCapability_application_t120 &&
-	    olc->forwardLogicalChannelParameters.dataType.data.application.
-	    t120.choice == eDataProtocolCapability_separateLANStack &&
-	    olc->separateStack.networkAddress.choice ==
-	    eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
-				  &olc->separateStack.networkAddress.
-				  localAreaAddress);
-		if (ret < 0)
-			return -1;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
-			OpenLogicalChannelAck * olca)
-{
-	H2250LogicalChannelAckParameters *ack;
-	int ret;
-
-	DEBUGP("ip_ct_h323: OpenLogicalChannelAck\n");
-
-	if ((olca->options &
-	     eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
-	    (olca->reverseLogicalChannelParameters.options &
-	     eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
-	    && (olca->reverseLogicalChannelParameters.multiplexParameters.
-		choice ==
-		eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
-	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
-					   &olca->
-					   reverseLogicalChannelParameters.
-					   multiplexParameters.
-					   h2250LogicalChannelParameters);
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((olca->options &
-	     eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
-	    (olca->forwardMultiplexAckParameters.choice ==
-	     eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
-	{
-		ack = &olca->forwardMultiplexAckParameters.
-		    h2250LogicalChannelAckParameters;
-		if (ack->options &
-		    eH2250LogicalChannelAckParameters_mediaChannel) {
-			/* RTP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-					      &ack->mediaChannel);
-			if (ret < 0)
-				return -1;
-		}
-
-		if (ack->options &
-		    eH2250LogicalChannelAckParameters_mediaControlChannel) {
-			/* RTCP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-					      &ack->mediaControlChannel);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
-			MultimediaSystemControlMessage * mscm)
-{
-	switch (mscm->choice) {
-	case eMultimediaSystemControlMessage_request:
-		if (mscm->request.choice ==
-		    eRequestMessage_openLogicalChannel) {
-			return process_olc(pskb, ct, ctinfo, data, dataoff,
-					   &mscm->request.openLogicalChannel);
-		}
-		DEBUGP("ip_ct_h323: H.245 Request %d\n",
-		       mscm->request.choice);
-		break;
-	case eMultimediaSystemControlMessage_response:
-		if (mscm->response.choice ==
-		    eResponseMessage_openLogicalChannelAck) {
-			return process_olca(pskb, ct, ctinfo, data, dataoff,
-					    &mscm->response.
-					    openLogicalChannelAck);
-		}
-		DEBUGP("ip_ct_h323: H.245 Response %d\n",
-		       mscm->response.choice);
-		break;
-	default:
-		DEBUGP("ip_ct_h323: H.245 signal %d\n", mscm->choice);
-		break;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int h245_help(struct sk_buff **pskb, struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo)
-{
-	static MultimediaSystemControlMessage mscm;
-	unsigned char *data = NULL;
-	int datalen;
-	int dataoff;
-	int ret;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
-		return NF_ACCEPT;
-	}
-	DEBUGP("ip_ct_h245: skblen = %u\n", (*pskb)->len);
-
-	spin_lock_bh(&ip_h323_lock);
-
-	/* Process each TPKT */
-	while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
-		DEBUGP("ip_ct_h245: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-		       NIPQUAD((*pskb)->nh.iph->saddr),
-		       NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
-		/* Decode H.245 signal */
-		ret = DecodeMultimediaSystemControlMessage(data, datalen,
-							   &mscm);
-		if (ret < 0) {
-			if (net_ratelimit())
-				printk("ip_ct_h245: decoding error: %s\n",
-				       ret == H323_ERROR_BOUND ?
-				       "out of bound" : "out of range");
-			/* We don't drop when decoding error */
-			break;
-		}
-
-		/* Process H.245 signal */
-		if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
-			goto drop;
-	}
-
-	spin_unlock_bh(&ip_h323_lock);
-	return NF_ACCEPT;
-
-      drop:
-	spin_unlock_bh(&ip_h323_lock);
-	if (net_ratelimit())
-		printk("ip_ct_h245: packet dropped\n");
-	return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_h245 = {
-	.name = "H.245",
-	.me = THIS_MODULE,
-	.max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */ ,
-	.timeout = 240,
-	.tuple = {.dst = {.protonum = IPPROTO_TCP}},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF}},
-	.help = h245_help
-};
-
-/****************************************************************************/
-void ip_conntrack_h245_expect(struct ip_conntrack *new,
-			      struct ip_conntrack_expect *this)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	new->helper = &ip_conntrack_helper_h245;
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-int get_h225_addr(unsigned char *data, TransportAddress * addr,
-		  __be32 * ip, u_int16_t * port)
-{
-	unsigned char *p;
-
-	if (addr->choice != eTransportAddress_ipAddress)
-		return 0;
-
-	p = data + addr->ipAddress.ip;
-	*ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
-	*port = (p[4] << 8) | (p[5]);
-
-	return 1;
-}
-
-/****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, int dataoff,
-		       TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-	typeof(nat_h245_hook) nat_h245;
-
-	/* Read h245Address */
-	if (!get_h225_addr(*data, addr, &ip, &port) ||
-	    ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
-		return 0;
-
-	/* Create expect for h245 connection */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = 0;
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_h245 = rcu_dereference(nat_h245_hook))) {
-		/* NAT needed */
-		ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr,
-			       port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = ip_conntrack_h245_expect;
-
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_q931: expect H.245 "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/* Forwarding declaration */
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
-			      struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
-				 struct ip_conntrack *ct,
-				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data, int dataoff,
-				 TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-	typeof(nat_callforwarding_hook) nat_callforwarding;
-
-	/* Read alternativeAddress */
-	if (!get_h225_addr(*data, addr, &ip, &port) || port == 0)
-		return 0;
-
-	/* If the calling party is on the same side of the forward-to party,
-	 * we don't need to track the second call */
-	if (callforward_filter) {
-		struct rtable *rt1, *rt2;
-		struct flowi fl1 = {
-			.fl4_dst = ip,
-		};
-		struct flowi fl2 = {
-			.fl4_dst = ct->tuplehash[!dir].tuple.src.ip,
-		};
-
-		if (ip_route_output_key(&rt1, &fl1) == 0) {
-			if (ip_route_output_key(&rt2, &fl2) == 0) {
-				if (rt1->rt_gateway == rt2->rt_gateway &&
-				    rt1->u.dst.dev  == rt2->u.dst.dev)
-					ret = 1;
-				dst_release(&rt2->u.dst);
-			}
-			dst_release(&rt1->u.dst);
-		}
-		if (ret) {
-			DEBUGP("ip_ct_q931: Call Forwarding not tracked\n");
-			return 0;
-		}
-	}
-
-	/* Create expect for the second call leg */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = 0;
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) {
-		/* Need NAT */
-		ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
-					 addr, port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = ip_conntrack_q931_expect;
-
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_q931: expect Call Forwarding "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned char **data, int dataoff,
-			 Setup_UUIE * setup)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret;
-	int i;
-	__be32 ip;
-	u_int16_t port;
-	typeof(set_h225_addr_hook) set_h225_addr;
-
-	DEBUGP("ip_ct_q931: Setup\n");
-
-	if (setup->options & eSetup_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &setup->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	set_h225_addr = rcu_dereference(set_h225_addr_hook);
-
-	if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
-	    (set_h225_addr) &&
-	    get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) &&
-	    ip != ct->tuplehash[!dir].tuple.src.ip) {
-		DEBUGP("ip_ct_q931: set destCallSignalAddress "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(ip), port,
-		       NIPQUAD(ct->tuplehash[!dir].tuple.src.ip),
-		       ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
-				    &setup->destCallSignalAddress,
-				    ct->tuplehash[!dir].tuple.src.ip,
-				    ntohs(ct->tuplehash[!dir].tuple.src.
-					  u.tcp.port));
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
-	    (set_h225_addr) &&
-	    get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port)
-	    && ip != ct->tuplehash[!dir].tuple.dst.ip) {
-		DEBUGP("ip_ct_q931: set sourceCallSignalAddress "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(ip), port,
-		       NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
-		       ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
-				    &setup->sourceCallSignalAddress,
-				    ct->tuplehash[!dir].tuple.dst.ip,
-				    ntohs(ct->tuplehash[!dir].tuple.dst.
-					  u.tcp.port));
-		if (ret < 0)
-			return -1;
-	}
-
-	if (setup->options & eSetup_UUIE_fastStart) {
-		for (i = 0; i < setup->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &setup->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
-				  struct ip_conntrack *ct,
-				  enum ip_conntrack_info ctinfo,
-				  unsigned char **data, int dataoff,
-				  CallProceeding_UUIE * callproc)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: CallProceeding\n");
-
-	if (callproc->options & eCallProceeding_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &callproc->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (callproc->options & eCallProceeding_UUIE_fastStart) {
-		for (i = 0; i < callproc->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &callproc->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct ip_conntrack *ct,
-			   enum ip_conntrack_info ctinfo,
-			   unsigned char **data, int dataoff,
-			   Connect_UUIE * connect)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Connect\n");
-
-	if (connect->options & eConnect_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &connect->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (connect->options & eConnect_UUIE_fastStart) {
-		for (i = 0; i < connect->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &connect->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    unsigned char **data, int dataoff,
-			    Alerting_UUIE * alert)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Alerting\n");
-
-	if (alert->options & eAlerting_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &alert->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (alert->options & eAlerting_UUIE_fastStart) {
-		for (i = 0; i < alert->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &alert->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_information(struct sk_buff **pskb,
-			       struct ip_conntrack *ct,
-			       enum ip_conntrack_info ctinfo,
-			       unsigned char **data, int dataoff,
-			       Information_UUIE * info)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Information\n");
-
-	if (info->options & eInformation_UUIE_fastStart) {
-		for (i = 0; i < info->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &info->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    unsigned char **data, int dataoff,
-			    Facility_UUIE * facility)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Facility\n");
-
-	if (facility->reason.choice == eFacilityReason_callForwarded) {
-		if (facility->options & eFacility_UUIE_alternativeAddress)
-			return expect_callforwarding(pskb, ct, ctinfo, data,
-						     dataoff,
-						     &facility->
-						     alternativeAddress);
-		return 0;
-	}
-
-	if (facility->options & eFacility_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &facility->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (facility->options & eFacility_UUIE_fastStart) {
-		for (i = 0; i < facility->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &facility->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    unsigned char **data, int dataoff,
-			    Progress_UUIE * progress)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Progress\n");
-
-	if (progress->options & eProgress_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &progress->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (progress->options & eProgress_UUIE_fastStart) {
-		for (i = 0; i < progress->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &progress->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff, Q931 * q931)
-{
-	H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
-	int i;
-	int ret = 0;
-
-	switch (pdu->h323_message_body.choice) {
-	case eH323_UU_PDU_h323_message_body_setup:
-		ret = process_setup(pskb, ct, ctinfo, data, dataoff,
-				    &pdu->h323_message_body.setup);
-		break;
-	case eH323_UU_PDU_h323_message_body_callProceeding:
-		ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
-					     &pdu->h323_message_body.
-					     callProceeding);
-		break;
-	case eH323_UU_PDU_h323_message_body_connect:
-		ret = process_connect(pskb, ct, ctinfo, data, dataoff,
-				      &pdu->h323_message_body.connect);
-		break;
-	case eH323_UU_PDU_h323_message_body_alerting:
-		ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
-				       &pdu->h323_message_body.alerting);
-		break;
-	case eH323_UU_PDU_h323_message_body_information:
-		ret = process_information(pskb, ct, ctinfo, data, dataoff,
-					  &pdu->h323_message_body.
-					  information);
-		break;
-	case eH323_UU_PDU_h323_message_body_facility:
-		ret = process_facility(pskb, ct, ctinfo, data, dataoff,
-				       &pdu->h323_message_body.facility);
-		break;
-	case eH323_UU_PDU_h323_message_body_progress:
-		ret = process_progress(pskb, ct, ctinfo, data, dataoff,
-				       &pdu->h323_message_body.progress);
-		break;
-	default:
-		DEBUGP("ip_ct_q931: Q.931 signal %d\n",
-		       pdu->h323_message_body.choice);
-		break;
-	}
-
-	if (ret < 0)
-		return -1;
-
-	if (pdu->options & eH323_UU_PDU_h245Control) {
-		for (i = 0; i < pdu->h245Control.count; i++) {
-			ret = process_h245(pskb, ct, ctinfo, data, dataoff,
-					   &pdu->h245Control.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int q931_help(struct sk_buff **pskb, struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo)
-{
-	static Q931 q931;
-	unsigned char *data = NULL;
-	int datalen;
-	int dataoff;
-	int ret;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
-		return NF_ACCEPT;
-	}
-	DEBUGP("ip_ct_q931: skblen = %u\n", (*pskb)->len);
-
-	spin_lock_bh(&ip_h323_lock);
-
-	/* Process each TPKT */
-	while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
-		DEBUGP("ip_ct_q931: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-		       NIPQUAD((*pskb)->nh.iph->saddr),
-		       NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
-		/* Decode Q.931 signal */
-		ret = DecodeQ931(data, datalen, &q931);
-		if (ret < 0) {
-			if (net_ratelimit())
-				printk("ip_ct_q931: decoding error: %s\n",
-				       ret == H323_ERROR_BOUND ?
-				       "out of bound" : "out of range");
-			/* We don't drop when decoding error */
-			break;
-		}
-
-		/* Process Q.931 signal */
-		if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
-			goto drop;
-	}
-
-	spin_unlock_bh(&ip_h323_lock);
-	return NF_ACCEPT;
-
-      drop:
-	spin_unlock_bh(&ip_h323_lock);
-	if (net_ratelimit())
-		printk("ip_ct_q931: packet dropped\n");
-	return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_q931 = {
-	.name = "Q.931",
-	.me = THIS_MODULE,
-	.max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ ,
-	.timeout = 240,
-	.tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}},
-		  .dst = {.protonum = IPPROTO_TCP}},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF}},
-	.help = q931_help
-};
-
-/****************************************************************************/
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
-			      struct ip_conntrack_expect *this)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	new->helper = &ip_conntrack_helper_q931;
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
-{
-	struct udphdr _uh, *uh;
-	int dataoff;
-
-	uh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, sizeof(_uh),
-				&_uh);
-	if (uh == NULL)
-		return NULL;
-	dataoff = (*pskb)->nh.iph->ihl * 4 + sizeof(_uh);
-	if (dataoff >= (*pskb)->len)
-		return NULL;
-	*datalen = (*pskb)->len - dataoff;
-	return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
-}
-
-/****************************************************************************/
-static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
-					       __be32 ip, u_int16_t port)
-{
-	struct ip_conntrack_expect *exp;
-	struct ip_conntrack_tuple tuple;
-
-	tuple.src.ip = 0;
-	tuple.src.u.tcp.port = 0;
-	tuple.dst.ip = ip;
-	tuple.dst.u.tcp.port = htons(port);
-	tuple.dst.protonum = IPPROTO_TCP;
-
-	exp = __ip_conntrack_expect_find(&tuple);
-	if (exp && exp->master == ct)
-		return exp;
-	return NULL;
-}
-
-/****************************************************************************/
-static int set_expect_timeout(struct ip_conntrack_expect *exp,
-			      unsigned timeout)
-{
-	if (!exp || !del_timer(&exp->timeout))
-		return 0;
-
-	exp->timeout.expires = jiffies + timeout * HZ;
-	add_timer(&exp->timeout);
-
-	return 1;
-}
-
-/****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data,
-		       TransportAddress * addr, int count)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	int i;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp;
-	typeof(nat_q931_hook) nat_q931;
-
-	/* Look for the first related address */
-	for (i = 0; i < count; i++) {
-		if (get_h225_addr(*data, &addr[i], &ip, &port) &&
-		    ip == ct->tuplehash[dir].tuple.src.ip && port != 0)
-			break;
-	}
-
-	if (i >= count)		/* Not found */
-		return 0;
-
-	/* Create expect for Q.931 */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = gkrouted_only ?	/* only accept calls from GK? */
-	    ct->tuplehash[!dir].tuple.src.ip : 0;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0;
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;	/* Accept multiple calls */
-
-	nat_q931 = rcu_dereference(nat_q931_hook);
-	if (nat_q931) {	/* Need NAT */
-		ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = ip_conntrack_q931_expect;
-
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_ras: expect Q.931 "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-
-			/* Save port for looking up expect in processing RCF */
-			info->sig_port[dir] = port;
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, GatekeeperRequest * grq)
-{
-	typeof(set_ras_addr_hook) set_ras_addr;
-
-	DEBUGP("ip_ct_ras: GRQ\n");
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr)	/* NATed */
-		return set_ras_addr(pskb, ct, ctinfo, data,
-				    &grq->rasAddress, 1);
-	return 0;
-}
-
-/* Declare before using */
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
-				    struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, GatekeeperConfirm * gcf)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp;
-
-	DEBUGP("ip_ct_ras: GCF\n");
-
-	if (!get_h225_addr(*data, &gcf->rasAddress, &ip, &port))
-		return 0;
-
-	/* Registration port is the same as discovery port */
-	if (ip == ct->tuplehash[dir].tuple.src.ip &&
-	    port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port))
-		return 0;
-
-	/* Avoid RAS expectation loops. A GCF is never expected. */
-	if (test_bit(IPS_EXPECTED_BIT, &ct->status))
-		return 0;
-
-	/* Need new expect */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_UDP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = 0;
-	exp->expectfn = ip_conntrack_ras_expect;
-	if (ip_conntrack_expect_related(exp) == 0) {
-		DEBUGP("ip_ct_ras: expect RAS "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(exp->tuple.src.ip),
-		       ntohs(exp->tuple.src.u.tcp.port),
-		       NIPQUAD(exp->tuple.dst.ip),
-		       ntohs(exp->tuple.dst.u.tcp.port));
-	} else
-		ret = -1;
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, RegistrationRequest * rrq)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int ret;
-	typeof(set_ras_addr_hook) set_ras_addr;
-
-	DEBUGP("ip_ct_ras: RRQ\n");
-
-	ret = expect_q931(pskb, ct, ctinfo, data,
-			  rrq->callSignalAddress.item,
-			  rrq->callSignalAddress.count);
-	if (ret < 0)
-		return -1;
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
-				   rrq->rasAddress.item,
-				   rrq->rasAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (rrq->options & eRegistrationRequest_timeToLive) {
-		DEBUGP("ip_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
-		info->timeout = rrq->timeToLive;
-	} else
-		info->timeout = default_rrq_ttl;
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, RegistrationConfirm * rcf)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int ret;
-	struct ip_conntrack_expect *exp;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: RCF\n");
-
-	set_sig_addr = rcu_dereference(set_sig_addr_hook);
-	if (set_sig_addr) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
-				   rcf->callSignalAddress.item,
-				   rcf->callSignalAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (rcf->options & eRegistrationConfirm_timeToLive) {
-		DEBUGP("ip_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
-		info->timeout = rcf->timeToLive;
-	}
-
-	if (info->timeout > 0) {
-		DEBUGP
-		    ("ip_ct_ras: set RAS connection timeout to %u seconds\n",
-		     info->timeout);
-		ip_ct_refresh(ct, *pskb, info->timeout * HZ);
-
-		/* Set expect timeout */
-		read_lock_bh(&ip_conntrack_lock);
-		exp = find_expect(ct, ct->tuplehash[dir].tuple.dst.ip,
-				  info->sig_port[!dir]);
-		if (exp) {
-			DEBUGP("ip_ct_ras: set Q.931 expect "
-			       "(%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu) "
-			       "timeout to %u seconds\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port),
-			       info->timeout);
-			set_expect_timeout(exp, info->timeout);
-		}
-		read_unlock_bh(&ip_conntrack_lock);
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, UnregistrationRequest * urq)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int ret;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: URQ\n");
-
-	set_sig_addr = rcu_dereference(set_sig_addr_hook);
-	if (set_sig_addr) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
-				   urq->callSignalAddress.item,
-				   urq->callSignalAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	/* Clear old expect */
-	ip_ct_remove_expectations(ct);
-	info->sig_port[dir] = 0;
-	info->sig_port[!dir] = 0;
-
-	/* Give it 30 seconds for UCF or URJ */
-	ip_ct_refresh(ct, *pskb, 30 * HZ);
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, AdmissionRequest * arq)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	__be32 ip;
-	u_int16_t port;
-	typeof(set_h225_addr_hook) set_h225_addr;
-
-	DEBUGP("ip_ct_ras: ARQ\n");
-
-	set_h225_addr = rcu_dereference(set_h225_addr_hook);
-	if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
-	    get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) &&
-	    ip == ct->tuplehash[dir].tuple.src.ip &&
-	    port == info->sig_port[dir] && set_h225_addr) {
-		/* Answering ARQ */
-		return set_h225_addr(pskb, data, 0,
-				     &arq->destCallSignalAddress,
-				     ct->tuplehash[!dir].tuple.dst.ip,
-				     info->sig_port[!dir]);
-	}
-
-	if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
-	    get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) &&
-	    ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) {
-		/* Calling ARQ */
-		return set_h225_addr(pskb, data, 0,
-				     &arq->srcCallSignalAddress,
-				     ct->tuplehash[!dir].tuple.dst.ip,
-				     port);
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, AdmissionConfirm * acf)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: ACF\n");
-
-	if (!get_h225_addr(*data, &acf->destCallSignalAddress, &ip, &port))
-		return 0;
-
-	if (ip == ct->tuplehash[dir].tuple.dst.ip) {	/* Answering ACF */
-		set_sig_addr = rcu_dereference(set_sig_addr_hook);
-		if (set_sig_addr)
-			return set_sig_addr(pskb, ct, ctinfo, data,
-					    &acf->destCallSignalAddress, 1);
-		return 0;
-	}
-
-	/* Need new expect */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;
-	exp->expectfn = ip_conntrack_q931_expect;
-
-	if (ip_conntrack_expect_related(exp) == 0) {
-		DEBUGP("ip_ct_ras: expect Q.931 "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(exp->tuple.src.ip),
-		       ntohs(exp->tuple.src.u.tcp.port),
-		       NIPQUAD(exp->tuple.dst.ip),
-		       ntohs(exp->tuple.dst.u.tcp.port));
-	} else
-		ret = -1;
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, LocationRequest * lrq)
-{
-	typeof(set_ras_addr_hook) set_ras_addr;
-
-	DEBUGP("ip_ct_ras: LRQ\n");
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr)
-		return set_ras_addr(pskb, ct, ctinfo, data,
-				    &lrq->replyAddress, 1);
-	return 0;
-}
-
-/****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, LocationConfirm * lcf)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-
-	DEBUGP("ip_ct_ras: LCF\n");
-
-	if (!get_h225_addr(*data, &lcf->callSignalAddress, &ip, &port))
-		return 0;
-
-	/* Need new expect for call signal */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;
-	exp->expectfn = ip_conntrack_q931_expect;
-
-	if (ip_conntrack_expect_related(exp) == 0) {
-		DEBUGP("ip_ct_ras: expect Q.931 "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(exp->tuple.src.ip),
-		       ntohs(exp->tuple.src.u.tcp.port),
-		       NIPQUAD(exp->tuple.dst.ip),
-		       ntohs(exp->tuple.dst.u.tcp.port));
-	} else
-		ret = -1;
-
-	ip_conntrack_expect_put(exp);
-
-	/* Ignore rasAddress */
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, InfoRequestResponse * irr)
-{
-	int ret;
-	typeof(set_ras_addr_hook) set_ras_addr;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: IRR\n");
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
-				   &irr->rasAddress, 1);
-		if (ret < 0)
-			return -1;
-	}
-
-	set_sig_addr = rcu_dereference(set_sig_addr_hook);
-	if (set_sig_addr) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
-				   irr->callSignalAddress.item,
-				   irr->callSignalAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, RasMessage * ras)
-{
-	switch (ras->choice) {
-	case eRasMessage_gatekeeperRequest:
-		return process_grq(pskb, ct, ctinfo, data,
-				   &ras->gatekeeperRequest);
-	case eRasMessage_gatekeeperConfirm:
-		return process_gcf(pskb, ct, ctinfo, data,
-				   &ras->gatekeeperConfirm);
-	case eRasMessage_registrationRequest:
-		return process_rrq(pskb, ct, ctinfo, data,
-				   &ras->registrationRequest);
-	case eRasMessage_registrationConfirm:
-		return process_rcf(pskb, ct, ctinfo, data,
-				   &ras->registrationConfirm);
-	case eRasMessage_unregistrationRequest:
-		return process_urq(pskb, ct, ctinfo, data,
-				   &ras->unregistrationRequest);
-	case eRasMessage_admissionRequest:
-		return process_arq(pskb, ct, ctinfo, data,
-				   &ras->admissionRequest);
-	case eRasMessage_admissionConfirm:
-		return process_acf(pskb, ct, ctinfo, data,
-				   &ras->admissionConfirm);
-	case eRasMessage_locationRequest:
-		return process_lrq(pskb, ct, ctinfo, data,
-				   &ras->locationRequest);
-	case eRasMessage_locationConfirm:
-		return process_lcf(pskb, ct, ctinfo, data,
-				   &ras->locationConfirm);
-	case eRasMessage_infoRequestResponse:
-		return process_irr(pskb, ct, ctinfo, data,
-				   &ras->infoRequestResponse);
-	default:
-		DEBUGP("ip_ct_ras: RAS message %d\n", ras->choice);
-		break;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int ras_help(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	static RasMessage ras;
-	unsigned char *data;
-	int datalen = 0;
-	int ret;
-
-	DEBUGP("ip_ct_ras: skblen = %u\n", (*pskb)->len);
-
-	spin_lock_bh(&ip_h323_lock);
-
-	/* Get UDP data */
-	data = get_udp_data(pskb, &datalen);
-	if (data == NULL)
-		goto accept;
-	DEBUGP("ip_ct_ras: RAS message %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-	       NIPQUAD((*pskb)->nh.iph->saddr),
-	       NIPQUAD((*pskb)->nh.iph->daddr), datalen);
-
-	/* Decode RAS message */
-	ret = DecodeRasMessage(data, datalen, &ras);
-	if (ret < 0) {
-		if (net_ratelimit())
-			printk("ip_ct_ras: decoding error: %s\n",
-			       ret == H323_ERROR_BOUND ?
-			       "out of bound" : "out of range");
-		goto accept;
-	}
-
-	/* Process RAS message */
-	if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
-		goto drop;
-
-      accept:
-	spin_unlock_bh(&ip_h323_lock);
-	return NF_ACCEPT;
-
-      drop:
-	spin_unlock_bh(&ip_h323_lock);
-	if (net_ratelimit())
-		printk("ip_ct_ras: packet dropped\n");
-	return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_ras = {
-	.name = "RAS",
-	.me = THIS_MODULE,
-	.max_expected = 32,
-	.timeout = 240,
-	.tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}},
-		  .dst = {.protonum = IPPROTO_UDP}},
-	.mask = {.src = {.u = {0xFFFE}},
-		 .dst = {.protonum = 0xFF}},
-	.help = ras_help,
-};
-
-/****************************************************************************/
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
-				    struct ip_conntrack_expect *this)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	new->helper = &ip_conntrack_helper_ras;
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-/* Not __exit - called from init() */
-static void fini(void)
-{
-	ip_conntrack_helper_unregister(&ip_conntrack_helper_ras);
-	ip_conntrack_helper_unregister(&ip_conntrack_helper_q931);
-	kfree(h323_buffer);
-	DEBUGP("ip_ct_h323: fini\n");
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
-	int ret;
-
-	h323_buffer = kmalloc(65536, GFP_KERNEL);
-	if (!h323_buffer)
-		return -ENOMEM;
-	if ((ret = ip_conntrack_helper_register(&ip_conntrack_helper_q931)) ||
-	    (ret = ip_conntrack_helper_register(&ip_conntrack_helper_ras))) {
-		fini();
-		return ret;
-	}
-	DEBUGP("ip_ct_h323: init success\n");
-	return 0;
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL_GPL(get_h225_addr);
-EXPORT_SYMBOL_GPL(ip_conntrack_h245_expect);
-EXPORT_SYMBOL_GPL(ip_conntrack_q931_expect);
-EXPORT_SYMBOL_GPL(set_h245_addr_hook);
-EXPORT_SYMBOL_GPL(set_h225_addr_hook);
-EXPORT_SYMBOL_GPL(set_sig_addr_hook);
-EXPORT_SYMBOL_GPL(set_ras_addr_hook);
-EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
-EXPORT_SYMBOL_GPL(nat_t120_hook);
-EXPORT_SYMBOL_GPL(nat_h245_hook);
-EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
-EXPORT_SYMBOL_GPL(nat_q931_hook);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 connection tracking helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
deleted file mode 100644
index 2b760c5cf709..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * ip_conntrack_pptp.c	- Version 3.0
- *
- * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft.  PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * Limitations:
- * 	 - We blindly assume that control connections are always
- * 	   established in PNS->PAC direction.  This is a violation
- * 	   of RFFC2673
- * 	 - We can only support one single call within each session
- *
- * TODO:
- *	 - testing of incoming PPTP calls
- *
- * Changes:
- * 	2002-02-05 - Version 1.3
- * 	  - Call ip_conntrack_unexpect_related() from
- * 	    pptp_destroy_siblings() to destroy expectations in case
- * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
- * 	    (Philip Craig <philipc@snapgear.com>)
- * 	  - Add Version information at module loadtime
- * 	2002-02-10 - Version 1.6
- * 	  - move to C99 style initializers
- * 	  - remove second expectation if first arrives
- * 	2004-10-22 - Version 2.0
- * 	  - merge Mandrake's 2.6.x port with recent 2.6.x API changes
- * 	  - fix lots of linear skb assumptions from Mandrake's port
- * 	2005-06-10 - Version 2.1
- * 	  - use ip_conntrack_expect_free() instead of kfree() on the
- * 	    expect's (which are from the slab for quite some time)
- * 	2005-06-10 - Version 3.0
- * 	  - port helper to post-2.6.11 API changes,
- * 	    funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- * 	2005-07-30 - Version 3.1
- * 	  - port helper to 2.6.13 API changes
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_CT_PPTP_VERSION "3.1"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
-
-static DEFINE_SPINLOCK(ip_pptp_lock);
-
-int
-(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
-			  struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct PptpControlHeader *ctlh,
-			  union pptp_ctrl_union *pptpReq);
-
-int
-(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
-			  struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct PptpControlHeader *ctlh,
-			  union pptp_ctrl_union *pptpReq);
-
-void
-(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
-			    struct ip_conntrack_expect *expect_reply);
-
-void
-(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
-			     struct ip_conntrack_expect *exp);
-
-#if 0
-/* PptpControlMessageType names */
-const char *pptp_msg_name[] = {
-	"UNKNOWN_MESSAGE",
-	"START_SESSION_REQUEST",
-	"START_SESSION_REPLY",
-	"STOP_SESSION_REQUEST",
-	"STOP_SESSION_REPLY",
-	"ECHO_REQUEST",
-	"ECHO_REPLY",
-	"OUT_CALL_REQUEST",
-	"OUT_CALL_REPLY",
-	"IN_CALL_REQUEST",
-	"IN_CALL_REPLY",
-	"IN_CALL_CONNECT",
-	"CALL_CLEAR_REQUEST",
-	"CALL_DISCONNECT_NOTIFY",
-	"WAN_ERROR_NOTIFY",
-	"SET_LINK_INFO"
-};
-EXPORT_SYMBOL(pptp_msg_name);
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define SECS *HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-
-#define PPTP_GRE_TIMEOUT 		(10 MINS)
-#define PPTP_GRE_STREAM_TIMEOUT 	(5 HOURS)
-
-static void pptp_expectfn(struct ip_conntrack *ct,
-			 struct ip_conntrack_expect *exp)
-{
-	typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn;
-
-	DEBUGP("increasing timeouts\n");
-
-	/* increase timeout of GRE data channel conntrack entry */
-	ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
-	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
-
-	/* Can you see how rusty this code is, compared with the pre-2.6.11
-	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
-
-	rcu_read_lock();
-	ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn);
-	if (!ip_nat_pptp_expectfn) {
-		struct ip_conntrack_tuple inv_t;
-		struct ip_conntrack_expect *exp_other;
-
-		/* obviously this tuple inversion only works until you do NAT */
-		invert_tuplepr(&inv_t, &exp->tuple);
-		DEBUGP("trying to unexpect other dir: ");
-		DUMP_TUPLE(&inv_t);
-
-		exp_other = ip_conntrack_expect_find_get(&inv_t);
-		if (exp_other) {
-			/* delete other expectation.  */
-			DEBUGP("found\n");
-			ip_conntrack_unexpect_related(exp_other);
-			ip_conntrack_expect_put(exp_other);
-		} else {
-			DEBUGP("not found\n");
-		}
-	} else {
-		/* we need more than simple inversion */
-		ip_nat_pptp_expectfn(ct, exp);
-	}
-	rcu_read_unlock();
-}
-
-static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_expect *exp;
-
-	DEBUGP("trying to timeout ct or exp for tuple ");
-	DUMP_TUPLE(t);
-
-	h = ip_conntrack_find_get(t, NULL);
-	if (h)  {
-		struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
-		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
-		sibling->proto.gre.timeout = 0;
-		sibling->proto.gre.stream_timeout = 0;
-		if (del_timer(&sibling->timeout))
-			sibling->timeout.function((unsigned long)sibling);
-		ip_conntrack_put(sibling);
-		return 1;
-	} else {
-		exp = ip_conntrack_expect_find_get(t);
-		if (exp) {
-			DEBUGP("unexpect_related of expect %p\n", exp);
-			ip_conntrack_unexpect_related(exp);
-			ip_conntrack_expect_put(exp);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-
-/* timeout GRE data connections */
-static void pptp_destroy_siblings(struct ip_conntrack *ct)
-{
-	struct ip_conntrack_tuple t;
-
-	ip_ct_gre_keymap_destroy(ct);
-	/* Since ct->sibling_list has literally rusted away in 2.6.11,
-	 * we now need another way to find out about our sibling
-	 * contrack and expects... -HW */
-
-	/* try original (pns->pac) tuple */
-	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
-	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
-	t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
-
-	if (!destroy_sibling_or_exp(&t))
-		DEBUGP("failed to timeout original pns->pac ct/exp\n");
-
-	/* try reply (pac->pns) tuple */
-	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
-	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
-	t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
-
-	if (!destroy_sibling_or_exp(&t))
-		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
-}
-
-/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
-static inline int
-exp_gre(struct ip_conntrack *ct,
-	__be16 callid,
-	__be16 peer_callid)
-{
-	struct ip_conntrack_expect *exp_orig, *exp_reply;
-	int ret = 1;
-	typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre;
-
-	exp_orig = ip_conntrack_expect_alloc(ct);
-	if (exp_orig == NULL)
-		goto out;
-
-	exp_reply = ip_conntrack_expect_alloc(ct);
-	if (exp_reply == NULL)
-		goto out_put_orig;
-
-	/* original direction, PNS->PAC */
-	exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-	exp_orig->tuple.src.u.gre.key = peer_callid;
-	exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-	exp_orig->tuple.dst.u.gre.key = callid;
-	exp_orig->tuple.dst.protonum = IPPROTO_GRE;
-
-	exp_orig->mask.src.ip = htonl(0xffffffff);
-	exp_orig->mask.src.u.all = 0;
-	exp_orig->mask.dst.u.gre.key = htons(0xffff);
-	exp_orig->mask.dst.ip = htonl(0xffffffff);
-	exp_orig->mask.dst.protonum = 0xff;
-
-	exp_orig->master = ct;
-	exp_orig->expectfn = pptp_expectfn;
-	exp_orig->flags = 0;
-
-	/* both expectations are identical apart from tuple */
-	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
-
-	/* reply direction, PAC->PNS */
-	exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
-	exp_reply->tuple.src.u.gre.key = callid;
-	exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-	exp_reply->tuple.dst.u.gre.key = peer_callid;
-	exp_reply->tuple.dst.protonum = IPPROTO_GRE;
-
-	ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre);
-	if (ip_nat_pptp_exp_gre)
-		ip_nat_pptp_exp_gre(exp_orig, exp_reply);
-	if (ip_conntrack_expect_related(exp_orig) != 0)
-		goto out_put_both;
-	if (ip_conntrack_expect_related(exp_reply) != 0)
-		goto out_unexpect_orig;
-
-	/* Add GRE keymap entries */
-	if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
-		goto out_unexpect_both;
-	if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
-		ip_ct_gre_keymap_destroy(ct);
-		goto out_unexpect_both;
-	}
-	ret = 0;
-
-out_put_both:
-	ip_conntrack_expect_put(exp_reply);
-out_put_orig:
-	ip_conntrack_expect_put(exp_orig);
-out:
-	return ret;
-
-out_unexpect_both:
-	ip_conntrack_unexpect_related(exp_reply);
-out_unexpect_orig:
-	ip_conntrack_unexpect_related(exp_orig);
-	goto out_put_both;
-}
-
-static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
-		 struct PptpControlHeader *ctlh,
-		 union pptp_ctrl_union *pptpReq,
-		 unsigned int reqlen,
-		 struct ip_conntrack *ct,
-		 enum ip_conntrack_info ctinfo)
-{
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg;
-	__be16 cid = 0, pcid = 0;
-	typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound;
-
-	msg = ntohs(ctlh->messageType);
-	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
-
-	switch (msg) {
-	case PPTP_START_SESSION_REPLY:
-		/* server confirms new control session */
-		if (info->sstate < PPTP_SESSION_REQUESTED)
-			goto invalid;
-		if (pptpReq->srep.resultCode == PPTP_START_OK)
-			info->sstate = PPTP_SESSION_CONFIRMED;
-		else
-			info->sstate = PPTP_SESSION_ERROR;
-		break;
-
-	case PPTP_STOP_SESSION_REPLY:
-		/* server confirms end of control session */
-		if (info->sstate > PPTP_SESSION_STOPREQ)
-			goto invalid;
-		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
-			info->sstate = PPTP_SESSION_NONE;
-		else
-			info->sstate = PPTP_SESSION_ERROR;
-		break;
-
-	case PPTP_OUT_CALL_REPLY:
-		/* server accepted call, we now expect GRE frames */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		if (info->cstate != PPTP_CALL_OUT_REQ &&
-		    info->cstate != PPTP_CALL_OUT_CONF)
-			goto invalid;
-
-		cid = pptpReq->ocack.callID;
-		pcid = pptpReq->ocack.peersCallID;
-		if (info->pns_call_id != pcid)
-			goto invalid;
-		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
-			ntohs(cid), ntohs(pcid));
-
-		if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
-			info->cstate = PPTP_CALL_OUT_CONF;
-			info->pac_call_id = cid;
-			exp_gre(ct, cid, pcid);
-		} else
-			info->cstate = PPTP_CALL_NONE;
-		break;
-
-	case PPTP_IN_CALL_REQUEST:
-		/* server tells us about incoming call request */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-
-		cid = pptpReq->icreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
-		info->cstate = PPTP_CALL_IN_REQ;
-		info->pac_call_id = cid;
-		break;
-
-	case PPTP_IN_CALL_CONNECT:
-		/* server tells us about incoming call established */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		if (info->cstate != PPTP_CALL_IN_REP &&
-		    info->cstate != PPTP_CALL_IN_CONF)
-			goto invalid;
-
-		pcid = pptpReq->iccon.peersCallID;
-		cid = info->pac_call_id;
-
-		if (info->pns_call_id != pcid)
-			goto invalid;
-
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
-		info->cstate = PPTP_CALL_IN_CONF;
-
-		/* we expect a GRE connection from PAC to PNS */
-		exp_gre(ct, cid, pcid);
-		break;
-
-	case PPTP_CALL_DISCONNECT_NOTIFY:
-		/* server confirms disconnect */
-		cid = pptpReq->disc.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
-		info->cstate = PPTP_CALL_NONE;
-
-		/* untrack this call id, unexpect GRE packets */
-		pptp_destroy_siblings(ct);
-		break;
-
-	case PPTP_WAN_ERROR_NOTIFY:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* I don't have to explain these ;) */
-		break;
-	default:
-		goto invalid;
-	}
-
-	ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound);
-	if (ip_nat_pptp_inbound)
-		return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
-	return NF_ACCEPT;
-
-invalid:
-	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
-	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
-	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
-	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
-	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
-	return NF_ACCEPT;
-}
-
-static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
-		  struct PptpControlHeader *ctlh,
-		  union pptp_ctrl_union *pptpReq,
-		  unsigned int reqlen,
-		  struct ip_conntrack *ct,
-		  enum ip_conntrack_info ctinfo)
-{
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg;
-	__be16 cid = 0, pcid = 0;
-	typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound;
-
-	msg = ntohs(ctlh->messageType);
-	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
-
-	switch (msg) {
-	case PPTP_START_SESSION_REQUEST:
-		/* client requests for new control session */
-		if (info->sstate != PPTP_SESSION_NONE)
-			goto invalid;
-		info->sstate = PPTP_SESSION_REQUESTED;
-		break;
-	case PPTP_STOP_SESSION_REQUEST:
-		/* client requests end of control session */
-		info->sstate = PPTP_SESSION_STOPREQ;
-		break;
-
-	case PPTP_OUT_CALL_REQUEST:
-		/* client initiating connection to server */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		info->cstate = PPTP_CALL_OUT_REQ;
-		/* track PNS call id */
-		cid = pptpReq->ocreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
-		info->pns_call_id = cid;
-		break;
-	case PPTP_IN_CALL_REPLY:
-		/* client answers incoming call */
-		if (info->cstate != PPTP_CALL_IN_REQ &&
-		    info->cstate != PPTP_CALL_IN_REP)
-			goto invalid;
-
-		cid = pptpReq->icack.callID;
-		pcid = pptpReq->icack.peersCallID;
-		if (info->pac_call_id != pcid)
-			goto invalid;
-		DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
-		       ntohs(cid), ntohs(pcid));
-
-		if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
-			/* part two of the three-way handshake */
-			info->cstate = PPTP_CALL_IN_REP;
-			info->pns_call_id = cid;
-		} else
-			info->cstate = PPTP_CALL_NONE;
-		break;
-
-	case PPTP_CALL_CLEAR_REQUEST:
-		/* client requests hangup of call */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		/* FUTURE: iterate over all calls and check if
-		 * call ID is valid.  We don't do this without newnat,
-		 * because we only know about last call */
-		info->cstate = PPTP_CALL_CLEAR_REQ;
-		break;
-	case PPTP_SET_LINK_INFO:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* I don't have to explain these ;) */
-		break;
-	default:
-		goto invalid;
-	}
-
-	ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound);
-	if (ip_nat_pptp_outbound)
-		return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
-	return NF_ACCEPT;
-
-invalid:
-	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
-	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
-	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
-	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
-	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
-	return NF_ACCEPT;
-}
-
-static const unsigned int pptp_msg_size[] = {
-	[PPTP_START_SESSION_REQUEST]  = sizeof(struct PptpStartSessionRequest),
-	[PPTP_START_SESSION_REPLY]    = sizeof(struct PptpStartSessionReply),
-	[PPTP_STOP_SESSION_REQUEST]   = sizeof(struct PptpStopSessionRequest),
-	[PPTP_STOP_SESSION_REPLY]     = sizeof(struct PptpStopSessionReply),
-	[PPTP_OUT_CALL_REQUEST]       = sizeof(struct PptpOutCallRequest),
-	[PPTP_OUT_CALL_REPLY]	      = sizeof(struct PptpOutCallReply),
-	[PPTP_IN_CALL_REQUEST]	      = sizeof(struct PptpInCallRequest),
-	[PPTP_IN_CALL_REPLY]	      = sizeof(struct PptpInCallReply),
-	[PPTP_IN_CALL_CONNECT]	      = sizeof(struct PptpInCallConnected),
-	[PPTP_CALL_CLEAR_REQUEST]     = sizeof(struct PptpClearCallRequest),
-	[PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
-	[PPTP_WAN_ERROR_NOTIFY]	      = sizeof(struct PptpWanErrorNotify),
-	[PPTP_SET_LINK_INFO]	      = sizeof(struct PptpSetLinkInfo),
-};
-
-/* track caller id inside control connection, call expect_related */
-static int
-conntrack_pptp_help(struct sk_buff **pskb,
-		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-
-{
-	int dir = CTINFO2DIR(ctinfo);
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	struct tcphdr _tcph, *tcph;
-	struct pptp_pkt_hdr _pptph, *pptph;
-	struct PptpControlHeader _ctlh, *ctlh;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
-	unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
-	unsigned int datalen, reqlen, nexthdr_off;
-	int oldsstate, oldcstate;
-	int ret;
-	u_int16_t msg;
-
-	/* don't do any tracking before tcp handshake complete */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
-		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
-		return NF_ACCEPT;
-	}
-
-	nexthdr_off = (*pskb)->nh.iph->ihl*4;
-	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
-	BUG_ON(!tcph);
-	nexthdr_off += tcph->doff * 4;
-	datalen = tcplen - tcph->doff * 4;
-
-	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
-	if (!pptph) {
-		DEBUGP("no full PPTP header, can't track\n");
-		return NF_ACCEPT;
-	}
-	nexthdr_off += sizeof(_pptph);
-	datalen -= sizeof(_pptph);
-
-	/* if it's not a control message we can't do anything with it */
-	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
-	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
-		DEBUGP("not a control packet\n");
-		return NF_ACCEPT;
-	}
-
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
-	if (!ctlh)
-		return NF_ACCEPT;
-	nexthdr_off += sizeof(_ctlh);
-	datalen -= sizeof(_ctlh);
-
-	reqlen = datalen;
-	msg = ntohs(ctlh->messageType);
-	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
-		return NF_ACCEPT;
-	if (reqlen > sizeof(*pptpReq))
-		reqlen = sizeof(*pptpReq);
-
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
-	if (!pptpReq)
-		return NF_ACCEPT;
-
-	oldsstate = info->sstate;
-	oldcstate = info->cstate;
-
-	spin_lock_bh(&ip_pptp_lock);
-
-	/* FIXME: We just blindly assume that the control connection is always
-	 * established from PNS->PAC.  However, RFC makes no guarantee */
-	if (dir == IP_CT_DIR_ORIGINAL)
-		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
-					ctinfo);
-	else
-		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
-				       ctinfo);
-	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
-		oldsstate, info->sstate, oldcstate, info->cstate);
-	spin_unlock_bh(&ip_pptp_lock);
-
-	return ret;
-}
-
-/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
-	.list = { NULL, NULL },
-	.name = "pptp",
-	.me = THIS_MODULE,
-	.max_expected = 2,
-	.timeout = 5 * 60,
-	.tuple = { .src = { .ip = 0,
-			    .u = { .tcp = { .port =
-				    __constant_htons(PPTP_CONTROL_PORT) } }
-			  },
-		   .dst = { .ip = 0,
-			    .u = { .all = 0 },
-			    .protonum = IPPROTO_TCP
-			  }
-		 },
-	.mask = { .src = { .ip = 0,
-			   .u = { .tcp = { .port = __constant_htons(0xffff) } }
-			 },
-		  .dst = { .ip = 0,
-			   .u = { .all = 0 },
-			   .protonum = 0xff
-			 }
-		},
-	.help = conntrack_pptp_help,
-	.destroy = pptp_destroy_siblings,
-};
-
-extern void ip_ct_proto_gre_fini(void);
-extern int __init ip_ct_proto_gre_init(void);
-
-/* ip_conntrack_pptp initialization */
-static int __init ip_conntrack_helper_pptp_init(void)
-{
-	int retcode;
-
-	retcode = ip_ct_proto_gre_init();
-	if (retcode < 0)
-		return retcode;
-
-	DEBUGP(" registering helper\n");
-	if ((retcode = ip_conntrack_helper_register(&pptp))) {
-		printk(KERN_ERR "Unable to register conntrack application "
-				"helper for pptp: %d\n", retcode);
-		ip_ct_proto_gre_fini();
-		return retcode;
-	}
-
-	printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
-	return 0;
-}
-
-static void __exit ip_conntrack_helper_pptp_fini(void)
-{
-	ip_conntrack_helper_unregister(&pptp);
-	ip_ct_proto_gre_fini();
-	printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
-}
-
-module_init(ip_conntrack_helper_pptp_init);
-module_exit(ip_conntrack_helper_pptp_fini);
-
-EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
-EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
deleted file mode 100644
index 053e591f407a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* IRC extension for IP connection tracking, Version 1.21
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
- * based on RR's ip_conntrack_ftp.c
- *
- * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- **
- *	Module load syntax:
- * 	insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
- *			    max_dcc_channels=n dcc_timeout=secs
- *
- * 	please give the ports of all IRC servers You wish to connect to.
- *	If You don't specify ports, the default will be port 6667.
- *	With max_dcc_channels you can define the maximum number of not
- *	yet answered DCC channels per IRC session (default 8).
- *	With dcc_timeout you can specify how long the system waits for
- *	an expected DCC channel (default 300 seconds).
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/moduleparam.h>
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-static unsigned int max_dcc_channels = 8;
-static unsigned int dcc_timeout = 300;
-/* This is slow, but it's simple. --RR */
-static char *irc_buffer;
-static DEFINE_SPINLOCK(irc_buffer_lock);
-
-unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				unsigned int matchoff,
-				unsigned int matchlen,
-				struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
-MODULE_LICENSE("GPL");
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, uint, 0400);
-MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, uint, 0400);
-MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
-
-static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
-#define MINMATCHLEN	5
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
-				       __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
-		     u_int16_t *port, char **ad_beg_p, char **ad_end_p)
-/* tries to get the ip_addr and port out of a dcc command
-   return value: -1 on failure, 0 on success
-	data		pointer to first byte of DCC command data
-	data_end	pointer to last byte of dcc command data
-	ip		returns parsed ip of dcc command
-	port		returns parsed port of dcc command
-	ad_beg_p	returns pointer to first byte of addr data
-	ad_end_p	returns pointer to last byte of addr data */
-{
-
-	/* at least 12: "AAAAAAAA P\1\n" */
-	while (*data++ != ' ')
-		if (data > data_end - 12)
-			return -1;
-
-	*ad_beg_p = data;
-	*ip = simple_strtoul(data, &data, 10);
-
-	/* skip blanks between ip and port */
-	while (*data == ' ') {
-		if (data >= data_end)
-			return -1;
-		data++;
-	}
-
-	*port = simple_strtoul(data, &data, 10);
-	*ad_end_p = data;
-
-	return 0;
-}
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
-	unsigned int dataoff;
-	struct tcphdr _tcph, *th;
-	char *data, *data_limit, *ib_ptr;
-	int dir = CTINFO2DIR(ctinfo);
-	struct ip_conntrack_expect *exp;
-	u32 seq;
-	u_int32_t dcc_ip;
-	u_int16_t dcc_port;
-	int i, ret = NF_ACCEPT;
-	char *addr_beg_p, *addr_end_p;
-	typeof(ip_nat_irc_hook) ip_nat_irc;
-
-	DEBUGP("entered\n");
-
-	/* If packet is coming from IRC server */
-	if (dir == IP_CT_DIR_REPLY)
-		return NF_ACCEPT;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
-		DEBUGP("Conntrackinfo = %u\n", ctinfo);
-		return NF_ACCEPT;
-	}
-
-	/* Not a full tcp header? */
-	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return NF_ACCEPT;
-
-	/* No data? */
-	dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
-	if (dataoff >= (*pskb)->len)
-		return NF_ACCEPT;
-
-	spin_lock_bh(&irc_buffer_lock);
-	ib_ptr = skb_header_pointer(*pskb, dataoff,
-				    (*pskb)->len - dataoff, irc_buffer);
-	BUG_ON(ib_ptr == NULL);
-
-	data = ib_ptr;
-	data_limit = ib_ptr + (*pskb)->len - dataoff;
-
-	/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
-	 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
-	while (data < (data_limit - (19 + MINMATCHLEN))) {
-		if (memcmp(data, "\1DCC ", 5)) {
-			data++;
-			continue;
-		}
-
-		data += 5;
-		/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
-
-		DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
-			NIPQUAD(iph->saddr), ntohs(th->source),
-			NIPQUAD(iph->daddr), ntohs(th->dest));
-
-		for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
-			if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
-				/* no match */
-				continue;
-			}
-
-			DEBUGP("DCC %s detected\n", dccprotos[i]);
-			data += strlen(dccprotos[i]);
-			/* we have at least
-			 * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
-			 * data left (== 14/13 bytes) */
-			if (parse_dcc((char *)data, data_limit, &dcc_ip,
-				       &dcc_port, &addr_beg_p, &addr_end_p)) {
-				/* unable to parse */
-				DEBUGP("unable to parse dcc command\n");
-				continue;
-			}
-			DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
-				HIPQUAD(dcc_ip), dcc_port);
-
-			/* dcc_ip can be the internal OR external (NAT'ed) IP
-			 * Tiago Sousa <mirage@kaotik.org> */
-			if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
-			    && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
-				if (net_ratelimit())
-					printk(KERN_WARNING
-						"Forged DCC command from "
-						"%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
-				NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
-						HIPQUAD(dcc_ip), dcc_port);
-
-				continue;
-			}
-
-			exp = ip_conntrack_expect_alloc(ct);
-			if (exp == NULL) {
-				ret = NF_DROP;
-				goto out;
-			}
-
-			/* save position of address in dcc string,
-			 * necessary for NAT */
-			DEBUGP("tcph->seq = %u\n", th->seq);
-			seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
-
-			/* We refer to the reverse direction ("!dir")
-			 * tuples here, because we're expecting
-			 * something in the other * direction.
-			 * Doesn't matter unless NAT is happening.  */
-			exp->tuple = ((struct ip_conntrack_tuple)
-				{ { 0, { 0 } },
-				  { ct->tuplehash[!dir].tuple.dst.ip,
-				    { .tcp = { htons(dcc_port) } },
-				    IPPROTO_TCP }});
-			exp->mask = ((struct ip_conntrack_tuple)
-				{ { 0, { 0 } },
-				  { htonl(0xFFFFFFFF),
-					{ .tcp = { htons(0xFFFF) } }, 0xFF }});
-			exp->expectfn = NULL;
-			exp->flags = 0;
-			ip_nat_irc = rcu_dereference(ip_nat_irc_hook);
-			if (ip_nat_irc)
-				ret = ip_nat_irc(pskb, ctinfo,
-						 addr_beg_p - ib_ptr,
-						 addr_end_p - addr_beg_p,
-						 exp);
-			else if (ip_conntrack_expect_related(exp) != 0)
-				ret = NF_DROP;
-			ip_conntrack_expect_put(exp);
-			goto out;
-		} /* for .. NUM_DCCPROTO */
-	} /* while data < ... */
-
- out:
-	spin_unlock_bh(&irc_buffer_lock);
-	return ret;
-}
-
-static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
-static char irc_names[MAX_PORTS][sizeof("irc-65535")];
-
-static void ip_conntrack_irc_fini(void);
-
-static int __init ip_conntrack_irc_init(void)
-{
-	int i, ret;
-	struct ip_conntrack_helper *hlpr;
-	char *tmpname;
-
-	if (max_dcc_channels < 1) {
-		printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
-		return -EBUSY;
-	}
-
-	irc_buffer = kmalloc(65536, GFP_KERNEL);
-	if (!irc_buffer)
-		return -ENOMEM;
-
-	/* If no port given, default to standard irc port */
-	if (ports_c == 0)
-		ports[ports_c++] = IRC_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		hlpr = &irc_helpers[i];
-		hlpr->tuple.src.u.tcp.port = htons(ports[i]);
-		hlpr->tuple.dst.protonum = IPPROTO_TCP;
-		hlpr->mask.src.u.tcp.port = htons(0xFFFF);
-		hlpr->mask.dst.protonum = 0xFF;
-		hlpr->max_expected = max_dcc_channels;
-		hlpr->timeout = dcc_timeout;
-		hlpr->me = THIS_MODULE;
-		hlpr->help = help;
-
-		tmpname = &irc_names[i][0];
-		if (ports[i] == IRC_PORT)
-			sprintf(tmpname, "irc");
-		else
-			sprintf(tmpname, "irc-%d", i);
-		hlpr->name = tmpname;
-
-		DEBUGP("port #%d: %d\n", i, ports[i]);
-
-		ret = ip_conntrack_helper_register(hlpr);
-
-		if (ret) {
-			printk("ip_conntrack_irc: ERROR registering port %d\n",
-				ports[i]);
-			ip_conntrack_irc_fini();
-			return -EBUSY;
-		}
-	}
-	return 0;
-}
-
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by the init function */
-static void ip_conntrack_irc_fini(void)
-{
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering port %d\n",
-		       ports[i]);
-		ip_conntrack_helper_unregister(&irc_helpers[i]);
-	}
-	kfree(irc_buffer);
-}
-
-module_init(ip_conntrack_irc_init);
-module_exit(ip_conntrack_irc_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
deleted file mode 100644
index cc6dd49c9da0..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- *      NetBIOS name service broadcast connection tracking helper
- *
- *      (c) 2005 Patrick McHardy <kaber@trash.net>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-/*
- *      This helper tracks locally originating NetBIOS name service
- *      requests by issuing permanent expectations (valid until
- *      timing out) matching all reply connections from the
- *      destination network. The only NetBIOS specific thing is
- *      actually the port number.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#define NMBD_PORT	137
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int timeout = 3;
-module_param(timeout, uint, 0400);
-MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
-	struct ip_conntrack_expect *exp;
-	struct iphdr *iph = (*pskb)->nh.iph;
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-	struct in_device *in_dev;
-	__be32 mask = 0;
-
-	/* we're only interested in locally generated packets */
-	if ((*pskb)->sk == NULL)
-		goto out;
-	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
-		goto out;
-	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-		goto out;
-
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
-	if (in_dev != NULL) {
-		for_primary_ifa(in_dev) {
-			if (ifa->ifa_broadcast == iph->daddr) {
-				mask = ifa->ifa_mask;
-				break;
-			}
-		} endfor_ifa(in_dev);
-	}
-	rcu_read_unlock();
-
-	if (mask == 0)
-		goto out;
-
-	exp = ip_conntrack_expect_alloc(ct);
-	if (exp == NULL)
-		goto out;
-
-	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
-	exp->mask.src.ip          = mask;
-	exp->mask.src.u.udp.port  = htons(0xFFFF);
-	exp->mask.dst.ip          = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.udp.port  = htons(0xFFFF);
-	exp->mask.dst.protonum    = 0xFF;
-
-	exp->expectfn             = NULL;
-	exp->flags                = IP_CT_EXPECT_PERMANENT;
-
-	ip_conntrack_expect_related(exp);
-	ip_conntrack_expect_put(exp);
-
-	ip_ct_refresh(ct, *pskb, timeout * HZ);
-out:
-	return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper helper = {
-	.name			= "netbios-ns",
-	.tuple = {
-		.src = {
-			.u = {
-				.udp = {
-					.port	= __constant_htons(NMBD_PORT),
-				}
-			}
-		},
-		.dst = {
-			.protonum	= IPPROTO_UDP,
-		},
-	},
-	.mask = {
-		.src = {
-			.u = {
-				.udp = {
-					.port	= __constant_htons(0xFFFF),
-				}
-			}
-		},
-		.dst = {
-			.protonum	= 0xFF,
-		},
-	},
-	.max_expected		= 1,
-	.me			= THIS_MODULE,
-	.help			= help,
-};
-
-static int __init ip_conntrack_netbios_ns_init(void)
-{
-	helper.timeout = timeout;
-	return ip_conntrack_helper_register(&helper);
-}
-
-static void __exit ip_conntrack_netbios_ns_fini(void)
-{
-	ip_conntrack_helper_unregister(&helper);
-}
-
-module_init(ip_conntrack_netbios_ns_init);
-module_exit(ip_conntrack_netbios_ns_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
deleted file mode 100644
index 9228b76ccd9a..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-/* Connection tracking via netlink socket. Allows for user space
- * protocol helpers and general trouble making from userspace.
- *
- * (C) 2001 by Jay Schulist <jschlst@samba.org>
- * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
- * Initial connection tracking via netlink development funded and
- * generally made possible by Network Robots, Inc. (www.networkrobots.com)
- *
- * Further development of this code funded by Astaro AG (http://www.astaro.com)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-MODULE_LICENSE("GPL");
-
-static char __initdata version[] = "0.90";
-
-static inline int
-ctnetlink_dump_tuples_proto(struct sk_buff *skb,
-			    const struct ip_conntrack_tuple *tuple,
-			    struct ip_conntrack_protocol *proto)
-{
-	int ret = 0;
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
-
-	NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
-
-	if (likely(proto->tuple_to_nfattr))
-		ret = proto->tuple_to_nfattr(skb, tuple);
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return ret;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples_ip(struct sk_buff *skb,
-			 const struct ip_conntrack_tuple *tuple)
-{
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
-
-	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip);
-	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip);
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples(struct sk_buff *skb,
-		      const struct ip_conntrack_tuple *tuple)
-{
-	int ret;
-	struct ip_conntrack_protocol *proto;
-
-	ret = ctnetlink_dump_tuples_ip(skb, tuple);
-	if (unlikely(ret < 0))
-		return ret;
-
-	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-	ret = ctnetlink_dump_tuples_proto(skb, tuple, proto);
-	ip_conntrack_proto_put(proto);
-
-	return ret;
-}
-
-static inline int
-ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 status = htonl((u_int32_t) ct->status);
-	NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	long timeout_l = ct->timeout.expires - jiffies;
-	__be32 timeout;
-
-	if (timeout_l < 0)
-		timeout = 0;
-	else
-		timeout = htonl(timeout_l / HZ);
-
-	NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
-	struct nfattr *nest_proto;
-	int ret;
-
-	if (!proto->to_nfattr) {
-		ip_conntrack_proto_put(proto);
-		return 0;
-	}
-
-	nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
-
-	ret = proto->to_nfattr(skb, nest_proto, ct);
-
-	ip_conntrack_proto_put(proto);
-
-	NFA_NEST_END(skb, nest_proto);
-
-	return ret;
-
-nfattr_failure:
-	ip_conntrack_proto_put(proto);
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	struct nfattr *nest_helper;
-
-	if (!ct->helper)
-		return 0;
-
-	nest_helper = NFA_NEST(skb, CTA_HELP);
-	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
-
-	if (ct->helper->to_nfattr)
-		ct->helper->to_nfattr(skb, ct);
-
-	NFA_NEST_END(skb, nest_helper);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static inline int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
-			enum ip_conntrack_dir dir)
-{
-	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
-	struct nfattr *nest_count = NFA_NEST(skb, type);
-	__be32 tmp;
-
-	tmp = htonl(ct->counters[dir].packets);
-	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp);
-
-	tmp = htonl(ct->counters[dir].bytes);
-	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp);
-
-	NFA_NEST_END(skb, nest_count);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-#else
-#define ctnetlink_dump_counters(a, b, c) (0)
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-static inline int
-ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 mark = htonl(ct->mark);
-
-	NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-#else
-#define ctnetlink_dump_mark(a, b) (0)
-#endif
-
-static inline int
-ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 id = htonl(ct->id);
-	NFA_PUT(skb, CTA_ID, sizeof(__be32), &id);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 use = htonl(atomic_read(&ct->ct_general.use));
-
-	NFA_PUT(skb, CTA_USE, sizeof(__be32), &use);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
-static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
-		    int event, int nowait,
-		    const struct ip_conntrack *ct)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	struct nfattr *nest_parms;
-	unsigned char *b;
-
-	b = skb->tail;
-
-	event |= NFNL_SUBSYS_CTNETLINK << 8;
-	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
-	nfmsg  = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	if (ctnetlink_dump_status(skb, ct) < 0 ||
-	    ctnetlink_dump_timeout(skb, ct) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
-	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
-	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
-	    ctnetlink_dump_mark(skb, ct) < 0 ||
-	    ctnetlink_dump_id(skb, ct) < 0 ||
-	    ctnetlink_dump_use(skb, ct) < 0)
-		goto nfattr_failure;
-
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_conntrack_event(struct notifier_block *this,
-				     unsigned long events, void *ptr)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	struct nfattr *nest_parms;
-	struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
-	struct sk_buff *skb;
-	unsigned int type;
-	unsigned char *b;
-	unsigned int flags = 0, group;
-
-	/* ignore our fake conntrack entry */
-	if (ct == &ip_conntrack_untracked)
-		return NOTIFY_DONE;
-
-	if (events & IPCT_DESTROY) {
-		type = IPCTNL_MSG_CT_DELETE;
-		group = NFNLGRP_CONNTRACK_DESTROY;
-	} else if (events & (IPCT_NEW | IPCT_RELATED)) {
-		type = IPCTNL_MSG_CT_NEW;
-		flags = NLM_F_CREATE|NLM_F_EXCL;
-		group = NFNLGRP_CONNTRACK_NEW;
-	} else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
-		type = IPCTNL_MSG_CT_NEW;
-		group = NFNLGRP_CONNTRACK_UPDATE;
-	} else
-		return NOTIFY_DONE;
-
-	if (!nfnetlink_has_listeners(group))
-		return NOTIFY_DONE;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-	if (!skb)
-		return NOTIFY_DONE;
-
-	b = skb->tail;
-
-	type |= NFNL_SUBSYS_CTNETLINK << 8;
-	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
-	nfmsg = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = flags;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version	= NFNETLINK_V0;
-	nfmsg->res_id	= 0;
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	if (events & IPCT_DESTROY) {
-		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
-			goto nfattr_failure;
-	} else {
-		if (ctnetlink_dump_status(skb, ct) < 0)
-			goto nfattr_failure;
-
-		if (ctnetlink_dump_timeout(skb, ct) < 0)
-			goto nfattr_failure;
-
-		if (events & IPCT_PROTOINFO
-		    && ctnetlink_dump_protoinfo(skb, ct) < 0)
-			goto nfattr_failure;
-
-		if ((events & IPCT_HELPER || ct->helper)
-		    && ctnetlink_dump_helpinfo(skb, ct) < 0)
-			goto nfattr_failure;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-		if ((events & IPCT_MARK || ct->mark)
-		    && ctnetlink_dump_mark(skb, ct) < 0)
-			goto nfattr_failure;
-#endif
-
-		if (events & IPCT_COUNTER_FILLING &&
-		    (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-		     ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
-			goto nfattr_failure;
-	}
-
-	nlh->nlmsg_len = skb->tail - b;
-	nfnetlink_send(skb, 0, group, 0);
-	return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
-	kfree_skb(skb);
-	return NOTIFY_DONE;
-}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-static int ctnetlink_done(struct netlink_callback *cb)
-{
-	if (cb->args[1])
-		ip_conntrack_put((struct ip_conntrack *)cb->args[1]);
-	return 0;
-}
-
-static int
-ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct ip_conntrack *ct, *last;
-	struct ip_conntrack_tuple_hash *h;
-	struct list_head *i;
-
-	read_lock_bh(&ip_conntrack_lock);
-	last = (struct ip_conntrack *)cb->args[1];
-	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
-restart:
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
-			h = (struct ip_conntrack_tuple_hash *) i;
-			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
-				continue;
-			ct = tuplehash_to_ctrack(h);
-			if (cb->args[1]) {
-				if (ct != last)
-					continue;
-				cb->args[1] = 0;
-			}
-			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
-						cb->nlh->nlmsg_seq,
-						IPCTNL_MSG_CT_NEW,
-						1, ct) < 0) {
-				nf_conntrack_get(&ct->ct_general);
-				cb->args[1] = (unsigned long)ct;
-				goto out;
-			}
-#ifdef CONFIG_NF_CT_ACCT
-			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
-						IPCTNL_MSG_CT_GET_CTRZERO)
-				memset(&ct->counters, 0, sizeof(ct->counters));
-#endif
-		}
-		if (cb->args[1]) {
-			cb->args[1] = 0;
-			goto restart;
-		}
-	}
-out:
-	read_unlock_bh(&ip_conntrack_lock);
-	if (last)
-		ip_conntrack_put(last);
-
-	return skb->len;
-}
-
-static const size_t cta_min_ip[CTA_IP_MAX] = {
-	[CTA_IP_V4_SRC-1]	= sizeof(__be32),
-	[CTA_IP_V4_DST-1]	= sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
-{
-	struct nfattr *tb[CTA_IP_MAX];
-
-	nfattr_parse_nested(tb, CTA_IP_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
-		return -EINVAL;
-
-	if (!tb[CTA_IP_V4_SRC-1])
-		return -EINVAL;
-	tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
-
-	if (!tb[CTA_IP_V4_DST-1])
-		return -EINVAL;
-	tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
-
-	return 0;
-}
-
-static const size_t cta_min_proto[CTA_PROTO_MAX] = {
-	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
-	[CTA_PROTO_SRC_PORT-1]	= sizeof(u_int16_t),
-	[CTA_PROTO_DST_PORT-1]	= sizeof(u_int16_t),
-	[CTA_PROTO_ICMP_TYPE-1]	= sizeof(u_int8_t),
-	[CTA_PROTO_ICMP_CODE-1]	= sizeof(u_int8_t),
-	[CTA_PROTO_ICMP_ID-1]	= sizeof(u_int16_t),
-};
-
-static inline int
-ctnetlink_parse_tuple_proto(struct nfattr *attr,
-			    struct ip_conntrack_tuple *tuple)
-{
-	struct nfattr *tb[CTA_PROTO_MAX];
-	struct ip_conntrack_protocol *proto;
-	int ret = 0;
-
-	nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
-		return -EINVAL;
-
-	if (!tb[CTA_PROTO_NUM-1])
-		return -EINVAL;
-	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
-
-	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-
-	if (likely(proto->nfattr_to_tuple))
-		ret = proto->nfattr_to_tuple(tb, tuple);
-
-	ip_conntrack_proto_put(proto);
-
-	return ret;
-}
-
-static inline int
-ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
-		      enum ctattr_tuple type)
-{
-	struct nfattr *tb[CTA_TUPLE_MAX];
-	int err;
-
-	memset(tuple, 0, sizeof(*tuple));
-
-	nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
-
-	if (!tb[CTA_TUPLE_IP-1])
-		return -EINVAL;
-
-	err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
-	if (err < 0)
-		return err;
-
-	if (!tb[CTA_TUPLE_PROTO-1])
-		return -EINVAL;
-
-	err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
-	if (err < 0)
-		return err;
-
-	/* orig and expect tuples get DIR_ORIGINAL */
-	if (type == CTA_TUPLE_REPLY)
-		tuple->dst.dir = IP_CT_DIR_REPLY;
-	else
-		tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
-	return 0;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
-	[CTA_PROTONAT_PORT_MIN-1]	= sizeof(u_int16_t),
-	[CTA_PROTONAT_PORT_MAX-1]	= sizeof(u_int16_t),
-};
-
-static int ctnetlink_parse_nat_proto(struct nfattr *attr,
-				     const struct ip_conntrack *ct,
-				     struct ip_nat_range *range)
-{
-	struct nfattr *tb[CTA_PROTONAT_MAX];
-	struct ip_nat_protocol *npt;
-
-	nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
-		return -EINVAL;
-
-	npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
-	if (!npt->nfattr_to_range) {
-		ip_nat_proto_put(npt);
-		return 0;
-	}
-
-	/* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
-	if (npt->nfattr_to_range(tb, range) > 0)
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-
-	ip_nat_proto_put(npt);
-
-	return 0;
-}
-
-static const size_t cta_min_nat[CTA_NAT_MAX] = {
-	[CTA_NAT_MINIP-1]       = sizeof(__be32),
-	[CTA_NAT_MAXIP-1]       = sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_nat(struct nfattr *nat,
-		    const struct ip_conntrack *ct, struct ip_nat_range *range)
-{
-	struct nfattr *tb[CTA_NAT_MAX];
-	int err;
-
-	memset(range, 0, sizeof(*range));
-
-	nfattr_parse_nested(tb, CTA_NAT_MAX, nat);
-
-	if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
-		return -EINVAL;
-
-	if (tb[CTA_NAT_MINIP-1])
-		range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
-
-	if (!tb[CTA_NAT_MAXIP-1])
-		range->max_ip = range->min_ip;
-	else
-		range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
-
-	if (range->min_ip)
-		range->flags |= IP_NAT_RANGE_MAP_IPS;
-
-	if (!tb[CTA_NAT_PROTO-1])
-		return 0;
-
-	err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-#endif
-
-static inline int
-ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
-{
-	struct nfattr *tb[CTA_HELP_MAX];
-
-	nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
-
-	if (!tb[CTA_HELP_NAME-1])
-		return -EINVAL;
-
-	*helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
-
-	return 0;
-}
-
-static const size_t cta_min[CTA_MAX] = {
-	[CTA_STATUS-1] 		= sizeof(__be32),
-	[CTA_TIMEOUT-1] 	= sizeof(__be32),
-	[CTA_MARK-1]		= sizeof(__be32),
-	[CTA_USE-1]		= sizeof(__be32),
-	[CTA_ID-1]		= sizeof(__be32)
-};
-
-static int
-ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack *ct;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
-		return -EINVAL;
-
-	if (cda[CTA_TUPLE_ORIG-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
-	else if (cda[CTA_TUPLE_REPLY-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
-	else {
-		/* Flush the whole table */
-		ip_conntrack_flush();
-		return 0;
-	}
-
-	if (err < 0)
-		return err;
-
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (!h)
-		return -ENOENT;
-
-	ct = tuplehash_to_ctrack(h);
-
-	if (cda[CTA_ID-1]) {
-		u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
-		if (ct->id != id) {
-			ip_conntrack_put(ct);
-			return -ENOENT;
-		}
-	}
-	if (del_timer(&ct->timeout))
-		ct->timeout.function((unsigned long)ct);
-
-	ip_conntrack_put(ct);
-
-	return 0;
-}
-
-static int
-ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack *ct;
-	struct sk_buff *skb2 = NULL;
-	int err = 0;
-
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct nfgenmsg *msg = NLMSG_DATA(nlh);
-		u32 rlen;
-
-		if (msg->nfgen_family != AF_INET)
-			return -EAFNOSUPPORT;
-
-#ifndef CONFIG_IP_NF_CT_ACCT
-		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
-			return -ENOTSUPP;
-#endif
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
-	}
-
-	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
-		return -EINVAL;
-
-	if (cda[CTA_TUPLE_ORIG-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
-	else if (cda[CTA_TUPLE_REPLY-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
-	else
-		return -EINVAL;
-
-	if (err < 0)
-		return err;
-
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (!h)
-		return -ENOENT;
-
-	ct = tuplehash_to_ctrack(h);
-
-	err = -ENOMEM;
-	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb2) {
-		ip_conntrack_put(ct);
-		return -ENOMEM;
-	}
-
-	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
-				  IPCTNL_MSG_CT_NEW, 1, ct);
-	ip_conntrack_put(ct);
-	if (err <= 0)
-		goto free;
-
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-	if (err < 0)
-		goto out;
-
-	return 0;
-
-free:
-	kfree_skb(skb2);
-out:
-	return err;
-}
-
-static inline int
-ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	unsigned long d;
-	unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
-	d = ct->status ^ status;
-
-	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
-		/* unchangeable */
-		return -EINVAL;
-
-	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
-		/* SEEN_REPLY bit can only be set */
-		return -EINVAL;
-
-
-	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
-		/* ASSURED bit can only be set */
-		return -EINVAL;
-
-	if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-#ifndef CONFIG_IP_NF_NAT_NEEDED
-		return -EINVAL;
-#else
-		struct ip_nat_range range;
-
-		if (cda[CTA_NAT_DST-1]) {
-			if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (ip_nat_initialized(ct,
-					       HOOK2MANIP(NF_IP_PRE_ROUTING)))
-				return -EEXIST;
-			ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-		}
-		if (cda[CTA_NAT_SRC-1]) {
-			if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (ip_nat_initialized(ct,
-					       HOOK2MANIP(NF_IP_POST_ROUTING)))
-				return -EEXIST;
-			ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-		}
-#endif
-	}
-
-	/* Be careful here, modifying NAT bits can screw up things,
-	 * so don't let users modify them directly if they don't pass
-	 * ip_nat_range. */
-	ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
-	return 0;
-}
-
-
-static inline int
-ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	struct ip_conntrack_helper *helper;
-	char *helpname;
-	int err;
-
-	/* don't change helper of sibling connections */
-	if (ct->master)
-		return -EINVAL;
-
-	err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
-	if (err < 0)
-		return err;
-
-	helper = __ip_conntrack_helper_find_byname(helpname);
-	if (!helper) {
-		if (!strcmp(helpname, ""))
-			helper = NULL;
-		else
-			return -EINVAL;
-	}
-
-	if (ct->helper) {
-		if (!helper) {
-			/* we had a helper before ... */
-			ip_ct_remove_expectations(ct);
-			ct->helper = NULL;
-		} else {
-			/* need to zero data of old helper */
-			memset(&ct->help, 0, sizeof(ct->help));
-		}
-	}
-
-	ct->helper = helper;
-
-	return 0;
-}
-
-static inline int
-ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
-	if (!del_timer(&ct->timeout))
-		return -ETIME;
-
-	ct->timeout.expires = jiffies + timeout * HZ;
-	add_timer(&ct->timeout);
-
-	return 0;
-}
-
-static inline int
-ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
-	struct ip_conntrack_protocol *proto;
-	u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
-	int err = 0;
-
-	nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
-
-	proto = ip_conntrack_proto_find_get(npt);
-
-	if (proto->from_nfattr)
-		err = proto->from_nfattr(tb, ct);
-	ip_conntrack_proto_put(proto);
-
-	return err;
-}
-
-static int
-ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	int err;
-
-	if (cda[CTA_HELP-1]) {
-		err = ctnetlink_change_helper(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_TIMEOUT-1]) {
-		err = ctnetlink_change_timeout(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_STATUS-1]) {
-		err = ctnetlink_change_status(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_PROTOINFO-1]) {
-		err = ctnetlink_change_protoinfo(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
-	return 0;
-}
-
-static int
-ctnetlink_create_conntrack(struct nfattr *cda[],
-			   struct ip_conntrack_tuple *otuple,
-			   struct ip_conntrack_tuple *rtuple)
-{
-	struct ip_conntrack *ct;
-	int err = -EINVAL;
-
-	ct = ip_conntrack_alloc(otuple, rtuple);
-	if (ct == NULL || IS_ERR(ct))
-		return -ENOMEM;
-
-	if (!cda[CTA_TIMEOUT-1])
-		goto err;
-	ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
-	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
-	ct->status |= IPS_CONFIRMED;
-
-	if (cda[CTA_STATUS-1]) {
-		err = ctnetlink_change_status(ct, cda);
-		if (err < 0)
-			goto err;
-	}
-
-	if (cda[CTA_PROTOINFO-1]) {
-		err = ctnetlink_change_protoinfo(ct, cda);
-		if (err < 0)
-			goto err;
-	}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
-	ct->helper = ip_conntrack_helper_find_get(rtuple);
-
-	add_timer(&ct->timeout);
-	ip_conntrack_hash_insert(ct);
-
-	if (ct->helper)
-		ip_conntrack_helper_put(ct->helper);
-
-	return 0;
-
-err:
-	ip_conntrack_free(ct);
-	return err;
-}
-
-static int
-ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple otuple, rtuple;
-	struct ip_conntrack_tuple_hash *h = NULL;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
-		return -EINVAL;
-
-	if (cda[CTA_TUPLE_ORIG-1]) {
-		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_TUPLE_REPLY-1]) {
-		err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
-		if (err < 0)
-			return err;
-	}
-
-	write_lock_bh(&ip_conntrack_lock);
-	if (cda[CTA_TUPLE_ORIG-1])
-		h = __ip_conntrack_find(&otuple, NULL);
-	else if (cda[CTA_TUPLE_REPLY-1])
-		h = __ip_conntrack_find(&rtuple, NULL);
-
-	if (h == NULL) {
-		write_unlock_bh(&ip_conntrack_lock);
-		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
-		return err;
-	}
-	/* implicit 'else' */
-
-	/* we only allow nat config for new conntracks */
-	if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-
-	/* We manipulate the conntrack inside the global conntrack table lock,
-	 * so there's no need to increase the refcount */
-	err = -EEXIST;
-	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
-		err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
-
-out_unlock:
-	write_unlock_bh(&ip_conntrack_lock);
-	return err;
-}
-
-/***********************************************************************
- * EXPECT
- ***********************************************************************/
-
-static inline int
-ctnetlink_exp_dump_tuple(struct sk_buff *skb,
-			 const struct ip_conntrack_tuple *tuple,
-			 enum ctattr_expect type)
-{
-	struct nfattr *nest_parms = NFA_NEST(skb, type);
-
-	if (ctnetlink_dump_tuples(skb, tuple) < 0)
-		goto nfattr_failure;
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_mask(struct sk_buff *skb,
-			const struct ip_conntrack_tuple *tuple,
-			const struct ip_conntrack_tuple *mask)
-{
-	int ret;
-	struct ip_conntrack_protocol *proto;
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
-
-	ret = ctnetlink_dump_tuples_ip(skb, mask);
-	if (unlikely(ret < 0))
-		goto nfattr_failure;
-
-	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-	ret = ctnetlink_dump_tuples_proto(skb, mask, proto);
-	ip_conntrack_proto_put(proto);
-	if (unlikely(ret < 0))
-		goto nfattr_failure;
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_expect(struct sk_buff *skb,
-			  const struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack *master = exp->master;
-	__be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
-	__be32 id = htonl(exp->id);
-
-	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
-		goto nfattr_failure;
-	if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
-		goto nfattr_failure;
-	if (ctnetlink_exp_dump_tuple(skb,
-				 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-				 CTA_EXPECT_MASTER) < 0)
-		goto nfattr_failure;
-
-	NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout);
-	NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static int
-ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
-		    int event,
-		    int nowait,
-		    const struct ip_conntrack_expect *exp)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	unsigned char *b;
-
-	b = skb->tail;
-
-	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
-	nfmsg  = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version	    = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
-	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
-		goto nfattr_failure;
-
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
-				  unsigned long events, void *ptr)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
-	struct sk_buff *skb;
-	unsigned int type;
-	unsigned char *b;
-	int flags = 0;
-
-	if (events & IPEXP_NEW) {
-		type = IPCTNL_MSG_EXP_NEW;
-		flags = NLM_F_CREATE|NLM_F_EXCL;
-	} else
-		return NOTIFY_DONE;
-
-	if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
-		return NOTIFY_DONE;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-	if (!skb)
-		return NOTIFY_DONE;
-
-	b = skb->tail;
-
-	type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
-	nfmsg = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = flags;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version	    = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
-	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
-		goto nfattr_failure;
-
-	nlh->nlmsg_len = skb->tail - b;
-	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
-	return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
-	kfree_skb(skb);
-	return NOTIFY_DONE;
-}
-#endif
-
-static int
-ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct ip_conntrack_expect *exp = NULL;
-	struct list_head *i;
-	u_int32_t *id = (u_int32_t *) &cb->args[0];
-
-	read_lock_bh(&ip_conntrack_lock);
-	list_for_each_prev(i, &ip_conntrack_expect_list) {
-		exp = (struct ip_conntrack_expect *) i;
-		if (exp->id <= *id)
-			continue;
-		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
-					    cb->nlh->nlmsg_seq,
-					    IPCTNL_MSG_EXP_NEW,
-					    1, exp) < 0)
-			goto out;
-		*id = exp->id;
-	}
-out:
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return skb->len;
-}
-
-static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
-	[CTA_EXPECT_TIMEOUT-1]          = sizeof(__be32),
-	[CTA_EXPECT_ID-1]               = sizeof(__be32)
-};
-
-static int
-ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_expect *exp;
-	struct sk_buff *skb2;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
-		return -EINVAL;
-
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct nfgenmsg *msg = NLMSG_DATA(nlh);
-		u32 rlen;
-
-		if (msg->nfgen_family != AF_INET)
-			return -EAFNOSUPPORT;
-
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_exp_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
-	}
-
-	if (cda[CTA_EXPECT_MASTER-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
-	else
-		return -EINVAL;
-
-	if (err < 0)
-		return err;
-
-	exp = ip_conntrack_expect_find_get(&tuple);
-	if (!exp)
-		return -ENOENT;
-
-	if (cda[CTA_EXPECT_ID-1]) {
-		__be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
-		if (exp->id != ntohl(id)) {
-			ip_conntrack_expect_put(exp);
-			return -ENOENT;
-		}
-	}
-
-	err = -ENOMEM;
-	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb2)
-		goto out;
-
-	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
-				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
-				      1, exp);
-	if (err <= 0)
-		goto free;
-
-	ip_conntrack_expect_put(exp);
-
-	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-
-free:
-	kfree_skb(skb2);
-out:
-	ip_conntrack_expect_put(exp);
-	return err;
-}
-
-static int
-ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_expect *exp, *tmp;
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_helper *h;
-	int err;
-
-	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
-		return -EINVAL;
-
-	if (cda[CTA_EXPECT_TUPLE-1]) {
-		/* delete a single expect by tuple */
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
-		if (err < 0)
-			return err;
-
-		/* bump usage count to 2 */
-		exp = ip_conntrack_expect_find_get(&tuple);
-		if (!exp)
-			return -ENOENT;
-
-		if (cda[CTA_EXPECT_ID-1]) {
-			__be32 id =
-				*(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
-			if (exp->id != ntohl(id)) {
-				ip_conntrack_expect_put(exp);
-				return -ENOENT;
-			}
-		}
-
-		/* after list removal, usage count == 1 */
-		ip_conntrack_unexpect_related(exp);
-		/* have to put what we 'get' above.
-		 * after this line usage count == 0 */
-		ip_conntrack_expect_put(exp);
-	} else if (cda[CTA_EXPECT_HELP_NAME-1]) {
-		char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
-
-		/* delete all expectations for this helper */
-		write_lock_bh(&ip_conntrack_lock);
-		h = __ip_conntrack_helper_find_byname(name);
-		if (!h) {
-			write_unlock_bh(&ip_conntrack_lock);
-			return -EINVAL;
-		}
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
-					 list) {
-			if (exp->master->helper == h
-			    && del_timer(&exp->timeout)) {
-				ip_ct_unlink_expect(exp);
-				ip_conntrack_expect_put(exp);
-			}
-		}
-		write_unlock_bh(&ip_conntrack_lock);
-	} else {
-		/* This basically means we have to flush everything*/
-		write_lock_bh(&ip_conntrack_lock);
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
-					 list) {
-			if (del_timer(&exp->timeout)) {
-				ip_ct_unlink_expect(exp);
-				ip_conntrack_expect_put(exp);
-			}
-		}
-		write_unlock_bh(&ip_conntrack_lock);
-	}
-
-	return 0;
-}
-static int
-ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
-{
-	return -EOPNOTSUPP;
-}
-
-static int
-ctnetlink_create_expect(struct nfattr *cda[])
-{
-	struct ip_conntrack_tuple tuple, mask, master_tuple;
-	struct ip_conntrack_tuple_hash *h = NULL;
-	struct ip_conntrack_expect *exp;
-	struct ip_conntrack *ct;
-	int err = 0;
-
-	/* caller guarantees that those three CTA_EXPECT_* exist */
-	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
-	if (err < 0)
-		return err;
-	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
-	if (err < 0)
-		return err;
-	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
-	if (err < 0)
-		return err;
-
-	/* Look for master conntrack of this expectation */
-	h = ip_conntrack_find_get(&master_tuple, NULL);
-	if (!h)
-		return -ENOENT;
-	ct = tuplehash_to_ctrack(h);
-
-	if (!ct->helper) {
-		/* such conntrack hasn't got any helper, abort */
-		err = -EINVAL;
-		goto out;
-	}
-
-	exp = ip_conntrack_expect_alloc(ct);
-	if (!exp) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	exp->expectfn = NULL;
-	exp->flags = 0;
-	exp->master = ct;
-	memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
-	memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
-
-	err = ip_conntrack_expect_related(exp);
-	ip_conntrack_expect_put(exp);
-
-out:
-	ip_conntrack_put(tuplehash_to_ctrack(h));
-	return err;
-}
-
-static int
-ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_expect *exp;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
-		return -EINVAL;
-
-	if (!cda[CTA_EXPECT_TUPLE-1]
-	    || !cda[CTA_EXPECT_MASK-1]
-	    || !cda[CTA_EXPECT_MASTER-1])
-		return -EINVAL;
-
-	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
-	if (err < 0)
-		return err;
-
-	write_lock_bh(&ip_conntrack_lock);
-	exp = __ip_conntrack_expect_find(&tuple);
-
-	if (!exp) {
-		write_unlock_bh(&ip_conntrack_lock);
-		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_expect(cda);
-		return err;
-	}
-
-	err = -EEXIST;
-	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
-		err = ctnetlink_change_expect(exp, cda);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	return err;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
-	.notifier_call	= ctnetlink_conntrack_event,
-};
-
-static struct notifier_block ctnl_notifier_exp = {
-	.notifier_call	= ctnetlink_expect_event,
-};
-#endif
-
-static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
-	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
-					    .attr_count = CTA_MAX, },
-	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX, },
-	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
-					    .attr_count = CTA_MAX, },
-	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX, },
-};
-
-static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
-	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
-					    .attr_count = CTA_EXPECT_MAX, },
-	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
-					    .attr_count = CTA_EXPECT_MAX, },
-	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
-					    .attr_count = CTA_EXPECT_MAX, },
-};
-
-static struct nfnetlink_subsystem ctnl_subsys = {
-	.name				= "conntrack",
-	.subsys_id			= NFNL_SUBSYS_CTNETLINK,
-	.cb_count			= IPCTNL_MSG_MAX,
-	.cb				= ctnl_cb,
-};
-
-static struct nfnetlink_subsystem ctnl_exp_subsys = {
-	.name				= "conntrack_expect",
-	.subsys_id			= NFNL_SUBSYS_CTNETLINK_EXP,
-	.cb_count			= IPCTNL_MSG_EXP_MAX,
-	.cb				= ctnl_exp_cb,
-};
-
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
-
-static int __init ctnetlink_init(void)
-{
-	int ret;
-
-	printk("ctnetlink v%s: registering with nfnetlink.\n", version);
-	ret = nfnetlink_subsys_register(&ctnl_subsys);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot register with nfnetlink.\n");
-		goto err_out;
-	}
-
-	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
-		goto err_unreg_subsys;
-	}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-	ret = ip_conntrack_register_notifier(&ctnl_notifier);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot register notifier.\n");
-		goto err_unreg_exp_subsys;
-	}
-
-	ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot expect register notifier.\n");
-		goto err_unreg_notifier;
-	}
-#endif
-
-	return 0;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
-	ip_conntrack_unregister_notifier(&ctnl_notifier);
-err_unreg_exp_subsys:
-	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
-err_unreg_subsys:
-	nfnetlink_subsys_unregister(&ctnl_subsys);
-err_out:
-	return ret;
-}
-
-static void __exit ctnetlink_exit(void)
-{
-	printk("ctnetlink: unregistering from nfnetlink.\n");
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-	ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
-	ip_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
-	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-	nfnetlink_subsys_unregister(&ctnl_subsys);
-	return;
-}
-
-module_init(ctnetlink_init);
-module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
deleted file mode 100644
index 88af82e98658..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
-
-static int generic_pkt_to_tuple(const struct sk_buff *skb,
-				unsigned int dataoff,
-				struct ip_conntrack_tuple *tuple)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return 1;
-}
-
-static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
-				const struct ip_conntrack_tuple *orig)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int generic_print_tuple(struct seq_file *s,
-			       const struct ip_conntrack_tuple *tuple)
-{
-	return 0;
-}
-
-/* Print out the private part of the conntrack. */
-static int generic_print_conntrack(struct seq_file *s,
-				   const struct ip_conntrack *state)
-{
-	return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int packet(struct ip_conntrack *conntrack,
-		  const struct sk_buff *skb,
-		  enum ip_conntrack_info ctinfo)
-{
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
-	return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_generic_protocol =
-{
-	.proto			= 0,
-	.name			= "unknown",
-	.pkt_to_tuple		= generic_pkt_to_tuple,
-	.invert_tuple		= generic_invert_tuple,
-	.print_tuple		= generic_print_tuple,
-	.print_conntrack	= generic_print_conntrack,
-	.packet			= packet,
-	.new			= new,
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
deleted file mode 100644
index ac1c49ef36a9..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
- * Connection tracking protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-
-static DEFINE_RWLOCK(ip_ct_gre_lock);
-
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
-
-/* shamelessly stolen from ip_conntrack_proto_udp.c */
-#define GRE_TIMEOUT		(30*HZ)
-#define GRE_STREAM_TIMEOUT	(180*HZ)
-
-#if 0
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
-			NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
-			NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
-#else
-#define DEBUGP(x, args...)
-#define DUMP_TUPLE_GRE(x)
-#endif
-
-/* GRE KEYMAP HANDLING FUNCTIONS */
-static LIST_HEAD(gre_keymap_list);
-
-static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
-				const struct ip_conntrack_tuple *t)
-{
-	return ((km->tuple.src.ip == t->src.ip) &&
-		(km->tuple.dst.ip == t->dst.ip) &&
-		(km->tuple.dst.protonum == t->dst.protonum) &&
-		(km->tuple.dst.u.all == t->dst.u.all));
-}
-
-/* look up the source key for a given tuple */
-static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
-{
-	struct ip_ct_gre_keymap *km;
-	__be16 key = 0;
-
-	read_lock_bh(&ip_ct_gre_lock);
-	list_for_each_entry(km, &gre_keymap_list, list) {
-		if (gre_key_cmpfn(km, t)) {
-			key = km->tuple.src.u.gre.key;
-			break;
-		}
-	}
-	read_unlock_bh(&ip_ct_gre_lock);
-
-	DEBUGP("lookup src key 0x%x up key for ", key);
-	DUMP_TUPLE_GRE(t);
-
-	return key;
-}
-
-/* add a single keymap entry, associate with specified master ct */
-int
-ip_ct_gre_keymap_add(struct ip_conntrack *ct,
-		     struct ip_conntrack_tuple *t, int reply)
-{
-	struct ip_ct_gre_keymap **exist_km, *km;
-
-	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
-		DEBUGP("refusing to add GRE keymap to non-pptp session\n");
-		return -1;
-	}
-
-	if (!reply)
-		exist_km = &ct->help.ct_pptp_info.keymap_orig;
-	else
-		exist_km = &ct->help.ct_pptp_info.keymap_reply;
-
-	if (*exist_km) {
-		/* check whether it's a retransmission */
-		list_for_each_entry(km, &gre_keymap_list, list) {
-			if (gre_key_cmpfn(km, t) && km == *exist_km)
-				return 0;
-		}
-		DEBUGP("trying to override keymap_%s for ct %p\n",
-			reply? "reply":"orig", ct);
-		return -EEXIST;
-	}
-
-	km = kmalloc(sizeof(*km), GFP_ATOMIC);
-	if (!km)
-		return -ENOMEM;
-
-	memcpy(&km->tuple, t, sizeof(*t));
-	*exist_km = km;
-
-	DEBUGP("adding new entry %p: ", km);
-	DUMP_TUPLE_GRE(&km->tuple);
-
-	write_lock_bh(&ip_ct_gre_lock);
-	list_add_tail(&km->list, &gre_keymap_list);
-	write_unlock_bh(&ip_ct_gre_lock);
-
-	return 0;
-}
-
-/* destroy the keymap entries associated with specified master ct */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
-{
-	DEBUGP("entering for ct %p\n", ct);
-
-	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
-		DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
-		return;
-	}
-
-	write_lock_bh(&ip_ct_gre_lock);
-	if (ct->help.ct_pptp_info.keymap_orig) {
-		DEBUGP("removing %p from list\n",
-			ct->help.ct_pptp_info.keymap_orig);
-		list_del(&ct->help.ct_pptp_info.keymap_orig->list);
-		kfree(ct->help.ct_pptp_info.keymap_orig);
-		ct->help.ct_pptp_info.keymap_orig = NULL;
-	}
-	if (ct->help.ct_pptp_info.keymap_reply) {
-		DEBUGP("removing %p from list\n",
-			ct->help.ct_pptp_info.keymap_reply);
-		list_del(&ct->help.ct_pptp_info.keymap_reply->list);
-		kfree(ct->help.ct_pptp_info.keymap_reply);
-		ct->help.ct_pptp_info.keymap_reply = NULL;
-	}
-	write_unlock_bh(&ip_ct_gre_lock);
-}
-
-
-/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-
-/* invert gre part of tuple */
-static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
-			    const struct ip_conntrack_tuple *orig)
-{
-	tuple->dst.u.gre.key = orig->src.u.gre.key;
-	tuple->src.u.gre.key = orig->dst.u.gre.key;
-
-	return 1;
-}
-
-/* gre hdr info to tuple */
-static int gre_pkt_to_tuple(const struct sk_buff *skb,
-			   unsigned int dataoff,
-			   struct ip_conntrack_tuple *tuple)
-{
-	struct gre_hdr_pptp _pgrehdr, *pgrehdr;
-	__be16 srckey;
-	struct gre_hdr _grehdr, *grehdr;
-
-	/* first only delinearize old RFC1701 GRE header */
-	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-	if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
-		/* try to behave like "ip_conntrack_proto_generic" */
-		tuple->src.u.all = 0;
-		tuple->dst.u.all = 0;
-		return 1;
-	}
-
-	/* PPTP header is variable length, only need up to the call_id field */
-	pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
-	if (!pgrehdr)
-		return 1;
-
-	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
-		DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
-		return 0;
-	}
-
-	tuple->dst.u.gre.key = pgrehdr->call_id;
-	srckey = gre_keymap_lookup(tuple);
-	tuple->src.u.gre.key = srckey;
-
-	return 1;
-}
-
-/* print gre part of tuple */
-static int gre_print_tuple(struct seq_file *s,
-			   const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
-			  ntohs(tuple->src.u.gre.key),
-			  ntohs(tuple->dst.u.gre.key));
-}
-
-/* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
-			       const struct ip_conntrack *ct)
-{
-	return seq_printf(s, "timeout=%u, stream_timeout=%u ",
-			  (ct->proto.gre.timeout / HZ),
-			  (ct->proto.gre.stream_timeout / HZ));
-}
-
-/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct ip_conntrack *ct,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info conntrackinfo)
-{
-	/* If we've seen traffic both ways, this is a GRE connection.
-	 * Extend timeout. */
-	if (ct->status & IPS_SEEN_REPLY) {
-		ip_ct_refresh_acct(ct, conntrackinfo, skb,
-				   ct->proto.gre.stream_timeout);
-		/* Also, more likely to be important, and not a probe. */
-		set_bit(IPS_ASSURED_BIT, &ct->status);
-		ip_conntrack_event_cache(IPCT_STATUS, skb);
-	} else
-		ip_ct_refresh_acct(ct, conntrackinfo, skb,
-				   ct->proto.gre.timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int gre_new(struct ip_conntrack *ct,
-		   const struct sk_buff *skb)
-{
-	DEBUGP(": ");
-	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
-	/* initialize to sane value.  Ideally a conntrack helper
-	 * (e.g. in case of pptp) is increasing them */
-	ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
-	ct->proto.gre.timeout = GRE_TIMEOUT;
-
-	return 1;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct ip_conntrack *ct)
-{
-	struct ip_conntrack *master = ct->master;
-	DEBUGP(" entering\n");
-
-	if (!master)
-		DEBUGP("no master !?!\n");
-	else
-		ip_ct_gre_keymap_destroy(master);
-}
-
-/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
-	.proto		 = IPPROTO_GRE,
-	.name		 = "gre",
-	.pkt_to_tuple	 = gre_pkt_to_tuple,
-	.invert_tuple	 = gre_invert_tuple,
-	.print_tuple	 = gre_print_tuple,
-	.print_conntrack = gre_print_conntrack,
-	.packet		 = gre_packet,
-	.new		 = gre_new,
-	.destroy	 = gre_destroy,
-	.me 		 = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-/* ip_conntrack_proto_gre initialization */
-int __init ip_ct_proto_gre_init(void)
-{
-	return ip_conntrack_protocol_register(&gre);
-}
-
-/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
- * init() code on errors.
- */
-void ip_ct_proto_gre_fini(void)
-{
-	struct list_head *pos, *n;
-
-	/* delete all keymap entries */
-	write_lock_bh(&ip_ct_gre_lock);
-	list_for_each_safe(pos, n, &gre_keymap_list) {
-		DEBUGP("deleting keymap %p at module unload time\n", pos);
-		list_del(pos);
-		kfree(pos);
-	}
-	write_unlock_bh(&ip_ct_gre_lock);
-
-	ip_conntrack_protocol_unregister(&gre);
-}
-
-EXPORT_SYMBOL(ip_ct_gre_keymap_add);
-EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
deleted file mode 100644
index ad70c81a21e0..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/icmp.h>
-#include <linux/seq_file.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int icmp_pkt_to_tuple(const struct sk_buff *skb,
-			     unsigned int dataoff,
-			     struct ip_conntrack_tuple *tuple)
-{
-	struct icmphdr _hdr, *hp;
-
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->dst.u.icmp.type = hp->type;
-	tuple->src.u.icmp.id = hp->un.echo.id;
-	tuple->dst.u.icmp.code = hp->code;
-
-	return 1;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
-	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
-};
-
-static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			     const struct ip_conntrack_tuple *orig)
-{
-	if (orig->dst.u.icmp.type >= sizeof(invmap)
-	    || !invmap[orig->dst.u.icmp.type])
-		return 0;
-
-	tuple->src.u.icmp.id = orig->src.u.icmp.id;
-	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
-	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int icmp_print_tuple(struct seq_file *s,
-			    const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "type=%u code=%u id=%u ",
-			  tuple->dst.u.icmp.type,
-			  tuple->dst.u.icmp.code,
-			  ntohs(tuple->src.u.icmp.id));
-}
-
-/* Print out the private part of the conntrack. */
-static int icmp_print_conntrack(struct seq_file *s,
-				const struct ip_conntrack *conntrack)
-{
-	return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct ip_conntrack *ct,
-		       const struct sk_buff *skb,
-		       enum ip_conntrack_info ctinfo)
-{
-	/* Try to delete connection immediately after all replies:
-	   won't actually vanish as we still have skb, and del_timer
-	   means this will only run once even if count hits zero twice
-	   (theoretically possible with SMP) */
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count)
-		    && del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
-	} else {
-		atomic_inc(&ct->proto.icmp.count);
-		ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
-	}
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int icmp_new(struct ip_conntrack *conntrack,
-		    const struct sk_buff *skb)
-{
-	static const u_int8_t valid_new[] = {
-		[ICMP_ECHO] = 1,
-		[ICMP_TIMESTAMP] = 1,
-		[ICMP_INFO_REQUEST] = 1,
-		[ICMP_ADDRESS] = 1
-	};
-
-	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
-	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
-		/* Can't create a new ICMP `conn' with this. */
-		DEBUGP("icmp: can't create new conn with type %u\n",
-		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
-		DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
-		return 0;
-	}
-	atomic_set(&conntrack->proto.icmp.count, 0);
-	return 1;
-}
-
-static int
-icmp_error_message(struct sk_buff *skb,
-		   enum ip_conntrack_info *ctinfo,
-		   unsigned int hooknum)
-{
-	struct ip_conntrack_tuple innertuple, origtuple;
-	struct {
-		struct icmphdr icmp;
-		struct iphdr ip;
-	} _in, *inside;
-	struct ip_conntrack_protocol *innerproto;
-	struct ip_conntrack_tuple_hash *h;
-	int dataoff;
-
-	IP_NF_ASSERT(skb->nfct == NULL);
-
-	/* Not enough header? */
-	inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
-	if (inside == NULL)
-		return -NF_ACCEPT;
-
-	/* Ignore ICMP's containing fragments (shouldn't happen) */
-	if (inside->ip.frag_off & htons(IP_OFFSET)) {
-		DEBUGP("icmp_error_track: fragment of proto %u\n",
-		       inside->ip.protocol);
-		return -NF_ACCEPT;
-	}
-
-	innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
-	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
-	/* Are they talking about one of our connections? */
-	if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
-		DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
-		ip_conntrack_proto_put(innerproto);
-		return -NF_ACCEPT;
-	}
-
-	/* Ordinarily, we'd expect the inverted tupleproto, but it's
-	   been preserved inside the ICMP. */
-	if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
-		DEBUGP("icmp_error_track: Can't invert tuple\n");
-		ip_conntrack_proto_put(innerproto);
-		return -NF_ACCEPT;
-	}
-	ip_conntrack_proto_put(innerproto);
-
-	*ctinfo = IP_CT_RELATED;
-
-	h = ip_conntrack_find_get(&innertuple, NULL);
-	if (!h) {
-		/* Locally generated ICMPs will match inverted if they
-		   haven't been SNAT'ed yet */
-		/* FIXME: NAT code has to handle half-done double NAT --RR */
-		if (hooknum == NF_IP_LOCAL_OUT)
-			h = ip_conntrack_find_get(&origtuple, NULL);
-
-		if (!h) {
-			DEBUGP("icmp_error_track: no match\n");
-			return -NF_ACCEPT;
-		}
-		/* Reverse direction from that found */
-		if (DIRECTION(h) != IP_CT_DIR_REPLY)
-			*ctinfo += IP_CT_IS_REPLY;
-	} else {
-		if (DIRECTION(h) == IP_CT_DIR_REPLY)
-			*ctinfo += IP_CT_IS_REPLY;
-	}
-
-	/* Update skb to refer to this connection */
-	skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
-	skb->nfctinfo = *ctinfo;
-	return -NF_ACCEPT;
-}
-
-/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
-	   unsigned int hooknum)
-{
-	struct icmphdr _ih, *icmph;
-
-	/* Not enough header? */
-	icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
-	if (icmph == NULL) {
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: short packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* See ip_conntrack_proto_tcp.c */
-	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, skb->nh.iph->ihl * 4, 0)) {
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: bad ICMP checksum ");
-		return -NF_ACCEPT;
-	}
-
-	/*
-	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
-	 *
-	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
-	 *		  discarded.
-	 */
-	if (icmph->type > NR_ICMP_TYPES) {
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: invalid ICMP type ");
-		return -NF_ACCEPT;
-	}
-
-	/* Need to track icmp error message? */
-	if (icmph->type != ICMP_DEST_UNREACH
-	    && icmph->type != ICMP_SOURCE_QUENCH
-	    && icmph->type != ICMP_TIME_EXCEEDED
-	    && icmph->type != ICMP_PARAMETERPROB
-	    && icmph->type != ICMP_REDIRECT)
-		return NF_ACCEPT;
-
-	return icmp_error_message(skb, ctinfo, hooknum);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int icmp_tuple_to_nfattr(struct sk_buff *skb,
-				const struct ip_conntrack_tuple *t)
-{
-	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16),
-		&t->src.u.icmp.id);
-	NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
-		&t->dst.u.icmp.type);
-	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
-		&t->dst.u.icmp.code);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static int icmp_nfattr_to_tuple(struct nfattr *tb[],
-				struct ip_conntrack_tuple *tuple)
-{
-	if (!tb[CTA_PROTO_ICMP_TYPE-1]
-	    || !tb[CTA_PROTO_ICMP_CODE-1]
-	    || !tb[CTA_PROTO_ICMP_ID-1])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type =
-			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
-	tuple->dst.u.icmp.code =
-			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
-	tuple->src.u.icmp.id =
-			*(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
-
-	if (tuple->dst.u.icmp.type >= sizeof(invmap)
-	    || !invmap[tuple->dst.u.icmp.type])
-		return -EINVAL;
-
-	return 0;
-}
-#endif
-
-struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
-{
-	.proto 			= IPPROTO_ICMP,
-	.name 			= "icmp",
-	.pkt_to_tuple		= icmp_pkt_to_tuple,
-	.invert_tuple		= icmp_invert_tuple,
-	.print_tuple		= icmp_print_tuple,
-	.print_conntrack	= icmp_print_conntrack,
-	.packet			= icmp_packet,
-	.new			= icmp_new,
-	.error			= icmp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr	= icmp_tuple_to_nfattr,
-	.nfattr_to_tuple	= icmp_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
deleted file mode 100644
index e6942992b2f6..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ /dev/null
@@ -1,659 +0,0 @@
-/*
- * Connection tracking protocol helper module for SCTP.
- *
- * SCTP is defined in RFC 2960. References to various sections in this code
- * are to this RFC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * Added support for proc manipulation of timeouts.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <linux/string.h>
-#include <linux/seq_file.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
-   closely.  They're more complex. --RR
-
-   And so for me for SCTP :D -Kiran */
-
-static const char *sctp_conntrack_names[] = {
-	"NONE",
-	"CLOSED",
-	"COOKIE_WAIT",
-	"COOKIE_ECHOED",
-	"ESTABLISHED",
-	"SHUTDOWN_SENT",
-	"SHUTDOWN_RECD",
-	"SHUTDOWN_ACK_SENT",
-};
-
-#define SECS  * HZ
-#define MINS  * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS  * 24 HOURS
-
-static unsigned int ip_ct_sctp_timeout_closed __read_mostly           = 10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly      =  3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly    =  3 SECS;
-static unsigned int ip_ct_sctp_timeout_established __read_mostly      =  5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly    = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly    = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
-
-static const unsigned int * sctp_timeouts[]
-= { NULL,                                  /* SCTP_CONNTRACK_NONE  */
-    &ip_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
-    &ip_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
-    &ip_ct_sctp_timeout_cookie_echoed,     /* SCTP_CONNTRACK_COOKIE_ECHOED */
-    &ip_ct_sctp_timeout_established,       /* SCTP_CONNTRACK_ESTABLISHED */
-    &ip_ct_sctp_timeout_shutdown_sent,     /* SCTP_CONNTRACK_SHUTDOWN_SENT */
-    &ip_ct_sctp_timeout_shutdown_recd,     /* SCTP_CONNTRACK_SHUTDOWN_RECD */
-    &ip_ct_sctp_timeout_shutdown_ack_sent  /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
- };
-
-#define sNO SCTP_CONNTRACK_NONE
-#define	sCL SCTP_CONNTRACK_CLOSED
-#define	sCW SCTP_CONNTRACK_COOKIE_WAIT
-#define	sCE SCTP_CONNTRACK_COOKIE_ECHOED
-#define	sES SCTP_CONNTRACK_ESTABLISHED
-#define	sSS SCTP_CONNTRACK_SHUTDOWN_SENT
-#define	sSR SCTP_CONNTRACK_SHUTDOWN_RECD
-#define	sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
-#define	sIV SCTP_CONNTRACK_MAX
-
-/*
-	These are the descriptions of the states:
-
-NOTE: These state names are tantalizingly similar to the states of an
-SCTP endpoint. But the interpretation of the states is a little different,
-considering that these are the states of the connection and not of an end
-point. Please note the subtleties. -Kiran
-
-NONE              - Nothing so far.
-COOKIE WAIT       - We have seen an INIT chunk in the original direction, or also
-		    an INIT_ACK chunk in the reply direction.
-COOKIE ECHOED     - We have seen a COOKIE_ECHO chunk in the original direction.
-ESTABLISHED       - We have seen a COOKIE_ACK in the reply direction.
-SHUTDOWN_SENT     - We have seen a SHUTDOWN chunk in the original direction.
-SHUTDOWN_RECD     - We have seen a SHUTDOWN chunk in the reply directoin.
-SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
-		    to that of the SHUTDOWN chunk.
-CLOSED            - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
-		    the SHUTDOWN chunk. Connection is closed.
-*/
-
-/* TODO
- - I have assumed that the first INIT is in the original direction.
- This messes things when an INIT comes in the reply direction in CLOSED
- state.
- - Check the error type in the reply dir before transitioning from
-cookie echoed to closed.
- - Sec 5.2.4 of RFC 2960
- - Multi Homing support.
-*/
-
-/* SCTP conntrack state transitions */
-static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
-	{
-/*	ORIGINAL	*/
-/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init         */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error        */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
-/* cookie_echo  */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack   */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
-	},
-	{
-/*	REPLY	*/
-/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init         */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack     */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort        */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown     */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error        */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo  */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
-/* cookie_ack   */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
-	}
-};
-
-static int sctp_pkt_to_tuple(const struct sk_buff *skb,
-			     unsigned int dataoff,
-			     struct ip_conntrack_tuple *tuple)
-{
-	sctp_sctphdr_t _hdr, *hp;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->src.u.sctp.port = hp->source;
-	tuple->dst.u.sctp.port = hp->dest;
-	return 1;
-}
-
-static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			     const struct ip_conntrack_tuple *orig)
-{
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	tuple->src.u.sctp.port = orig->dst.u.sctp.port;
-	tuple->dst.u.sctp.port = orig->src.u.sctp.port;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int sctp_print_tuple(struct seq_file *s,
-			    const struct ip_conntrack_tuple *tuple)
-{
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	return seq_printf(s, "sport=%hu dport=%hu ",
-			  ntohs(tuple->src.u.sctp.port),
-			  ntohs(tuple->dst.u.sctp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s,
-				const struct ip_conntrack *conntrack)
-{
-	enum sctp_conntrack state;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	read_lock_bh(&sctp_lock);
-	state = conntrack->proto.sctp.state;
-	read_unlock_bh(&sctp_lock);
-
-	return seq_printf(s, "%s ", sctp_conntrack_names[state]);
-}
-
-#define for_each_sctp_chunk(skb, sch, _sch, offset, count)		\
-for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0;	\
-	offset < skb->len &&						\
-	(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch));	\
-	offset += (ntohs(sch->length) + 3) & ~3, count++)
-
-/* Some validity checks to make sure the chunks are fine */
-static int do_basic_checks(struct ip_conntrack *conntrack,
-			   const struct sk_buff *skb,
-			   char *map)
-{
-	u_int32_t offset, count;
-	sctp_chunkhdr_t _sch, *sch;
-	int flag;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	flag = 0;
-
-	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
-		DEBUGP("Chunk Num: %d  Type: %d\n", count, sch->type);
-
-		if (sch->type == SCTP_CID_INIT
-			|| sch->type == SCTP_CID_INIT_ACK
-			|| sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
-			flag = 1;
-		}
-
-		/*
-		 * Cookie Ack/Echo chunks not the first OR
-		 * Init / Init Ack / Shutdown compl chunks not the only chunks
-		 * OR zero-length.
-		 */
-		if (((sch->type == SCTP_CID_COOKIE_ACK
-			|| sch->type == SCTP_CID_COOKIE_ECHO
-			|| flag)
-		      && count !=0) || !sch->length) {
-			DEBUGP("Basic checks failed\n");
-			return 1;
-		}
-
-		if (map) {
-			set_bit(sch->type, (void *)map);
-		}
-	}
-
-	DEBUGP("Basic checks passed\n");
-	return count == 0;
-}
-
-static int new_state(enum ip_conntrack_dir dir,
-		     enum sctp_conntrack cur_state,
-		     int chunk_type)
-{
-	int i;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	DEBUGP("Chunk type: %d\n", chunk_type);
-
-	switch (chunk_type) {
-		case SCTP_CID_INIT:
-			DEBUGP("SCTP_CID_INIT\n");
-			i = 0; break;
-		case SCTP_CID_INIT_ACK:
-			DEBUGP("SCTP_CID_INIT_ACK\n");
-			i = 1; break;
-		case SCTP_CID_ABORT:
-			DEBUGP("SCTP_CID_ABORT\n");
-			i = 2; break;
-		case SCTP_CID_SHUTDOWN:
-			DEBUGP("SCTP_CID_SHUTDOWN\n");
-			i = 3; break;
-		case SCTP_CID_SHUTDOWN_ACK:
-			DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
-			i = 4; break;
-		case SCTP_CID_ERROR:
-			DEBUGP("SCTP_CID_ERROR\n");
-			i = 5; break;
-		case SCTP_CID_COOKIE_ECHO:
-			DEBUGP("SCTP_CID_COOKIE_ECHO\n");
-			i = 6; break;
-		case SCTP_CID_COOKIE_ACK:
-			DEBUGP("SCTP_CID_COOKIE_ACK\n");
-			i = 7; break;
-		case SCTP_CID_SHUTDOWN_COMPLETE:
-			DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
-			i = 8; break;
-		default:
-			/* Other chunks like DATA, SACK, HEARTBEAT and
-			its ACK do not cause a change in state */
-			DEBUGP("Unknown chunk type, Will stay in %s\n",
-						sctp_conntrack_names[cur_state]);
-			return cur_state;
-	}
-
-	DEBUGP("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n",
-			dir, sctp_conntrack_names[cur_state], chunk_type,
-			sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
-
-	return sctp_conntracks[dir][i][cur_state];
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int sctp_packet(struct ip_conntrack *conntrack,
-		       const struct sk_buff *skb,
-		       enum ip_conntrack_info ctinfo)
-{
-	enum sctp_conntrack newconntrack, oldsctpstate;
-	struct iphdr *iph = skb->nh.iph;
-	sctp_sctphdr_t _sctph, *sh;
-	sctp_chunkhdr_t _sch, *sch;
-	u_int32_t offset, count;
-	char map[256 / sizeof (char)] = {0};
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
-	if (sh == NULL)
-		return -1;
-
-	if (do_basic_checks(conntrack, skb, map) != 0)
-		return -1;
-
-	/* Check the verification tag (Sec 8.5) */
-	if (!test_bit(SCTP_CID_INIT, (void *)map)
-		&& !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
-		&& !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
-		&& !test_bit(SCTP_CID_ABORT, (void *)map)
-		&& !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
-		&& (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
-		DEBUGP("Verification tag check failed\n");
-		return -1;
-	}
-
-	oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
-	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
-		write_lock_bh(&sctp_lock);
-
-		/* Special cases of Verification tag check (Sec 8.5.1) */
-		if (sch->type == SCTP_CID_INIT) {
-			/* Sec 8.5.1 (A) */
-			if (sh->vtag != 0) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		} else if (sch->type == SCTP_CID_ABORT) {
-			/* Sec 8.5.1 (B) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
-				&& !(sh->vtag == conntrack->proto.sctp.vtag
-							[1 - CTINFO2DIR(ctinfo)])) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
-			/* Sec 8.5.1 (C) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
-				&& !(sh->vtag == conntrack->proto.sctp.vtag
-							[1 - CTINFO2DIR(ctinfo)]
-					&& (sch->flags & 1))) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
-			/* Sec 8.5.1 (D) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		}
-
-		oldsctpstate = conntrack->proto.sctp.state;
-		newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
-
-		/* Invalid */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
-			DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
-			       CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
-			write_unlock_bh(&sctp_lock);
-			return -1;
-		}
-
-		/* If it is an INIT or an INIT ACK note down the vtag */
-		if (sch->type == SCTP_CID_INIT
-			|| sch->type == SCTP_CID_INIT_ACK) {
-			sctp_inithdr_t _inithdr, *ih;
-
-			ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
-						sizeof(_inithdr), &_inithdr);
-			if (ih == NULL) {
-					write_unlock_bh(&sctp_lock);
-					return -1;
-			}
-			DEBUGP("Setting vtag %x for dir %d\n",
-					ih->init_tag, !CTINFO2DIR(ctinfo));
-			conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
-		}
-
-		conntrack->proto.sctp.state = newconntrack;
-		if (oldsctpstate != newconntrack)
-			ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
-		write_unlock_bh(&sctp_lock);
-	}
-
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
-
-	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
-		&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
-		&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
-		DEBUGP("Setting assured bit\n");
-		set_bit(IPS_ASSURED_BIT, &conntrack->status);
-		ip_conntrack_event_cache(IPCT_STATUS, skb);
-	}
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int sctp_new(struct ip_conntrack *conntrack,
-		    const struct sk_buff *skb)
-{
-	enum sctp_conntrack newconntrack;
-	struct iphdr *iph = skb->nh.iph;
-	sctp_sctphdr_t _sctph, *sh;
-	sctp_chunkhdr_t _sch, *sch;
-	u_int32_t offset, count;
-	char map[256 / sizeof (char)] = {0};
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
-	if (sh == NULL)
-		return 0;
-
-	if (do_basic_checks(conntrack, skb, map) != 0)
-		return 0;
-
-	/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
-	if ((test_bit (SCTP_CID_ABORT, (void *)map))
-		|| (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
-		|| (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
-		return 0;
-	}
-
-	newconntrack = SCTP_CONNTRACK_MAX;
-	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
-		/* Don't need lock here: this conntrack not in circulation yet */
-		newconntrack = new_state (IP_CT_DIR_ORIGINAL,
-						SCTP_CONNTRACK_NONE, sch->type);
-
-		/* Invalid: delete conntrack */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
-			DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
-			return 0;
-		}
-
-		/* Copy the vtag into the state info */
-		if (sch->type == SCTP_CID_INIT) {
-			if (sh->vtag == 0) {
-				sctp_inithdr_t _inithdr, *ih;
-
-				ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
-							sizeof(_inithdr), &_inithdr);
-				if (ih == NULL)
-					return 0;
-
-				DEBUGP("Setting vtag %x for new conn\n",
-					ih->init_tag);
-
-				conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
-								ih->init_tag;
-			} else {
-				/* Sec 8.5.1 (A) */
-				return 0;
-			}
-		}
-		/* If it is a shutdown ack OOTB packet, we expect a return
-		   shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
-		else {
-			DEBUGP("Setting vtag %x for new conn OOTB\n",
-				sh->vtag);
-			conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
-		}
-
-		conntrack->proto.sctp.state = newconntrack;
-	}
-
-	return 1;
-}
-
-static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
-	.proto 		 = IPPROTO_SCTP,
-	.name 		 = "sctp",
-	.pkt_to_tuple 	 = sctp_pkt_to_tuple,
-	.invert_tuple 	 = sctp_invert_tuple,
-	.print_tuple 	 = sctp_print_tuple,
-	.print_conntrack = sctp_print_conntrack,
-	.packet 	 = sctp_packet,
-	.new 		 = sctp_new,
-	.destroy 	 = NULL,
-	.me 		 = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-#ifdef CONFIG_SYSCTL
-static ctl_table ip_ct_sysctl_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
-		.procname	= "ip_conntrack_sctp_timeout_closed",
-		.data		= &ip_ct_sctp_timeout_closed,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
-		.procname	= "ip_conntrack_sctp_timeout_cookie_wait",
-		.data		= &ip_ct_sctp_timeout_cookie_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
-		.procname	= "ip_conntrack_sctp_timeout_cookie_echoed",
-		.data		= &ip_ct_sctp_timeout_cookie_echoed,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
-		.procname	= "ip_conntrack_sctp_timeout_established",
-		.data		= &ip_ct_sctp_timeout_established,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_sent",
-		.data		= &ip_ct_sctp_timeout_shutdown_sent,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_recd",
-		.data		= &ip_ct_sctp_timeout_shutdown_recd,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_ack_sent",
-		.data		= &ip_ct_sctp_timeout_shutdown_ack_sent,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_netfilter_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NETFILTER,
-		.procname	= "netfilter",
-		.mode		= 0555,
-		.child		= ip_ct_sysctl_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= ip_ct_netfilter_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ip_ct_ipv4_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-#endif
-
-static int __init ip_conntrack_proto_sctp_init(void)
-{
-	int ret;
-
-	ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
-	if (ret) {
-		printk("ip_conntrack_proto_sctp: protocol register failed\n");
-		goto out;
-	}
-
-#ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
-	if (ip_ct_sysctl_header == NULL) {
-		ret = -ENOMEM;
-		printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
-		goto cleanup;
-	}
-#endif
-
-	return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup:
-	ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#endif
- out:
-	DEBUGP("SCTP conntrack module loading %s\n",
-					ret ? "failed": "succeeded");
-	return ret;
-}
-
-static void __exit ip_conntrack_proto_sctp_fini(void)
-{
-	ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#ifdef CONFIG_SYSCTL
-	unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
-	DEBUGP("SCTP conntrack module unloaded\n");
-}
-
-module_init(ip_conntrack_proto_sctp_init);
-module_exit(ip_conntrack_proto_sctp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
deleted file mode 100644
index 0a72eab14620..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ /dev/null
@@ -1,1164 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- *	- Real stateful connection tracking
- *	- Modified state transitions table
- *	- Window scaling support added
- *	- SACK support added
- *
- * Willy Tarreau:
- *	- State table bugfixes
- *	- More robust state changes
- *	- Tuning timer parameters
- *
- * version 2.2
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/spinlock.h>
-
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP printk
-#define DEBUGP_VARS
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
-/* "Be conservative in what you do,
-    be liberal in what you accept from others."
-    If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal __read_mostly = 0;
-
-/* If it is set to zero, we disable picking up already established
-   connections. */
-int ip_ct_tcp_loose __read_mostly = 1;
-
-/* Max number of the retransmitted packets without receiving an (acceptable)
-   ACK from the destination. If this number is reached, a shorter timer
-   will be started. */
-int ip_ct_tcp_max_retrans __read_mostly = 3;
-
-  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
-     closely.  They're more complex. --RR */
-
-static const char *tcp_conntrack_names[] = {
-	"NONE",
-	"SYN_SENT",
-	"SYN_RECV",
-	"ESTABLISHED",
-	"FIN_WAIT",
-	"CLOSE_WAIT",
-	"LAST_ACK",
-	"TIME_WAIT",
-	"CLOSE",
-	"LISTEN"
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
-unsigned int ip_ct_tcp_timeout_established __read_mostly =   5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
-unsigned int ip_ct_tcp_timeout_close __read_mostly =        10 SECS;
-
-/* RFC1122 says the R2 limit should be at least 100 seconds.
-   Linux uses 15 packets as limit, which corresponds
-   to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
-
-static const unsigned int * tcp_timeouts[]
-= { NULL,                              /*      TCP_CONNTRACK_NONE */
-    &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
-    &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
-    &ip_ct_tcp_timeout_established,    /*      TCP_CONNTRACK_ESTABLISHED,      */
-    &ip_ct_tcp_timeout_fin_wait,       /*      TCP_CONNTRACK_FIN_WAIT, */
-    &ip_ct_tcp_timeout_close_wait,     /*      TCP_CONNTRACK_CLOSE_WAIT,       */
-    &ip_ct_tcp_timeout_last_ack,       /*      TCP_CONNTRACK_LAST_ACK, */
-    &ip_ct_tcp_timeout_time_wait,      /*      TCP_CONNTRACK_TIME_WAIT,        */
-    &ip_ct_tcp_timeout_close,          /*      TCP_CONNTRACK_CLOSE,    */
-    NULL,                              /*      TCP_CONNTRACK_LISTEN */
- };
-
-#define sNO TCP_CONNTRACK_NONE
-#define sSS TCP_CONNTRACK_SYN_SENT
-#define sSR TCP_CONNTRACK_SYN_RECV
-#define sES TCP_CONNTRACK_ESTABLISHED
-#define sFW TCP_CONNTRACK_FIN_WAIT
-#define sCW TCP_CONNTRACK_CLOSE_WAIT
-#define sLA TCP_CONNTRACK_LAST_ACK
-#define sTW TCP_CONNTRACK_TIME_WAIT
-#define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
-#define sIV TCP_CONNTRACK_MAX
-#define sIG TCP_CONNTRACK_IGNORE
-
-/* What TCP flags are set from RST/SYN/FIN/ACK. */
-enum tcp_bit_set {
-	TCP_SYN_SET,
-	TCP_SYNACK_SET,
-	TCP_FIN_SET,
-	TCP_ACK_SET,
-	TCP_RST_SET,
-	TCP_NONE_SET,
-};
-
-/*
- * The TCP state transition table needs a few words...
- *
- * We are the man in the middle. All the packets go through us
- * but might get lost in transit to the destination.
- * It is assumed that the destinations can't receive segments
- * we haven't seen.
- *
- * The checked segment is in window, but our windows are *not*
- * equivalent with the ones of the sender/receiver. We always
- * try to guess the state of the current sender.
- *
- * The meaning of the states are:
- *
- * NONE:	initial state
- * SYN_SENT:	SYN-only packet seen
- * SYN_RECV:	SYN-ACK packet seen
- * ESTABLISHED:	ACK packet seen
- * FIN_WAIT:	FIN packet seen
- * CLOSE_WAIT:	ACK seen (after FIN)
- * LAST_ACK:	FIN seen (after FIN)
- * TIME_WAIT:	last ACK seen
- * CLOSE:	closed connection
- *
- * LISTEN state is not used.
- *
- * Packets marked as IGNORED (sIG):
- *	if they may be either invalid or valid
- *	and the receiver may send back a connection
- *	closing RST or a SYN/ACK.
- *
- * Packets marked as INVALID (sIV):
- *	if they are invalid
- *	or we do not support the request (simultaneous open)
- */
-static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
-	{
-/* ORIGINAL */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
-/*
- *	sNO -> sSS	Initialize a new connection
- *	sSS -> sSS	Retransmitted SYN
- *	sSR -> sIG	Late retransmitted SYN?
- *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
- *			are errors. Receiver will reply with RST
- *			and close the connection.
- *			Or we are not in sync and hold a dead connection.
- *	sFW -> sIG
- *	sCW -> sIG
- *	sLA -> sIG
- *	sTW -> sSS	Reopened connection (RFC 1122).
- *	sCL -> sSS
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * A SYN/ACK from the client is always invalid:
- *	- either it tries to set up a simultaneous open, which is
- *	  not supported;
- *	- or the firewall has just been inserted between the two hosts
- *	  during the session set-up. The SYN will be retransmitted
- *	  by the true client (or it'll time out).
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- *	sNO -> sIV	Too late and no reason to do anything...
- *	sSS -> sIV	Client migth not send FIN in this state:
- *			we enforce waiting for a SYN/ACK reply first.
- *	sSR -> sFW	Close started.
- *	sES -> sFW
- *	sFW -> sLA	FIN seen in both directions, waiting for
- *			the last ACK.
- *			Migth be a retransmitted FIN as well...
- *	sCW -> sLA
- *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
- *	sTW -> sTW
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- *	sNO -> sES	Assumed.
- *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
- *	sSR -> sES	Established state is reached.
- *	sES -> sES	:-)
- *	sFW -> sCW	Normal close request answered by ACK.
- *	sCW -> sCW
- *	sLA -> sTW	Last ACK detected.
- *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
-	},
-	{
-/* REPLY */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*syn*/	   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- *	sNO -> sIV	Never reached.
- *	sSS -> sIV	Simultaneous open, not supported
- *	sSR -> sIV	Simultaneous open, not supported.
- *	sES -> sIV	Server may not initiate a connection.
- *	sFW -> sIV
- *	sCW -> sIV
- *	sLA -> sIV
- *	sTW -> sIV	Reopened connection, but server may not do it.
- *	sCL -> sIV
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
-/*
- *	sSS -> sSR	Standard open.
- *	sSR -> sSR	Retransmitted SYN/ACK.
- *	sES -> sIG	Late retransmitted SYN/ACK?
- *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
- *	sCW -> sIG
- *	sLA -> sIG
- *	sTW -> sIG
- *	sCL -> sIG
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- *	sSS -> sIV	Server might not send FIN in this state.
- *	sSR -> sFW	Close started.
- *	sES -> sFW
- *	sFW -> sLA	FIN seen in both directions.
- *	sCW -> sLA
- *	sLA -> sLA	Retransmitted FIN.
- *	sTW -> sTW
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- *	sSS -> sIG	Might be a half-open connection.
- *	sSR -> sSR	Might answer late resent SYN.
- *	sES -> sES	:-)
- *	sFW -> sCW	Normal close request answered by ACK.
- *	sCW -> sCW
- *	sLA -> sTW	Last ACK detected.
- *	sTW -> sTW	Retransmitted last ACK.
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
-	}
-};
-
-static int tcp_pkt_to_tuple(const struct sk_buff *skb,
-			    unsigned int dataoff,
-			    struct ip_conntrack_tuple *tuple)
-{
-	struct tcphdr _hdr, *hp;
-
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->src.u.tcp.port = hp->source;
-	tuple->dst.u.tcp.port = hp->dest;
-
-	return 1;
-}
-
-static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			    const struct ip_conntrack_tuple *orig)
-{
-	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
-	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int tcp_print_tuple(struct seq_file *s,
-			   const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "sport=%hu dport=%hu ",
-			  ntohs(tuple->src.u.tcp.port),
-			  ntohs(tuple->dst.u.tcp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s,
-			       const struct ip_conntrack *conntrack)
-{
-	enum tcp_conntrack state;
-
-	read_lock_bh(&tcp_lock);
-	state = conntrack->proto.tcp.state;
-	read_unlock_bh(&tcp_lock);
-
-	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
-			 const struct ip_conntrack *ct)
-{
-	struct nfattr *nest_parms;
-
-	read_lock_bh(&tcp_lock);
-	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
-	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
-		&ct->proto.tcp.state);
-	read_unlock_bh(&tcp_lock);
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	read_unlock_bh(&tcp_lock);
-	return -1;
-}
-
-static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
-	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
-};
-
-static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
-{
-	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
-	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
-
-	/* updates could not contain anything about the private
-	 * protocol info, in that case skip the parsing */
-	if (!attr)
-		return 0;
-
-	nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
-		return -EINVAL;
-
-	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
-		return -EINVAL;
-
-	write_lock_bh(&tcp_lock);
-	ct->proto.tcp.state =
-		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
-	write_unlock_bh(&tcp_lock);
-
-	return 0;
-}
-#endif
-
-static unsigned int get_conntrack_index(const struct tcphdr *tcph)
-{
-	if (tcph->rst) return TCP_RST_SET;
-	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
-	else if (tcph->fin) return TCP_FIN_SET;
-	else if (tcph->ack) return TCP_ACK_SET;
-	else return TCP_NONE_SET;
-}
-
-/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
-   in IP Filter' by Guido van Rooij.
-
-   http://www.nluug.nl/events/sane2000/papers.html
-   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
-
-   The boundaries and the conditions are changed according to RFC793:
-   the packet must intersect the window (i.e. segments may be
-   after the right or before the left edge) and thus receivers may ACK
-   segments after the right edge of the window.
-
-	td_maxend = max(sack + max(win,1)) seen in reply packets
-	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
-	td_maxwin += seq + len - sender.td_maxend
-			if seq + len > sender.td_maxend
-	td_end    = max(seq + len) seen in sent packets
-
-   I.   Upper bound for valid data:	seq <= sender.td_maxend
-   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
-   III.	Upper bound for valid ack:      sack <= receiver.td_end
-   IV.	Lower bound for valid ack:	ack >= receiver.td_end - MAXACKWINDOW
-
-   where sack is the highest right edge of sack block found in the packet.
-
-   The upper bound limit for a valid ack is not ignored -
-   we doesn't have to deal with fragments.
-*/
-
-static inline __u32 segment_seq_plus_len(__u32 seq,
-					 size_t len,
-					 struct iphdr *iph,
-					 struct tcphdr *tcph)
-{
-	return (seq + len - (iph->ihl + tcph->doff)*4
-		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
-}
-
-/* Fixme: what about big packets? */
-#define MAXACKWINCONST			66000
-#define MAXACKWINDOW(sender)						\
-	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
-					      : MAXACKWINCONST)
-
-/*
- * Simplified tcp_parse_options routine from tcp_input.c
- */
-static void tcp_options(const struct sk_buff *skb,
-			struct iphdr *iph,
-			struct tcphdr *tcph,
-			struct ip_ct_tcp_state *state)
-{
-	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
-	unsigned char *ptr;
-	int length = (tcph->doff*4) - sizeof(struct tcphdr);
-
-	if (!length)
-		return;
-
-	ptr = skb_header_pointer(skb,
-				 (iph->ihl * 4) + sizeof(struct tcphdr),
-				 length, buff);
-	BUG_ON(ptr == NULL);
-
-	state->td_scale =
-	state->flags = 0;
-
-	while (length > 0) {
-		int opcode=*ptr++;
-		int opsize;
-
-		switch (opcode) {
-		case TCPOPT_EOL:
-			return;
-		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-			length--;
-			continue;
-		default:
-			opsize=*ptr++;
-			if (opsize < 2) /* "silly options" */
-				return;
-			if (opsize > length)
-				break;	/* don't parse partial options */
-
-			if (opcode == TCPOPT_SACK_PERM
-			    && opsize == TCPOLEN_SACK_PERM)
-				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
-			else if (opcode == TCPOPT_WINDOW
-				 && opsize == TCPOLEN_WINDOW) {
-				state->td_scale = *(u_int8_t *)ptr;
-
-				if (state->td_scale > 14) {
-					/* See RFC1323 */
-					state->td_scale = 14;
-				}
-				state->flags |=
-					IP_CT_TCP_FLAG_WINDOW_SCALE;
-			}
-			ptr += opsize - 2;
-			length -= opsize;
-		}
-	}
-}
-
-static void tcp_sack(const struct sk_buff *skb,
-		     struct iphdr *iph,
-		     struct tcphdr *tcph,
-		     __u32 *sack)
-{
-	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
-	unsigned char *ptr;
-	int length = (tcph->doff*4) - sizeof(struct tcphdr);
-	__u32 tmp;
-
-	if (!length)
-		return;
-
-	ptr = skb_header_pointer(skb,
-				 (iph->ihl * 4) + sizeof(struct tcphdr),
-				 length, buff);
-	BUG_ON(ptr == NULL);
-
-	/* Fast path for timestamp-only option */
-	if (length == TCPOLEN_TSTAMP_ALIGNED*4
-	    && *(__be32 *)ptr ==
-		__constant_htonl((TCPOPT_NOP << 24)
-				 | (TCPOPT_NOP << 16)
-				 | (TCPOPT_TIMESTAMP << 8)
-				 | TCPOLEN_TIMESTAMP))
-		return;
-
-	while (length > 0) {
-		int opcode=*ptr++;
-		int opsize, i;
-
-		switch (opcode) {
-		case TCPOPT_EOL:
-			return;
-		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-			length--;
-			continue;
-		default:
-			opsize=*ptr++;
-			if (opsize < 2) /* "silly options" */
-				return;
-			if (opsize > length)
-				break;	/* don't parse partial options */
-
-			if (opcode == TCPOPT_SACK
-			    && opsize >= (TCPOLEN_SACK_BASE
-					  + TCPOLEN_SACK_PERBLOCK)
-			    && !((opsize - TCPOLEN_SACK_BASE)
-				 % TCPOLEN_SACK_PERBLOCK)) {
-				for (i = 0;
-				     i < (opsize - TCPOLEN_SACK_BASE);
-				     i += TCPOLEN_SACK_PERBLOCK) {
-					tmp = ntohl(*((__be32 *)(ptr+i)+1));
-
-					if (after(tmp, *sack))
-						*sack = tmp;
-				}
-				return;
-			}
-			ptr += opsize - 2;
-			length -= opsize;
-		}
-	}
-}
-
-static int tcp_in_window(struct ip_ct_tcp *state,
-			 enum ip_conntrack_dir dir,
-			 unsigned int index,
-			 const struct sk_buff *skb,
-			 struct iphdr *iph,
-			 struct tcphdr *tcph)
-{
-	struct ip_ct_tcp_state *sender = &state->seen[dir];
-	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
-	__u32 seq, ack, sack, end, win, swin;
-	int res;
-
-	/*
-	 * Get the required data from the packet.
-	 */
-	seq = ntohl(tcph->seq);
-	ack = sack = ntohl(tcph->ack_seq);
-	win = ntohs(tcph->window);
-	end = segment_seq_plus_len(seq, skb->len, iph, tcph);
-
-	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
-		tcp_sack(skb, iph, tcph, &sack);
-
-	DEBUGP("tcp_in_window: START\n");
-	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack=%u win=%u end=%u\n",
-		NIPQUAD(iph->saddr), ntohs(tcph->source),
-		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end);
-	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-
-	if (sender->td_end == 0) {
-		/*
-		 * Initialize sender data.
-		 */
-		if (tcph->syn && tcph->ack) {
-			/*
-			 * Outgoing SYN-ACK in reply to a SYN.
-			 */
-			sender->td_end =
-			sender->td_maxend = end;
-			sender->td_maxwin = (win == 0 ? 1 : win);
-
-			tcp_options(skb, iph, tcph, sender);
-			/*
-			 * RFC 1323:
-			 * Both sides must send the Window Scale option
-			 * to enable window scaling in either direction.
-			 */
-			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
-			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
-				sender->td_scale =
-				receiver->td_scale = 0;
-		} else {
-			/*
-			 * We are in the middle of a connection,
-			 * its history is lost for us.
-			 * Let's try to use the data from the packet.
-			 */
-			sender->td_end = end;
-			sender->td_maxwin = (win == 0 ? 1 : win);
-			sender->td_maxend = end + sender->td_maxwin;
-		}
-	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
-		     && dir == IP_CT_DIR_ORIGINAL)
-		    || (state->state == TCP_CONNTRACK_SYN_RECV
-			&& dir == IP_CT_DIR_REPLY))
-		    && after(end, sender->td_end)) {
-		/*
-		 * RFC 793: "if a TCP is reinitialized ... then it need
-		 * not wait at all; it must only be sure to use sequence
-		 * numbers larger than those recently used."
-		 */
-		sender->td_end =
-		sender->td_maxend = end;
-		sender->td_maxwin = (win == 0 ? 1 : win);
-
-		tcp_options(skb, iph, tcph, sender);
-	}
-
-	if (!(tcph->ack)) {
-		/*
-		 * If there is no ACK, just pretend it was set and OK.
-		 */
-		ack = sack = receiver->td_end;
-	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
-		    (TCP_FLAG_ACK|TCP_FLAG_RST))
-		   && (ack == 0)) {
-		/*
-		 * Broken TCP stacks, that set ACK in RST packets as well
-		 * with zero ack value.
-		 */
-		ack = sack = receiver->td_end;
-	}
-
-	if (seq == end
-	    && (!tcph->rst
-		|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
-		/*
-		 * Packets contains no data: we assume it is valid
-		 * and check the ack value only.
-		 * However RST segments are always validated by their
-		 * SEQ number, except when seq == 0 (reset sent answering
-		 * SYN.
-		 */
-		seq = end = sender->td_end;
-
-	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack =%u win=%u end=%u\n",
-		NIPQUAD(iph->saddr), ntohs(tcph->source),
-		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end);
-	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-
-	DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
-		before(seq, sender->td_maxend + 1),
-		after(end, sender->td_end - receiver->td_maxwin - 1),
-		before(sack, receiver->td_end + 1),
-		after(ack, receiver->td_end - MAXACKWINDOW(sender)));
-
-	if (before(seq, sender->td_maxend + 1) &&
-	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
-	    before(sack, receiver->td_end + 1) &&
-	    after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
-		/*
-		 * Take into account window scaling (RFC 1323).
-		 */
-		if (!tcph->syn)
-			win <<= sender->td_scale;
-
-		/*
-		 * Update sender data.
-		 */
-		swin = win + (sack - ack);
-		if (sender->td_maxwin < swin)
-			sender->td_maxwin = swin;
-		if (after(end, sender->td_end))
-			sender->td_end = end;
-		/*
-		 * Update receiver data.
-		 */
-		if (after(end, sender->td_maxend))
-			receiver->td_maxwin += end - sender->td_maxend;
-		if (after(sack + win, receiver->td_maxend - 1)) {
-			receiver->td_maxend = sack + win;
-			if (win == 0)
-				receiver->td_maxend++;
-		}
-
-		/*
-		 * Check retransmissions.
-		 */
-		if (index == TCP_ACK_SET) {
-			if (state->last_dir == dir
-			    && state->last_seq == seq
-			    && state->last_ack == ack
-			    && state->last_end == end
-			    && state->last_win == win)
-				state->retrans++;
-			else {
-				state->last_dir = dir;
-				state->last_seq = seq;
-				state->last_ack = ack;
-				state->last_end = end;
-				state->last_win = win;
-				state->retrans = 0;
-			}
-		}
-		res = 1;
-	} else {
-		res = 0;
-		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
-		    ip_ct_tcp_be_liberal)
-			res = 1;
-		if (!res && LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-			"ip_ct_tcp: %s ",
-			before(seq, sender->td_maxend + 1) ?
-			after(end, sender->td_end - receiver->td_maxwin - 1) ?
-			before(sack, receiver->td_end + 1) ?
-			after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
-			: "ACK is under the lower bound (possible overly delayed ACK)"
-			: "ACK is over the upper bound (ACKed data not seen yet)"
-			: "SEQ is under the lower bound (already ACKed data retransmitted)"
-			: "SEQ is over the upper bound (over the window of the receiver)");
-	}
-
-	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
-	       "receiver end=%u maxend=%u maxwin=%u\n",
-		res, sender->td_end, sender->td_maxend, sender->td_maxwin,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
-
-	return res;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-/* Update sender->td_end after NAT successfully mangled the packet */
-void ip_conntrack_tcp_update(struct sk_buff *skb,
-			     struct ip_conntrack *conntrack,
-			     enum ip_conntrack_dir dir)
-{
-	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
-	__u32 end;
-#ifdef DEBUGP_VARS
-	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
-	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
-#endif
-
-	end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
-
-	write_lock_bh(&tcp_lock);
-	/*
-	 * We have to worry for the ack in the reply packet only...
-	 */
-	if (after(end, conntrack->proto.tcp.seen[dir].td_end))
-		conntrack->proto.tcp.seen[dir].td_end = end;
-	conntrack->proto.tcp.last_end = end;
-	write_unlock_bh(&tcp_lock);
-	DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-}
-
-#endif
-
-#define	TH_FIN	0x01
-#define	TH_SYN	0x02
-#define	TH_RST	0x04
-#define	TH_PUSH	0x08
-#define	TH_ACK	0x10
-#define	TH_URG	0x20
-#define	TH_ECE	0x40
-#define	TH_CWR	0x80
-
-/* table of valid flag combinations - ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
-{
-	[TH_SYN]			= 1,
-	[TH_SYN|TH_PUSH]		= 1,
-	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_PUSH|TH_URG]		= 1,
-	[TH_SYN|TH_ACK]			= 1,
-	[TH_SYN|TH_ACK|TH_PUSH]		= 1,
-	[TH_RST]			= 1,
-	[TH_RST|TH_ACK]			= 1,
-	[TH_RST|TH_ACK|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK]			= 1,
-	[TH_ACK]			= 1,
-	[TH_ACK|TH_PUSH]		= 1,
-	[TH_ACK|TH_URG]			= 1,
-	[TH_ACK|TH_URG|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_FIN|TH_ACK|TH_URG|TH_PUSH]	= 1,
-};
-
-/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct sk_buff *skb,
-		     enum ip_conntrack_info *ctinfo,
-		     unsigned int hooknum)
-{
-	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr _tcph, *th;
-	unsigned int tcplen = skb->len - iph->ihl * 4;
-	u_int8_t tcpflags;
-
-	/* Smaller that minimal TCP header? */
-	th = skb_header_pointer(skb, iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				"ip_ct_tcp: short packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Not whole TCP header or malformed packet */
-	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				"ip_ct_tcp: truncated/malformed packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Checksum invalid? Ignore.
-	 * We skip checking packets on the outgoing path
-	 * because it is assumed to be correct.
-	 */
-	/* FIXME: Source route IP option packets --RR */
-	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: bad TCP checksum ");
-		return -NF_ACCEPT;
-	}
-
-	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
-	if (!tcp_valid_flags[tcpflags]) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: invalid TCP flag combination ");
-		return -NF_ACCEPT;
-	}
-
-	return NF_ACCEPT;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct ip_conntrack *conntrack,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info ctinfo)
-{
-	enum tcp_conntrack new_state, old_state;
-	enum ip_conntrack_dir dir;
-	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *th, _tcph;
-	unsigned long timeout;
-	unsigned int index;
-
-	th = skb_header_pointer(skb, iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
-	BUG_ON(th == NULL);
-
-	write_lock_bh(&tcp_lock);
-	old_state = conntrack->proto.tcp.state;
-	dir = CTINFO2DIR(ctinfo);
-	index = get_conntrack_index(th);
-	new_state = tcp_conntracks[dir][index][old_state];
-
-	switch (new_state) {
-	case TCP_CONNTRACK_IGNORE:
-		/* Ignored packets:
-		 *
-		 * a) SYN in ORIGINAL
-		 * b) SYN/ACK in REPLY
-		 * c) ACK in reply direction after initial SYN in original.
-		 */
-		if (index == TCP_SYNACK_SET
-		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
-		    && conntrack->proto.tcp.last_dir != dir
-		    && ntohl(th->ack_seq) ==
-			     conntrack->proto.tcp.last_end) {
-			/* This SYN/ACK acknowledges a SYN that we earlier
-			 * ignored as invalid. This means that the client and
-			 * the server are both in sync, while the firewall is
-			 * not. We kill this session and block the SYN/ACK so
-			 * that the client cannot but retransmit its SYN and
-			 * thus initiate a clean new session.
-			 */
-			write_unlock_bh(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
-				nf_log_packet(PF_INET, 0, skb, NULL, NULL,
-					      NULL, "ip_ct_tcp: "
-					      "killing out of sync session ");
-			if (del_timer(&conntrack->timeout))
-				conntrack->timeout.function((unsigned long)
-							    conntrack);
-			return -NF_DROP;
-		}
-		conntrack->proto.tcp.last_index = index;
-		conntrack->proto.tcp.last_dir = dir;
-		conntrack->proto.tcp.last_seq = ntohl(th->seq);
-		conntrack->proto.tcp.last_end =
-		    segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
-
-		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: invalid packet ignored ");
-		return NF_ACCEPT;
-	case TCP_CONNTRACK_MAX:
-		/* Invalid packet */
-		DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
-		       dir, get_conntrack_index(th),
-		       old_state);
-		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: invalid state ");
-		return -NF_ACCEPT;
-	case TCP_CONNTRACK_SYN_SENT:
-		if (old_state < TCP_CONNTRACK_TIME_WAIT)
-			break;
-		if ((conntrack->proto.tcp.seen[dir].flags &
-			 IP_CT_TCP_FLAG_CLOSE_INIT)
-		    || after(ntohl(th->seq),
-			     conntrack->proto.tcp.seen[dir].td_end)) {
-			/* Attempt to reopen a closed connection.
-			* Delete this connection and look up again. */
-			write_unlock_bh(&tcp_lock);
-			if (del_timer(&conntrack->timeout))
-				conntrack->timeout.function((unsigned long)
-							    conntrack);
-			return -NF_REPEAT;
-		} else {
-			write_unlock_bh(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
-				nf_log_packet(PF_INET, 0, skb, NULL, NULL,
-					      NULL, "ip_ct_tcp: invalid SYN");
-			return -NF_ACCEPT;
-		}
-	case TCP_CONNTRACK_CLOSE:
-		if (index == TCP_RST_SET
-		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-			 && conntrack->proto.tcp.last_index == TCP_SYN_SET)
-			|| (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-			    && conntrack->proto.tcp.last_index == TCP_ACK_SET))
-		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
-			/* RST sent to invalid SYN or ACK we had let through
-			 * at a) and c) above:
-			 *
-			 * a) SYN was in window then
-			 * c) we hold a half-open connection.
-			 *
-			 * Delete our connection entry.
-			 * We skip window checking, because packet might ACK
-			 * segments we ignored. */
-			goto in_window;
-		}
-		/* Just fall through */
-	default:
-		/* Keep compilers happy. */
-		break;
-	}
-
-	if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
-			   skb, iph, th)) {
-		write_unlock_bh(&tcp_lock);
-		return -NF_ACCEPT;
-	}
-    in_window:
-	/* From now on we have got in-window packets */
-	conntrack->proto.tcp.last_index = index;
-
-	DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
-		NIPQUAD(iph->saddr), ntohs(th->source),
-		NIPQUAD(iph->daddr), ntohs(th->dest),
-		(th->syn ? 1 : 0), (th->ack ? 1 : 0),
-		(th->fin ? 1 : 0), (th->rst ? 1 : 0),
-		old_state, new_state);
-
-	conntrack->proto.tcp.state = new_state;
-	if (old_state != new_state
-	    && (new_state == TCP_CONNTRACK_FIN_WAIT
-		|| new_state == TCP_CONNTRACK_CLOSE))
-		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
-		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
-		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
-	write_unlock_bh(&tcp_lock);
-
-	ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-	if (new_state != old_state)
-		ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
-
-	if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
-		/* If only reply is a RST, we can consider ourselves not to
-		   have an established connection: this is a fairly common
-		   problem case, so we can delete the conntrack
-		   immediately.  --RR */
-		if (th->rst) {
-			if (del_timer(&conntrack->timeout))
-				conntrack->timeout.function((unsigned long)
-							    conntrack);
-			return NF_ACCEPT;
-		}
-	} else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-		   && (old_state == TCP_CONNTRACK_SYN_RECV
-		       || old_state == TCP_CONNTRACK_ESTABLISHED)
-		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
-		/* Set ASSURED if we see see valid ack in ESTABLISHED
-		   after SYN_RECV or a valid answer for a picked up
-		   connection. */
-		set_bit(IPS_ASSURED_BIT, &conntrack->status);
-		ip_conntrack_event_cache(IPCT_STATUS, skb);
-	}
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int tcp_new(struct ip_conntrack *conntrack,
-		   const struct sk_buff *skb)
-{
-	enum tcp_conntrack new_state;
-	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *th, _tcph;
-#ifdef DEBUGP_VARS
-	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
-	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
-#endif
-
-	th = skb_header_pointer(skb, iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
-	BUG_ON(th == NULL);
-
-	/* Don't need lock here: this conntrack not in circulation yet */
-	new_state
-		= tcp_conntracks[0][get_conntrack_index(th)]
-		[TCP_CONNTRACK_NONE];
-
-	/* Invalid: delete conntrack */
-	if (new_state >= TCP_CONNTRACK_MAX) {
-		DEBUGP("ip_ct_tcp: invalid new deleting.\n");
-		return 0;
-	}
-
-	if (new_state == TCP_CONNTRACK_SYN_SENT) {
-		/* SYN packet */
-		conntrack->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     iph, th);
-		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-			conntrack->proto.tcp.seen[0].td_maxwin = 1;
-		conntrack->proto.tcp.seen[0].td_maxend =
-			conntrack->proto.tcp.seen[0].td_end;
-
-		tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
-		conntrack->proto.tcp.seen[1].flags = 0;
-	} else if (ip_ct_tcp_loose == 0) {
-		/* Don't try to pick up connections. */
-		return 0;
-	} else {
-		/*
-		 * We are in the middle of a connection,
-		 * its history is lost for us.
-		 * Let's try to use the data from the packet.
-		 */
-		conntrack->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     iph, th);
-		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-			conntrack->proto.tcp.seen[0].td_maxwin = 1;
-		conntrack->proto.tcp.seen[0].td_maxend =
-			conntrack->proto.tcp.seen[0].td_end +
-			conntrack->proto.tcp.seen[0].td_maxwin;
-		conntrack->proto.tcp.seen[0].td_scale = 0;
-
-		/* We assume SACK and liberal window checking to handle
-		 * window scaling */
-		conntrack->proto.tcp.seen[0].flags =
-		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
-						     IP_CT_TCP_FLAG_BE_LIBERAL;
-	}
-
-	conntrack->proto.tcp.seen[1].td_end = 0;
-	conntrack->proto.tcp.seen[1].td_maxend = 0;
-	conntrack->proto.tcp.seen[1].td_maxwin = 1;
-	conntrack->proto.tcp.seen[1].td_scale = 0;
-
-	/* tcp_packet will set them */
-	conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
-	conntrack->proto.tcp.last_index = TCP_NONE_SET;
-
-	DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-	return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
-{
-	.proto 			= IPPROTO_TCP,
-	.name 			= "tcp",
-	.pkt_to_tuple 		= tcp_pkt_to_tuple,
-	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
-	.print_conntrack 	= tcp_print_conntrack,
-	.packet 		= tcp_packet,
-	.new 			= tcp_new,
-	.error			= tcp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.to_nfattr		= tcp_to_nfattr,
-	.from_nfattr		= nfattr_to_tcp,
-	.tuple_to_nfattr	= ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple	= ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
deleted file mode 100644
index 14c30c646c7f..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/seq_file.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
-
-static int udp_pkt_to_tuple(const struct sk_buff *skb,
-			     unsigned int dataoff,
-			     struct ip_conntrack_tuple *tuple)
-{
-	struct udphdr _hdr, *hp;
-
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->src.u.udp.port = hp->source;
-	tuple->dst.u.udp.port = hp->dest;
-
-	return 1;
-}
-
-static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			    const struct ip_conntrack_tuple *orig)
-{
-	tuple->src.u.udp.port = orig->dst.u.udp.port;
-	tuple->dst.u.udp.port = orig->src.u.udp.port;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int udp_print_tuple(struct seq_file *s,
-			   const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "sport=%hu dport=%hu ",
-			  ntohs(tuple->src.u.udp.port),
-			  ntohs(tuple->dst.u.udp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int udp_print_conntrack(struct seq_file *s,
-			       const struct ip_conntrack *conntrack)
-{
-	return 0;
-}
-
-/* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct ip_conntrack *conntrack,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info ctinfo)
-{
-	/* If we've seen traffic both ways, this is some kind of UDP
-	   stream.  Extend timeout. */
-	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
-		ip_ct_refresh_acct(conntrack, ctinfo, skb,
-				   ip_ct_udp_timeout_stream);
-		/* Also, more likely to be important, and not a probe */
-		if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
-			ip_conntrack_event_cache(IPCT_STATUS, skb);
-	} else
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
-	return 1;
-}
-
-static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
-		     unsigned int hooknum)
-{
-	struct iphdr *iph = skb->nh.iph;
-	unsigned int udplen = skb->len - iph->ihl * 4;
-	struct udphdr _hdr, *hdr;
-
-	/* Header is too small? */
-	hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
-	if (hdr == NULL) {
-		if (LOG_INVALID(IPPROTO_UDP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_udp: short packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Truncated/malformed packets */
-	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
-		if (LOG_INVALID(IPPROTO_UDP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_udp: truncated/malformed packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Packet with no checksum */
-	if (!hdr->check)
-		return NF_ACCEPT;
-
-	/* Checksum invalid? Ignore.
-	 * We skip checking packets on the outgoing path
-	 * because the checksum is assumed to be correct.
-	 * FIXME: Source route IP option packets --RR */
-	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
-		if (LOG_INVALID(IPPROTO_UDP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_udp: bad UDP checksum ");
-		return -NF_ACCEPT;
-	}
-
-	return NF_ACCEPT;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_udp =
-{
-	.proto 			= IPPROTO_UDP,
-	.name			= "udp",
-	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
-	.print_conntrack	= udp_print_conntrack,
-	.packet			= udp_packet,
-	.new			= udp_new,
-	.error			= udp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr	= ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple	= ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
deleted file mode 100644
index c59a962c1f61..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* SIP extension for IP connection tracking.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_conntrack_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP connection tracking helper");
-
-#define MAX_PORTS	8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of sip servers");
-
-static unsigned int sip_timeout = SIP_TIMEOUT;
-module_param(sip_timeout, uint, 0600);
-MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
-
-unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				struct ip_conntrack *ct,
-				const char **dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sip_hook);
-
-unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				struct ip_conntrack_expect *exp,
-				const char *dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sdp_hook);
-
-static int digits_len(const char *dptr, const char *limit, int *shift);
-static int epaddr_len(const char *dptr, const char *limit, int *shift);
-static int skp_digits_len(const char *dptr, const char *limit, int *shift);
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift);
-
-struct sip_header_nfo {
-	const char	*lname;
-	const char	*sname;
-	const char	*ln_str;
-	size_t		lnlen;
-	size_t		snlen;
-	size_t		ln_strlen;
-	int		case_sensitive;
-	int		(*match_len)(const char *, const char *, int *);
-};
-
-static struct sip_header_nfo ct_sip_hdrs[] = {
-	[POS_REG_REQ_URI] = { 	/* SIP REGISTER request URI */
-		.lname		= "sip:",
-		.lnlen		= sizeof("sip:") - 1,
-		.ln_str		= ":",
-		.ln_strlen	= sizeof(":") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_REQ_URI] = { 	/* SIP request URI */
-		.lname		= "sip:",
-		.lnlen		= sizeof("sip:") - 1,
-		.ln_str		= "@",
-		.ln_strlen	= sizeof("@") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_FROM] = {		/* SIP From header */
-		.lname		= "From:",
-		.lnlen		= sizeof("From:") - 1,
-		.sname		= "\r\nf:",
-		.snlen		= sizeof("\r\nf:") - 1,
-		.ln_str		= "sip:",
-		.ln_strlen	= sizeof("sip:") - 1,
-		.match_len	= skp_epaddr_len,
-	},
-	[POS_TO] = {		/* SIP To header */
-		.lname		= "To:",
-		.lnlen		= sizeof("To:") - 1,
-		.sname		= "\r\nt:",
-		.snlen		= sizeof("\r\nt:") - 1,
-		.ln_str		= "sip:",
-		.ln_strlen	= sizeof("sip:") - 1,
-		.match_len	= skp_epaddr_len,
-	},
-	[POS_VIA] = { 		/* SIP Via header */
-		.lname		= "Via:",
-		.lnlen		= sizeof("Via:") - 1,
-		.sname		= "\r\nv:",
-		.snlen		= sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */
-		.ln_str		= "UDP ",
-		.ln_strlen	= sizeof("UDP ") - 1,
-		.match_len	= epaddr_len,
-	},
-	[POS_CONTACT] = { 	/* SIP Contact header */
-		.lname		= "Contact:",
-		.lnlen		= sizeof("Contact:") - 1,
-		.sname		= "\r\nm:",
-		.snlen		= sizeof("\r\nm:") - 1,
-		.ln_str		= "sip:",
-		.ln_strlen	= sizeof("sip:") - 1,
-		.match_len	= skp_epaddr_len
-	},
-	[POS_CONTENT] = { 	/* SIP Content length header */
-		.lname		= "Content-Length:",
-		.lnlen		= sizeof("Content-Length:") - 1,
-		.sname		= "\r\nl:",
-		.snlen		= sizeof("\r\nl:") - 1,
-		.ln_str		= ":",
-		.ln_strlen	= sizeof(":") - 1,
-		.match_len	= skp_digits_len
-	},
-	[POS_MEDIA] = {		/* SDP media info */
-		.case_sensitive	= 1,
-		.lname		= "\nm=",
-		.lnlen		= sizeof("\nm=") - 1,
-		.sname		= "\rm=",
-		.snlen		= sizeof("\rm=") - 1,
-		.ln_str		= "audio ",
-		.ln_strlen	= sizeof("audio ") - 1,
-		.match_len	= digits_len
-	},
-	[POS_OWNER] = { 	/* SDP owner address*/
-		.case_sensitive	= 1,
-		.lname		= "\no=",
-		.lnlen		= sizeof("\no=") - 1,
-		.sname		= "\ro=",
-		.snlen		= sizeof("\ro=") - 1,
-		.ln_str		= "IN IP4 ",
-		.ln_strlen	= sizeof("IN IP4 ") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_CONNECTION] = { 	/* SDP connection info */
-		.case_sensitive	= 1,
-		.lname		= "\nc=",
-		.lnlen		= sizeof("\nc=") - 1,
-		.sname		= "\rc=",
-		.snlen		= sizeof("\rc=") - 1,
-		.ln_str		= "IN IP4 ",
-		.ln_strlen	= sizeof("IN IP4 ") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_SDP_HEADER] = { 	/* SDP version header */
-		.case_sensitive	= 1,
-		.lname		= "\nv=",
-		.lnlen		= sizeof("\nv=") - 1,
-		.sname		= "\rv=",
-		.snlen		= sizeof("\rv=") - 1,
-		.ln_str		= "=",
-		.ln_strlen	= sizeof("=") - 1,
-		.match_len	= digits_len
-	}
-};
-
-/* get line lenght until first CR or LF seen. */
-int ct_sip_lnlen(const char *line, const char *limit)
-{
-	const char *k = line;
-
-	while ((line <= limit) && (*line == '\r' || *line == '\n'))
-		line++;
-
-	while (line <= limit) {
-		if (*line == '\r' || *line == '\n')
-			break;
-		line++;
-	}
-	return line - k;
-}
-EXPORT_SYMBOL_GPL(ct_sip_lnlen);
-
-/* Linear string search, case sensitive. */
-const char *ct_sip_search(const char *needle, const char *haystack,
-			  size_t needle_len, size_t haystack_len,
-			  int case_sensitive)
-{
-	const char *limit = haystack + (haystack_len - needle_len);
-
-	while (haystack <= limit) {
-		if (case_sensitive) {
-			if (strncmp(haystack, needle, needle_len) == 0)
-				return haystack;
-		} else {
-			if (strnicmp(haystack, needle, needle_len) == 0)
-				return haystack;
-		}
-		haystack++;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(ct_sip_search);
-
-static int digits_len(const char *dptr, const char *limit, int *shift)
-{
-	int len = 0;
-	while (dptr <= limit && isdigit(*dptr)) {
-		dptr++;
-		len++;
-	}
-	return len;
-}
-
-/* get digits lenght, skiping blank spaces. */
-static int skp_digits_len(const char *dptr, const char *limit, int *shift)
-{
-	for (; dptr <= limit && *dptr == ' '; dptr++)
-		(*shift)++;
-
-	return digits_len(dptr, limit, shift);
-}
-
-/* Simple ipaddr parser.. */
-static int parse_ipaddr(const char *cp,	const char **endp,
-			__be32 *ipaddr, const char *limit)
-{
-	unsigned long int val;
-	int i, digit = 0;
-
-	for (i = 0, *ipaddr = 0; cp <= limit && i < 4; i++) {
-		digit = 0;
-		if (!isdigit(*cp))
-			break;
-
-		val = simple_strtoul(cp, (char **)&cp, 10);
-		if (val > 0xFF)
-			return -1;
-
-		((u_int8_t *)ipaddr)[i] = val;
-		digit = 1;
-
-		if (*cp != '.')
-			break;
-		cp++;
-	}
-	if (!digit)
-		return -1;
-
-	if (endp)
-		*endp = cp;
-
-	return 0;
-}
-
-/* skip ip address. returns it lenght. */
-static int epaddr_len(const char *dptr, const char *limit, int *shift)
-{
-	const char *aux = dptr;
-	__be32 ip;
-
-	if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) {
-		DEBUGP("ip: %s parse failed.!\n", dptr);
-		return 0;
-	}
-
-	/* Port number */
-	if (*dptr == ':') {
-		dptr++;
-		dptr += digits_len(dptr, limit, shift);
-	}
-	return dptr - aux;
-}
-
-/* get address length, skiping user info. */
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift)
-{
-	int s = *shift;
-
-	/* Search for @, but stop at the end of the line.
-	 * We are inside a sip: URI, so we don't need to worry about
-	 * continuation lines. */
-	while (dptr <= limit &&
-	       *dptr != '@' && *dptr != '\r' && *dptr != '\n') {
-		(*shift)++;
-		dptr++;
-	}
-
-	if (dptr <= limit && *dptr == '@') {
-		dptr++;
-		(*shift)++;
-	} else
-		*shift = s;
-
-	return epaddr_len(dptr, limit, shift);
-}
-
-/* Returns 0 if not found, -1 error parsing. */
-int ct_sip_get_info(const char *dptr, size_t dlen,
-		    unsigned int *matchoff,
-		    unsigned int *matchlen,
-		    enum sip_header_pos pos)
-{
-	struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos];
-	const char *limit, *aux, *k = dptr;
-	int shift = 0;
-
-	limit = dptr + (dlen - hnfo->lnlen);
-
-	while (dptr <= limit) {
-		if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) &&
-		    (hnfo->sname == NULL ||
-		     strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
-			dptr++;
-			continue;
-		}
-		aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen,
-				    ct_sip_lnlen(dptr, limit),
-				    hnfo->case_sensitive);
-		if (!aux) {
-			DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
-			       hnfo->lname);
-			return -1;
-		}
-		aux += hnfo->ln_strlen;
-
-		*matchlen = hnfo->match_len(aux, limit, &shift);
-		if (!*matchlen)
-			return -1;
-
-		*matchoff = (aux - k) + shift;
-
-		DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname,
-		       *matchlen);
-		return 1;
-	}
-	DEBUGP("%s header not found.\n", hnfo->lname);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ct_sip_get_info);
-
-static int set_expected_rtp(struct sk_buff **pskb,
-			    struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    __be32 ipaddr, u_int16_t port,
-			    const char *dptr)
-{
-	struct ip_conntrack_expect *exp;
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	int ret;
-	typeof(ip_nat_sdp_hook) ip_nat_sdp;
-
-	exp = ip_conntrack_expect_alloc(ct);
-	if (exp == NULL)
-		return NF_DROP;
-
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.udp.port = 0;
-	exp->tuple.dst.ip = ipaddr;
-	exp->tuple.dst.u.udp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_UDP;
-
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.udp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.udp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-
-	exp->expectfn = NULL;
-	exp->flags = 0;
-
-	ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook);
-	if (ip_nat_sdp)
-		ret = ip_nat_sdp(pskb, ctinfo, exp, dptr);
-	else {
-		if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		else
-			ret = NF_ACCEPT;
-	}
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-static int sip_help(struct sk_buff **pskb,
-		    struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	unsigned int dataoff, datalen;
-	const char *dptr;
-	int ret = NF_ACCEPT;
-	int matchoff, matchlen;
-	__be32 ipaddr;
-	u_int16_t port;
-	typeof(ip_nat_sip_hook) ip_nat_sip;
-
-	/* No Data ? */
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len) {
-		DEBUGP("skb->len = %u\n", (*pskb)->len);
-		return NF_ACCEPT;
-	}
-
-	ip_ct_refresh(ct, *pskb, sip_timeout * HZ);
-
-	if (!skb_is_nonlinear(*pskb))
-		dptr = (*pskb)->data + dataoff;
-	else {
-		DEBUGP("Copy of skbuff not supported yet.\n");
-		goto out;
-	}
-
-	ip_nat_sip = rcu_dereference(ip_nat_sip_hook);
-	if (ip_nat_sip) {
-		if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) {
-			ret = NF_DROP;
-			goto out;
-		}
-	}
-
-	/* After this point NAT, could have mangled skb, so
-	   we need to recalculate payload lenght. */
-	datalen = (*pskb)->len - dataoff;
-
-	if (datalen < (sizeof("SIP/2.0 200") - 1))
-		goto out;
-
-	/* RTP info only in some SDP pkts */
-	if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 &&
-	    memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) {
-		goto out;
-	}
-	/* Get ip and port address from SDP packet. */
-	if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
-			    POS_CONNECTION) > 0) {
-
-		/* We'll drop only if there are parse problems. */
-		if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr,
-				 dptr + datalen) < 0) {
-			ret = NF_DROP;
-			goto out;
-		}
-		if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
-				    POS_MEDIA) > 0) {
-
-			port = simple_strtoul(dptr + matchoff, NULL, 10);
-			if (port < 1024) {
-				ret = NF_DROP;
-				goto out;
-			}
-			ret = set_expected_rtp(pskb, ct, ctinfo,
-					       ipaddr, port, dptr);
-		}
-	}
-out:
-	return ret;
-}
-
-static struct ip_conntrack_helper sip[MAX_PORTS];
-static char sip_names[MAX_PORTS][10];
-
-static void fini(void)
-{
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering helper for port %d\n", ports[i]);
-		ip_conntrack_helper_unregister(&sip[i]);
-	}
-}
-
-static int __init init(void)
-{
-	int i, ret;
-	char *tmpname;
-
-	if (ports_c == 0)
-		ports[ports_c++] = SIP_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		/* Create helper structure */
-		memset(&sip[i], 0, sizeof(struct ip_conntrack_helper));
-
-		sip[i].tuple.dst.protonum = IPPROTO_UDP;
-		sip[i].tuple.src.u.udp.port = htons(ports[i]);
-		sip[i].mask.src.u.udp.port = htons(0xFFFF);
-		sip[i].mask.dst.protonum = 0xFF;
-		sip[i].max_expected = 2;
-		sip[i].timeout = 3 * 60; /* 3 minutes */
-		sip[i].me = THIS_MODULE;
-		sip[i].help = sip_help;
-
-		tmpname = &sip_names[i][0];
-		if (ports[i] == SIP_PORT)
-			sprintf(tmpname, "sip");
-		else
-			sprintf(tmpname, "sip-%d", i);
-		sip[i].name = tmpname;
-
-		DEBUGP("port #%d: %d\n", i, ports[i]);
-
-		ret = ip_conntrack_helper_register(&sip[i]);
-		if (ret) {
-			printk("ERROR registering helper for port %d\n",
-				ports[i]);
-			fini();
-			return ret;
-		}
-	}
-	return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
deleted file mode 100644
index 56b2f7546d1e..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ /dev/null
@@ -1,962 +0,0 @@
-/* This file contains all the functions required for the standalone
-   ip_conntrack module.
-
-   These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#ifdef CONFIG_SYSCTL
-#include <linux/sysctl.h>
-#endif
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-
-extern atomic_t ip_conntrack_count;
-DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int kill_proto(struct ip_conntrack *i, void *data)
-{
-	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
-			*((u_int8_t *) data));
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
-	    struct ip_conntrack_protocol *proto)
-{
-	seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
-		   NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
-	return proto->print_tuple(s, tuple);
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static unsigned int
-seq_print_counters(struct seq_file *s,
-		   const struct ip_conntrack_counter *counter)
-{
-	return seq_printf(s, "packets=%llu bytes=%llu ",
-			  (unsigned long long)counter->packets,
-			  (unsigned long long)counter->bytes);
-}
-#else
-#define seq_print_counters(x, y)	0
-#endif
-
-struct ct_iter_state {
-	unsigned int bucket;
-};
-
-static struct list_head *ct_get_first(struct seq_file *seq)
-{
-	struct ct_iter_state *st = seq->private;
-
-	for (st->bucket = 0;
-	     st->bucket < ip_conntrack_htable_size;
-	     st->bucket++) {
-		if (!list_empty(&ip_conntrack_hash[st->bucket]))
-			return ip_conntrack_hash[st->bucket].next;
-	}
-	return NULL;
-}
-
-static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
-{
-	struct ct_iter_state *st = seq->private;
-
-	head = head->next;
-	while (head == &ip_conntrack_hash[st->bucket]) {
-		if (++st->bucket >= ip_conntrack_htable_size)
-			return NULL;
-		head = ip_conntrack_hash[st->bucket].next;
-	}
-	return head;
-}
-
-static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct list_head *head = ct_get_first(seq);
-
-	if (head)
-		while (pos && (head = ct_get_next(seq, head)))
-			pos--;
-	return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	read_lock_bh(&ip_conntrack_lock);
-	return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
-{
-	read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
-	const struct ip_conntrack_tuple_hash *hash = v;
-	const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
-	struct ip_conntrack_protocol *proto;
-
-	IP_NF_ASSERT(conntrack);
-
-	/* we only want to print DIR_ORIGINAL */
-	if (DIRECTION(hash))
-		return 0;
-
-	proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-	IP_NF_ASSERT(proto);
-
-	if (seq_printf(s, "%-8s %u %ld ",
-		      proto->name,
-		      conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
-		      timer_pending(&conntrack->timeout)
-		      ? (long)(conntrack->timeout.expires - jiffies)/HZ
-		      : 0) != 0)
-		return -ENOSPC;
-
-	if (proto->print_conntrack(s, conntrack))
-		return -ENOSPC;
-
-	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-			proto))
-		return -ENOSPC;
-
-	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
-		return -ENOSPC;
-
-	if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
-		if (seq_printf(s, "[UNREPLIED] "))
-			return -ENOSPC;
-
-	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
-			proto))
-		return -ENOSPC;
-
-	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
-		return -ENOSPC;
-
-	if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
-		if (seq_printf(s, "[ASSURED] "))
-			return -ENOSPC;
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (seq_printf(s, "mark=%u ", conntrack->mark))
-		return -ENOSPC;
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
-	if (seq_printf(s, "secmark=%u ", conntrack->secmark))
-		return -ENOSPC;
-#endif
-
-	if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
-		return -ENOSPC;
-
-	return 0;
-}
-
-static struct seq_operations ct_seq_ops = {
-	.start = ct_seq_start,
-	.next  = ct_seq_next,
-	.stop  = ct_seq_stop,
-	.show  = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq;
-	struct ct_iter_state *st;
-	int ret;
-
-	st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
-	if (st == NULL)
-		return -ENOMEM;
-	ret = seq_open(file, &ct_seq_ops);
-	if (ret)
-		goto out_free;
-	seq          = file->private_data;
-	seq->private = st;
-	memset(st, 0, sizeof(struct ct_iter_state));
-	return ret;
-out_free:
-	kfree(st);
-	return ret;
-}
-
-static const struct file_operations ct_file_ops = {
-	.owner   = THIS_MODULE,
-	.open    = ct_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_private,
-};
-
-/* expects */
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
-{
-	struct list_head *e = &ip_conntrack_expect_list;
-	loff_t i;
-
-	/* strange seq_file api calls stop even if we fail,
-	 * thus we need to grab lock since stop unlocks */
-	read_lock_bh(&ip_conntrack_lock);
-
-	if (list_empty(e))
-		return NULL;
-
-	for (i = 0; i <= *pos; i++) {
-		e = e->next;
-		if (e == &ip_conntrack_expect_list)
-			return NULL;
-	}
-	return e;
-}
-
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
-	struct list_head *e = v;
-
-	++*pos;
-	e = e->next;
-
-	if (e == &ip_conntrack_expect_list)
-		return NULL;
-
-	return e;
-}
-
-static void exp_seq_stop(struct seq_file *s, void *v)
-{
-	read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
-	struct ip_conntrack_expect *expect = v;
-
-	if (expect->timeout.function)
-		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
-			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
-	else
-		seq_printf(s, "- ");
-
-	seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
-
-	print_tuple(s, &expect->tuple,
-		    __ip_conntrack_proto_find(expect->tuple.dst.protonum));
-	return seq_putc(s, '\n');
-}
-
-static struct seq_operations exp_seq_ops = {
-	.start = exp_seq_start,
-	.next = exp_seq_next,
-	.stop = exp_seq_stop,
-	.show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &exp_seq_ops);
-}
-
-static const struct file_operations exp_file_ops = {
-	.owner   = THIS_MODULE,
-	.open    = exp_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	int cpu;
-
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
-		if (!cpu_possible(cpu))
-			continue;
-		*pos = cpu+1;
-		return &per_cpu(ip_conntrack_stat, cpu);
-	}
-
-	return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	int cpu;
-
-	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
-		if (!cpu_possible(cpu))
-			continue;
-		*pos = cpu+1;
-		return &per_cpu(ip_conntrack_stat, cpu);
-	}
-
-	return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
-	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
-	struct ip_conntrack_stat *st = v;
-
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
-		return 0;
-	}
-
-	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
-		   nr_conntracks,
-		   st->searched,
-		   st->found,
-		   st->new,
-		   st->invalid,
-		   st->ignore,
-		   st->delete,
-		   st->delete_list,
-		   st->insert,
-		   st->insert_failed,
-		   st->drop,
-		   st->early_drop,
-		   st->error,
-
-		   st->expect_new,
-		   st->expect_create,
-		   st->expect_delete
-		);
-	return 0;
-}
-
-static struct seq_operations ct_cpu_seq_ops = {
-	.start  = ct_cpu_seq_start,
-	.next   = ct_cpu_seq_next,
-	.stop   = ct_cpu_seq_stop,
-	.show   = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ct_cpu_seq_ops);
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
-	.owner   = THIS_MODULE,
-	.open    = ct_cpu_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_private,
-};
-#endif
-
-static unsigned int ip_confirm(unsigned int hooknum,
-			       struct sk_buff **pskb,
-			       const struct net_device *in,
-			       const struct net_device *out,
-			       int (*okfn)(struct sk_buff *))
-{
-	/* We've seen it coming out the other side: confirm it */
-	return ip_conntrack_confirm(pskb);
-}
-
-static unsigned int ip_conntrack_help(unsigned int hooknum,
-				      struct sk_buff **pskb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	/* This is where we call the helper: as the packet goes out. */
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	if (ct && ct->helper && ctinfo != IP_CT_RELATED + IP_CT_IS_REPLY) {
-		unsigned int ret;
-		ret = ct->helper->help(pskb, ct, ctinfo);
-		if (ret != NF_ACCEPT)
-			return ret;
-	}
-	return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_defrag(unsigned int hooknum,
-					struct sk_buff **pskb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
-{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
-	/* Previously seen (loopback)?  Ignore.  Do this before
-	   fragment check. */
-	if ((*pskb)->nfct)
-		return NF_ACCEPT;
-#endif
-
-	/* Gather fragments. */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
-		*pskb = ip_ct_gather_frags(*pskb,
-					   hooknum == NF_IP_PRE_ROUTING ?
-					   IP_DEFRAG_CONNTRACK_IN :
-					   IP_DEFRAG_CONNTRACK_OUT);
-		if (!*pskb)
-			return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_local(unsigned int hooknum,
-				       struct sk_buff **pskb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
-{
-	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
-		if (net_ratelimit())
-			printk("ipt_hook: happy cracking.\n");
-		return NF_ACCEPT;
-	}
-	return ip_conntrack_in(hooknum, pskb, in, out, okfn);
-}
-
-/* Connection tracking may drop packets, but never alters them, so
-   make it the first hook. */
-static struct nf_hook_ops ip_conntrack_ops[] = {
-	{
-		.hook		= ip_conntrack_defrag,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
-	},
-	{
-		.hook		= ip_conntrack_in,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ip_conntrack_defrag,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
-		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
-	},
-	{
-		.hook		= ip_conntrack_local,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
-		.priority	= NF_IP_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ip_conntrack_help,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ip_conntrack_help,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ip_confirm,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
-	},
-	{
-		.hook		= ip_confirm,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
-	},
-};
-
-/* Sysctl support */
-
-int ip_conntrack_checksum __read_mostly = 1;
-
-#ifdef CONFIG_SYSCTL
-
-/* From ip_conntrack_core.c */
-extern int ip_conntrack_max;
-extern unsigned int ip_conntrack_htable_size;
-
-/* From ip_conntrack_proto_tcp.c */
-extern unsigned int ip_ct_tcp_timeout_syn_sent;
-extern unsigned int ip_ct_tcp_timeout_syn_recv;
-extern unsigned int ip_ct_tcp_timeout_established;
-extern unsigned int ip_ct_tcp_timeout_fin_wait;
-extern unsigned int ip_ct_tcp_timeout_close_wait;
-extern unsigned int ip_ct_tcp_timeout_last_ack;
-extern unsigned int ip_ct_tcp_timeout_time_wait;
-extern unsigned int ip_ct_tcp_timeout_close;
-extern unsigned int ip_ct_tcp_timeout_max_retrans;
-extern int ip_ct_tcp_loose;
-extern int ip_ct_tcp_be_liberal;
-extern int ip_ct_tcp_max_retrans;
-
-/* From ip_conntrack_proto_udp.c */
-extern unsigned int ip_ct_udp_timeout;
-extern unsigned int ip_ct_udp_timeout_stream;
-
-/* From ip_conntrack_proto_icmp.c */
-extern unsigned int ip_ct_icmp_timeout;
-
-/* From ip_conntrack_proto_generic.c */
-extern unsigned int ip_ct_generic_timeout;
-
-/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-
-static ctl_table ip_ct_sysctl_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
-		.procname	= "ip_conntrack_max",
-		.data		= &ip_conntrack_max,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_COUNT,
-		.procname	= "ip_conntrack_count",
-		.data		= &ip_conntrack_count,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_BUCKETS,
-		.procname	= "ip_conntrack_buckets",
-		.data		= &ip_conntrack_htable_size,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_CHECKSUM,
-		.procname	= "ip_conntrack_checksum",
-		.data		= &ip_conntrack_checksum,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
-		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
-		.data		= &ip_ct_tcp_timeout_syn_sent,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
-		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
-		.data		= &ip_ct_tcp_timeout_syn_recv,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
-		.procname	= "ip_conntrack_tcp_timeout_established",
-		.data		= &ip_ct_tcp_timeout_established,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
-		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
-		.data		= &ip_ct_tcp_timeout_fin_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
-		.procname	= "ip_conntrack_tcp_timeout_close_wait",
-		.data		= &ip_ct_tcp_timeout_close_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
-		.procname	= "ip_conntrack_tcp_timeout_last_ack",
-		.data		= &ip_ct_tcp_timeout_last_ack,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
-		.procname	= "ip_conntrack_tcp_timeout_time_wait",
-		.data		= &ip_ct_tcp_timeout_time_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
-		.procname	= "ip_conntrack_tcp_timeout_close",
-		.data		= &ip_ct_tcp_timeout_close,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
-		.procname	= "ip_conntrack_udp_timeout",
-		.data		= &ip_ct_udp_timeout,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
-		.procname	= "ip_conntrack_udp_timeout_stream",
-		.data		= &ip_ct_udp_timeout_stream,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
-		.procname	= "ip_conntrack_icmp_timeout",
-		.data		= &ip_ct_icmp_timeout,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
-		.procname	= "ip_conntrack_generic_timeout",
-		.data		= &ip_ct_generic_timeout,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_LOG_INVALID,
-		.procname	= "ip_conntrack_log_invalid",
-		.data		= &ip_ct_log_invalid,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &log_invalid_proto_min,
-		.extra2		= &log_invalid_proto_max,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
-		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
-		.data		= &ip_ct_tcp_timeout_max_retrans,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
-		.procname	= "ip_conntrack_tcp_loose",
-		.data		= &ip_ct_tcp_loose,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
-		.procname	= "ip_conntrack_tcp_be_liberal",
-		.data		= &ip_ct_tcp_be_liberal,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
-		.procname	= "ip_conntrack_tcp_max_retrans",
-		.data		= &ip_ct_tcp_max_retrans,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{ .ctl_name = 0 }
-};
-
-#define NET_IP_CONNTRACK_MAX 2089
-
-static ctl_table ip_ct_netfilter_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NETFILTER,
-		.procname	= "netfilter",
-		.mode		= 0555,
-		.child		= ip_ct_sysctl_table,
-	},
-	{
-		.ctl_name	= NET_IP_CONNTRACK_MAX,
-		.procname	= "ip_conntrack_max",
-		.data		= &ip_conntrack_max,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= ip_ct_netfilter_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ip_ct_ipv4_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-EXPORT_SYMBOL(ip_ct_log_invalid);
-#endif /* CONFIG_SYSCTL */
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
-   them. --RR */
-int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
-{
-	int ret = 0;
-
-	write_lock_bh(&ip_conntrack_lock);
-	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
-		ret = -EBUSY;
-		goto out;
-	}
-	rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
- out:
-	write_unlock_bh(&ip_conntrack_lock);
-	return ret;
-}
-
-void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	rcu_assign_pointer(ip_ct_protos[proto->proto],
-			   &ip_conntrack_generic_protocol);
-	write_unlock_bh(&ip_conntrack_lock);
-	synchronize_rcu();
-
-	/* Remove all contrack entries for this protocol */
-	ip_ct_iterate_cleanup(kill_proto, &proto->proto);
-}
-
-static int __init ip_conntrack_standalone_init(void)
-{
-#ifdef CONFIG_PROC_FS
-	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-#endif
-	int ret = 0;
-
-	ret = ip_conntrack_init();
-	if (ret < 0)
-		return ret;
-
-#ifdef CONFIG_PROC_FS
-	ret = -ENOMEM;
-	proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
-	if (!proc) goto cleanup_init;
-
-	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
-					&exp_file_ops);
-	if (!proc_exp) goto cleanup_proc;
-
-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-	if (!proc_stat)
-		goto cleanup_proc_exp;
-
-	proc_stat->proc_fops = &ct_cpu_seq_fops;
-	proc_stat->owner = THIS_MODULE;
-#endif
-
-	ret = nf_register_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-	if (ret < 0) {
-		printk("ip_conntrack: can't register hooks.\n");
-		goto cleanup_proc_stat;
-	}
-#ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
-	if (ip_ct_sysctl_header == NULL) {
-		printk("ip_conntrack: can't register to sysctl.\n");
-		ret = -ENOMEM;
-		goto cleanup_hooks;
-	}
-#endif
-	return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup_hooks:
-	nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#endif
- cleanup_proc_stat:
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
- cleanup_proc_exp:
-	proc_net_remove("ip_conntrack_expect");
- cleanup_proc:
-	proc_net_remove("ip_conntrack");
- cleanup_init:
-#endif /* CONFIG_PROC_FS */
-	ip_conntrack_cleanup();
-	return ret;
-}
-
-static void __exit ip_conntrack_standalone_fini(void)
-{
-	synchronize_net();
-#ifdef CONFIG_SYSCTL
-	unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
-	nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
-	proc_net_remove("ip_conntrack_expect");
-	proc_net_remove("ip_conntrack");
-#endif /* CONFIG_PROC_FS */
-	ip_conntrack_cleanup();
-}
-
-module_init(ip_conntrack_standalone_init);
-module_exit(ip_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
-   They should call this. */
-void need_conntrack(void)
-{
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-EXPORT_SYMBOL_GPL(ip_conntrack_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
-EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
-EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
-#endif
-EXPORT_SYMBOL(ip_conntrack_protocol_register);
-EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
-EXPORT_SYMBOL(ip_ct_get_tuple);
-EXPORT_SYMBOL(invert_tuplepr);
-EXPORT_SYMBOL(ip_conntrack_alter_reply);
-EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_conntrack);
-EXPORT_SYMBOL(ip_conntrack_helper_register);
-EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(__ip_ct_refresh_acct);
-
-EXPORT_SYMBOL(ip_conntrack_expect_alloc);
-EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
-EXPORT_SYMBOL(ip_conntrack_expect_related);
-EXPORT_SYMBOL(ip_conntrack_unexpect_related);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
-
-EXPORT_SYMBOL(ip_conntrack_tuple_taken);
-EXPORT_SYMBOL(ip_ct_gather_frags);
-EXPORT_SYMBOL(ip_conntrack_htable_size);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(ip_conntrack_hash);
-EXPORT_SYMBOL(ip_conntrack_untracked);
-EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-EXPORT_SYMBOL(ip_conntrack_tcp_update);
-#endif
-
-EXPORT_SYMBOL_GPL(ip_conntrack_flush);
-EXPORT_SYMBOL_GPL(__ip_conntrack_find);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
-EXPORT_SYMBOL_GPL(ip_conntrack_free);
-EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
-
-EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_checksum);
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
-EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
-#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
deleted file mode 100644
index 76e175e7a972..000000000000
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * 	- port to newnat API
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp connection tracking helper");
-MODULE_LICENSE("GPL");
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of tftp servers");
-
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
-				       __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
-				 enum ip_conntrack_info ctinfo,
-				 struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
-
-static int tftp_help(struct sk_buff **pskb,
-		     struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo)
-{
-	struct tftphdr _tftph, *tfh;
-	struct ip_conntrack_expect *exp;
-	unsigned int ret = NF_ACCEPT;
-	typeof(ip_nat_tftp_hook) ip_nat_tftp;
-
-	tfh = skb_header_pointer(*pskb,
-				 (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
-				 sizeof(_tftph), &_tftph);
-	if (tfh == NULL)
-		return NF_ACCEPT;
-
-	switch (ntohs(tfh->opcode)) {
-	/* RRQ and WRQ works the same way */
-	case TFTP_OPCODE_READ:
-	case TFTP_OPCODE_WRITE:
-		DEBUGP("");
-		DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-		exp = ip_conntrack_expect_alloc(ct);
-		if (exp == NULL)
-			return NF_DROP;
-
-		exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		exp->mask.src.ip = htonl(0xffffffff);
-		exp->mask.src.u.udp.port = 0;
-		exp->mask.dst.ip = htonl(0xffffffff);
-		exp->mask.dst.u.udp.port = htons(0xffff);
-		exp->mask.dst.protonum = 0xff;
-		exp->expectfn = NULL;
-		exp->flags = 0;
-
-		DEBUGP("expect: ");
-		DUMP_TUPLE(&exp->tuple);
-		DUMP_TUPLE(&exp->mask);
-		ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook);
-		if (ip_nat_tftp)
-			ret = ip_nat_tftp(pskb, ctinfo, exp);
-		else if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		ip_conntrack_expect_put(exp);
-		break;
-	case TFTP_OPCODE_DATA:
-	case TFTP_OPCODE_ACK:
-		DEBUGP("Data/ACK opcode\n");
-		break;
-	case TFTP_OPCODE_ERROR:
-		DEBUGP("Error opcode\n");
-		break;
-	default:
-		DEBUGP("Unknown opcode\n");
-	}
-	return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper tftp[MAX_PORTS];
-static char tftp_names[MAX_PORTS][sizeof("tftp-65535")];
-
-static void ip_conntrack_tftp_fini(void)
-{
-	int i;
-
-	for (i = 0 ; i < ports_c; i++) {
-		DEBUGP("unregistering helper for port %d\n",
-			ports[i]);
-		ip_conntrack_helper_unregister(&tftp[i]);
-	}
-}
-
-static int __init ip_conntrack_tftp_init(void)
-{
-	int i, ret;
-	char *tmpname;
-
-	if (ports_c == 0)
-		ports[ports_c++] = TFTP_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		/* Create helper structure */
-		memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
-
-		tftp[i].tuple.dst.protonum = IPPROTO_UDP;
-		tftp[i].tuple.src.u.udp.port = htons(ports[i]);
-		tftp[i].mask.dst.protonum = 0xFF;
-		tftp[i].mask.src.u.udp.port = htons(0xFFFF);
-		tftp[i].max_expected = 1;
-		tftp[i].timeout = 5 * 60; /* 5 minutes */
-		tftp[i].me = THIS_MODULE;
-		tftp[i].help = tftp_help;
-
-		tmpname = &tftp_names[i][0];
-		if (ports[i] == TFTP_PORT)
-			sprintf(tmpname, "tftp");
-		else
-			sprintf(tmpname, "tftp-%d", i);
-		tftp[i].name = tmpname;
-
-		DEBUGP("port #%d: %d\n", i, ports[i]);
-
-		ret=ip_conntrack_helper_register(&tftp[i]);
-		if (ret) {
-			printk("ERROR registering helper for port %d\n",
-				ports[i]);
-			ip_conntrack_tftp_fini();
-			return(ret);
-		}
-	}
-	return(0);
-}
-
-module_init(ip_conntrack_tftp_init);
-module_exit(ip_conntrack_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
deleted file mode 100644
index 85df1a9aed33..000000000000
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Amanda extension for TCP NAT alteration.
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on a copy of HW's ip_nat_irc.c as well as other modules
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *	Module load syntax:
- * 	insmod ip_nat_amanda.o
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int matchoff,
-			 unsigned int matchlen,
-			 struct ip_conntrack_expect *exp)
-{
-	char buffer[sizeof("65535")];
-	u_int16_t port;
-	unsigned int ret;
-
-	/* Connection comes from client. */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->dir = IP_CT_DIR_ORIGINAL;
-
-	/* When you see the packet, we need to NAT it the same as the
-	 * this one (ie. same IP: it will be TCP and master is UDP). */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		exp->tuple.dst.u.tcp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	sprintf(buffer, "%u", port);
-	ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
-				       matchoff, matchlen,
-				       buffer, strlen(buffer));
-	if (ret != NF_ACCEPT)
-		ip_conntrack_unexpect_related(exp);
-	return ret;
-}
-
-static void __exit ip_nat_amanda_fini(void)
-{
-	rcu_assign_pointer(ip_nat_amanda_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_amanda_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_amanda_hook));
-	rcu_assign_pointer(ip_nat_amanda_hook, help);
-	return 0;
-}
-
-module_init(ip_nat_amanda_init);
-module_exit(ip_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
deleted file mode 100644
index 40737fdbe9a7..000000000000
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ /dev/null
@@ -1,634 +0,0 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>  /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
-#include <linux/jhash.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_nat_lock);
-
-/* Calculated at init based on memory size */
-static unsigned int ip_nat_htable_size;
-
-static struct list_head *bysource;
-
-#define MAX_IP_NAT_PROTO 256
-static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
-
-static inline struct ip_nat_protocol *
-__ip_nat_proto_find(u_int8_t protonum)
-{
-	return rcu_dereference(ip_nat_protos[protonum]);
-}
-
-struct ip_nat_protocol *
-ip_nat_proto_find_get(u_int8_t protonum)
-{
-	struct ip_nat_protocol *p;
-
-	rcu_read_lock();
-	p = __ip_nat_proto_find(protonum);
-	if (!try_module_get(p->me))
-		p = &ip_nat_unknown_protocol;
-	rcu_read_unlock();
-
-	return p;
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-
-void
-ip_nat_proto_put(struct ip_nat_protocol *p)
-{
-	module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct ip_conntrack_tuple *tuple)
-{
-	/* Original src, to ensure we map it consistently if poss. */
-	return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
-			    tuple->dst.protonum, 0) % ip_nat_htable_size;
-}
-
-/* Noone using conntrack by the time this called. */
-static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
-{
-	if (!(conn->status & IPS_NAT_DONE_MASK))
-		return;
-
-	write_lock_bh(&ip_nat_lock);
-	list_del(&conn->nat.info.bysource);
-	write_unlock_bh(&ip_nat_lock);
-}
-
-/* Is this tuple already taken? (not by us) */
-int
-ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
-		  const struct ip_conntrack *ignored_conntrack)
-{
-	/* Conntrack tracking doesn't keep track of outgoing tuples; only
-	   incoming ones.  NAT means they don't have a fixed mapping,
-	   so we invert the tuple and look for the incoming reply.
-
-	   We could keep a separate hash if this proves too slow. */
-	struct ip_conntrack_tuple reply;
-
-	invert_tuplepr(&reply, tuple);
-	return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
-}
-EXPORT_SYMBOL(ip_nat_used_tuple);
-
-/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct ip_conntrack_tuple *tuple,
-	 const struct ip_nat_range *range)
-{
-	struct ip_nat_protocol *proto;
-	int ret = 0;
-
-	/* If we are supposed to map IPs, then we must be in the
-	   range specified, otherwise let this drag us onto a new src IP. */
-	if (range->flags & IP_NAT_RANGE_MAP_IPS) {
-		if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
-		    || ntohl(tuple->src.ip) > ntohl(range->max_ip))
-			return 0;
-	}
-
-	rcu_read_lock();
-	proto = __ip_nat_proto_find(tuple->dst.protonum);
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
-	    || proto->in_range(tuple, IP_NAT_MANIP_SRC,
-			       &range->min, &range->max))
-		ret = 1;
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static inline int
-same_src(const struct ip_conntrack *ct,
-	 const struct ip_conntrack_tuple *tuple)
-{
-	return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
-		== tuple->dst.protonum
-		&& ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
-		== tuple->src.ip
-		&& ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
-		== tuple->src.u.all);
-}
-
-/* Only called for SRC manip */
-static int
-find_appropriate_src(const struct ip_conntrack_tuple *tuple,
-		     struct ip_conntrack_tuple *result,
-		     const struct ip_nat_range *range)
-{
-	unsigned int h = hash_by_src(tuple);
-	struct ip_conntrack *ct;
-
-	read_lock_bh(&ip_nat_lock);
-	list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
-		if (same_src(ct, tuple)) {
-			/* Copy source part from reply tuple. */
-			invert_tuplepr(result,
-				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-			result->dst = tuple->dst;
-
-			if (in_range(result, range)) {
-				read_unlock_bh(&ip_nat_lock);
-				return 1;
-			}
-		}
-	}
-	read_unlock_bh(&ip_nat_lock);
-	return 0;
-}
-
-/* For [FUTURE] fragmentation handling, we want the least-used
-   src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
-   if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
-   1-65535, we don't do pro-rata allocation based on ports; we choose
-   the ip with the lowest src-ip/dst-ip/proto usage.
-*/
-static void
-find_best_ips_proto(struct ip_conntrack_tuple *tuple,
-		    const struct ip_nat_range *range,
-		    const struct ip_conntrack *conntrack,
-		    enum ip_nat_manip_type maniptype)
-{
-	__be32 *var_ipp;
-	/* Host order */
-	u_int32_t minip, maxip, j;
-
-	/* No IP mapping?  Do nothing. */
-	if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
-		return;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		var_ipp = &tuple->src.ip;
-	else
-		var_ipp = &tuple->dst.ip;
-
-	/* Fast path: only one choice. */
-	if (range->min_ip == range->max_ip) {
-		*var_ipp = range->min_ip;
-		return;
-	}
-
-	/* Hashing source and destination IPs gives a fairly even
-	 * spread in practice (if there are a small number of IPs
-	 * involved, there usually aren't that many connections
-	 * anyway).  The consistency means that servers see the same
-	 * client coming from the same IP (some Internet Banking sites
-	 * like this), even across reboots. */
-	minip = ntohl(range->min_ip);
-	maxip = ntohl(range->max_ip);
-	j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
-	*var_ipp = htonl(minip + j % (maxip - minip + 1));
-}
-
-/* Manipulate the tuple into the range given.  For NF_IP_POST_ROUTING,
- * we change the source to map into the range.  For NF_IP_PRE_ROUTING
- * and NF_IP_LOCAL_OUT, we change the destination to map into the
- * range.  It might not be possible to get a unique tuple, but we try.
- * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
-static void
-get_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_conntrack_tuple *orig_tuple,
-		 const struct ip_nat_range *range,
-		 struct ip_conntrack *conntrack,
-		 enum ip_nat_manip_type maniptype)
-{
-	struct ip_nat_protocol *proto;
-
-	/* 1) If this srcip/proto/src-proto-part is currently mapped,
-	   and that same mapping gives a unique tuple within the given
-	   range, use that.
-
-	   This is only required for source (ie. NAT/masq) mappings.
-	   So far, we don't do local source mappings, so multiple
-	   manips not an issue.  */
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		if (find_appropriate_src(orig_tuple, tuple, range)) {
-			DEBUGP("get_unique_tuple: Found current src map\n");
-			if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
-				if (!ip_nat_used_tuple(tuple, conntrack))
-					return;
-		}
-	}
-
-	/* 2) Select the least-used IP/proto combination in the given
-	   range. */
-	*tuple = *orig_tuple;
-	find_best_ips_proto(tuple, range, conntrack, maniptype);
-
-	/* 3) The per-protocol part of the manip is made to map into
-	   the range to make a unique tuple. */
-
-	rcu_read_lock();
-	proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
-
-	/* Change protocol info to have some randomization */
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
-		proto->unique_tuple(tuple, range, maniptype, conntrack);
-		goto out;
-	}
-
-	/* Only bother mapping if it's not already in range and unique */
-	if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
-	     || proto->in_range(tuple, maniptype, &range->min, &range->max))
-	    && !ip_nat_used_tuple(tuple, conntrack))
-		goto out;
-
-	/* Last change: get protocol to try to obtain unique tuple. */
-	proto->unique_tuple(tuple, range, maniptype, conntrack);
-out:
-	rcu_read_unlock();
-}
-
-unsigned int
-ip_nat_setup_info(struct ip_conntrack *conntrack,
-		  const struct ip_nat_range *range,
-		  unsigned int hooknum)
-{
-	struct ip_conntrack_tuple curr_tuple, new_tuple;
-	struct ip_nat_info *info = &conntrack->nat.info;
-	int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
-	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
-		     || hooknum == NF_IP_POST_ROUTING
-		     || hooknum == NF_IP_LOCAL_IN
-		     || hooknum == NF_IP_LOCAL_OUT);
-	BUG_ON(ip_nat_initialized(conntrack, maniptype));
-
-	/* What we've got will look like inverse of reply. Normally
-	   this is what is in the conntrack, except for prior
-	   manipulations (future optimization: if num_manips == 0,
-	   orig_tp =
-	   conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
-	invert_tuplepr(&curr_tuple,
-		       &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-	get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
-
-	if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
-		struct ip_conntrack_tuple reply;
-
-		/* Alter conntrack table so will recognize replies. */
-		invert_tuplepr(&reply, &new_tuple);
-		ip_conntrack_alter_reply(conntrack, &reply);
-
-		/* Non-atomic: we own this at the moment. */
-		if (maniptype == IP_NAT_MANIP_SRC)
-			conntrack->status |= IPS_SRC_NAT;
-		else
-			conntrack->status |= IPS_DST_NAT;
-	}
-
-	/* Place in source hash if this is the first time. */
-	if (have_to_hash) {
-		unsigned int srchash
-			= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				      .tuple);
-		write_lock_bh(&ip_nat_lock);
-		list_add(&info->bysource, &bysource[srchash]);
-		write_unlock_bh(&ip_nat_lock);
-	}
-
-	/* It's done. */
-	if (maniptype == IP_NAT_MANIP_DST)
-		set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
-	else
-		set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
-
-	return NF_ACCEPT;
-}
-EXPORT_SYMBOL(ip_nat_setup_info);
-
-/* Returns true if succeeded. */
-static int
-manip_pkt(u_int16_t proto,
-	  struct sk_buff **pskb,
-	  unsigned int iphdroff,
-	  const struct ip_conntrack_tuple *target,
-	  enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph;
-	struct ip_nat_protocol *p;
-
-	if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
-		return 0;
-
-	iph = (void *)(*pskb)->data + iphdroff;
-
-	/* Manipulate protcol part. */
-
-	/* rcu_read_lock()ed by nf_hook_slow */
-	p = __ip_nat_proto_find(proto);
-	if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
-		return 0;
-
-	iph = (void *)(*pskb)->data + iphdroff;
-
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		nf_csum_replace4(&iph->check, iph->saddr, target->src.ip);
-		iph->saddr = target->src.ip;
-	} else {
-		nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip);
-		iph->daddr = target->dst.ip;
-	}
-	return 1;
-}
-
-/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int ip_nat_packet(struct ip_conntrack *ct,
-			   enum ip_conntrack_info ctinfo,
-			   unsigned int hooknum,
-			   struct sk_buff **pskb)
-{
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned long statusbit;
-	enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
-
-	if (mtype == IP_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply dir. */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	/* Non-atomic: these bits don't change. */
-	if (ct->status & statusbit) {
-		struct ip_conntrack_tuple target;
-
-		/* We are aiming to look like inverse of other direction. */
-		invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
-		if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
-			return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(ip_nat_packet);
-
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
-				  enum ip_conntrack_info ctinfo,
-				  unsigned int hooknum,
-				  struct sk_buff **pskb)
-{
-	struct {
-		struct icmphdr icmp;
-		struct iphdr ip;
-	} *inside;
-	struct ip_conntrack_protocol *proto;
-	struct ip_conntrack_tuple inner, target;
-	int hdrlen = (*pskb)->nh.iph->ihl * 4;
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned long statusbit;
-	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
-
-	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
-		return 0;
-
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-
-	/* We're actually going to mangle it beyond trivial checksum
-	   adjustment, so make sure the current checksum is correct. */
-	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
-		return 0;
-
-	/* Must be RELATED */
-	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
-		     (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
-
-	/* Redirects on non-null nats must be dropped, else they'll
-	   start talking to each other without our translation, and be
-	   confused... --RR */
-	if (inside->icmp.type == ICMP_REDIRECT) {
-		/* If NAT isn't finished, assume it and drop. */
-		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
-			return 0;
-
-		if (ct->status & IPS_NAT_MASK)
-			return 0;
-	}
-
-	DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
-	       *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
-	/* rcu_read_lock()ed by nf_hook_slow */
-	proto = __ip_conntrack_proto_find(inside->ip.protocol);
-	if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
-			     sizeof(struct icmphdr) + inside->ip.ihl*4,
-			     &inner, proto))
-		return 0;
-
-	/* Change inner back to look like incoming packet.  We do the
-	   opposite manip on this hook to normal, because it might not
-	   pass all hooks (locally-generated ICMP).  Consider incoming
-	   packet: PREROUTING (DST manip), routing produces ICMP, goes
-	   through POSTROUTING (which must correct the DST manip). */
-	if (!manip_pkt(inside->ip.protocol, pskb,
-		       (*pskb)->nh.iph->ihl*4
-		       + sizeof(inside->icmp),
-		       &ct->tuplehash[!dir].tuple,
-		       !manip))
-		return 0;
-
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-		inside->icmp.checksum = 0;
-		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
-							       (*pskb)->len - hdrlen,
-							       0));
-	}
-
-	/* Change outer to look the reply to an incoming packet
-	 * (proto 0 means don't invert per-proto part). */
-	if (manip == IP_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply dir. */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-		if (!manip_pkt(0, pskb, 0, &target, manip))
-			return 0;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
-
-/* Protocol registration. */
-int ip_nat_protocol_register(struct ip_nat_protocol *proto)
-{
-	int ret = 0;
-
-	write_lock_bh(&ip_nat_lock);
-	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
-		ret = -EBUSY;
-		goto out;
-	}
-	rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
- out:
-	write_unlock_bh(&ip_nat_lock);
-	return ret;
-}
-EXPORT_SYMBOL(ip_nat_protocol_register);
-
-/* Noone stores the protocol anywhere; simply delete it. */
-void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
-{
-	write_lock_bh(&ip_nat_lock);
-	rcu_assign_pointer(ip_nat_protos[proto->protonum],
-			   &ip_nat_unknown_protocol);
-	write_unlock_bh(&ip_nat_lock);
-	synchronize_rcu();
-}
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-int
-ip_nat_port_range_to_nfattr(struct sk_buff *skb,
-			    const struct ip_nat_range *range)
-{
-	NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
-		&range->min.tcp.port);
-	NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
-		&range->max.tcp.port);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-int
-ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
-{
-	int ret = 0;
-
-	/* we have to return whether we actually parsed something or not */
-
-	if (tb[CTA_PROTONAT_PORT_MIN-1]) {
-		ret = 1;
-		range->min.tcp.port =
-			*(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
-	}
-
-	if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
-		if (ret)
-			range->max.tcp.port = range->min.tcp.port;
-	} else {
-		ret = 1;
-		range->max.tcp.port =
-			*(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
-EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
-#endif
-
-static int __init ip_nat_init(void)
-{
-	size_t i;
-
-	/* Leave them the same for the moment. */
-	ip_nat_htable_size = ip_conntrack_htable_size;
-
-	/* One vmalloc for both hash tables */
-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
-	if (!bysource)
-		return -ENOMEM;
-
-	/* Sew in builtin protocols. */
-	write_lock_bh(&ip_nat_lock);
-	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-		rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
-	rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
-	rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
-	rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
-	write_unlock_bh(&ip_nat_lock);
-
-	for (i = 0; i < ip_nat_htable_size; i++) {
-		INIT_LIST_HEAD(&bysource[i]);
-	}
-
-	/* FIXME: Man, this is a hack.  <SIGH> */
-	IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
-	rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
-
-	/* Initialize fake conntrack so that NAT will skip it */
-	ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
-	return 0;
-}
-
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct ip_conntrack *i, void *data)
-{
-	memset(&i->nat, 0, sizeof(i->nat));
-	i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
-	return 0;
-}
-
-static void __exit ip_nat_cleanup(void)
-{
-	ip_ct_iterate_cleanup(&clean_nat, NULL);
-	rcu_assign_pointer(ip_conntrack_destroyed, NULL);
-	synchronize_rcu();
-	vfree(bysource);
-}
-
-MODULE_LICENSE("GPL");
-
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
deleted file mode 100644
index 32e01d8dffcb..000000000000
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* FTP extension for TCP NAT alteration. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* FIXME: Time out? --RR */
-
-static int
-mangle_rfc959_packet(struct sk_buff **pskb,
-		     __be32 newip,
-		     u_int16_t port,
-		     unsigned int matchoff,
-		     unsigned int matchlen,
-		     struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo,
-		     u32 *seq)
-{
-	char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
-
-	sprintf(buffer, "%u,%u,%u,%u,%u,%u",
-		NIPQUAD(newip), port>>8, port&0xFF);
-
-	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
-	*seq += strlen(buffer) - matchlen;
-	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_eprt_packet(struct sk_buff **pskb,
-		   __be32 newip,
-		   u_int16_t port,
-		   unsigned int matchoff,
-		   unsigned int matchlen,
-		   struct ip_conntrack *ct,
-		   enum ip_conntrack_info ctinfo,
-		   u32 *seq)
-{
-	char buffer[sizeof("|1|255.255.255.255|65535|")];
-
-	sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
-
-	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
-	*seq += strlen(buffer) - matchlen;
-	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_epsv_packet(struct sk_buff **pskb,
-		   __be32 newip,
-		   u_int16_t port,
-		   unsigned int matchoff,
-		   unsigned int matchlen,
-		   struct ip_conntrack *ct,
-		   enum ip_conntrack_info ctinfo,
-		   u32 *seq)
-{
-	char buffer[sizeof("|||65535|")];
-
-	sprintf(buffer, "|||%u|", port);
-
-	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
-	*seq += strlen(buffer) - matchlen;
-	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
-		     unsigned int,
-		     unsigned int,
-		     struct ip_conntrack *,
-		     enum ip_conntrack_info,
-		     u32 *seq)
-= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
-    [IP_CT_FTP_PASV] = mangle_rfc959_packet,
-    [IP_CT_FTP_EPRT] = mangle_eprt_packet,
-    [IP_CT_FTP_EPSV] = mangle_epsv_packet
-};
-
-/* So, this packet has hit the connection tracking matching code.
-   Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_ftp(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       enum ip_ct_ftp_type type,
-			       unsigned int matchoff,
-			       unsigned int matchlen,
-			       struct ip_conntrack_expect *exp,
-			       u32 *seq)
-{
-	__be32 newip;
-	u_int16_t port;
-	int dir = CTINFO2DIR(ctinfo);
-	struct ip_conntrack *ct = exp->master;
-
-	DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
-
-	/* Connection will come from wherever this packet goes, hence !dir */
-	newip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->dir = !dir;
-
-	/* When you see the packet, we need to NAT it the same as the
-	 * this one. */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		exp->tuple.dst.u.tcp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
-			  seq)) {
-		ip_conntrack_unexpect_related(exp);
-		return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-static void __exit ip_nat_ftp_fini(void)
-{
-	rcu_assign_pointer(ip_nat_ftp_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_ftp_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_ftp_hook));
-	rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp);
-	return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
-	printk(KERN_INFO KBUILD_MODNAME
-	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
-	return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_ftp_init);
-module_exit(ip_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
deleted file mode 100644
index dc778cfef58b..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ /dev/null
@@ -1,436 +0,0 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
- *
- * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
- * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 	14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
- *		- add support for SACK adjustment
- *	14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- *		- merge SACK support into newnat API
- *	16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
- *		- make ip_nat_resize_packet more generic (TCP and UDP)
- *		- add ip_nat_mangle_udp_packet
- */
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-
-#if 0
-#define DEBUGP printk
-#define DUMP_OFFSET(x)	printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
-#else
-#define DEBUGP(format, args...)
-#define DUMP_OFFSET(x)
-#endif
-
-static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
-		    int sizediff,
-		    struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	int dir;
-	struct ip_nat_seq *this_way, *other_way;
-
-	DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
-		(*skb)->len, new_size);
-
-	dir = CTINFO2DIR(ctinfo);
-
-	this_way = &ct->nat.info.seq[dir];
-	other_way = &ct->nat.info.seq[!dir];
-
-	DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
-	DUMP_OFFSET(this_way);
-
-	spin_lock_bh(&ip_nat_seqofs_lock);
-
-	/* SYN adjust. If it's uninitialized, or this is after last
-	 * correction, record it: we don't handle more than one
-	 * adjustment in the window, but do deal with common case of a
-	 * retransmit */
-	if (this_way->offset_before == this_way->offset_after
-	    || before(this_way->correction_pos, seq)) {
-		    this_way->correction_pos = seq;
-		    this_way->offset_before = this_way->offset_after;
-		    this_way->offset_after += sizediff;
-	}
-	spin_unlock_bh(&ip_nat_seqofs_lock);
-
-	DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
-	DUMP_OFFSET(this_way);
-}
-
-/* Frobs data inside this packet, which is linear. */
-static void mangle_contents(struct sk_buff *skb,
-			    unsigned int dataoff,
-			    unsigned int match_offset,
-			    unsigned int match_len,
-			    const char *rep_buffer,
-			    unsigned int rep_len)
-{
-	unsigned char *data;
-
-	BUG_ON(skb_is_nonlinear(skb));
-	data = (unsigned char *)skb->nh.iph + dataoff;
-
-	/* move post-replacement */
-	memmove(data + match_offset + rep_len,
-		data + match_offset + match_len,
-		skb->tail - (data + match_offset + match_len));
-
-	/* insert data from buffer */
-	memcpy(data + match_offset, rep_buffer, rep_len);
-
-	/* update skb info */
-	if (rep_len > match_len) {
-		DEBUGP("ip_nat_mangle_packet: Extending packet by "
-			"%u from %u bytes\n", rep_len - match_len,
-		       skb->len);
-		skb_put(skb, rep_len - match_len);
-	} else {
-		DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
-			"%u from %u bytes\n", match_len - rep_len,
-		       skb->len);
-		__skb_trim(skb, skb->len + rep_len - match_len);
-	}
-
-	/* fix IP hdr checksum information */
-	skb->nh.iph->tot_len = htons(skb->len);
-	ip_send_check(skb->nh.iph);
-}
-
-/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
-{
-	struct sk_buff *nskb;
-
-	if ((*pskb)->len + extra > 65535)
-		return 0;
-
-	nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
-	if (!nskb)
-		return 0;
-
-	/* Transfer socket to new skb. */
-	if ((*pskb)->sk)
-		skb_set_owner_w(nskb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = nskb;
-	return 1;
-}
-
-/* Generic function for mangling variable-length address changes inside
- * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
- * command in FTP).
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * */
-int
-ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
-			 struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int match_offset,
-			 unsigned int match_len,
-			 const char *rep_buffer,
-			 unsigned int rep_len)
-{
-	struct iphdr *iph;
-	struct tcphdr *tcph;
-	int oldlen, datalen;
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return 0;
-
-	if (rep_len > match_len
-	    && rep_len - match_len > skb_tailroom(*pskb)
-	    && !enlarge_skb(pskb, rep_len - match_len))
-		return 0;
-
-	SKB_LINEAR_ASSERT(*pskb);
-
-	iph = (*pskb)->nh.iph;
-	tcph = (void *)iph + iph->ihl*4;
-
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
-			match_offset, match_len, rep_buffer, rep_len);
-
-	datalen = (*pskb)->len - iph->ihl*4;
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		tcph->check = 0;
-		tcph->check = tcp_v4_check(datalen,
-					   iph->saddr, iph->daddr,
-					   csum_partial((char *)tcph,
-							datalen, 0));
-	} else
-		nf_proto_csum_replace2(&tcph->check, *pskb,
-					htons(oldlen), htons(datalen), 1);
-
-	if (rep_len != match_len) {
-		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
-		adjust_tcp_sequence(ntohl(tcph->seq),
-				    (int)rep_len - (int)match_len,
-				    ct, ctinfo);
-		/* Tell TCP window tracking about seq change */
-		ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
-	}
-	return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-
-/* Generic function for mangling variable-length address changes inside
- * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
- * command in the Amanda protocol)
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
- *       should be fairly easy to do.
- */
-int
-ip_nat_mangle_udp_packet(struct sk_buff **pskb,
-			 struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int match_offset,
-			 unsigned int match_len,
-			 const char *rep_buffer,
-			 unsigned int rep_len)
-{
-	struct iphdr *iph;
-	struct udphdr *udph;
-	int datalen, oldlen;
-
-	/* UDP helpers might accidentally mangle the wrong packet */
-	iph = (*pskb)->nh.iph;
-	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
-			       match_offset + match_len)
-		return 0;
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return 0;
-
-	if (rep_len > match_len
-	    && rep_len - match_len > skb_tailroom(*pskb)
-	    && !enlarge_skb(pskb, rep_len - match_len))
-		return 0;
-
-	iph = (*pskb)->nh.iph;
-	udph = (void *)iph + iph->ihl*4;
-
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
-			match_offset, match_len, rep_buffer, rep_len);
-
-	/* update the length of the UDP packet */
-	datalen = (*pskb)->len - iph->ihl*4;
-	udph->len = htons(datalen);
-
-	/* fix udp checksum if udp checksum was previously calculated */
-	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
-		return 1;
-
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		udph->check = 0;
-		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
-						datalen, IPPROTO_UDP,
-						csum_partial((char *)udph,
-							     datalen, 0));
-		if (!udph->check)
-			udph->check = CSUM_MANGLED_0;
-	} else
-		nf_proto_csum_replace2(&udph->check, *pskb,
-					htons(oldlen), htons(datalen), 1);
-	return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
-	    struct tcphdr *tcph,
-	    unsigned int sackoff,
-	    unsigned int sackend,
-	    struct ip_nat_seq *natseq)
-{
-	while (sackoff < sackend) {
-		struct tcp_sack_block_wire *sack;
-		__be32 new_start_seq, new_end_seq;
-
-		sack = (void *)skb->data + sackoff;
-		if (after(ntohl(sack->start_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_after);
-		else
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_before);
-
-		if (after(ntohl(sack->end_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_after);
-		else
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_before);
-
-		DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
-			ntohl(sack->start_seq), new_start_seq,
-			ntohl(sack->end_seq), new_end_seq);
-
-		nf_proto_csum_replace4(&tcph->check, skb,
-					sack->start_seq, new_start_seq, 0);
-		nf_proto_csum_replace4(&tcph->check, skb,
-					sack->end_seq, new_end_seq, 0);
-		sack->start_seq = new_start_seq;
-		sack->end_seq = new_end_seq;
-		sackoff += sizeof(*sack);
-	}
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-ip_nat_sack_adjust(struct sk_buff **pskb,
-		   struct tcphdr *tcph,
-		   struct ip_conntrack *ct,
-		   enum ip_conntrack_info ctinfo)
-{
-	unsigned int dir, optoff, optend;
-
-	optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
-	optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
-
-	if (!skb_make_writable(pskb, optend))
-		return 0;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	while (optoff < optend) {
-		/* Usually: option, length. */
-		unsigned char *op = (*pskb)->data + optoff;
-
-		switch (op[0]) {
-		case TCPOPT_EOL:
-			return 1;
-		case TCPOPT_NOP:
-			optoff++;
-			continue;
-		default:
-			/* no partial options */
-			if (optoff + 1 == optend
-			    || optoff + op[1] > optend
-			    || op[1] < 2)
-				return 0;
-			if (op[0] == TCPOPT_SACK
-			    && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
-			    && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
-				sack_adjust(*pskb, tcph, optoff+2,
-					    optoff+op[1],
-					    &ct->nat.info.seq[!dir]);
-			optoff += op[1];
-		}
-	}
-	return 1;
-}
-
-/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
-int
-ip_nat_seq_adjust(struct sk_buff **pskb,
-		  struct ip_conntrack *ct,
-		  enum ip_conntrack_info ctinfo)
-{
-	struct tcphdr *tcph;
-	int dir;
-	__be32 newseq, newack;
-	struct ip_nat_seq *this_way, *other_way;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	this_way = &ct->nat.info.seq[dir];
-	other_way = &ct->nat.info.seq[!dir];
-
-	if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
-		return 0;
-
-	tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-	if (after(ntohl(tcph->seq), this_way->correction_pos))
-		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
-	else
-		newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
-
-	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
-		  other_way->correction_pos))
-		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
-	else
-		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
-
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
-
-	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-		ntohl(newack));
-
-	tcph->seq = newseq;
-	tcph->ack_seq = newack;
-
-	if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
-		return 0;
-
-	ip_conntrack_tcp_update(*pskb, ct, dir);
-
-	return 1;
-}
-EXPORT_SYMBOL(ip_nat_seq_adjust);
-
-/* Setup NAT on this expected conntrack so it follows master. */
-/* If we fail to get a free NAT slot, we'll get dropped on confirm */
-void ip_nat_follow_master(struct ip_conntrack *ct,
-			  struct ip_conntrack_expect *exp)
-{
-	struct ip_nat_range range;
-
-	/* This must be a fresh one. */
-	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.dst.ip;
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = exp->saved_proto;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.src.ip;
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
deleted file mode 100644
index bdc99ef6159e..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ /dev/null
@@ -1,611 +0,0 @@
-/*
- * H.323 extension for NAT alteration.
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 NAT module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
-		    unsigned char **data, int dataoff,
-		    unsigned int addroff, __be32 ip, u_int16_t port)
-{
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo);
-	struct {
-		__be32 ip;
-		__be16 port;
-	} __attribute__ ((__packed__)) buf;
-	struct tcphdr _tcph, *th;
-
-	buf.ip = ip;
-	buf.port = htons(port);
-	addroff += dataoff;
-
-	if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
-		if (!ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-					      addroff, sizeof(buf),
-					      (char *) &buf, sizeof(buf))) {
-			if (net_ratelimit())
-				printk("ip_nat_h323: ip_nat_mangle_tcp_packet"
-				       " error\n");
-			return -1;
-		}
-
-		/* Relocate data pointer */
-		th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
-					sizeof(_tcph), &_tcph);
-		if (th == NULL)
-			return -1;
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
-		    th->doff * 4 + dataoff;
-	} else {
-		if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-					      addroff, sizeof(buf),
-					      (char *) &buf, sizeof(buf))) {
-			if (net_ratelimit())
-				printk("ip_nat_h323: ip_nat_mangle_udp_packet"
-				       " error\n");
-			return -1;
-		}
-		/* ip_nat_mangle_udp_packet uses skb_make_writable() to copy
-		 * or pull everything in a linear buffer, so we can safely
-		 * use the skb pointers now */
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
-		    sizeof(struct udphdr);
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
-			 unsigned char **data, int dataoff,
-			 TransportAddress * addr,
-			 __be32 ip, u_int16_t port)
-{
-	return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port);
-}
-
-/****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
-			 unsigned char **data, int dataoff,
-			 H245_TransportAddress * addr,
-			 __be32 ip, u_int16_t port)
-{
-	return set_addr(pskb, data, dataoff,
-			addr->unicastAddress.iPAddress.network, ip, port);
-}
-
-/****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data,
-			TransportAddress * addr, int count)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int i;
-	__be32 ip;
-	u_int16_t port;
-
-	for (i = 0; i < count; i++) {
-		if (get_h225_addr(*data, &addr[i], &ip, &port)) {
-			if (ip == ct->tuplehash[dir].tuple.src.ip &&
-			    port == info->sig_port[dir]) {
-				/* GW->GK */
-
-				/* Fix for Gnomemeeting */
-				if (i > 0 &&
-				    get_h225_addr(*data, &addr[0],
-						  &ip, &port) &&
-				    (ntohl(ip) & 0xff000000) == 0x7f000000)
-					i = 0;
-
-				DEBUGP
-				    ("ip_nat_ras: set signal address "
-				     "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				     NIPQUAD(ip), port,
-				     NIPQUAD(ct->tuplehash[!dir].tuple.dst.
-					     ip), info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &addr[i],
-						     ct->tuplehash[!dir].
-						     tuple.dst.ip,
-						     info->sig_port[!dir]);
-			} else if (ip == ct->tuplehash[dir].tuple.dst.ip &&
-				   port == info->sig_port[dir]) {
-				/* GK->GW */
-				DEBUGP
-				    ("ip_nat_ras: set signal address "
-				     "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				     NIPQUAD(ip), port,
-				     NIPQUAD(ct->tuplehash[!dir].tuple.src.
-					     ip), info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &addr[i],
-						     ct->tuplehash[!dir].
-						     tuple.src.ip,
-						     info->sig_port[!dir]);
-			}
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data,
-			TransportAddress * addr, int count)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int i;
-	__be32 ip;
-	u_int16_t port;
-
-	for (i = 0; i < count; i++) {
-		if (get_h225_addr(*data, &addr[i], &ip, &port) &&
-		    ip == ct->tuplehash[dir].tuple.src.ip &&
-		    port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port)) {
-			DEBUGP("ip_nat_ras: set rasAddress "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(ip), port,
-			       NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
-			       ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.
-				     port));
-			return set_h225_addr(pskb, data, 0, &addr[i],
-					     ct->tuplehash[!dir].tuple.dst.ip,
-					     ntohs(ct->tuplehash[!dir].tuple.
-						   dst.u.udp.port));
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
-			H245_TransportAddress * addr,
-			u_int16_t port, u_int16_t rtp_port,
-			struct ip_conntrack_expect *rtp_exp,
-			struct ip_conntrack_expect *rtcp_exp)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int i;
-	u_int16_t nated_port;
-
-	/* Set expectations for NAT */
-	rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
-	rtp_exp->expectfn = ip_nat_follow_master;
-	rtp_exp->dir = !dir;
-	rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
-	rtcp_exp->expectfn = ip_nat_follow_master;
-	rtcp_exp->dir = !dir;
-
-	/* Lookup existing expects */
-	for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
-		if (info->rtp_port[i][dir] == rtp_port) {
-			/* Expected */
-
-			/* Use allocated ports first. This will refresh
-			 * the expects */
-			rtp_exp->tuple.dst.u.udp.port =
-			    htons(info->rtp_port[i][dir]);
-			rtcp_exp->tuple.dst.u.udp.port =
-			    htons(info->rtp_port[i][dir] + 1);
-			break;
-		} else if (info->rtp_port[i][dir] == 0) {
-			/* Not expected */
-			break;
-		}
-	}
-
-	/* Run out of expectations */
-	if (i >= H323_RTP_CHANNEL_MAX) {
-		if (net_ratelimit())
-			printk("ip_nat_h323: out of expectations\n");
-		return 0;
-	}
-
-	/* Try to get a pair of ports. */
-	for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
-	     nated_port != 0; nated_port += 2) {
-		rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(rtp_exp) == 0) {
-			rtcp_exp->tuple.dst.u.udp.port =
-			    htons(nated_port + 1);
-			if (ip_conntrack_expect_related(rtcp_exp) == 0)
-				break;
-			ip_conntrack_unexpect_related(rtp_exp);
-		}
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_h323: out of RTP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, addr,
-			  ct->tuplehash[!dir].tuple.dst.ip,
-			  (port & 1) ? nated_port + 1 : nated_port) == 0) {
-		/* Save ports */
-		info->rtp_port[i][dir] = rtp_port;
-		info->rtp_port[i][!dir] = nated_port;
-	} else {
-		ip_conntrack_unexpect_related(rtp_exp);
-		ip_conntrack_unexpect_related(rtcp_exp);
-		return -1;
-	}
-
-	/* Success */
-	DEBUGP("ip_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(rtp_exp->tuple.src.ip),
-	       ntohs(rtp_exp->tuple.src.u.udp.port),
-	       NIPQUAD(rtp_exp->tuple.dst.ip),
-	       ntohs(rtp_exp->tuple.dst.u.udp.port));
-	DEBUGP("ip_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(rtcp_exp->tuple.src.ip),
-	       ntohs(rtcp_exp->tuple.src.u.udp.port),
-	       NIPQUAD(rtcp_exp->tuple.dst.ip),
-	       ntohs(rtcp_exp->tuple.dst.u.udp.port));
-
-	return 0;
-}
-
-/****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, int dataoff,
-		    H245_TransportAddress * addr, u_int16_t port,
-		    struct ip_conntrack_expect *exp)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port = port;
-
-	/* Set expectations for NAT */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_follow_master;
-	exp->dir = !dir;
-
-	/* Try to get same port: if not, try to change it. */
-	for (; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_h323: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, addr,
-			  ct->tuplehash[!dir].tuple.dst.ip, nated_port) < 0) {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	DEBUGP("ip_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_h245_expect()
- * which was set by ip_conntrack_helper_h323.c. It calls both
- * ip_nat_follow_master() and ip_conntrack_h245_expect()
- ****************************************************************************/
-static void ip_nat_h245_expect(struct ip_conntrack *new,
-			       struct ip_conntrack_expect *this)
-{
-	ip_nat_follow_master(new, this);
-	ip_conntrack_h245_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, int dataoff,
-		    TransportAddress * addr, u_int16_t port,
-		    struct ip_conntrack_expect *exp)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port = port;
-
-	/* Set expectations for NAT */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_h245_expect;
-	exp->dir = !dir;
-
-	/* Check existing expects */
-	if (info->sig_port[dir] == port)
-		nated_port = info->sig_port[!dir];
-
-	/* Try to get same port: if not, try to change it. */
-	for (; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_q931: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h225_addr(pskb, data, dataoff, addr,
-			  ct->tuplehash[!dir].tuple.dst.ip,
-			  nated_port) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = nated_port;
-	} else {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	DEBUGP("ip_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_q931_expect()
- * which was set by ip_conntrack_helper_h323.c.
- ****************************************************************************/
-static void ip_nat_q931_expect(struct ip_conntrack *new,
-			       struct ip_conntrack_expect *this)
-{
-	struct ip_nat_range range;
-
-	if (this->tuple.src.ip != 0) {	/* Only accept calls from GK */
-		ip_nat_follow_master(new, this);
-		goto out;
-	}
-
-	/* This must be a fresh one. */
-	BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = this->saved_proto;
-	range.min_ip = range.max_ip =
-	    new->master->tuplehash[!this->dir].tuple.src.ip;
-
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
-      out:
-	ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, TransportAddress * addr, int idx,
-		    u_int16_t port, struct ip_conntrack_expect *exp)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port = port;
-	__be32 ip;
-
-	/* Set expectations for NAT */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_q931_expect;
-	exp->dir = !dir;
-
-	/* Check existing expects */
-	if (info->sig_port[dir] == port)
-		nated_port = info->sig_port[!dir];
-
-	/* Try to get same port: if not, try to change it. */
-	for (; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_ras: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h225_addr(pskb, data, 0, &addr[idx],
-			  ct->tuplehash[!dir].tuple.dst.ip,
-			  nated_port) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = nated_port;
-
-		/* Fix for Gnomemeeting */
-		if (idx > 0 &&
-		    get_h225_addr(*data, &addr[0], &ip, &port) &&
-		    (ntohl(ip) & 0xff000000) == 0x7f000000) {
-			set_h225_addr_hook(pskb, data, 0, &addr[0],
-					   ct->tuplehash[!dir].tuple.dst.ip,
-					   info->sig_port[!dir]);
-		}
-	} else {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	/* Success */
-	DEBUGP("ip_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************/
-static void ip_nat_callforwarding_expect(struct ip_conntrack *new,
-					 struct ip_conntrack_expect *this)
-{
-	struct ip_nat_range range;
-
-	/* This must be a fresh one. */
-	BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = this->saved_proto;
-	range.min_ip = range.max_ip = this->saved_ip;
-
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
-	ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct,
-			      enum ip_conntrack_info ctinfo,
-			      unsigned char **data, int dataoff,
-			      TransportAddress * addr, u_int16_t port,
-			      struct ip_conntrack_expect *exp)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port;
-
-	/* Set expectations for NAT */
-	exp->saved_ip = exp->tuple.dst.ip;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_callforwarding_expect;
-	exp->dir = !dir;
-
-	/* Try to get same port: if not, try to change it. */
-	for (nated_port = port; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_q931: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (!set_h225_addr(pskb, data, dataoff, addr,
-			   ct->tuplehash[!dir].tuple.dst.ip,
-			   nated_port) == 0) {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	/* Success */
-	DEBUGP("ip_nat_q931: expect Call Forwarding "
-	       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
-	BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
-
-	rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
-	rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
-	rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
-	rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
-	rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
-	rcu_assign_pointer(nat_t120_hook, nat_t120);
-	rcu_assign_pointer(nat_h245_hook, nat_h245);
-	rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
-	rcu_assign_pointer(nat_q931_hook, nat_q931);
-
-	DEBUGP("ip_nat_h323: init success\n");
-	return 0;
-}
-
-/****************************************************************************/
-static void __exit fini(void)
-{
-	rcu_assign_pointer(set_h245_addr_hook, NULL);
-	rcu_assign_pointer(set_h225_addr_hook, NULL);
-	rcu_assign_pointer(set_sig_addr_hook, NULL);
-	rcu_assign_pointer(set_ras_addr_hook, NULL);
-	rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
-	rcu_assign_pointer(nat_t120_hook, NULL);
-	rcu_assign_pointer(nat_h245_hook, NULL);
-	rcu_assign_pointer(nat_callforwarding_hook, NULL);
-	rcu_assign_pointer(nat_q931_hook, NULL);
-	synchronize_rcu();
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 NAT helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
deleted file mode 100644
index 24ce4a5023d7..000000000000
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * ip_nat_pptp.c	- Version 3.0
- *
- * NAT support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft.  PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * TODO: - NAT to a unique tuple, not to TCP source port
- * 	   (needs netfilter tuple reservation)
- *
- * Changes:
- *     2002-02-10 - Version 1.3
- *       - Use ip_nat_mangle_tcp_packet() because of cloned skb's
- *	   in local connections (Philip Craig <philipc@snapgear.com>)
- *       - add checks for magicCookie and pptp version
- *       - make argument list of pptp_{out,in}bound_packet() shorter
- *       - move to C99 style initializers
- *       - print version number at module loadtime
- *     2003-09-22 - Version 1.5
- *       - use SNATed tcp sourceport as callid, since we get called before
- *	   TCP header is mangled (Philip Craig <philipc@snapgear.com>)
- *     2004-10-22 - Version 2.0
- *       - kernel 2.6.x version
- *     2005-06-10 - Version 3.0
- *       - kernel >= 2.6.11 version,
- *	   funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_NAT_PPTP_VERSION "3.0"
-
-#define REQ_CID(req, off)		(*(__be16 *)((char *)(req) + (off)))
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-
-
-#if 0
-extern const char *pptp_msg_name[];
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
-				       __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static void pptp_nat_expected(struct ip_conntrack *ct,
-			      struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack *master = ct->master;
-	struct ip_conntrack_expect *other_exp;
-	struct ip_conntrack_tuple t;
-	struct ip_ct_pptp_master *ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info;
-	struct ip_nat_range range;
-
-	ct_pptp_info = &master->help.ct_pptp_info;
-	nat_pptp_info = &master->nat.help.nat_pptp_info;
-
-	/* And here goes the grand finale of corrosion... */
-
-	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		DEBUGP("we are PNS->PAC\n");
-		/* therefore, build tuple for PAC->PNS */
-		t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
-		t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
-		t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-		t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
-		t.dst.protonum = IPPROTO_GRE;
-	} else {
-		DEBUGP("we are PAC->PNS\n");
-		/* build tuple for PNS->PAC */
-		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-		t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
-		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-		t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
-		t.dst.protonum = IPPROTO_GRE;
-	}
-
-	DEBUGP("trying to unexpect other dir: ");
-	DUMP_TUPLE(&t);
-	other_exp = ip_conntrack_expect_find_get(&t);
-	if (other_exp) {
-		ip_conntrack_unexpect_related(other_exp);
-		ip_conntrack_expect_put(other_exp);
-		DEBUGP("success\n");
-	} else {
-		DEBUGP("not found!\n");
-	}
-
-	/* This must be a fresh one. */
-	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.dst.ip;
-	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-		range.min = range.max = exp->saved_proto;
-	}
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.src.ip;
-	if (exp->dir == IP_CT_DIR_REPLY) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-		range.min = range.max = exp->saved_proto;
-	}
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-
-/* outbound packets == from PNS to PAC */
-static int
-pptp_outbound_pkt(struct sk_buff **pskb,
-		  struct ip_conntrack *ct,
-		  enum ip_conntrack_info ctinfo,
-		  struct PptpControlHeader *ctlh,
-		  union pptp_ctrl_union *pptpReq)
-
-{
-	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg;
-	__be16 new_callid;
-	unsigned int cid_off;
-
-	new_callid = ct_pptp_info->pns_call_id;
-
-	switch (msg = ntohs(ctlh->messageType)) {
-	case PPTP_OUT_CALL_REQUEST:
-		cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
-		/* FIXME: ideally we would want to reserve a call ID
-		 * here.  current netfilter NAT core is not able to do
-		 * this :( For now we use TCP source port. This breaks
-		 * multiple calls within one control session */
-
-		/* save original call ID in nat_info */
-		nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
-		/* don't use tcph->source since we are at a DSTmanip
-		 * hook (e.g. PREROUTING) and pkt is not mangled yet */
-		new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
-		/* save new call ID in ct info */
-		ct_pptp_info->pns_call_id = new_callid;
-		break;
-	case PPTP_IN_CALL_REPLY:
-		cid_off = offsetof(union pptp_ctrl_union, icack.callID);
-		break;
-	case PPTP_CALL_CLEAR_REQUEST:
-		cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
-		break;
-	default:
-		DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
-		      (msg <= PPTP_MSG_MAX)?
-		      pptp_msg_name[msg]:pptp_msg_name[0]);
-		/* fall through */
-
-	case PPTP_SET_LINK_INFO:
-		/* only need to NAT in case PAC is behind NAT box */
-	case PPTP_START_SESSION_REQUEST:
-	case PPTP_START_SESSION_REPLY:
-	case PPTP_STOP_SESSION_REQUEST:
-	case PPTP_STOP_SESSION_REPLY:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* no need to alter packet */
-		return NF_ACCEPT;
-	}
-
-	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
-	 * down to here */
-	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-		ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
-
-	/* mangle packet */
-	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-				     cid_off + sizeof(struct pptp_pkt_hdr) +
-				     sizeof(struct PptpControlHeader),
-				     sizeof(new_callid), (char *)&new_callid,
-				     sizeof(new_callid)) == 0)
-		return NF_DROP;
-
-	return NF_ACCEPT;
-}
-
-static void
-pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
-	     struct ip_conntrack_expect *expect_reply)
-{
-	struct ip_conntrack *ct = expect_orig->master;
-	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
-	/* save original PAC call ID in nat_info */
-	nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
-
-	/* alter expectation for PNS->PAC direction */
-	expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
-	expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
-	expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
-	expect_orig->dir = IP_CT_DIR_ORIGINAL;
-
-	/* alter expectation for PAC->PNS direction */
-	expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
-	expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
-	expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
-	expect_reply->dir = IP_CT_DIR_REPLY;
-}
-
-/* inbound packets == from PAC to PNS */
-static int
-pptp_inbound_pkt(struct sk_buff **pskb,
-		 struct ip_conntrack *ct,
-		 enum ip_conntrack_info ctinfo,
-		 struct PptpControlHeader *ctlh,
-		 union pptp_ctrl_union *pptpReq)
-{
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg;
-	__be16 new_pcid;
-	unsigned int pcid_off;
-
-	new_pcid = nat_pptp_info->pns_call_id;
-
-	switch (msg = ntohs(ctlh->messageType)) {
-	case PPTP_OUT_CALL_REPLY:
-		pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
-		break;
-	case PPTP_IN_CALL_CONNECT:
-		pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
-		break;
-	case PPTP_IN_CALL_REQUEST:
-		/* only need to nat in case PAC is behind NAT box */
-		return NF_ACCEPT;
-	case PPTP_WAN_ERROR_NOTIFY:
-		pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
-		break;
-	case PPTP_CALL_DISCONNECT_NOTIFY:
-		pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
-		break;
-	case PPTP_SET_LINK_INFO:
-		pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
-		break;
-
-	default:
-		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
-			pptp_msg_name[msg]:pptp_msg_name[0]);
-		/* fall through */
-
-	case PPTP_START_SESSION_REQUEST:
-	case PPTP_START_SESSION_REPLY:
-	case PPTP_STOP_SESSION_REQUEST:
-	case PPTP_STOP_SESSION_REPLY:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* no need to alter packet */
-		return NF_ACCEPT;
-	}
-
-	/* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
-	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
-
-	/* mangle packet */
-	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
-		ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
-
-	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-				     pcid_off + sizeof(struct pptp_pkt_hdr) +
-				     sizeof(struct PptpControlHeader),
-				     sizeof(new_pcid), (char *)&new_pcid,
-				     sizeof(new_pcid)) == 0)
-		return NF_DROP;
-	return NF_ACCEPT;
-}
-
-
-extern int __init ip_nat_proto_gre_init(void);
-extern void __exit ip_nat_proto_gre_fini(void);
-
-static int __init ip_nat_helper_pptp_init(void)
-{
-	int ret;
-
-	DEBUGP("%s: registering NAT helper\n", __FILE__);
-
-	ret = ip_nat_proto_gre_init();
-	if (ret < 0)
-		return ret;
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound));
-	rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt);
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound));
-	rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt);
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre));
-	rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre);
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn));
-	rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected);
-
-	printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
-	return 0;
-}
-
-static void __exit ip_nat_helper_pptp_fini(void)
-{
-	DEBUGP("cleanup_module\n" );
-
-	rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL);
-	rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL);
-	rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL);
-	rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL);
-	synchronize_rcu();
-
-	ip_nat_proto_gre_fini();
-
-	printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
-}
-
-module_init(ip_nat_helper_pptp_init);
-module_exit(ip_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
deleted file mode 100644
index cfaeea38314f..000000000000
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/* IRC extension for TCP NAT alteration.
- * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
- * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * based on a copy of RR's ip_nat_ftp.c
- *
- * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/kernel.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/moduleparam.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("IRC (DCC) NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int matchoff,
-			 unsigned int matchlen,
-			 struct ip_conntrack_expect *exp)
-{
-	u_int16_t port;
-	unsigned int ret;
-
-	/* "4294967296 65635 " */
-	char buffer[18];
-
-	DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
-	       expect->seq, exp_irc_info->len,
-	       ntohl(tcph->seq));
-
-	/* Reply comes from server. */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->dir = IP_CT_DIR_REPLY;
-
-	/* When you see the packet, we need to NAT it the same as the
-	 * this one. */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		exp->tuple.dst.u.tcp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	/*      strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
-	 *      strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
-	 *      strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
-	 *      strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
-	 *      strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
-	 *              AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
-	 *                      255.255.255.255==4294967296, 10 digits)
-	 *              P:         bound port (min 1 d, max 5d (65635))
-	 *              F:         filename   (min 1 d )
-	 *              S:         size       (min 1 d )
-	 *              0x01, \n:  terminators
-	 */
-
-	/* AAA = "us", ie. where server normally talks to. */
-	sprintf(buffer, "%u %u",
-		ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
-		port);
-	DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
-	       buffer, NIPQUAD(exp->tuple.src.ip), port);
-
-	ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
-				       matchoff, matchlen, buffer,
-				       strlen(buffer));
-	if (ret != NF_ACCEPT)
-		ip_conntrack_unexpect_related(exp);
-	return ret;
-}
-
-static void __exit ip_nat_irc_fini(void)
-{
-	rcu_assign_pointer(ip_nat_irc_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_irc_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_irc_hook));
-	rcu_assign_pointer(ip_nat_irc_hook, help);
-	return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
-	printk(KERN_INFO KBUILD_MODNAME
-	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
-	return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_irc_init);
-module_exit(ip_nat_irc_fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
deleted file mode 100644
index 95810202d849..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * ip_nat_proto_gre.c - Version 2.0
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
-				       __FUNCTION__, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct ip_conntrack_tuple *tuple,
-	     enum ip_nat_manip_type maniptype,
-	     const union ip_conntrack_manip_proto *min,
-	     const union ip_conntrack_manip_proto *max)
-{
-	__be16 key;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		key = tuple->src.u.gre.key;
-	else
-		key = tuple->dst.u.gre.key;
-
-	return ntohs(key) >= ntohs(min->gre.key)
-		&& ntohs(key) <= ntohs(max->gre.key);
-}
-
-/* generate unique tuple ... */
-static int
-gre_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_nat_range *range,
-		 enum ip_nat_manip_type maniptype,
-		 const struct ip_conntrack *conntrack)
-{
-	static u_int16_t key;
-	__be16 *keyptr;
-	unsigned int min, i, range_size;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		keyptr = &tuple->src.u.gre.key;
-	else
-		keyptr = &tuple->dst.u.gre.key;
-
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		DEBUGP("%p: NATing GRE PPTP\n", conntrack);
-		min = 1;
-		range_size = 0xffff;
-	} else {
-		min = ntohs(range->min.gre.key);
-		range_size = ntohs(range->max.gre.key) - min + 1;
-	}
-
-	DEBUGP("min = %u, range_size = %u\n", min, range_size);
-
-	for (i = 0; i < range_size; i++, key++) {
-		*keyptr = htons(min + key % range_size);
-		if (!ip_nat_used_tuple(tuple, conntrack))
-			return 1;
-	}
-
-	DEBUGP("%p: no NAT mapping\n", conntrack);
-
-	return 0;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static int
-gre_manip_pkt(struct sk_buff **pskb,
-	      unsigned int iphdroff,
-	      const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype)
-{
-	struct gre_hdr *greh;
-	struct gre_hdr_pptp *pgreh;
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-
-	/* pgreh includes two optional 32bit fields which are not required
-	 * to be there.  That's where the magic '8' comes from */
-	if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
-		return 0;
-
-	greh = (void *)(*pskb)->data + hdroff;
-	pgreh = (struct gre_hdr_pptp *) greh;
-
-	/* we only have destination manip of a packet, since 'source key'
-	 * is not present in the packet itself */
-	if (maniptype == IP_NAT_MANIP_DST) {
-		/* key manipulation is always dest */
-		switch (greh->version) {
-		case 0:
-			if (!greh->key) {
-				DEBUGP("can't nat GRE w/o key\n");
-				break;
-			}
-			if (greh->csum) {
-				/* FIXME: Never tested this code... */
-				nf_proto_csum_replace4(gre_csum(greh), *pskb,
-							*(gre_key(greh)),
-							tuple->dst.u.gre.key, 0);
-			}
-			*(gre_key(greh)) = tuple->dst.u.gre.key;
-			break;
-		case GRE_VERSION_PPTP:
-			DEBUGP("call_id -> 0x%04x\n",
-				ntohs(tuple->dst.u.gre.key));
-			pgreh->call_id = tuple->dst.u.gre.key;
-			break;
-		default:
-			DEBUGP("can't nat unknown GRE version\n");
-			return 0;
-			break;
-		}
-	}
-	return 1;
-}
-
-/* nat helper struct */
-static struct ip_nat_protocol gre = {
-	.name		= "GRE",
-	.protonum	= IPPROTO_GRE,
-	.manip_pkt	= gre_manip_pkt,
-	.in_range	= gre_in_range,
-	.unique_tuple	= gre_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
-
-int __init ip_nat_proto_gre_init(void)
-{
-	return ip_nat_protocol_register(&gre);
-}
-
-void __exit ip_nat_proto_gre_fini(void)
-{
-	ip_nat_protocol_unregister(&gre);
-}
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
deleted file mode 100644
index 22a528ae0380..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-icmp_in_range(const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype,
-	      const union ip_conntrack_manip_proto *min,
-	      const union ip_conntrack_manip_proto *max)
-{
-	return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
-	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static int
-icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
-		  const struct ip_nat_range *range,
-		  enum ip_nat_manip_type maniptype,
-		  const struct ip_conntrack *conntrack)
-{
-	static u_int16_t id;
-	unsigned int range_size;
-	unsigned int i;
-
-	range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
-	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
-		range_size = 0xFFFF;
-
-	for (i = 0; i < range_size; i++, id++) {
-		tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
-					     (id % range_size));
-		if (!ip_nat_used_tuple(tuple, conntrack))
-			return 1;
-	}
-	return 0;
-}
-
-static int
-icmp_manip_pkt(struct sk_buff **pskb,
-	       unsigned int iphdroff,
-	       const struct ip_conntrack_tuple *tuple,
-	       enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	struct icmphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
-		return 0;
-
-	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-	nf_proto_csum_replace2(&hdr->checksum, *pskb,
-			       hdr->un.echo.id, tuple->src.u.icmp.id, 0);
-	hdr->un.echo.id = tuple->src.u.icmp.id;
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_icmp = {
-	.name			= "ICMP",
-	.protonum		= IPPROTO_ICMP,
-	.me			= THIS_MODULE,
-	.manip_pkt		= icmp_manip_pkt,
-	.in_range		= icmp_in_range,
-	.unique_tuple		= icmp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
deleted file mode 100644
index 14ff24f53a7a..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-
-static int
-tcp_in_range(const struct ip_conntrack_tuple *tuple,
-	     enum ip_nat_manip_type maniptype,
-	     const union ip_conntrack_manip_proto *min,
-	     const union ip_conntrack_manip_proto *max)
-{
-	__be16 port;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		port = tuple->src.u.tcp.port;
-	else
-		port = tuple->dst.u.tcp.port;
-
-	return ntohs(port) >= ntohs(min->tcp.port)
-		&& ntohs(port) <= ntohs(max->tcp.port);
-}
-
-static int
-tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_nat_range *range,
-		 enum ip_nat_manip_type maniptype,
-		 const struct ip_conntrack *conntrack)
-{
-	static u_int16_t port;
-	__be16 *portptr;
-	unsigned int range_size, min, i;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		portptr = &tuple->src.u.tcp.port;
-	else
-		portptr = &tuple->dst.u.tcp.port;
-
-	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		/* If it's dst rewrite, can't change port */
-		if (maniptype == IP_NAT_MANIP_DST)
-			return 0;
-
-		/* Map privileged onto privileged. */
-		if (ntohs(*portptr) < 1024) {
-			/* Loose convention: >> 512 is credential passing */
-			if (ntohs(*portptr)<512) {
-				min = 1;
-				range_size = 511 - min + 1;
-			} else {
-				min = 600;
-				range_size = 1023 - min + 1;
-			}
-		} else {
-			min = 1024;
-			range_size = 65535 - 1024 + 1;
-		}
-	} else {
-		min = ntohs(range->min.tcp.port);
-		range_size = ntohs(range->max.tcp.port) - min + 1;
-	}
-
-	/* Start from random port to avoid prediction */
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
-		port =  net_random();
-
-	for (i = 0; i < range_size; i++, port++) {
-		*portptr = htons(min + port % range_size);
-		if (!ip_nat_used_tuple(tuple, conntrack)) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
-static int
-tcp_manip_pkt(struct sk_buff **pskb,
-	      unsigned int iphdroff,
-	      const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	struct tcphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be16 *portptr, newport, oldport;
-	int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
-	/* this could be a inner header returned in icmp packet; in such
-	   cases we cannot update the checksum field since it is outside of
-	   the 8 bytes of transport layer headers we are guaranteed */
-	if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
-		hdrsize = sizeof(struct tcphdr);
-
-	if (!skb_make_writable(pskb, hdroff + hdrsize))
-		return 0;
-
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct tcphdr *)((*pskb)->data + hdroff);
-
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.ip;
-		newport = tuple->src.u.tcp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.ip;
-		newport = tuple->dst.u.tcp.port;
-		portptr = &hdr->dest;
-	}
-
-	oldport = *portptr;
-	*portptr = newport;
-
-	if (hdrsize < sizeof(*hdr))
-		return 1;
-
-	nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-	nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_tcp = {
-	.name			= "TCP",
-	.protonum		= IPPROTO_TCP,
-	.me			= THIS_MODULE,
-	.manip_pkt		= tcp_manip_pkt,
-	.in_range		= tcp_in_range,
-	.unique_tuple		= tcp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
deleted file mode 100644
index dfd521672891..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-udp_in_range(const struct ip_conntrack_tuple *tuple,
-	     enum ip_nat_manip_type maniptype,
-	     const union ip_conntrack_manip_proto *min,
-	     const union ip_conntrack_manip_proto *max)
-{
-	__be16 port;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		port = tuple->src.u.udp.port;
-	else
-		port = tuple->dst.u.udp.port;
-
-	return ntohs(port) >= ntohs(min->udp.port)
-		&& ntohs(port) <= ntohs(max->udp.port);
-}
-
-static int
-udp_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_nat_range *range,
-		 enum ip_nat_manip_type maniptype,
-		 const struct ip_conntrack *conntrack)
-{
-	static u_int16_t port;
-	__be16 *portptr;
-	unsigned int range_size, min, i;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		portptr = &tuple->src.u.udp.port;
-	else
-		portptr = &tuple->dst.u.udp.port;
-
-	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		/* If it's dst rewrite, can't change port */
-		if (maniptype == IP_NAT_MANIP_DST)
-			return 0;
-
-		if (ntohs(*portptr) < 1024) {
-			/* Loose convention: >> 512 is credential passing */
-			if (ntohs(*portptr)<512) {
-				min = 1;
-				range_size = 511 - min + 1;
-			} else {
-				min = 600;
-				range_size = 1023 - min + 1;
-			}
-		} else {
-			min = 1024;
-			range_size = 65535 - 1024 + 1;
-		}
-	} else {
-		min = ntohs(range->min.udp.port);
-		range_size = ntohs(range->max.udp.port) - min + 1;
-	}
-
-	/* Start from random port to avoid prediction */
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
-		port = net_random();
-
-	for (i = 0; i < range_size; i++, port++) {
-		*portptr = htons(min + port % range_size);
-		if (!ip_nat_used_tuple(tuple, conntrack))
-			return 1;
-	}
-	return 0;
-}
-
-static int
-udp_manip_pkt(struct sk_buff **pskb,
-	      unsigned int iphdroff,
-	      const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	struct udphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be16 *portptr, newport;
-
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
-		return 0;
-
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct udphdr *)((*pskb)->data + hdroff);
-
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.ip;
-		newport = tuple->src.u.udp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.ip;
-		newport = tuple->dst.u.udp.port;
-		portptr = &hdr->dest;
-	}
-
-	if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
-		nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-		nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0);
-		if (!hdr->check)
-			hdr->check = CSUM_MANGLED_0;
-	}
-	*portptr = newport;
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_udp = {
-	.name			= "UDP",
-	.protonum		= IPPROTO_UDP,
-	.me			= THIS_MODULE,
-	.manip_pkt		= udp_manip_pkt,
-	.in_range		= udp_in_range,
-	.unique_tuple		= udp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
deleted file mode 100644
index 3bf049517246..000000000000
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* The "unknown" protocol.  This is what is used for protocols we
- * don't understand.  It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
-			    enum ip_nat_manip_type manip_type,
-			    const union ip_conntrack_manip_proto *min,
-			    const union ip_conntrack_manip_proto *max)
-{
-	return 1;
-}
-
-static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
-				const struct ip_nat_range *range,
-				enum ip_nat_manip_type maniptype,
-				const struct ip_conntrack *conntrack)
-{
-	/* Sorry: we can't help you; if it's not unique, we can't frob
-	   anything. */
-	return 0;
-}
-
-static int
-unknown_manip_pkt(struct sk_buff **pskb,
-		  unsigned int iphdroff,
-		  const struct ip_conntrack_tuple *tuple,
-		  enum ip_nat_manip_type maniptype)
-{
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_unknown_protocol = {
-	.name			= "unknown",
-	/* .me isn't set: getting a ref to this cannot fail. */
-	.manip_pkt		= unknown_manip_pkt,
-	.in_range		= unknown_in_range,
-	.unique_tuple		= unknown_unique_tuple,
-};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
deleted file mode 100644
index 080eb1d92200..000000000000
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
-
-static struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} nat_initial_table __initdata
-= { { "nat", NAT_VALID_HOOKS, 4,
-      sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-      { [NF_IP_PRE_ROUTING] = 0,
-	[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
-	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
-      { [NF_IP_PRE_ROUTING] = 0,
-	[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
-	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
-      0, NULL, { } },
-    {
-	    /* PRE_ROUTING */
-	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ipt_entry),
-		sizeof(struct ipt_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* POST_ROUTING */
-	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ipt_entry),
-		sizeof(struct ipt_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* LOCAL_OUT */
-	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ipt_entry),
-		sizeof(struct ipt_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } }
-    },
-    /* ERROR */
-    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-	0,
-	sizeof(struct ipt_entry),
-	sizeof(struct ipt_error),
-	0, { 0, 0 }, { } },
-      { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
-	  { } },
-	"ERROR"
-      }
-    }
-};
-
-static struct xt_table nat_table = {
-	.name		= "nat",
-	.valid_hooks	= NAT_VALID_HOOKS,
-	.lock		= RW_LOCK_UNLOCKED,
-	.me		= THIS_MODULE,
-	.af		= AF_INET,
-};
-
-/* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    unsigned int hooknum,
-				    const struct xt_target *target,
-				    const void *targinfo)
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-
-	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-
-	/* Connection must be valid and new. */
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
-			    || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
-	IP_NF_ASSERT(out);
-
-	return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
-{
-	static int warned = 0;
-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
-	struct rtable *rt;
-
-	if (ip_route_output_key(&rt, &fl) != 0)
-		return;
-
-	if (rt->rt_src != srcip && !warned) {
-		printk("NAT: no longer support implicit source local NAT\n");
-		printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
-		       NIPQUAD(srcip), NIPQUAD(dstip));
-		warned = 1;
-	}
-	ip_rt_put(rt);
-}
-
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    unsigned int hooknum,
-				    const struct xt_target *target,
-				    const void *targinfo)
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
-		     || hooknum == NF_IP_LOCAL_OUT);
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-
-	/* Connection must be valid and new. */
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
-	if (hooknum == NF_IP_LOCAL_OUT
-	    && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle((*pskb)->nh.iph->daddr,
-				     mr->range[0].min_ip);
-
-	return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-static int ipt_snat_checkentry(const char *tablename,
-			       const void *entry,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
-{
-	struct ip_nat_multi_range_compat *mr = targinfo;
-
-	/* Must be a valid range */
-	if (mr->rangesize != 1) {
-		printk("SNAT: multiple ranges no longer supported\n");
-		return 0;
-	}
-	return 1;
-}
-
-static int ipt_dnat_checkentry(const char *tablename,
-			       const void *entry,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
-{
-	struct ip_nat_multi_range_compat *mr = targinfo;
-
-	/* Must be a valid range */
-	if (mr->rangesize != 1) {
-		printk("DNAT: multiple ranges no longer supported\n");
-		return 0;
-	}
-	if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
-		printk("DNAT: port randomization not supported\n");
-		return 0;
-	}
-	return 1;
-}
-
-inline unsigned int
-alloc_null_binding(struct ip_conntrack *conntrack,
-		   struct ip_nat_info *info,
-		   unsigned int hooknum)
-{
-	/* Force range to this IP; let proto decide mapping for
-	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
-	   Use reply in case it's already been mangled (eg local packet).
-	*/
-	__be32 ip
-		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
-		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
-		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
-	struct ip_nat_range range
-		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
-
-	DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
-	       NIPQUAD(ip));
-	return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-unsigned int
-alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
-			     struct ip_nat_info *info,
-			     unsigned int hooknum)
-{
-	__be32 ip
-		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
-		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
-		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
-	u_int16_t all
-		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
-		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
-		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
-	struct ip_nat_range range
-		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
-
-	DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
-	       conntrack, NIPQUAD(ip));
-	return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-int ip_nat_rule_find(struct sk_buff **pskb,
-		     unsigned int hooknum,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     struct ip_conntrack *ct,
-		     struct ip_nat_info *info)
-{
-	int ret;
-
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
-
-	if (ret == NF_ACCEPT) {
-		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
-			/* NUL mapping */
-			ret = alloc_null_binding(ct, info, hooknum);
-	}
-	return ret;
-}
-
-static struct xt_target ipt_snat_reg = {
-	.name		= "SNAT",
-	.family		= AF_INET,
-	.target		= ipt_snat_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
-	.table		= "nat",
-	.hooks		= 1 << NF_IP_POST_ROUTING,
-	.checkentry	= ipt_snat_checkentry,
-};
-
-static struct xt_target ipt_dnat_reg = {
-	.name		= "DNAT",
-	.family		= AF_INET,
-	.target		= ipt_dnat_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
-	.table		= "nat",
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
-	.checkentry	= ipt_dnat_checkentry,
-};
-
-int __init ip_nat_rule_init(void)
-{
-	int ret;
-
-	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
-	if (ret != 0)
-		return ret;
-	ret = xt_register_target(&ipt_snat_reg);
-	if (ret != 0)
-		goto unregister_table;
-
-	ret = xt_register_target(&ipt_dnat_reg);
-	if (ret != 0)
-		goto unregister_snat;
-
-	return ret;
-
- unregister_snat:
-	xt_unregister_target(&ipt_snat_reg);
- unregister_table:
-	xt_unregister_table(&nat_table);
-
-	return ret;
-}
-
-void ip_nat_rule_cleanup(void)
-{
-	xt_unregister_target(&ipt_dnat_reg);
-	xt_unregister_target(&ipt_snat_reg);
-	ipt_unregister_table(&nat_table);
-}
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
deleted file mode 100644
index 325c5a9dc2ef..000000000000
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/* SIP extension for UDP NAT alteration.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_nat_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-struct addr_map {
-	struct {
-		char		src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-		char		dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-		unsigned int	srclen, srciplen;
-		unsigned int	dstlen, dstiplen;
-	} addr[IP_CT_DIR_MAX];
-};
-
-static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map)
-{
-	struct ip_conntrack_tuple *t;
-	enum ip_conntrack_dir dir;
-	unsigned int n;
-
-	for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
-		t = &ct->tuplehash[dir].tuple;
-
-		n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
-			    NIPQUAD(t->src.ip));
-		map->addr[dir].srciplen = n;
-		n += sprintf(map->addr[dir].src + n, ":%u",
-			     ntohs(t->src.u.udp.port));
-		map->addr[dir].srclen = n;
-
-		n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
-			    NIPQUAD(t->dst.ip));
-		map->addr[dir].dstiplen = n;
-		n += sprintf(map->addr[dir].dst + n, ":%u",
-			     ntohs(t->dst.u.udp.port));
-		map->addr[dir].dstlen = n;
-	}
-}
-
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
-			struct ip_conntrack *ct, const char **dptr, size_t dlen,
-			enum sip_header_pos pos, struct addr_map *map)
-{
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned int matchlen, matchoff, addrlen;
-	char *addr;
-
-	if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
-		return 1;
-
-	if ((matchlen == map->addr[dir].srciplen ||
-	     matchlen == map->addr[dir].srclen) &&
-	    memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
-		addr    = map->addr[!dir].dst;
-		addrlen = map->addr[!dir].dstlen;
-	} else if ((matchlen == map->addr[dir].dstiplen ||
-		    matchlen == map->addr[dir].dstlen) &&
-		   memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
-		addr    = map->addr[!dir].src;
-		addrlen = map->addr[!dir].srclen;
-	} else
-		return 1;
-
-	if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-				      matchoff, matchlen, addr, addrlen))
-		return 0;
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-	return 1;
-
-}
-
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       struct ip_conntrack *ct,
-			       const char **dptr)
-{
-	enum sip_header_pos pos;
-	struct addr_map map;
-	int dataoff, datalen;
-
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-	datalen = (*pskb)->len - dataoff;
-	if (datalen < sizeof("SIP/2.0") - 1)
-		return NF_DROP;
-
-	addr_map_init(ct, &map);
-
-	/* Basic rules: requests and responses. */
-	if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
-		/* 10.2: Constructing the REGISTER Request:
-		 *
-		 * The "userinfo" and "@" components of the SIP URI MUST NOT
-		 * be present.
-		 */
-		if (datalen >= sizeof("REGISTER") - 1 &&
-		    strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
-			pos = POS_REG_REQ_URI;
-		else
-			pos = POS_REQ_URI;
-
-		if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
-			return NF_DROP;
-	}
-
-	if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
-		return NF_DROP;
-	return NF_ACCEPT;
-}
-
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
-				      enum ip_conntrack_info ctinfo,
-				      struct ip_conntrack *ct,
-				      const char **dptr, size_t dlen,
-				      char *buffer, int bufflen,
-				      enum sip_header_pos pos)
-{
-	unsigned int matchlen, matchoff;
-
-	if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
-		return 0;
-
-	if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-				      matchoff, matchlen, buffer, bufflen))
-		return 0;
-
-	/* We need to reload this. Thanks Patrick. */
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-	return 1;
-}
-
-static int mangle_content_len(struct sk_buff **pskb,
-			      enum ip_conntrack_info ctinfo,
-			      struct ip_conntrack *ct,
-			      const char *dptr)
-{
-	unsigned int dataoff, matchoff, matchlen;
-	char buffer[sizeof("65536")];
-	int bufflen;
-
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-
-	/* Get actual SDP lenght */
-	if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
-			    &matchlen, POS_SDP_HEADER) > 0) {
-
-		/* since ct_sip_get_info() give us a pointer passing 'v='
-		   we need to add 2 bytes in this count. */
-		int c_len = (*pskb)->len - dataoff - matchoff + 2;
-
-		/* Now, update SDP lenght */
-		if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
-				    &matchlen, POS_CONTENT) > 0) {
-
-			bufflen = sprintf(buffer, "%u", c_len);
-
-			return ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-							matchoff, matchlen,
-							buffer, bufflen);
-		}
-	}
-	return 0;
-}
-
-static unsigned int mangle_sdp(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       struct ip_conntrack *ct,
-			       __be32 newip, u_int16_t port,
-			       const char *dptr)
-{
-	char buffer[sizeof("nnn.nnn.nnn.nnn")];
-	unsigned int dataoff, bufflen;
-
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
-
-	/* Mangle owner and contact info. */
-	bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
-			       buffer, bufflen, POS_OWNER))
-		return 0;
-
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
-			       buffer, bufflen, POS_CONNECTION))
-		return 0;
-
-	/* Mangle media port. */
-	bufflen = sprintf(buffer, "%u", port);
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
-			       buffer, bufflen, POS_MEDIA))
-		return 0;
-
-	return mangle_content_len(pskb, ctinfo, ct, dptr);
-}
-
-/* So, this packet has hit the connection tracking matching code.
-   Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       struct ip_conntrack_expect *exp,
-			       const char *dptr)
-{
-	struct ip_conntrack *ct = exp->master;
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	__be32 newip;
-	u_int16_t port;
-
-	DEBUGP("ip_nat_sdp():\n");
-
-	/* Connection will come from reply */
-	newip = ct->tuplehash[!dir].tuple.dst.ip;
-
-	exp->tuple.dst.ip = newip;
-	exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
-	exp->dir = !dir;
-
-	/* When you see the packet, we need to NAT it the same as the
-	   this one. */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
-		exp->tuple.dst.u.udp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
-		ip_conntrack_unexpect_related(exp);
-		return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-static void __exit fini(void)
-{
-	rcu_assign_pointer(ip_nat_sip_hook, NULL);
-	rcu_assign_pointer(ip_nat_sdp_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_sip_hook));
-	BUG_ON(rcu_dereference(ip_nat_sdp_hook));
-	rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip);
-	rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp);
-	return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
deleted file mode 100644
index e41d0efae515..000000000000
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ /dev/null
@@ -1,1333 +0,0 @@
-/*
- * ip_nat_snmp_basic.c
- *
- * Basic SNMP Application Layer Gateway
- *
- * This IP NAT module is intended for use with SNMP network
- * discovery and monitoring applications where target networks use
- * conflicting private address realms.
- *
- * Static NAT is used to remap the networks from the view of the network
- * management system at the IP layer, and this module remaps some application
- * layer addresses to match.
- *
- * The simplest form of ALG is performed, where only tagged IP addresses
- * are modified.  The module does not need to be MIB aware and only scans
- * messages at the ASN.1/BER level.
- *
- * Currently, only SNMPv1 and SNMPv2 are supported.
- *
- * More information on ALG and associated issues can be found in
- * RFC 2962
- *
- * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
- * McLean & Jochen Friedrich, stripped down for use in the kernel.
- *
- * Copyright (c) 2000 RP Internet (www.rpi.net.au).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- * Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
- */
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-#include <asm/uaccess.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
-
-#define SNMP_PORT 161
-#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (*(u8 *)n)
-
-static int debug;
-static DEFINE_SPINLOCK(snmp_lock);
-
-/*
- * Application layer address mapping mimics the NAT mapping, but
- * only for the first octet in this case (a more flexible system
- * can be implemented if needed).
- */
-struct oct1_map
-{
-	u_int8_t from;
-	u_int8_t to;
-};
-
-
-/*****************************************************************************
- *
- * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* Class */
-#define ASN1_UNI	0	/* Universal */
-#define ASN1_APL	1	/* Application */
-#define ASN1_CTX	2	/* Context */
-#define ASN1_PRV	3	/* Private */
-
-/* Tag */
-#define ASN1_EOC	0	/* End Of Contents */
-#define ASN1_BOL	1	/* Boolean */
-#define ASN1_INT	2	/* Integer */
-#define ASN1_BTS	3	/* Bit String */
-#define ASN1_OTS	4	/* Octet String */
-#define ASN1_NUL	5	/* Null */
-#define ASN1_OJI	6	/* Object Identifier  */
-#define ASN1_OJD	7	/* Object Description */
-#define ASN1_EXT	8	/* External */
-#define ASN1_SEQ	16	/* Sequence */
-#define ASN1_SET	17	/* Set */
-#define ASN1_NUMSTR	18	/* Numerical String */
-#define ASN1_PRNSTR	19	/* Printable String */
-#define ASN1_TEXSTR	20	/* Teletext String */
-#define ASN1_VIDSTR	21	/* Video String */
-#define ASN1_IA5STR	22	/* IA5 String */
-#define ASN1_UNITIM	23	/* Universal Time */
-#define ASN1_GENTIM	24	/* General Time */
-#define ASN1_GRASTR	25	/* Graphical String */
-#define ASN1_VISSTR	26	/* Visible String */
-#define ASN1_GENSTR	27	/* General String */
-
-/* Primitive / Constructed methods*/
-#define ASN1_PRI	0	/* Primitive */
-#define ASN1_CON	1	/* Constructed */
-
-/*
- * Error codes.
- */
-#define ASN1_ERR_NOERROR		0
-#define ASN1_ERR_DEC_EMPTY		2
-#define ASN1_ERR_DEC_EOC_MISMATCH	3
-#define ASN1_ERR_DEC_LENGTH_MISMATCH	4
-#define ASN1_ERR_DEC_BADVALUE		5
-
-/*
- * ASN.1 context.
- */
-struct asn1_ctx
-{
-	int error;			/* Error condition */
-	unsigned char *pointer;		/* Octet just to be decoded */
-	unsigned char *begin;		/* First octet */
-	unsigned char *end;		/* Octet after last octet */
-};
-
-/*
- * Octet string (not null terminated)
- */
-struct asn1_octstr
-{
-	unsigned char *data;
-	unsigned int len;
-};
-
-static void asn1_open(struct asn1_ctx *ctx,
-		      unsigned char *buf,
-		      unsigned int len)
-{
-	ctx->begin = buf;
-	ctx->end = buf + len;
-	ctx->pointer = buf;
-	ctx->error = ASN1_ERR_NOERROR;
-}
-
-static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
-{
-	if (ctx->pointer >= ctx->end) {
-		ctx->error = ASN1_ERR_DEC_EMPTY;
-		return 0;
-	}
-	*ch = *(ctx->pointer)++;
-	return 1;
-}
-
-static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
-{
-	unsigned char ch;
-
-	*tag = 0;
-
-	do
-	{
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-		*tag <<= 7;
-		*tag |= ch & 0x7F;
-	} while ((ch & 0x80) == 0x80);
-	return 1;
-}
-
-static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
-				    unsigned int *cls,
-				    unsigned int *con,
-				    unsigned int *tag)
-{
-	unsigned char ch;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*cls = (ch & 0xC0) >> 6;
-	*con = (ch & 0x20) >> 5;
-	*tag = (ch & 0x1F);
-
-	if (*tag == 0x1F) {
-		if (!asn1_tag_decode(ctx, tag))
-			return 0;
-	}
-	return 1;
-}
-
-static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
-					unsigned int *def,
-					unsigned int *len)
-{
-	unsigned char ch, cnt;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	if (ch == 0x80)
-		*def = 0;
-	else {
-		*def = 1;
-
-		if (ch < 0x80)
-			*len = ch;
-		else {
-			cnt = (unsigned char) (ch & 0x7F);
-			*len = 0;
-
-			while (cnt > 0) {
-				if (!asn1_octet_decode(ctx, &ch))
-					return 0;
-				*len <<= 8;
-				*len |= ch;
-				cnt--;
-			}
-		}
-	}
-	return 1;
-}
-
-static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
-					unsigned char **eoc,
-					unsigned int *cls,
-					unsigned int *con,
-					unsigned int *tag)
-{
-	unsigned int def, len;
-
-	if (!asn1_id_decode(ctx, cls, con, tag))
-		return 0;
-
-	def = len = 0;
-	if (!asn1_length_decode(ctx, &def, &len))
-		return 0;
-
-	if (def)
-		*eoc = ctx->pointer + len;
-	else
-		*eoc = NULL;
-	return 1;
-}
-
-static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
-	unsigned char ch;
-
-	if (eoc == 0) {
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		if (ch != 0x00) {
-			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		if (ch != 0x00) {
-			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
-			return 0;
-		}
-		return 1;
-	} else {
-		if (ctx->pointer != eoc) {
-			ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
-			return 0;
-		}
-		return 1;
-	}
-}
-
-static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
-	ctx->pointer = eoc;
-	return 1;
-}
-
-static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
-				      unsigned char *eoc,
-				      long *integer)
-{
-	unsigned char ch;
-	unsigned int  len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = (signed char) ch;
-	len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof (long)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
-				      unsigned char *eoc,
-				      unsigned int *integer)
-{
-	unsigned char ch;
-	unsigned int  len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = ch;
-	if (ch == 0) len = 0;
-	else len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof (unsigned int)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
-				       unsigned char *eoc,
-				       unsigned long *integer)
-{
-	unsigned char ch;
-	unsigned int  len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = ch;
-	if (ch == 0) len = 0;
-	else len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof (unsigned long)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
-					unsigned char *eoc,
-					unsigned char **octets,
-					unsigned int *len)
-{
-	unsigned char *ptr;
-
-	*len = 0;
-
-	*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
-	if (*octets == NULL) {
-		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
-		return 0;
-	}
-
-	ptr = *octets;
-	while (ctx->pointer < eoc) {
-		if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
-			kfree(*octets);
-			*octets = NULL;
-			return 0;
-		}
-		(*len)++;
-	}
-	return 1;
-}
-
-static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
-				       unsigned long *subid)
-{
-	unsigned char ch;
-
-	*subid = 0;
-
-	do {
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*subid <<= 7;
-		*subid |= ch & 0x7F;
-	} while ((ch & 0x80) == 0x80);
-	return 1;
-}
-
-static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
-				     unsigned char *eoc,
-				     unsigned long **oid,
-				     unsigned int *len)
-{
-	unsigned long subid;
-	unsigned int  size;
-	unsigned long *optr;
-
-	size = eoc - ctx->pointer + 1;
-	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
-	if (*oid == NULL) {
-		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
-		return 0;
-	}
-
-	optr = *oid;
-
-	if (!asn1_subid_decode(ctx, &subid)) {
-		kfree(*oid);
-		*oid = NULL;
-		return 0;
-	}
-
-	if (subid < 40) {
-		optr [0] = 0;
-		optr [1] = subid;
-	} else if (subid < 80) {
-		optr [0] = 1;
-		optr [1] = subid - 40;
-	} else {
-		optr [0] = 2;
-		optr [1] = subid - 80;
-	}
-
-	*len = 2;
-	optr += 2;
-
-	while (ctx->pointer < eoc) {
-		if (++(*len) > size) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			kfree(*oid);
-			*oid = NULL;
-			return 0;
-		}
-
-		if (!asn1_subid_decode(ctx, optr++)) {
-			kfree(*oid);
-			*oid = NULL;
-			return 0;
-		}
-	}
-	return 1;
-}
-
-/*****************************************************************************
- *
- * SNMP decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* SNMP Versions */
-#define SNMP_V1				0
-#define SNMP_V2C			1
-#define SNMP_V2				2
-#define SNMP_V3				3
-
-/* Default Sizes */
-#define SNMP_SIZE_COMM			256
-#define SNMP_SIZE_OBJECTID		128
-#define SNMP_SIZE_BUFCHR		256
-#define SNMP_SIZE_BUFINT		128
-#define SNMP_SIZE_SMALLOBJECTID		16
-
-/* Requests */
-#define SNMP_PDU_GET			0
-#define SNMP_PDU_NEXT			1
-#define SNMP_PDU_RESPONSE		2
-#define SNMP_PDU_SET			3
-#define SNMP_PDU_TRAP1			4
-#define SNMP_PDU_BULK			5
-#define SNMP_PDU_INFORM			6
-#define SNMP_PDU_TRAP2			7
-
-/* Errors */
-#define SNMP_NOERROR			0
-#define SNMP_TOOBIG			1
-#define SNMP_NOSUCHNAME			2
-#define SNMP_BADVALUE			3
-#define SNMP_READONLY			4
-#define SNMP_GENERROR			5
-#define SNMP_NOACCESS			6
-#define SNMP_WRONGTYPE			7
-#define SNMP_WRONGLENGTH		8
-#define SNMP_WRONGENCODING		9
-#define SNMP_WRONGVALUE			10
-#define SNMP_NOCREATION			11
-#define SNMP_INCONSISTENTVALUE		12
-#define SNMP_RESOURCEUNAVAILABLE	13
-#define SNMP_COMMITFAILED		14
-#define SNMP_UNDOFAILED			15
-#define SNMP_AUTHORIZATIONERROR		16
-#define SNMP_NOTWRITABLE		17
-#define SNMP_INCONSISTENTNAME		18
-
-/* General SNMP V1 Traps */
-#define SNMP_TRAP_COLDSTART		0
-#define SNMP_TRAP_WARMSTART		1
-#define SNMP_TRAP_LINKDOWN		2
-#define SNMP_TRAP_LINKUP		3
-#define SNMP_TRAP_AUTFAILURE		4
-#define SNMP_TRAP_EQPNEIGHBORLOSS	5
-#define SNMP_TRAP_ENTSPECIFIC		6
-
-/* SNMPv1 Types */
-#define SNMP_NULL                0
-#define SNMP_INTEGER             1    /* l  */
-#define SNMP_OCTETSTR            2    /* c  */
-#define SNMP_DISPLAYSTR          2    /* c  */
-#define SNMP_OBJECTID            3    /* ul */
-#define SNMP_IPADDR              4    /* uc */
-#define SNMP_COUNTER             5    /* ul */
-#define SNMP_GAUGE               6    /* ul */
-#define SNMP_TIMETICKS           7    /* ul */
-#define SNMP_OPAQUE              8    /* c  */
-
-/* Additional SNMPv2 Types */
-#define SNMP_UINTEGER            5    /* ul */
-#define SNMP_BITSTR              9    /* uc */
-#define SNMP_NSAP               10    /* uc */
-#define SNMP_COUNTER64          11    /* ul */
-#define SNMP_NOSUCHOBJECT       12
-#define SNMP_NOSUCHINSTANCE     13
-#define SNMP_ENDOFMIBVIEW       14
-
-union snmp_syntax
-{
-	unsigned char uc[0];	/* 8 bit unsigned */
-	char c[0];		/* 8 bit signed */
-	unsigned long ul[0];	/* 32 bit unsigned */
-	long l[0];		/* 32 bit signed */
-};
-
-struct snmp_object
-{
-	unsigned long *id;
-	unsigned int id_len;
-	unsigned short type;
-	unsigned int syntax_len;
-	union snmp_syntax syntax;
-};
-
-struct snmp_request
-{
-	unsigned long id;
-	unsigned int error_status;
-	unsigned int error_index;
-};
-
-struct snmp_v1_trap
-{
-	unsigned long *id;
-	unsigned int id_len;
-	unsigned long ip_address;	/* pointer  */
-	unsigned int general;
-	unsigned int specific;
-	unsigned long time;
-};
-
-/* SNMP types */
-#define SNMP_IPA    0
-#define SNMP_CNT    1
-#define SNMP_GGE    2
-#define SNMP_TIT    3
-#define SNMP_OPQ    4
-#define SNMP_C64    6
-
-/* SNMP errors */
-#define SERR_NSO    0
-#define SERR_NSI    1
-#define SERR_EOM    2
-
-static inline void mangle_address(unsigned char *begin,
-				  unsigned char *addr,
-				  const struct oct1_map *map,
-				  __sum16 *check);
-struct snmp_cnv
-{
-	unsigned int class;
-	unsigned int tag;
-	int syntax;
-};
-
-static struct snmp_cnv snmp_conv [] =
-{
-	{ASN1_UNI, ASN1_NUL, SNMP_NULL},
-	{ASN1_UNI, ASN1_INT, SNMP_INTEGER},
-	{ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
-	{ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
-	{ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
-	{ASN1_APL, SNMP_IPA, SNMP_IPADDR},
-	{ASN1_APL, SNMP_CNT, SNMP_COUNTER},	/* Counter32 */
-	{ASN1_APL, SNMP_GGE, SNMP_GAUGE},	/* Gauge32 == Unsigned32  */
-	{ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
-	{ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
-
-	/* SNMPv2 data types and errors */
-	{ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
-	{ASN1_APL, SNMP_C64, SNMP_COUNTER64},
-	{ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
-	{ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
-	{ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
-	{0,       0,       -1}
-};
-
-static unsigned char snmp_tag_cls2syntax(unsigned int tag,
-					 unsigned int cls,
-					 unsigned short *syntax)
-{
-	struct snmp_cnv *cnv;
-
-	cnv = snmp_conv;
-
-	while (cnv->syntax != -1) {
-		if (cnv->tag == tag && cnv->class == cls) {
-			*syntax = cnv->syntax;
-			return 1;
-		}
-		cnv++;
-	}
-	return 0;
-}
-
-static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
-					struct snmp_object **obj)
-{
-	unsigned int cls, con, tag, len, idlen;
-	unsigned short type;
-	unsigned char *eoc, *end, *p;
-	unsigned long *lp, *id;
-	unsigned long ul;
-	long l;
-
-	*obj = NULL;
-	id = NULL;
-
-	if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
-		return 0;
-
-	if (!asn1_oid_decode(ctx, end, &id, &idlen))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
-		kfree(id);
-		return 0;
-	}
-
-	if (con != ASN1_PRI) {
-		kfree(id);
-		return 0;
-	}
-
-	type = 0;
-	if (!snmp_tag_cls2syntax(tag, cls, &type)) {
-		kfree(id);
-		return 0;
-	}
-
-	l = 0;
-	switch (type) {
-		case SNMP_INTEGER:
-			len = sizeof(long);
-			if (!asn1_long_decode(ctx, end, &l)) {
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len,
-				       GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			(*obj)->syntax.l[0] = l;
-			break;
-		case SNMP_OCTETSTR:
-		case SNMP_OPAQUE:
-			if (!asn1_octets_decode(ctx, end, &p, &len)) {
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len,
-				       GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			memcpy((*obj)->syntax.c, p, len);
-			kfree(p);
-			break;
-		case SNMP_NULL:
-		case SNMP_NOSUCHOBJECT:
-		case SNMP_NOSUCHINSTANCE:
-		case SNMP_ENDOFMIBVIEW:
-			len = 0;
-			*obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			if (!asn1_null_decode(ctx, end)) {
-				kfree(id);
-				kfree(*obj);
-				*obj = NULL;
-				return 0;
-			}
-			break;
-		case SNMP_OBJECTID:
-			if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
-				kfree(id);
-				return 0;
-			}
-			len *= sizeof(unsigned long);
-			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(lp);
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			memcpy((*obj)->syntax.ul, lp, len);
-			kfree(lp);
-			break;
-		case SNMP_IPADDR:
-			if (!asn1_octets_decode(ctx, end, &p, &len)) {
-				kfree(id);
-				return 0;
-			}
-			if (len != 4) {
-				kfree(p);
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(p);
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			memcpy((*obj)->syntax.uc, p, len);
-			kfree(p);
-			break;
-		case SNMP_COUNTER:
-		case SNMP_GAUGE:
-		case SNMP_TIMETICKS:
-			len = sizeof(unsigned long);
-			if (!asn1_ulong_decode(ctx, end, &ul)) {
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			(*obj)->syntax.ul[0] = ul;
-			break;
-		default:
-			kfree(id);
-			return 0;
-	}
-
-	(*obj)->syntax_len = len;
-	(*obj)->type = type;
-	(*obj)->id = id;
-	(*obj)->id_len = idlen;
-
-	if (!asn1_eoc_decode(ctx, eoc)) {
-		kfree(id);
-		kfree(*obj);
-		*obj = NULL;
-		return 0;
-	}
-	return 1;
-}
-
-static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
-					 struct snmp_request *request)
-{
-	unsigned int cls, con, tag;
-	unsigned char *end;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-
-	if (!asn1_ulong_decode(ctx, end, &request->id))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-
-	if (!asn1_uint_decode(ctx, end, &request->error_status))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-
-	if (!asn1_uint_decode(ctx, end, &request->error_index))
-		return 0;
-
-	return 1;
-}
-
-/*
- * Fast checksum update for possibly oddly-aligned UDP byte, from the
- * code example in the draft.
- */
-static void fast_csum(__sum16 *csum,
-		      const unsigned char *optr,
-		      const unsigned char *nptr,
-		      int offset)
-{
-	unsigned char s[4];
-
-	if (offset & 1) {
-		s[0] = s[2] = 0;
-		s[1] = ~*optr;
-		s[3] = *nptr;
-	} else {
-		s[1] = s[3] = 0;
-		s[0] = ~*optr;
-		s[2] = *nptr;
-	}
-
-	*csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-}
-
-/*
- * Mangle IP address.
- * 	- begin points to the start of the snmp messgae
- *      - addr points to the start of the address
- */
-static inline void mangle_address(unsigned char *begin,
-				  unsigned char *addr,
-				  const struct oct1_map *map,
-				  __sum16 *check)
-{
-	if (map->from == NOCT1(addr)) {
-		u_int32_t old;
-
-		if (debug)
-			memcpy(&old, (unsigned char *)addr, sizeof(old));
-
-		*addr = map->to;
-
-		/* Update UDP checksum if being used */
-		if (*check) {
-			fast_csum(check,
-				  &map->from, &map->to, addr - begin);
-		}
-
-		if (debug)
-			printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
-			       "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
-	}
-}
-
-static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
-				      struct snmp_v1_trap *trap,
-				      const struct oct1_map *map,
-				      __sum16 *check)
-{
-	unsigned int cls, con, tag, len;
-	unsigned char *end;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
-		return 0;
-
-	if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_id_free;
-
-	if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
-	      (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
-		goto err_id_free;
-
-	if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
-		goto err_id_free;
-
-	/* IPv4 only */
-	if (len != 4)
-		goto err_addr_free;
-
-	mangle_address(ctx->begin, ctx->pointer - 4, map, check);
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_addr_free;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		goto err_addr_free;
-
-	if (!asn1_uint_decode(ctx, end, &trap->general))
-		goto err_addr_free;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_addr_free;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		goto err_addr_free;
-
-	if (!asn1_uint_decode(ctx, end, &trap->specific))
-		goto err_addr_free;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_addr_free;
-
-	if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
-	      (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
-		goto err_addr_free;
-
-	if (!asn1_ulong_decode(ctx, end, &trap->time))
-		goto err_addr_free;
-
-	return 1;
-
-err_addr_free:
-	kfree((unsigned long *)trap->ip_address);
-
-err_id_free:
-	kfree(trap->id);
-
-	return 0;
-}
-
-/*****************************************************************************
- *
- * Misc. routines
- *
- *****************************************************************************/
-
-static void hex_dump(unsigned char *buf, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; i++) {
-		if (i && !(i % 16))
-			printk("\n");
-		printk("%02x ", *(buf + i));
-	}
-	printk("\n");
-}
-
-/*
- * Parse and mangle SNMP message according to mapping.
- * (And this is the fucking 'basic' method).
- */
-static int snmp_parse_mangle(unsigned char *msg,
-			     u_int16_t len,
-			     const struct oct1_map *map,
-			     __sum16 *check)
-{
-	unsigned char *eoc, *end;
-	unsigned int cls, con, tag, vers, pdutype;
-	struct asn1_ctx ctx;
-	struct asn1_octstr comm;
-	struct snmp_object **obj;
-
-	if (debug > 1)
-		hex_dump(msg, len);
-
-	asn1_open(&ctx, msg, len);
-
-	/*
-	 * Start of SNMP message.
-	 */
-	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
-		return 0;
-	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
-		return 0;
-
-	/*
-	 * Version 1 or 2 handled.
-	 */
-	if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
-		return 0;
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-	if (!asn1_uint_decode (&ctx, end, &vers))
-		return 0;
-	if (debug > 1)
-		printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
-	if (vers > 1)
-		return 1;
-
-	/*
-	 * Community.
-	 */
-	if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
-		return 0;
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
-		return 0;
-	if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
-		return 0;
-	if (debug > 1) {
-		unsigned int i;
-
-		printk(KERN_DEBUG "bsalg: community: ");
-		for (i = 0; i < comm.len; i++)
-			printk("%c", comm.data[i]);
-		printk("\n");
-	}
-	kfree(comm.data);
-
-	/*
-	 * PDU type
-	 */
-	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
-		return 0;
-	if (cls != ASN1_CTX || con != ASN1_CON)
-		return 0;
-	if (debug > 1) {
-		unsigned char *pdus[] = {
-			[SNMP_PDU_GET] = "get",
-			[SNMP_PDU_NEXT] = "get-next",
-			[SNMP_PDU_RESPONSE] = "response",
-			[SNMP_PDU_SET] = "set",
-			[SNMP_PDU_TRAP1] = "trapv1",
-			[SNMP_PDU_BULK] = "bulk",
-			[SNMP_PDU_INFORM] = "inform",
-			[SNMP_PDU_TRAP2] = "trapv2"
-		};
-
-		if (pdutype > SNMP_PDU_TRAP2)
-			printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
-		else
-			printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
-	}
-	if (pdutype != SNMP_PDU_RESPONSE &&
-	    pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
-		return 1;
-
-	/*
-	 * Request header or v1 trap
-	 */
-	if (pdutype == SNMP_PDU_TRAP1) {
-		struct snmp_v1_trap trap;
-		unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
-
-		if (ret) {
-			kfree(trap.id);
-			kfree((unsigned long *)trap.ip_address);
-		} else
-			return ret;
-
-	} else {
-		struct snmp_request req;
-
-		if (!snmp_request_decode(&ctx, &req))
-			return 0;
-
-		if (debug > 1)
-			printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
-			"error_index=%u\n", req.id, req.error_status,
-			req.error_index);
-	}
-
-	/*
-	 * Loop through objects, look for IP addresses to mangle.
-	 */
-	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
-		return 0;
-
-	obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
-	if (obj == NULL) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
-		return 0;
-	}
-
-	while (!asn1_eoc_decode(&ctx, eoc)) {
-		unsigned int i;
-
-		if (!snmp_object_decode(&ctx, obj)) {
-			if (*obj) {
-				kfree((*obj)->id);
-				kfree(*obj);
-			}
-			kfree(obj);
-			return 0;
-		}
-
-		if (debug > 1) {
-			printk(KERN_DEBUG "bsalg: object: ");
-			for (i = 0; i < (*obj)->id_len; i++) {
-				if (i > 0)
-					printk(".");
-				printk("%lu", (*obj)->id[i]);
-			}
-			printk(": type=%u\n", (*obj)->type);
-
-		}
-
-		if ((*obj)->type == SNMP_IPADDR)
-			mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
-
-		kfree((*obj)->id);
-		kfree(*obj);
-	}
-	kfree(obj);
-
-	if (!asn1_eoc_decode(&ctx, eoc))
-		return 0;
-
-	return 1;
-}
-
-/*****************************************************************************
- *
- * NAT routines.
- *
- *****************************************************************************/
-
-/*
- * SNMP translation routine.
- */
-static int snmp_translate(struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct sk_buff **pskb)
-{
-	struct iphdr *iph = (*pskb)->nh.iph;
-	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
-	u_int16_t udplen = ntohs(udph->len);
-	u_int16_t paylen = udplen - sizeof(struct udphdr);
-	int dir = CTINFO2DIR(ctinfo);
-	struct oct1_map map;
-
-	/*
-	 * Determine mappping for application layer addresses based
-	 * on NAT manipulations for the packet.
-	 */
-	if (dir == IP_CT_DIR_ORIGINAL) {
-		/* SNAT traps */
-		map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
-		map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
-	} else {
-		/* DNAT replies */
-		map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
-		map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
-	}
-
-	if (map.from == map.to)
-		return NF_ACCEPT;
-
-	if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
-			       paylen, &map, &udph->check)) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "bsalg: parser failed\n");
-		return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-/* We don't actually set up expectations, just adjust internal IP
- * addresses if this is being NATted */
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct,
-		enum ip_conntrack_info ctinfo)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	unsigned int ret;
-	struct iphdr *iph = (*pskb)->nh.iph;
-	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
-
-	/* SNMP replies and originating SNMP traps get mangled */
-	if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
-		return NF_ACCEPT;
-	if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
-		return NF_ACCEPT;
-
-	/* No NAT? */
-	if (!(ct->status & IPS_NAT_MASK))
-		return NF_ACCEPT;
-
-	/*
-	 * Make sure the packet length is ok.  So far, we were only guaranteed
-	 * to have a valid length IP header plus 8 bytes, which means we have
-	 * enough room for a UDP header.  Just verify the UDP length field so we
-	 * can mess around with the payload.
-	 */
-	if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
-		 if (net_ratelimit())
-			 printk(KERN_WARNING "SNMP: dropping malformed packet "
-				"src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
-				NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-		 return NF_DROP;
-	}
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return NF_DROP;
-
-	spin_lock_bh(&snmp_lock);
-	ret = snmp_translate(ct, ctinfo, pskb);
-	spin_unlock_bh(&snmp_lock);
-	return ret;
-}
-
-static struct ip_conntrack_helper snmp_helper = {
-	.max_expected = 0,
-	.timeout = 180,
-	.me = THIS_MODULE,
-	.help = help,
-	.name = "snmp",
-
-	.tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}},
-		  .dst = {.protonum = IPPROTO_UDP},
-	},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF},
-	},
-};
-
-static struct ip_conntrack_helper snmp_trap_helper = {
-	.max_expected = 0,
-	.timeout = 180,
-	.me = THIS_MODULE,
-	.help = help,
-	.name = "snmp_trap",
-
-	.tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}},
-		  .dst = {.protonum = IPPROTO_UDP},
-	},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF},
-	},
-};
-
-/*****************************************************************************
- *
- * Module stuff.
- *
- *****************************************************************************/
-
-static int __init ip_nat_snmp_basic_init(void)
-{
-	int ret = 0;
-
-	ret = ip_conntrack_helper_register(&snmp_helper);
-	if (ret < 0)
-		return ret;
-	ret = ip_conntrack_helper_register(&snmp_trap_helper);
-	if (ret < 0) {
-		ip_conntrack_helper_unregister(&snmp_helper);
-		return ret;
-	}
-	return ret;
-}
-
-static void __exit ip_nat_snmp_basic_fini(void)
-{
-	ip_conntrack_helper_unregister(&snmp_helper);
-	ip_conntrack_helper_unregister(&snmp_trap_helper);
-}
-
-module_init(ip_nat_snmp_basic_init);
-module_exit(ip_nat_snmp_basic_fini);
-
-module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
deleted file mode 100644
index 6bcfdf6dfcc9..000000000000
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/* This file contains all the functions required for the standalone
-   ip_nat module.
-
-   These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * 	- new API and handling of conntrack/nat helpers
- * 	- now capable of multiple expectations for one master
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
-{
-	struct ip_conntrack *ct;
-	struct ip_conntrack_tuple *t;
-	enum ip_conntrack_info ctinfo;
-	enum ip_conntrack_dir dir;
-	unsigned long statusbit;
-
-	ct = ip_conntrack_get(skb, &ctinfo);
-	if (ct == NULL)
-		return;
-	dir = CTINFO2DIR(ctinfo);
-	t = &ct->tuplehash[dir].tuple;
-
-	if (dir == IP_CT_DIR_ORIGINAL)
-		statusbit = IPS_DST_NAT;
-	else
-		statusbit = IPS_SRC_NAT;
-
-	if (ct->status & statusbit) {
-		fl->fl4_dst = t->dst.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP)
-			fl->fl_ip_dport = t->dst.u.tcp.port;
-	}
-
-	statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		fl->fl4_src = t->src.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP)
-			fl->fl_ip_sport = t->src.u.tcp.port;
-	}
-}
-#endif
-
-static unsigned int
-ip_nat_fn(unsigned int hooknum,
-	  struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	struct ip_nat_info *info;
-	/* maniptype == SRC for postrouting. */
-	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
-	/* We never see fragments: conntrack defrags on pre-routing
-	   and local-out, and ip_nat_out protects post-routing. */
-	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
-		       & htons(IP_MF|IP_OFFSET)));
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	/* Can't track?  It's not due to stress, or conntrack would
-	   have dropped it.  Hence it's the user's responsibilty to
-	   packet filter it out, or implement conntrack/NAT for that
-	   protocol. 8) --RR */
-	if (!ct) {
-		/* Exception: ICMP redirect to new connection (not in
-		   hash table yet).  We must not let this through, in
-		   case we're doing NAT to the same network. */
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
-			struct icmphdr _hdr, *hp;
-
-			hp = skb_header_pointer(*pskb,
-						(*pskb)->nh.iph->ihl*4,
-						sizeof(_hdr), &_hdr);
-			if (hp != NULL &&
-			    hp->type == ICMP_REDIRECT)
-				return NF_DROP;
-		}
-		return NF_ACCEPT;
-	}
-
-	/* Don't try to NAT if this packet is not conntracked */
-	if (ct == &ip_conntrack_untracked)
-		return NF_ACCEPT;
-
-	switch (ctinfo) {
-	case IP_CT_RELATED:
-	case IP_CT_RELATED+IP_CT_IS_REPLY:
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
-			if (!ip_nat_icmp_reply_translation(ct, ctinfo,
-							   hooknum, pskb))
-				return NF_DROP;
-			else
-				return NF_ACCEPT;
-		}
-		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
-	case IP_CT_NEW:
-		info = &ct->nat.info;
-
-		/* Seen it before?  This can happen for loopback, retrans,
-		   or local packets.. */
-		if (!ip_nat_initialized(ct, maniptype)) {
-			unsigned int ret;
-
-			if (unlikely(is_confirmed(ct)))
-				/* NAT module was loaded late */
-				ret = alloc_null_binding_confirmed(ct, info,
-								   hooknum);
-			else if (hooknum == NF_IP_LOCAL_IN)
-				/* LOCAL_IN hook doesn't have a chain!  */
-				ret = alloc_null_binding(ct, info, hooknum);
-			else
-				ret = ip_nat_rule_find(pskb, hooknum,
-						       in, out, ct,
-						       info);
-
-			if (ret != NF_ACCEPT) {
-				return ret;
-			}
-		} else
-			DEBUGP("Already setup manip %s for ct %p\n",
-			       maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
-			       ct);
-		break;
-
-	default:
-		/* ESTABLISHED */
-		IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
-			     || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
-		info = &ct->nat.info;
-	}
-
-	IP_NF_ASSERT(info);
-	return ip_nat_packet(ct, ctinfo, hooknum, pskb);
-}
-
-static unsigned int
-ip_nat_in(unsigned int hooknum,
-	  struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
-{
-	unsigned int ret;
-	__be32 daddr = (*pskb)->nh.iph->daddr;
-
-	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-	if (ret != NF_DROP && ret != NF_STOLEN
-	    && daddr != (*pskb)->nh.iph->daddr) {
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = NULL;
-	}
-	return ret;
-}
-
-static unsigned int
-ip_nat_out(unsigned int hooknum,
-	   struct sk_buff **pskb,
-	   const struct net_device *in,
-	   const struct net_device *out,
-	   int (*okfn)(struct sk_buff *))
-{
-#ifdef CONFIG_XFRM
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-#endif
-	unsigned int ret;
-
-	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
-		return NF_ACCEPT;
-
-	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-#ifdef CONFIG_XFRM
-	if (ret != NF_DROP && ret != NF_STOLEN
-	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (ct->tuplehash[dir].tuple.src.ip !=
-		    ct->tuplehash[!dir].tuple.dst.ip
-		    || ct->tuplehash[dir].tuple.src.u.all !=
-		       ct->tuplehash[!dir].tuple.dst.u.all
-		    )
-			return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
-	}
-#endif
-	return ret;
-}
-
-static unsigned int
-ip_nat_local_fn(unsigned int hooknum,
-		struct sk_buff **pskb,
-		const struct net_device *in,
-		const struct net_device *out,
-		int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int ret;
-
-	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
-		return NF_ACCEPT;
-
-	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-	if (ret != NF_DROP && ret != NF_STOLEN
-	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (ct->tuplehash[dir].tuple.dst.ip !=
-		    ct->tuplehash[!dir].tuple.src.ip) {
-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
-				ret = NF_DROP;
-		}
-#ifdef CONFIG_XFRM
-		else if (ct->tuplehash[dir].tuple.dst.u.all !=
-			 ct->tuplehash[!dir].tuple.src.u.all)
-			if (ip_xfrm_me_harder(pskb))
-				ret = NF_DROP;
-#endif
-
-	}
-	return ret;
-}
-
-static unsigned int
-ip_nat_adjust(unsigned int hooknum,
-	      struct sk_buff **pskb,
-	      const struct net_device *in,
-	      const struct net_device *out,
-	      int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
-		DEBUGP("ip_nat_standalone: adjusting sequence number\n");
-		if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
-			return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-/* We must be after connection tracking and before packet filtering. */
-
-static struct nf_hook_ops ip_nat_ops[] = {
-	/* Before packet filtering, change destination */
-	{
-		.hook		= ip_nat_in,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= ip_nat_out,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-	/* After conntrack, adjust sequence number */
-	{
-		.hook		= ip_nat_adjust,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_NAT_SEQ_ADJUST,
-	},
-	/* Before packet filtering, change destination */
-	{
-		.hook		= ip_nat_local_fn,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= ip_nat_fn,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-	/* After conntrack, adjust sequence number */
-	{
-		.hook		= ip_nat_adjust,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_NAT_SEQ_ADJUST,
-	},
-};
-
-static int __init ip_nat_standalone_init(void)
-{
-	int ret = 0;
-
-	need_conntrack();
-
-#ifdef CONFIG_XFRM
-	BUG_ON(ip_nat_decode_session != NULL);
-	ip_nat_decode_session = nat_decode_session;
-#endif
-	ret = ip_nat_rule_init();
-	if (ret < 0) {
-		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_decode_session;
-	}
-	ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
-	if (ret < 0) {
-		printk("ip_nat_init: can't register hooks.\n");
-		goto cleanup_rule_init;
-	}
-	return ret;
-
- cleanup_rule_init:
-	ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
-	synchronize_net();
-#endif
-	return ret;
-}
-
-static void __exit ip_nat_standalone_fini(void)
-{
-	nf_unregister_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
-	ip_nat_rule_cleanup();
-#ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
-	synchronize_net();
-#endif
-}
-
-module_init(ip_nat_standalone_init);
-module_exit(ip_nat_standalone_fini);
-
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
deleted file mode 100644
index 604793536fc1..000000000000
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * 	- Port to newnat API
- *
- * This module currently supports DNAT:
- * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
- *
- * and SNAT:
- * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
- *
- * It has not been tested with
- * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
- * If you do test this please let me know if it works or not.
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
-			 enum ip_conntrack_info ctinfo,
-			 struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack *ct = exp->master;
-
-	exp->saved_proto.udp.port
-		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
-	exp->dir = IP_CT_DIR_REPLY;
-	exp->expectfn = ip_nat_follow_master;
-	if (ip_conntrack_expect_related(exp) != 0)
-		return NF_DROP;
-	return NF_ACCEPT;
-}
-
-static void __exit ip_nat_tftp_fini(void)
-{
-	rcu_assign_pointer(ip_nat_tftp_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_tftp_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_tftp_hook));
-	rcu_assign_pointer(ip_nat_tftp_hook, help);
-	return 0;
-}
-
-module_init(ip_nat_tftp_init);
-module_exit(ip_nat_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a14798a850d7..702d94db19b9 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -8,18 +8,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
- * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
- * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian
- *             Zander).
- * 2000-08-01: Added Nick Williams' MAC support.
- * 2002-06-25: Code cleanup.
- * 2005-01-10: Added /proc counter for dropped packets; fixed so
- *             packets aren't delivered to user space if they're going
- *             to be dropped.
- * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
- *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -191,12 +179,13 @@ ipq_flush(int verdict)
 static struct sk_buff *
 ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 {
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	size_t size = 0;
 	size_t data_len = 0;
 	struct sk_buff *skb;
 	struct ipq_packet_msg *pmsg;
 	struct nlmsghdr *nlh;
+	struct timeval tv;
 
 	read_lock_bh(&queue_lock);
 
@@ -234,15 +223,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	if (!skb)
 		goto nlmsg_failure;
 
-	old_tail= skb->tail;
+	old_tail = skb->tail;
 	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
 	pmsg = NLMSG_DATA(nlh);
 	memset(pmsg, 0, sizeof(*pmsg));
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
-	pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
+	tv = ktime_to_timeval(entry->skb->tstamp);
+	pmsg->timestamp_sec   = tv.tv_sec;
+	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
 	pmsg->hook            = entry->info->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
@@ -378,7 +368,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 	}
 	if (!skb_make_writable(&e->skb, v->data_len))
 		return -ENOMEM;
-	memcpy(e->skb->data, v->payload, v->data_len);
+	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
 
 	return 0;
@@ -495,7 +485,7 @@ ipq_rcv_skb(struct sk_buff *skb)
 	if (skblen < sizeof(*nlh))
 		return;
 
-	nlh = (struct nlmsghdr *)skb->data;
+	nlh = nlmsg_hdr(skb);
 	nlmsglen = nlh->nlmsg_len;
 	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
 		return;
@@ -678,7 +668,7 @@ static int __init ip_queue_init(void)
 
 	netlink_register_notifier(&ipq_nl_notifier);
 	ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
-				      THIS_MODULE);
+				      NULL, THIS_MODULE);
 	if (ipqnl == NULL) {
 		printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
 		goto cleanup_netlink_notifier;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 50cc4b92e284..e3f83bf160d9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -7,12 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * 	- increase module usage count as soon as we have rules inside
- * 	  a table
- * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
- * 	- Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
  */
 #include <linux/cache.h>
 #include <linux/capability.h>
@@ -198,7 +192,7 @@ int do_match(struct ipt_entry_match *m,
 {
 	/* Stop iteration if it doesn't match */
 	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
-				      offset, skb->nh.iph->ihl*4, hotdrop))
+				      offset, ip_hdrlen(skb), hotdrop))
 		return 1;
 	else
 		return 0;
@@ -231,7 +225,7 @@ ipt_do_table(struct sk_buff **pskb,
 	struct xt_table_info *private;
 
 	/* Initialization */
-	ip = (*pskb)->nh.iph;
+	ip = ip_hdr(*pskb);
 	datalen = (*pskb)->len - ip->ihl * 4;
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
@@ -320,7 +314,7 @@ ipt_do_table(struct sk_buff **pskb,
 					= 0x57acc001;
 #endif
 				/* Target might have changed stuff. */
-				ip = (*pskb)->nh.iph;
+				ip = ip_hdr(*pskb);
 				datalen = (*pskb)->len - ip->ihl * 4;
 
 				if (verdict == IPT_CONTINUE)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index e965b333c997..40e273421398 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -21,15 +21,12 @@
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-
-#include <net/checksum.h>
-
 #include <linux/netfilter_arp.h>
-
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/checksum.h>
 
 #define CLUSTERIP_VERSION "0.8"
 
@@ -240,7 +237,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
 static inline u_int32_t
 clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	unsigned long hashval;
 	u_int16_t sport, dport;
 	u_int16_t *ports;
@@ -310,15 +307,16 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	u_int32_t *mark, hash;
+	u_int32_t hash;
 
 	/* don't need to clusterip_config_get() here, since refcount
 	 * is only decremented by destroy() - and ip_tables guarantees
 	 * that the ->target() function isn't called after ->destroy() */
 
-	mark = nf_ct_get_mark((*pskb), &ctinfo);
-	if (mark == NULL) {
+	ct = nf_ct_get(*pskb, &ctinfo);
+	if (ct == NULL) {
 		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
 			/* FIXME: need to drop invalid ones, since replies
 			 * to outgoing connections of other nodes will be
@@ -328,7 +326,7 @@ target(struct sk_buff **pskb,
 
 	/* special case: ICMP error handling. conntrack distinguishes between
 	 * error messages (RELATED) and information requests (see below) */
-	if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+	if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
 	    && (ctinfo == IP_CT_RELATED
 		|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
 		return XT_CONTINUE;
@@ -341,7 +339,7 @@ target(struct sk_buff **pskb,
 
 	switch (ctinfo) {
 		case IP_CT_NEW:
-			*mark = hash;
+			ct->mark = hash;
 			break;
 		case IP_CT_RELATED:
 		case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -358,7 +356,7 @@ target(struct sk_buff **pskb,
 #ifdef DEBUG_CLUSTERP
 	DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 #endif
-	DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
+	DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
 	if (!clusterip_responsible(cipinfo->config, hash)) {
 		DEBUGP("not responsible\n");
 		return NF_DROP;
@@ -411,12 +409,10 @@ checkentry(const char *tablename,
 				       "has invalid config pointer!\n");
 				return 0;
 			}
-			clusterip_config_entry_get(cipinfo->config);
 		} else {
 			/* Case B: This is a new rule referring to an existing
 			 * clusterip config. */
 			cipinfo->config = config;
-			clusterip_config_entry_get(cipinfo->config);
 		}
 	} else {
 		/* Case C: This is a completely new clusterip config */
@@ -523,7 +519,7 @@ arp_mangle(unsigned int hook,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
-	struct arphdr *arp = (*pskb)->nh.arph;
+	struct arphdr *arp = arp_hdr(*pskb);
 	struct arp_payload *payload;
 	struct clusterip_config *c;
 
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4f565633631d..918ca92e534a 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -5,14 +5,13 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
 */
 
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/tcp.h>
 #include <net/checksum.h>
 
@@ -29,13 +28,13 @@ MODULE_DESCRIPTION("iptables ECN modification module");
 static inline int
 set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 
 	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		__u8 oldtos;
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return 0;
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		oldtos = iph->tos;
 		iph->tos &= ~IPT_ECN_IP_MASK;
 		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -52,7 +51,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	__be16 oldval;
 
 	/* Not enought header? */
-	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+	tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 				  sizeof(_tcph), &_tcph);
 	if (!tcph)
 		return 0;
@@ -63,9 +62,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	     tcph->cwr == einfo->proto.tcp.cwr)))
 		return 1;
 
-	if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
 		return 0;
-	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+	tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
 
 	oldval = ((__be16 *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -93,7 +92,7 @@ target(struct sk_buff **pskb,
 			return NF_DROP;
 
 	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
-	    && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
+	    && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
 		if (!set_ect_tcp(pskb, einfo))
 			return NF_DROP;
 
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index d9c37fd94228..a42c5cd968b1 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -399,9 +399,9 @@ ipt_log_packet(unsigned int pf,
 		/* MAC logging for input chain only. */
 		printk("MAC=");
 		if (skb->dev && skb->dev->hard_header_len
-		    && skb->mac.raw != (void*)skb->nh.iph) {
+		    && skb->mac_header != skb->network_header) {
 			int i;
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
 				printk("%02x%c", *p,
 				       i==skb->dev->hard_header_len - 1
@@ -477,14 +477,10 @@ static int __init ipt_log_init(void)
 	ret = xt_register_target(&ipt_log_reg);
 	if (ret < 0)
 		return ret;
-	if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
-		printk(KERN_WARNING "ipt_LOG: not logging via system console "
-		       "since somebody else already registered for PF_INET\n");
-		/* we cannot make module load fail here, since otherwise
-		 * iptables userspace would abort */
-	}
-
-	return 0;
+	ret = nf_log_register(PF_INET, &ipt_log_logger);
+	if (ret < 0 && ret != -EEXIST)
+		xt_unregister_target(&ipt_log_reg);
+	return ret;
 }
 
 static void __exit ipt_log_fini(void)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index b5955f3a3f8f..d4f2d7775330 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,12 +19,8 @@
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
+#include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -48,7 +44,7 @@ masquerade_check(const char *tablename,
 		 void *targinfo,
 		 unsigned int hook_mask)
 {
-	const struct ip_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		DEBUGP("masquerade_check: bad MAP_IPS.\n");
@@ -69,33 +65,26 @@ masquerade_target(struct sk_buff **pskb,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-#ifdef CONFIG_NF_NAT_NEEDED
+	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
-#endif
-	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
-	struct ip_nat_range newrange;
-	const struct ip_nat_multi_range_compat *mr;
+	struct nf_nat_range newrange;
+	const struct nf_nat_multi_range_compat *mr;
 	struct rtable *rt;
 	__be32 newsrc;
 
-	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-#ifdef CONFIG_NF_NAT_NEEDED
+	ct = nf_ct_get(*pskb, &ctinfo);
 	nat = nfct_nat(ct);
-#endif
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+
+	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
 			    || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
 
 	/* Source address is 0.0.0.0 - locally generated packet that is
 	 * probably not supposed to be masqueraded.
 	 */
-#ifdef CONFIG_NF_NAT_NEEDED
 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
-#else
-	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
-#endif
 		return NF_ACCEPT;
 
 	mr = targinfo;
@@ -107,40 +96,30 @@ masquerade_target(struct sk_buff **pskb,
 	}
 
 	write_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
 	nat->masq_index = out->ifindex;
-#else
-	ct->nat.masq_index = out->ifindex;
-#endif
 	write_unlock_bh(&masq_lock);
 
 	/* Transfer from original range. */
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
 		  newsrc, newsrc,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static inline int
-device_cmp(struct ip_conntrack *i, void *ifindex)
+device_cmp(struct nf_conn *i, void *ifindex)
 {
-	int ret;
-#ifdef CONFIG_NF_NAT_NEEDED
 	struct nf_conn_nat *nat = nfct_nat(i);
+	int ret;
 
 	if (!nat)
 		return 0;
-#endif
 
 	read_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
 	ret = (nat->masq_index == (int)(long)ifindex);
-#else
-	ret = (i->nat.masq_index == (int)(long)ifindex);
-#endif
 	read_unlock_bh(&masq_lock);
 
 	return ret;
@@ -156,9 +135,9 @@ static int masq_device_event(struct notifier_block *this,
 		/* Device was downed.  Search entire table for
 		   conntracks which were associated with that device,
 		   and forget them. */
-		IP_NF_ASSERT(dev->ifindex != 0);
+		NF_CT_ASSERT(dev->ifindex != 0);
 
-		ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+		nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
 	}
 
 	return NOTIFY_DONE;
@@ -174,9 +153,9 @@ static int masq_inet_event(struct notifier_block *this,
 		/* IP address was deleted.  Search entire table for
 		   conntracks which were associated with that device,
 		   and forget them. */
-		IP_NF_ASSERT(dev->ifindex != 0);
+		NF_CT_ASSERT(dev->ifindex != 0);
 
-		ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+		nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
 	}
 
 	return NOTIFY_DONE;
@@ -194,7 +173,7 @@ static struct xt_target masquerade = {
 	.name		= "MASQUERADE",
 	.family		= AF_INET,
 	.target		= masquerade_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= 1 << NF_IP_POST_ROUTING,
 	.checkentry	= masquerade_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index fd7aaa347cd8..068c69bce30e 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -16,11 +16,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
 
 #define MODULENAME "NETMAP"
 MODULE_LICENSE("GPL");
@@ -40,7 +36,7 @@ check(const char *tablename,
       void *targinfo,
       unsigned int hook_mask)
 {
-	const struct ip_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
 		DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
@@ -61,39 +57,39 @@ target(struct sk_buff **pskb,
        const struct xt_target *target,
        const void *targinfo)
 {
-	struct ip_conntrack *ct;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-	struct ip_nat_range newrange;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
+	struct nf_nat_range newrange;
 
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_POST_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
-	ct = ip_conntrack_get(*pskb, &ctinfo);
+	ct = nf_ct_get(*pskb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
 	if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
-		new_ip = (*pskb)->nh.iph->daddr & ~netmask;
+		new_ip = ip_hdr(*pskb)->daddr & ~netmask;
 	else
-		new_ip = (*pskb)->nh.iph->saddr & ~netmask;
+		new_ip = ip_hdr(*pskb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
 		  new_ip, new_ip,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static struct xt_target target_module = {
 	.name 		= MODULENAME,
 	.family		= AF_INET,
 	.target 	= target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
 			  (1 << NF_IP_LOCAL_OUT),
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index c2b6b80670f8..68cc76a198eb 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -19,11 +19,7 @@
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -43,7 +39,7 @@ redirect_check(const char *tablename,
 	       void *targinfo,
 	       unsigned int hook_mask)
 {
-	const struct ip_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		DEBUGP("redirect_check: bad MAP_IPS.\n");
@@ -64,17 +60,17 @@ redirect_target(struct sk_buff **pskb,
 		const struct xt_target *target,
 		const void *targinfo)
 {
-	struct ip_conntrack *ct;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-	struct ip_nat_range newrange;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
+	struct nf_nat_range newrange;
 
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
 
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	ct = nf_ct_get(*pskb, &ctinfo);
+	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	/* Local packets: make them go to loopback */
 	if (hooknum == NF_IP_LOCAL_OUT)
@@ -96,20 +92,20 @@ redirect_target(struct sk_buff **pskb,
 	}
 
 	/* Transfer from original range. */
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
 		  newdst, newdst,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static struct xt_target redirect_reg = {
 	.name		= "REDIRECT",
 	.family		= AF_INET,
 	.target		= redirect_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
 	.checkentry	= redirect_check,
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 80f739e21824..9041e0741f6f 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -1,7 +1,5 @@
 /*
  * This is a module which is used for rejecting packets.
- * Added support for customized reject packets (Jozsef Kadlecsik).
- * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
  */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
@@ -43,7 +41,7 @@ MODULE_DESCRIPTION("iptables REJECT target module");
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
 	struct sk_buff *nskb;
-	struct iphdr *iph = oldskb->nh.iph;
+	struct iphdr *niph;
 	struct tcphdr _otcph, *oth, *tcph;
 	__be16 tmp_port;
 	__be32 tmp_addr;
@@ -51,10 +49,10 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	unsigned int addr_type;
 
 	/* IP header checks: fragment. */
-	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
+	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
 		return;
 
-	oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
 				 sizeof(_otcph), &_otcph);
 	if (oth == NULL)
 		return;
@@ -64,7 +62,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		return;
 
 	/* Check checksum */
-	if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
+	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
 		return;
 
 	/* We need a linear, writeable skb.  We also need to expand
@@ -84,20 +82,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	skb_shinfo(nskb)->gso_segs = 0;
 	skb_shinfo(nskb)->gso_type = 0;
 
-	tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
+	tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb));
 
 	/* Swap source and dest */
-	tmp_addr = nskb->nh.iph->saddr;
-	nskb->nh.iph->saddr = nskb->nh.iph->daddr;
-	nskb->nh.iph->daddr = tmp_addr;
+	niph = ip_hdr(nskb);
+	tmp_addr = niph->saddr;
+	niph->saddr = niph->daddr;
+	niph->daddr = tmp_addr;
 	tmp_port = tcph->source;
 	tcph->source = tcph->dest;
 	tcph->dest = tmp_port;
 
 	/* Truncate to length (no data) */
 	tcph->doff = sizeof(struct tcphdr)/4;
-	skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
-	nskb->nh.iph->tot_len = htons(nskb->len);
+	skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
+	niph->tot_len = htons(nskb->len);
 
 	if (tcph->ack) {
 		needs_ack = 0;
@@ -105,9 +104,9 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		tcph->ack_seq = 0;
 	} else {
 		needs_ack = 1;
-		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
-				      + oldskb->len - oldskb->nh.iph->ihl*4
-				      - (oth->doff<<2));
+		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+				      oldskb->len - ip_hdrlen(oldskb) -
+				      (oth->doff << 2));
 		tcph->seq = 0;
 	}
 
@@ -122,14 +121,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* Adjust TCP checksum */
 	tcph->check = 0;
 	tcph->check = tcp_v4_check(sizeof(struct tcphdr),
-				   nskb->nh.iph->saddr,
-				   nskb->nh.iph->daddr,
+				   niph->saddr, niph->daddr,
 				   csum_partial((char *)tcph,
 						sizeof(struct tcphdr), 0));
 
 	/* Set DF, id = 0 */
-	nskb->nh.iph->frag_off = htons(IP_DF);
-	nskb->nh.iph->id = 0;
+	niph->frag_off = htons(IP_DF);
+	niph->id = 0;
 
 	addr_type = RTN_UNSPEC;
 	if (hook != NF_IP_FORWARD
@@ -145,12 +143,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	nskb->ip_summed = CHECKSUM_NONE;
 
 	/* Adjust IP TTL */
-	nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
+	niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
 
 	/* Adjust IP checksum */
-	nskb->nh.iph->check = 0;
-	nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph,
-					   nskb->nh.iph->ihl);
+	niph->check = 0;
+	niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
 
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(nskb->dst))
@@ -182,7 +179,7 @@ static unsigned int reject(struct sk_buff **pskb,
 
 	/* Our naive response construction doesn't deal with IP
 	   options, and probably shouldn't try. */
-	if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+	if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
 		return NF_DROP;
 
 	/* WARNING: This code causes reentry within iptables.
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index bd4404e5c688..511e5ff84938 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -7,21 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
- * 	* copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
- * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
- * 	* added --nodst to not include destination-ip in new source
- * 	  calculations.
- *	* added some more sanity-checks.
- * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
- * 	* fixed a buggy if-statement in same_check(), should have
- * 	  used ntohl() but didn't.
- * 	* added support for multiple ranges. IPT_SAME_MAX_RANGE is
- * 	  defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
- * 	  and is currently set to 10.
- * 	* added support for 1-address range, nice to have now that
- * 	  we have multiple ranges.
  */
 #include <linux/types.h>
 #include <linux/ip.h>
@@ -35,11 +20,7 @@
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
 #include <linux/netfilter_ipv4/ipt_SAME.h>
 
 MODULE_LICENSE("GPL");
@@ -138,17 +119,17 @@ same_target(struct sk_buff **pskb,
 		const struct xt_target *target,
 		const void *targinfo)
 {
-	struct ip_conntrack *ct;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int32_t tmpip, aindex;
 	__be32 new_ip;
 	const struct ipt_same_info *same = targinfo;
-	struct ip_nat_range newrange;
-	const struct ip_conntrack_tuple *t;
+	struct nf_nat_range newrange;
+	const struct nf_conntrack_tuple *t;
 
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
 			hooknum == NF_IP_POST_ROUTING);
-	ct = ip_conntrack_get(*pskb, &ctinfo);
+	ct = nf_ct_get(*pskb, &ctinfo);
 
 	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 
@@ -157,17 +138,10 @@ same_target(struct sk_buff **pskb,
 	   Here we calculate the index in same->iparray which
 	   holds the ipaddress we should use */
 
-#ifdef CONFIG_NF_NAT_NEEDED
 	tmpip = ntohl(t->src.u3.ip);
 
 	if (!(same->info & IPT_SAME_NODST))
 		tmpip += ntohl(t->dst.u3.ip);
-#else
-	tmpip = ntohl(t->src.ip);
-
-	if (!(same->info & IPT_SAME_NODST))
-		tmpip += ntohl(t->dst.ip);
-#endif
 	aindex = tmpip % same->ipnum;
 
 	new_ip = htonl(same->iparray[aindex]);
@@ -178,13 +152,13 @@ same_target(struct sk_buff **pskb,
 			NIPQUAD(new_ip));
 
 	/* Transfer from original range. */
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ same->range[0].flags, new_ip, new_ip,
 		  /* FIXME: Use ports from correct range! */
 		  same->range[0].min, same->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static struct xt_target same_reg = {
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index cedf9f7d9d6e..0ad02f249837 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 
 	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
 		__u8 oldtos;
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return NF_DROP;
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		oldtos = iph->tos;
 		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
 		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 64be31c22ba9..a991ec7bd4e7 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -32,7 +32,7 @@ ipt_ttl_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 
 	switch (info->mode) {
 		case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index a26404dbe212..23b607b33b32 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -2,20 +2,6 @@
  * netfilter module for userspace packet logging daemons
  *
  * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- *
- * 2000/09/22 ulog-cprange feature added
- * 2001/01/04 in-kernel queue as proposed by Sebastian Zander
- * 						<zander@fokus.gmd.de>
- * 2001/01/30 per-rule nlgroup conflicts with global queue.
- *            nlgroup now global (sysctl)
- * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
- * 	      module loadtime -HW
- * 2002/07/07 remove broken nflog_rcv() function -HW
- * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
- * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
- * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
- *	      resulting in bogus 'error during NLMSG_PUT' messages.
- *
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  *
@@ -42,8 +28,6 @@
  * flushtimeout:
  *   Specify, after how many hundredths of a second the queue should be
  *   flushed even if it is not full yet.
- *
- * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
  */
 
 #include <linux/module.h>
@@ -61,6 +45,7 @@
 #include <linux/netfilter_ipv4/ipt_ULOG.h>
 #include <net/sock.h>
 #include <linux/bitops.h>
+#include <asm/unaligned.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
@@ -186,6 +171,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	ulog_packet_msg_t *pm;
 	size_t size, copy_len;
 	struct nlmsghdr *nlh;
+	struct timeval tv;
 
 	/* ffs == find first bit set, necessary because userspace
 	 * is already shifting groupnumber, but we need unshifted.
@@ -231,14 +217,15 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	pm = NLMSG_DATA(nlh);
 
 	/* We might not have a timestamp, get one */
-	if (skb->tstamp.off_sec == 0)
+	if (skb->tstamp.tv64 == 0)
 		__net_timestamp((struct sk_buff *)skb);
 
 	/* copy hook, prefix, timestamp, payload, etc. */
 	pm->data_len = copy_len;
-	pm->timestamp_sec = skb->tstamp.off_sec;
-	pm->timestamp_usec = skb->tstamp.off_usec;
-	pm->mark = skb->mark;
+	tv = ktime_to_timeval(skb->tstamp);
+	put_unaligned(tv.tv_sec, &pm->timestamp_sec);
+	put_unaligned(tv.tv_usec, &pm->timestamp_usec);
+	put_unaligned(skb->mark, &pm->mark);
 	pm->hook = hooknum;
 	if (prefix != NULL)
 		strncpy(pm->prefix, prefix, sizeof(pm->prefix));
@@ -248,9 +235,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
 		*(pm->prefix) = '\0';
 
 	if (in && in->hard_header_len > 0
-	    && skb->mac.raw != (void *) skb->nh.iph
+	    && skb->mac_header != skb->network_header
 	    && in->hard_header_len <= ULOG_MAC_LEN) {
-		memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+		memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
 		pm->mac_len = in->hard_header_len;
 	} else
 		pm->mac_len = 0;
@@ -362,12 +349,52 @@ static int ipt_ulog_checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ipt_ulog_info {
+	compat_uint_t	nl_group;
+	compat_size_t	copy_range;
+	compat_size_t	qthreshold;
+	char		prefix[ULOG_PREFIX_LEN];
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_ipt_ulog_info *cl = src;
+	struct ipt_ulog_info l = {
+		.nl_group	= cl->nl_group,
+		.copy_range	= cl->copy_range,
+		.qthreshold	= cl->qthreshold,
+	};
+
+	memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
+	memcpy(dst, &l, sizeof(l));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct ipt_ulog_info *l = src;
+	struct compat_ipt_ulog_info cl = {
+		.nl_group	= l->nl_group,
+		.copy_range	= l->copy_range,
+		.qthreshold	= l->qthreshold,
+	};
+
+	memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
+	return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
 static struct xt_target ipt_ulog_reg = {
 	.name		= "ULOG",
 	.family		= AF_INET,
 	.target		= ipt_ulog_target,
 	.targetsize	= sizeof(struct ipt_ulog_info),
 	.checkentry	= ipt_ulog_checkentry,
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct compat_ipt_ulog_info),
+	.compat_from_user = compat_from_user,
+	.compat_to_user	= compat_to_user,
+#endif
 	.me		= THIS_MODULE,
 };
 
@@ -389,14 +416,11 @@ static int __init ipt_ulog_init(void)
 	}
 
 	/* initialize ulog_buffers */
-	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
-		init_timer(&ulog_buffers[i].timer);
-		ulog_buffers[i].timer.function = ulog_timer;
-		ulog_buffers[i].timer.data = i;
-	}
+	for (i = 0; i < ULOG_MAXNLGROUPS; i++)
+		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
 
 	nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
-					THIS_MODULE);
+					NULL, THIS_MODULE);
 	if (!nflognl)
 		return -ENOMEM;
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index cfa0472617f6..a652a1451552 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -33,7 +33,7 @@ static int match(const struct sk_buff *skb,
 		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
-	const struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	int ret = 1;
 
 	if (info->source)
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 37508b2cfea6..26218122f865 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -1,7 +1,5 @@
 /* IP tables module for matching the value of the IPv4 and TCP ECN bits
  *
- * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
- *
  * (C) 2002 by Harald Welte <laforge@gnumonks.org>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -11,6 +9,7 @@
 
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/tcp.h>
@@ -26,7 +25,7 @@ MODULE_LICENSE("GPL");
 static inline int match_ip(const struct sk_buff *skb,
 			   const struct ipt_ecn_info *einfo)
 {
-	return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect);
+	return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
 }
 
 static inline int match_tcp(const struct sk_buff *skb,
@@ -38,8 +37,7 @@ static inline int match_tcp(const struct sk_buff *skb,
 	/* In practice, TCP match does this, so can't fail.  But let's
 	 * be good citizens.
 	 */
-	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		*hotdrop = 0;
 		return 0;
@@ -80,7 +78,7 @@ static int match(const struct sk_buff *skb,
 			return 0;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
-		if (skb->nh.iph->protocol != IPPROTO_TCP)
+		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
 			return 0;
 		if (!match_tcp(skb, info, hotdrop))
 			return 0;
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index bc5d5e6091e4..33af9e940887 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb,
       int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_iprange_info *info = matchinfo;
-	const struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 
 	if (info->flags & IPRANGE_SRC) {
 		if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index aecb9c48e152..15a9e8bbb7cc 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -183,11 +183,11 @@ ipt_recent_match(const struct sk_buff *skb,
 	int ret = info->invert;
 
 	if (info->side == IPT_RECENT_DEST)
-		addr = skb->nh.iph->daddr;
+		addr = ip_hdr(skb)->daddr;
 	else
-		addr = skb->nh.iph->saddr;
+		addr = ip_hdr(skb)->saddr;
 
-	ttl = skb->nh.iph->ttl;
+	ttl = ip_hdr(skb)->ttl;
 	/* use TTL as seen before forwarding */
 	if (out && !skb->sk)
 		ttl++;
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 5d33b51d49d8..d314844af12b 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -30,7 +30,7 @@ match(const struct sk_buff *skb,
 {
 	const struct ipt_tos_info *info = matchinfo;
 
-	return (skb->nh.iph->tos == info->tos) ^ info->invert;
+	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
 }
 
 static struct xt_match tos_match = {
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 1eca9f400374..ab02d9e3139c 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -1,7 +1,5 @@
 /* IP tables module for matching the value of the TTL
  *
- * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp
- *
  * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -26,19 +24,20 @@ static int match(const struct sk_buff *skb,
 		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_ttl_info *info = matchinfo;
+	const u8 ttl = ip_hdr(skb)->ttl;
 
 	switch (info->mode) {
 		case IPT_TTL_EQ:
-			return (skb->nh.iph->ttl == info->ttl);
+			return (ttl == info->ttl);
 			break;
 		case IPT_TTL_NE:
-			return (!(skb->nh.iph->ttl == info->ttl));
+			return (!(ttl == info->ttl));
 			break;
 		case IPT_TTL_LT:
-			return (skb->nh.iph->ttl < info->ttl);
+			return (ttl < info->ttl);
 			break;
 		case IPT_TTL_GT:
-			return (skb->nh.iph->ttl > info->ttl);
+			return (ttl > info->ttl);
 			break;
 		default:
 			printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index d1d61e97b976..42728909eba0 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -102,7 +103,7 @@ ipt_local_out_hook(unsigned int hook,
 {
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 98b66ef0c714..9278802f2742 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -7,8 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -17,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -130,13 +129,14 @@ ipt_local_hook(unsigned int hook,
 		   int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
+	const struct iphdr *iph;
 	u_int8_t tos;
 	__be32 saddr, daddr;
 	u_int32_t mark;
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
@@ -144,19 +144,23 @@ ipt_local_hook(unsigned int hook,
 
 	/* Save things which could affect route */
 	mark = (*pskb)->mark;
-	saddr = (*pskb)->nh.iph->saddr;
-	daddr = (*pskb)->nh.iph->daddr;
-	tos = (*pskb)->nh.iph->tos;
+	iph = ip_hdr(*pskb);
+	saddr = iph->saddr;
+	daddr = iph->daddr;
+	tos = iph->tos;
 
 	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
 	/* Reroute for ANY change. */
-	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
-	    && ((*pskb)->nh.iph->saddr != saddr
-		|| (*pskb)->nh.iph->daddr != daddr
-		|| (*pskb)->mark != mark
-		|| (*pskb)->nh.iph->tos != tos))
-		if (ip_route_me_harder(pskb, RTN_UNSPEC))
-			ret = NF_DROP;
+	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+		iph = ip_hdr(*pskb);
+
+		if (iph->saddr != saddr ||
+		    iph->daddr != daddr ||
+		    (*pskb)->mark != mark ||
+		    iph->tos != tos)
+			if (ip_route_me_harder(pskb, RTN_UNSPEC))
+				ret = NF_DROP;
+	}
 
 	return ret;
 }
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8f3e92d20df8..0654eaae70c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -4,14 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- move L3 protocol dependent part to this file.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- add get_features() to support various size of conntrack
- *	  structures.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
  */
 
 #include <linux/types.h>
@@ -87,7 +79,7 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 	local_bh_enable();
 
 	if (skb)
-		ip_send_check(skb->nh.iph);
+		ip_send_check(ip_hdr(skb));
 
 	return skb;
 }
@@ -97,16 +89,16 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	     u_int8_t *protonum)
 {
 	/* Never happen */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+	if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
 		if (net_ratelimit()) {
 			printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
-			(*pskb)->nh.iph->protocol, hooknum);
+			ip_hdr(*pskb)->protocol, hooknum);
 		}
 		return -NF_DROP;
 	}
 
-	*dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
-	*protonum = (*pskb)->nh.iph->protocol;
+	*dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb);
+	*protonum = ip_hdr(*pskb)->protocol;
 
 	return NF_ACCEPT;
 }
@@ -152,9 +144,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	return help->helper->help(pskb,
-			       (*pskb)->nh.raw - (*pskb)->data
-					       + (*pskb)->nh.iph->ihl*4,
-			       ct, ctinfo);
+				  skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+				  ct, ctinfo);
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
@@ -171,7 +162,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #endif
 
 	/* Gather fragments. */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		*pskb = nf_ct_ipv4_gather_frags(*pskb,
 						hooknum == NF_IP_PRE_ROUTING ?
 						IP_DEFRAG_CONNTRACK_IN :
@@ -199,7 +190,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 {
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5fd1e5363c1a..f4fc657c1983 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -4,11 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
  */
 
 #include <linux/types.h>
@@ -158,7 +153,7 @@ icmp_error_message(struct sk_buff *skb,
 	NF_CT_ASSERT(skb->nfct == NULL);
 
 	/* Not enough header? */
-	inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+	inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in);
 	if (inside == NULL)
 		return -NF_ACCEPT;
 
@@ -172,7 +167,7 @@ icmp_error_message(struct sk_buff *skb,
 	/* rcu_read_lock()ed by nf_hook_slow */
 	innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
 
-	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
+	dataoff = ip_hdrlen(skb) + sizeof(inside->icmp);
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
 			     inside->ip.protocol, &origtuple,
@@ -227,7 +222,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
 	struct icmphdr _ih, *icmph;
 
 	/* Not enough header? */
-	icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
 	if (icmph == NULL) {
 		if (LOG_INVALID(IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 452e9d326684..ea02f00d2dac 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -431,7 +431,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	} *inside;
 	struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple inner, target;
-	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	int hdrlen = ip_hdrlen(*pskb);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
@@ -439,7 +439,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
 
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
@@ -469,9 +469,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
 
 	if (!nf_ct_get_tuple(*pskb,
-			     (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
-			     (*pskb)->nh.iph->ihl*4 +
-			     sizeof(struct icmphdr) + inside->ip.ihl*4,
+			     ip_hdrlen(*pskb) + sizeof(struct icmphdr),
+			     (ip_hdrlen(*pskb) +
+			      sizeof(struct icmphdr) + inside->ip.ihl * 4),
 			     (u_int16_t)AF_INET,
 			     inside->ip.protocol,
 			     &inner, l3proto, l4proto))
@@ -483,14 +483,14 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	   packet: PREROUTING (DST manip), routing produces ICMP, goes
 	   through POSTROUTING (which must correct the DST manip). */
 	if (!manip_pkt(inside->ip.protocol, pskb,
-		       (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
+		       ip_hdrlen(*pskb) + sizeof(inside->icmp),
 		       &ct->tuplehash[!dir].tuple,
 		       !manip))
 		return 0;
 
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+		inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 		inside->icmp.checksum = 0;
 		inside->icmp.checksum =
 			csum_fold(skb_checksum(*pskb, hdrlen,
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9cbf3f9be13b..fcebc968d37f 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -33,7 +33,7 @@ static int set_addr(struct sk_buff **pskb,
 		    unsigned int addroff, __be32 ip, __be16 port)
 {
 	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo);
+	struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
 	struct {
 		__be32 ip;
 		__be16 port;
@@ -44,7 +44,7 @@ static int set_addr(struct sk_buff **pskb,
 	buf.port = port;
 	addroff += dataoff;
 
-	if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
+	if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
 		if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
@@ -55,11 +55,11 @@ static int set_addr(struct sk_buff **pskb,
 		}
 
 		/* Relocate data pointer */
-		th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
+		th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 					sizeof(_tcph), &_tcph);
 		if (th == NULL)
 			return -1;
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
+		*data = (*pskb)->data + ip_hdrlen(*pskb) +
 		    th->doff * 4 + dataoff;
 	} else {
 		if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
@@ -73,8 +73,8 @@ static int set_addr(struct sk_buff **pskb,
 		/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
 		 * or pull everything in a linear buffer, so we can safely
 		 * use the skb pointers now */
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
-		    sizeof(struct udphdr);
+		*data = ((*pskb)->data + ip_hdrlen(*pskb) +
+			 sizeof(struct udphdr));
 	}
 
 	return 0;
@@ -383,7 +383,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
 			       struct nf_conntrack_expect *this)
 {
-	struct ip_nat_range range;
+	struct nf_nat_range range;
 
 	if (this->tuple.src.u3.ip != 0) {	/* Only accept calls from GK */
 		nf_nat_follow_master(new, this);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 49a90c39ffce..15b6e5ce3a04 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -87,12 +87,13 @@ static void mangle_contents(struct sk_buff *skb,
 	unsigned char *data;
 
 	BUG_ON(skb_is_nonlinear(skb));
-	data = (unsigned char *)skb->nh.iph + dataoff;
+	data = skb_network_header(skb) + dataoff;
 
 	/* move post-replacement */
 	memmove(data + match_offset + rep_len,
 		data + match_offset + match_len,
-		skb->tail - (data + match_offset + match_len));
+		skb->tail - (skb->network_header + dataoff +
+			     match_offset + match_len));
 
 	/* insert data from buffer */
 	memcpy(data + match_offset, rep_buffer, rep_len);
@@ -111,8 +112,8 @@ static void mangle_contents(struct sk_buff *skb,
 	}
 
 	/* fix IP hdr checksum information */
-	skb->nh.iph->tot_len = htons(skb->len);
-	ip_send_check(skb->nh.iph);
+	ip_hdr(skb)->tot_len = htons(skb->len);
+	ip_send_check(ip_hdr(skb));
 }
 
 /* Unusual, but possible case. */
@@ -152,6 +153,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
+	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct iphdr *iph;
 	struct tcphdr *tcph;
 	int oldlen, datalen;
@@ -166,7 +168,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 
 	SKB_LINEAR_ASSERT(*pskb);
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	tcph = (void *)iph + iph->ihl*4;
 
 	oldlen = (*pskb)->len - iph->ihl*4;
@@ -175,11 +177,22 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 
 	datalen = (*pskb)->len - iph->ihl*4;
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		tcph->check = 0;
-		tcph->check = tcp_v4_check(datalen,
-					   iph->saddr, iph->daddr,
-					   csum_partial((char *)tcph,
-							datalen, 0));
+		if (!(rt->rt_flags & RTCF_LOCAL) &&
+		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
+			(*pskb)->csum_start = skb_headroom(*pskb) +
+					      skb_network_offset(*pskb) +
+					      iph->ihl * 4;
+			(*pskb)->csum_offset = offsetof(struct tcphdr, check);
+			tcph->check = ~tcp_v4_check(datalen,
+						    iph->saddr, iph->daddr, 0);
+		} else {
+			tcph->check = 0;
+			tcph->check = tcp_v4_check(datalen,
+						   iph->saddr, iph->daddr,
+						   csum_partial((char *)tcph,
+								datalen, 0));
+		}
 	} else
 		nf_proto_csum_replace2(&tcph->check, *pskb,
 				       htons(oldlen), htons(datalen), 1);
@@ -190,7 +203,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 				    (int)rep_len - (int)match_len,
 				    ct, ctinfo);
 		/* Tell TCP window tracking about seq change */
-		nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4,
+		nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
 					ct, CTINFO2DIR(ctinfo));
 	}
 	return 1;
@@ -216,12 +229,13 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
+	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct iphdr *iph;
 	struct udphdr *udph;
 	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
 			       match_offset + match_len)
 		return 0;
@@ -234,7 +248,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 	    !enlarge_skb(pskb, rep_len - match_len))
 		return 0;
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	udph = (void *)iph + iph->ihl*4;
 
 	oldlen = (*pskb)->len - iph->ihl*4;
@@ -250,13 +264,25 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 		return 1;
 
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		udph->check = 0;
-		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
-						datalen, IPPROTO_UDP,
-						csum_partial((char *)udph,
-							     datalen, 0));
-		if (!udph->check)
-			udph->check = CSUM_MANGLED_0;
+		if (!(rt->rt_flags & RTCF_LOCAL) &&
+		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
+			(*pskb)->csum_start = skb_headroom(*pskb) +
+					      skb_network_offset(*pskb) +
+					      iph->ihl * 4;
+			(*pskb)->csum_offset = offsetof(struct udphdr, check);
+			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 datalen, IPPROTO_UDP,
+							 0);
+		} else {
+			udph->check = 0;
+			udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+							datalen, IPPROTO_UDP,
+							csum_partial((char *)udph,
+								     datalen, 0));
+			if (!udph->check)
+				udph->check = CSUM_MANGLED_0;
+		}
 	} else
 		nf_proto_csum_replace2(&udph->check, *pskb,
 				       htons(oldlen), htons(datalen), 1);
@@ -318,8 +344,8 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
 	unsigned int dir, optoff, optend;
 	struct nf_conn_nat *nat = nfct_nat(ct);
 
-	optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
-	optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+	optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
+	optend = ip_hdrlen(*pskb) + tcph->doff * 4;
 
 	if (!skb_make_writable(pskb, optend))
 		return 0;
@@ -371,10 +397,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	this_way = &nat->info.seq[dir];
 	other_way = &nat->info.seq[!dir];
 
-	if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
 		return 0;
 
-	tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
 		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
 	else
@@ -399,7 +425,7 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
 		return 0;
 
-	nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir);
+	nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
 
 	return 1;
 }
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 7ba341c22eaa..a66888749ceb 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -53,7 +53,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct nf_conntrack_tuple t;
 	struct nf_ct_pptp_master *ct_pptp_info;
 	struct nf_nat_pptp *nat_pptp_info;
-	struct ip_nat_range range;
+	struct nf_nat_range range;
 
 	ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
 	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 147a4370cf03..2a283397a8b6 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -191,7 +191,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 
 	if (hooknum == NF_IP_LOCAL_OUT &&
 	    mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+		warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
 				     mr->range[0].min_ip);
 
 	return nf_nat_setup_info(ct, &mr->range[0], hooknum);
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b12cd7c314ca..bfd88e4e0685 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/udp.h>
 
 #include <net/netfilter/nf_nat.h>
@@ -92,7 +93,7 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
 	if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
 				      matchoff, matchlen, addr, addrlen))
 		return 0;
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	return 1;
 
 }
@@ -106,7 +107,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
 	struct addr_map map;
 	int dataoff, datalen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	datalen = (*pskb)->len - dataoff;
 	if (datalen < sizeof("SIP/2.0") - 1)
 		return NF_DROP;
@@ -155,7 +156,7 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
 		return 0;
 
 	/* We need to reload this. Thanks Patrick. */
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	return 1;
 }
 
@@ -168,7 +169,7 @@ static int mangle_content_len(struct sk_buff **pskb,
 	char buffer[sizeof("65536")];
 	int bufflen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 
 	/* Get actual SDP lenght */
 	if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
@@ -200,7 +201,7 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
 	unsigned int dataoff, bufflen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 
 	/* Mangle owner and contact info. */
 	bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ce5c4939a6ee..6e88505d6162 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,10 +38,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
  * Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
  */
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -1194,7 +1190,7 @@ static int snmp_translate(struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  struct sk_buff **pskb)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
 	u_int16_t udplen = ntohs(udph->len);
 	u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1235,7 +1231,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	unsigned int ret;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
 
 	/* SNMP replies and originating SNMP traps get mangled */
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 15aa3db8cb33..64bbed2ba780 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -86,8 +86,7 @@ nf_nat_fn(unsigned int hooknum,
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	   and local-out, and nf_nat_out protects post-routing. */
-	NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off
-		       & htons(IP_MF|IP_OFFSET)));
+	NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
 
 	ct = nf_ct_get(*pskb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
@@ -98,11 +97,10 @@ nf_nat_fn(unsigned int hooknum,
 		/* Exception: ICMP redirect to new connection (not in
 		   hash table yet).  We must not let this through, in
 		   case we're doing NAT to the same network. */
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
 			struct icmphdr _hdr, *hp;
 
-			hp = skb_header_pointer(*pskb,
-						(*pskb)->nh.iph->ihl*4,
+			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 						sizeof(_hdr), &_hdr);
 			if (hp != NULL &&
 			    hp->type == ICMP_REDIRECT)
@@ -122,7 +120,7 @@ nf_nat_fn(unsigned int hooknum,
 	switch (ctinfo) {
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
 							   hooknum, pskb))
 				return NF_DROP;
@@ -177,11 +175,11 @@ nf_nat_in(unsigned int hooknum,
 	  int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
-	__be32 daddr = (*pskb)->nh.iph->daddr;
+	__be32 daddr = ip_hdr(*pskb)->daddr;
 
 	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    daddr != (*pskb)->nh.iph->daddr) {
+	    daddr != ip_hdr(*pskb)->daddr) {
 		dst_release((*pskb)->dst);
 		(*pskb)->dst = NULL;
 	}
@@ -203,7 +201,7 @@ nf_nat_out(unsigned int hooknum,
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
 	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
@@ -236,7 +234,7 @@ nf_nat_local_fn(unsigned int hooknum,
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
 	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae68a691e8cd..37ab5802ca08 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -87,19 +87,6 @@ static const struct file_operations sockstat_seq_fops = {
 	.release = single_release,
 };
 
-static unsigned long
-fold_field(void *mib[], int offt)
-{
-	unsigned long res = 0;
-	int i;
-
-	for_each_possible_cpu(i) {
-		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
-		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
-	}
-	return res;
-}
-
 /* snmp items */
 static const struct snmp_mib snmp4_ipstats_list[] = {
 	SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
@@ -266,8 +253,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) ip_statistics,
-				      snmp4_ipstats_list[i].entry));
+			   snmp_fold_field((void **)ip_statistics,
+					   snmp4_ipstats_list[i].entry));
 
 	seq_puts(seq, "\nIcmp:");
 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
@@ -276,8 +263,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nIcmp:");
 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) icmp_statistics,
-				      snmp4_icmp_list[i].entry));
+			   snmp_fold_field((void **)icmp_statistics,
+					   snmp4_icmp_list[i].entry));
 
 	seq_puts(seq, "\nTcp:");
 	for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
@@ -288,12 +275,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   fold_field((void **) tcp_statistics,
-					      snmp4_tcp_list[i].entry));
+				   snmp_fold_field((void **)tcp_statistics,
+						   snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   fold_field((void **) tcp_statistics,
-					      snmp4_tcp_list[i].entry));
+				   snmp_fold_field((void **)tcp_statistics,
+						   snmp4_tcp_list[i].entry));
 	}
 
 	seq_puts(seq, "\nUdp:");
@@ -303,8 +290,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udp_statistics,
-				      snmp4_udp_list[i].entry));
+			   snmp_fold_field((void **)udp_statistics,
+					   snmp4_udp_list[i].entry));
 
 	/* the UDP and UDP-Lite MIBs are the same */
 	seq_puts(seq, "\nUdpLite:");
@@ -314,8 +301,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdpLite:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udplite_statistics,
-				      snmp4_udp_list[i].entry)     );
+			   snmp_fold_field((void **)udplite_statistics,
+					   snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
 	return 0;
@@ -348,8 +335,8 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) net_statistics,
-				      snmp4_net_list[i].entry));
+			   snmp_fold_field((void **)net_statistics,
+					   snmp4_net_list[i].entry));
 
 	seq_putc(seq, '\n');
 	return 0;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index da70fef82c93..971ab9356e51 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -45,7 +45,7 @@
 #include <net/ipip.h>
 #include <linux/igmp.h>
 
-struct net_protocol *inet_protos[MAX_INET_PROTOS];
+struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
 static DEFINE_SPINLOCK(inet_proto_lock);
 
 /*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 87e9c1618100..24d7c9f31918 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -132,7 +132,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
 		return 1;
 
-	type = skb->h.icmph->type;
+	type = icmp_hdr(skb)->type;
 	if (type < 32) {
 		__u32 data = raw_sk(sk)->filter.data;
 
@@ -184,8 +184,8 @@ out:
 void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int err = 0;
 	int harderr = 0;
 
@@ -256,7 +256,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 	nf_reset(skb);
 
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	raw_rcv_skb(sk, skb);
 	return 0;
@@ -291,11 +291,13 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	skb->priority = sk->sk_priority;
 	skb->dst = dst_clone(&rt->u.dst);
 
-	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
+	skb_put(skb, length);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	err = memcpy_fromiovecend((void *)iph, from, 0, length);
 	if (err)
 		goto error_fault;
@@ -613,7 +615,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	/* Copy the address. */
 	if (sin) {
 		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		sin->sin_port = 0;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
@@ -887,7 +889,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations raw_seq_ops = {
+static const struct seq_operations raw_seq_ops = {
 	.start = raw_seq_start,
 	.next  = raw_seq_next,
 	.stop  = raw_seq_stop,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 37e0d4d5cf94..cb76e3c725a0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -82,7 +82,6 @@
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/pkt_sched.h>
@@ -104,6 +103,7 @@
 #include <net/xfrm.h>
 #include <net/ip_mp_alg.h>
 #include <net/netevent.h>
+#include <net/rtnetlink.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
@@ -364,7 +364,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rt_cache_seq_ops = {
+static const struct seq_operations rt_cache_seq_ops = {
 	.start  = rt_cache_seq_start,
 	.next   = rt_cache_seq_next,
 	.stop   = rt_cache_seq_stop,
@@ -470,7 +470,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rt_cpu_seq_ops = {
+static const struct seq_operations rt_cpu_seq_ops = {
 	.start  = rt_cpu_seq_start,
 	.next   = rt_cpu_seq_next,
 	.stop   = rt_cpu_seq_stop,
@@ -1519,7 +1519,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 static int ip_rt_bug(struct sk_buff *skb)
 {
 	printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
-		NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr),
+		NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
 		skb->dev ? skb->dev->name : "?");
 	kfree_skb(skb);
 	return 0;
@@ -1698,9 +1698,9 @@ static void ip_handle_martian_source(struct net_device *dev,
 		printk(KERN_WARNING "martian source %u.%u.%u.%u from "
 			"%u.%u.%u.%u, on dev %s\n",
 			NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
-		if (dev->hard_header_len && skb->mac.raw) {
+		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
 			int i;
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			printk(KERN_WARNING "ll header: ");
 			for (i = 0; i < dev->hard_header_len; i++, p++) {
 				printk("%02x", *p);
@@ -2134,7 +2134,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		rcu_read_lock();
 		if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
 			int our = ip_check_mc(in_dev, daddr, saddr,
-				skb->nh.iph->protocol);
+				ip_hdr(skb)->protocol);
 			if (our
 #ifdef CONFIG_IP_MROUTE
 			    || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 
 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
 		dev_out = ip_dev_find(oldflp->fl4_src);
-		if (dev_out == NULL)
+		if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind))
 			goto out;
 
 		/* I removed check for oif == dev_out->oif here.
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 		      of another iface. --ANK
 		 */
 
-		if (oldflp->oif == 0
+		if (dev_out && oldflp->oif == 0
 		    && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
 			/* Special hack: user can direct multicasts
 			   and limited broadcast via necessary interface
@@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		id = rt->peer->ip_id_count;
 		if (rt->peer->tcp_ts_stamp) {
 			ts = rt->peer->tcp_ts;
-			tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+			tsage = get_seconds() - rt->peer->tcp_ts_stamp;
 		}
 	}
 
@@ -2721,7 +2721,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
@@ -2747,10 +2747,11 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
 	 */
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
 
 	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
-	skb->nh.iph->protocol = IPPROTO_ICMP;
+	ip_hdr(skb)->protocol = IPPROTO_ICMP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
 	src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
@@ -3193,6 +3194,8 @@ int __init ip_rt_init(void)
 	xfrm_init();
 	xfrm4_init();
 #endif
+	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+
 	return rc;
 }
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 33016cc90f0b..2da1be0589a9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -125,10 +125,11 @@ static __u16 const msstab[] = {
 __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	int mssind;
 	const __u16 mss = *mssp;
 
-
 	tp->last_synq_overflow = jiffies;
 
 	/* XXX sort msstab[] by probability?  Binary search? */
@@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
 
-	return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
-				     skb->h.th->source, skb->h.th->dest,
-				     ntohl(skb->h.th->seq),
+	return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
+				     th->source, th->dest, ntohl(th->seq),
 				     jiffies / (HZ * 60), mssind);
 }
 
@@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
  */
 static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 {
-	__u32 seq;
-	__u32 mssind;
-
-	seq = ntohl(skb->h.th->seq)-1;
-	mssind = check_tcp_syn_cookie(cookie,
-				      skb->nh.iph->saddr, skb->nh.iph->daddr,
-				      skb->h.th->source, skb->h.th->dest,
-				      seq, jiffies / (HZ * 60), COUNTER_TRIES);
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 seq = ntohl(th->seq) - 1;
+	__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+					    th->source, th->dest, seq,
+					    jiffies / (HZ * 60),
+					    COUNTER_TRIES);
 
 	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
 }
@@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
-	__u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 cookie = ntohl(th->ack_seq) - 1;
 	struct sock *ret = sk;
 	struct request_sock *req;
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
 
-	if (!sysctl_tcp_syncookies || !skb->h.th->ack)
+	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
 
 	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
@@ -220,12 +220,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	}
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
-	treq->rcv_isn		= ntohl(skb->h.th->seq) - 1;
+	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
-	ireq->rmt_port		= skb->h.th->source;
-	ireq->loc_addr		= skb->nh.iph->daddr;
-	ireq->rmt_addr		= skb->nh.iph->saddr;
+	ireq->rmt_port		= th->source;
+	ireq->loc_addr		= ip_hdr(skb)->daddr;
+	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->opt		= NULL;
 
 	/* We throwed the options of the initial SYN away, so we hope
@@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 						.tos = RT_CONN_FLAGS(sk) } },
 				    .proto = IPPROTO_TCP,
 				    .uli_u = { .ports =
-					       { .sport = skb->h.th->dest,
-						 .dport = skb->h.th->source } } };
+					       { .sport = th->dest,
+						 .dport = th->source } } };
 		security_req_classify_flow(req, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0aa304711a96..6817d6485df5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -647,6 +647,14 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_dointvec
 	},
 	{
+		.ctl_name	= NET_TCP_FRTO_RESPONSE,
+		.procname	= "tcp_frto_response",
+		.data		= &sysctl_tcp_frto_response,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
 		.ctl_name	= NET_TCP_LOW_LATENCY,
 		.procname	= "tcp_low_latency",
 		.data		= &sysctl_tcp_low_latency,
@@ -803,6 +811,14 @@ ctl_table ipv4_table[] = {
 		.proc_handler   = &proc_allowed_congestion_control,
 		.strategy	= &strategy_allowed_congestion_control,
 	},
+	{
+		.ctl_name	= NET_TCP_MAX_SSTHRESH,
+		.procname	= "tcp_max_ssthresh",
+		.data		= &sysctl_tcp_max_ssthresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3834b10b5115..d6e488668171 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -297,7 +297,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
  * All the sk_stream_mem_schedule() is of this nature: accounting
  * is strict, actions are advisory and have some latency.
  */
-int tcp_memory_pressure;
+int tcp_memory_pressure __read_mostly;
 
 EXPORT_SYMBOL(tcp_memory_pressure);
 
@@ -425,7 +425,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			/* Subtract 1, if FIN is in queue. */
 			if (answ && !skb_queue_empty(&sk->sk_receive_queue))
 				answ -=
-		       ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin;
+		       tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
 		} else
 			answ = tp->urg_seq - tp->copied_seq;
 		release_sock(sk);
@@ -444,7 +444,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		break;
 	default:
 		return -ENOIOCTLCMD;
-	};
+	}
 
 	return put_user(answ, (int __user *)arg);
 }
@@ -460,9 +460,9 @@ static inline int forced_push(struct tcp_sock *tp)
 	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
 
-static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
-			      struct sk_buff *skb)
+static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	skb->csum    = 0;
@@ -470,10 +470,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
 	tcb->flags   = TCPCB_FLAG_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
-	__skb_queue_tail(&sk->sk_write_queue, skb);
+	tcp_add_write_queue_tail(sk, skb);
 	sk_charge_skb(sk, skb);
-	if (!sk->sk_send_head)
-		sk->sk_send_head = skb;
 	if (tp->nonagle & TCP_NAGLE_PUSH)
 		tp->nonagle &= ~TCP_NAGLE_PUSH;
 }
@@ -488,15 +486,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 	}
 }
 
-static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
-			    int mss_now, int nonagle)
+static inline void tcp_push(struct sock *sk, int flags, int mss_now,
+			    int nonagle)
 {
-	if (sk->sk_send_head) {
-		struct sk_buff *skb = sk->sk_write_queue.prev;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (tcp_send_head(sk)) {
+		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		if (!(flags & MSG_MORE) || forced_push(tp))
 			tcp_mark_push(tp, skb);
 		tcp_mark_urg(tp, flags, skb);
-		__tcp_push_pending_frames(sk, tp, mss_now,
+		__tcp_push_pending_frames(sk, mss_now,
 					  (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
 	}
 }
@@ -526,13 +526,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 		goto do_error;
 
 	while (psize > 0) {
-		struct sk_buff *skb = sk->sk_write_queue.prev;
+		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		struct page *page = pages[poffset / PAGE_SIZE];
 		int copy, i, can_coalesce;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
 
-		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
+		if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
 new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
@@ -542,7 +542,7 @@ new_segment:
 			if (!skb)
 				goto wait_for_memory;
 
-			skb_entail(sk, tp, skb);
+			skb_entail(sk, skb);
 			copy = size_goal;
 		}
 
@@ -588,8 +588,8 @@ new_segment:
 
 		if (forced_push(tp)) {
 			tcp_mark_push(tp, skb);
-			__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
-		} else if (skb == sk->sk_send_head)
+			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+		} else if (skb == tcp_send_head(sk))
 			tcp_push_one(sk, mss_now);
 		continue;
 
@@ -597,7 +597,7 @@ wait_for_sndbuf:
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
 		if (copied)
-			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+			tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
 		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 			goto do_error;
@@ -608,7 +608,7 @@ wait_for_memory:
 
 out:
 	if (copied)
-		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+		tcp_push(sk, flags, mss_now, tp->nonagle);
 	return copied;
 
 do_error:
@@ -639,8 +639,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
 
-static inline int select_size(struct sock *sk, struct tcp_sock *tp)
+static inline int select_size(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int tmp = tp->mss_cache;
 
 	if (sk->sk_route_caps & NETIF_F_SG) {
@@ -704,9 +705,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		while (seglen > 0) {
 			int copy;
 
-			skb = sk->sk_write_queue.prev;
+			skb = tcp_write_queue_tail(sk);
 
-			if (!sk->sk_send_head ||
+			if (!tcp_send_head(sk) ||
 			    (copy = size_goal - skb->len) <= 0) {
 
 new_segment:
@@ -716,7 +717,7 @@ new_segment:
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+				skb = sk_stream_alloc_pskb(sk, select_size(sk),
 							   0, sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
@@ -727,7 +728,7 @@ new_segment:
 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
 					skb->ip_summed = CHECKSUM_PARTIAL;
 
-				skb_entail(sk, tp, skb);
+				skb_entail(sk, skb);
 				copy = size_goal;
 			}
 
@@ -832,8 +833,8 @@ new_segment:
 
 			if (forced_push(tp)) {
 				tcp_mark_push(tp, skb);
-				__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
-			} else if (skb == sk->sk_send_head)
+				__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+			} else if (skb == tcp_send_head(sk))
 				tcp_push_one(sk, mss_now);
 			continue;
 
@@ -841,7 +842,7 @@ wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
 			if (copied)
-				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+				tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 				goto do_error;
@@ -853,16 +854,18 @@ wait_for_memory:
 
 out:
 	if (copied)
-		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+		tcp_push(sk, flags, mss_now, tp->nonagle);
 	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return copied;
 
 do_fault:
 	if (!skb->len) {
-		if (sk->sk_send_head == skb)
-			sk->sk_send_head = NULL;
-		__skb_unlink(skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(skb, sk);
+		/* It is the one place in all of TCP, except connection
+		 * reset, where we can be unlinking the send_head.
+		 */
+		tcp_check_send_head(sk, skb);
 		sk_stream_free_skb(sk, skb);
 	}
 
@@ -1016,9 +1019,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 
 	skb_queue_walk(&sk->sk_receive_queue, skb) {
 		offset = seq - TCP_SKB_CB(skb)->seq;
-		if (skb->h.th->syn)
+		if (tcp_hdr(skb)->syn)
 			offset--;
-		if (offset < skb->len || skb->h.th->fin) {
+		if (offset < skb->len || tcp_hdr(skb)->fin) {
 			*off = offset;
 			return skb;
 		}
@@ -1070,7 +1073,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 			if (offset != skb->len)
 				break;
 		}
-		if (skb->h.th->fin) {
+		if (tcp_hdr(skb)->fin) {
 			sk_eat_skb(sk, skb, 0);
 			++seq;
 			break;
@@ -1174,11 +1177,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				break;
 			}
 			offset = *seq - TCP_SKB_CB(skb)->seq;
-			if (skb->h.th->syn)
+			if (tcp_hdr(skb)->syn)
 				offset--;
 			if (offset < skb->len)
 				goto found_ok_skb;
-			if (skb->h.th->fin)
+			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
 			BUG_TRAP(flags & MSG_PEEK);
 			skb = skb->next;
@@ -1389,12 +1392,12 @@ do_prequeue:
 skip_copy:
 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
 			tp->urg_data = 0;
-			tcp_fast_path_check(sk, tp);
+			tcp_fast_path_check(sk);
 		}
 		if (used + offset < skb->len)
 			continue;
 
-		if (skb->h.th->fin)
+		if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK)) {
 			sk_eat_skb(sk, skb, copied_early);
@@ -1563,21 +1566,19 @@ void tcp_close(struct sock *sk, long timeout)
 	 */
 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
-			  skb->h.th->fin;
+			  tcp_hdr(skb)->fin;
 		data_was_unread += len;
 		__kfree_skb(skb);
 	}
 
 	sk_stream_mem_reclaim(sk);
 
-	/* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
-	 * 3.10, we send a RST here because data was lost.  To
-	 * witness the awful effects of the old behavior of always
-	 * doing a FIN, run an older 2.1.x kernel or 2.0.x, start
-	 * a bulk GET in an FTP client, suspend the process, wait
-	 * for the client to advertise a zero window, then kill -9
-	 * the FTP client, wheee...  Note: timeout is always zero
-	 * in such a case.
+	/* As outlined in RFC 2525, section 2.17, we send a RST here because
+	 * data was lost. To witness the awful effects of the old behavior of
+	 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
+	 * GET in an FTP client, suspend the process, wait for the client to
+	 * advertise a zero window, then kill -9 the FTP client, wheee...
+	 * Note: timeout is always zero in such a case.
 	 */
 	if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
@@ -1732,7 +1733,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
-	sk_stream_writequeue_purge(sk);
+	tcp_write_queue_purge(sk);
 	__skb_queue_purge(&tp->out_of_order_queue);
 #ifdef CONFIG_NET_DMA
 	__skb_queue_purge(&sk->sk_async_wait_queue);
@@ -1758,7 +1759,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
-	sk->sk_send_head = NULL;
+	tcp_init_send_head(sk);
 	tp->rx_opt.saw_tstamp = 0;
 	tcp_sack_reset(&tp->rx_opt);
 	__sk_dst_reset(sk);
@@ -1830,7 +1831,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			 * for currently queued segments.
 			 */
 			tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk, tp);
+			tcp_push_pending_frames(sk);
 		} else {
 			tp->nonagle &= ~TCP_NAGLE_OFF;
 		}
@@ -1854,7 +1855,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->nonagle &= ~TCP_NAGLE_CORK;
 			if (tp->nonagle&TCP_NAGLE_OFF)
 				tp->nonagle |= TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk, tp);
+			tcp_push_pending_frames(sk);
 		}
 		break;
 
@@ -1954,7 +1955,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	default:
 		err = -ENOPROTOOPT;
 		break;
-	};
+	}
+
 	release_sock(sk);
 	return err;
 }
@@ -2124,7 +2126,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	if (put_user(len, optlen))
 		return -EFAULT;
@@ -2170,7 +2172,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		goto out;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	thlen = th->doff * 4;
 	if (thlen < sizeof(*th))
 		goto out;
@@ -2210,7 +2212,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 	delta = htonl(oldlen + (thlen + len));
 
 	skb = segs;
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	seq = ntohl(th->seq);
 
 	do {
@@ -2219,23 +2221,25 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 		th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				       (__force u32)delta));
 		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-							   skb->csum));
+			th->check =
+			     csum_fold(csum_partial(skb_transport_header(skb),
+						    thlen, skb->csum));
 
 		seq += len;
 		skb = skb->next;
-		th = skb->h.th;
+		th = tcp_hdr(skb);
 
 		th->seq = htonl(seq);
 		th->cwr = 0;
 	} while (skb->next);
 
-	delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+	delta = htonl(oldlen + (skb->tail - skb->transport_header) +
+		      skb->data_len);
 	th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				(__force u32)delta));
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-						   skb->csum));
+		th->check = csum_fold(csum_partial(skb_transport_header(skb),
+						   thlen, skb->csum));
 
 out:
 	return segs;
@@ -2372,6 +2376,23 @@ void __tcp_put_md5sig_pool(void)
 EXPORT_SYMBOL(__tcp_put_md5sig_pool);
 #endif
 
+void tcp_done(struct sock *sk)
+{
+	if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
+		TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+
+	tcp_set_state(sk, TCP_CLOSE);
+	tcp_clear_xmit_timers(sk);
+
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_state_change(sk);
+	else
+		inet_csk_destroy_sock(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_done);
+
 extern void __skb_cb_too_small_for_tcp(int, int);
 extern struct tcp_congestion_ops tcp_reno;
 
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5730333cd0ac..281c9f913257 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 5c8caf4a1244..86b26539e54b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -12,6 +12,8 @@
 #include <linux/list.h>
 #include <net/tcp.h>
 
+int sysctl_tcp_max_ssthresh = 0;
+
 static DEFINE_SPINLOCK(tcp_cong_list_lock);
 static LIST_HEAD(tcp_cong_list);
 
@@ -77,18 +79,19 @@ void tcp_init_congestion_control(struct sock *sk)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_congestion_ops *ca;
 
-	if (icsk->icsk_ca_ops != &tcp_init_congestion_ops)
-		return;
+	/* if no choice made yet assign the current value set as default */
+	if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+			if (try_module_get(ca->owner)) {
+				icsk->icsk_ca_ops = ca;
+				break;
+			}
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
-		if (try_module_get(ca->owner)) {
-			icsk->icsk_ca_ops = ca;
-			break;
+			/* fallback to next available */
 		}
-
+		rcu_read_unlock();
 	}
-	rcu_read_unlock();
 
 	if (icsk->icsk_ca_ops->init)
 		icsk->icsk_ca_ops->init(sk);
@@ -123,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
 #endif
 
 	if (ca) {
-		ca->non_restricted = 1;	/* default is always allowed */
+		ca->flags |= TCP_CONG_NON_RESTRICTED;	/* default is always allowed */
 		list_move(&ca->list, &tcp_cong_list);
 		ret = 0;
 	}
@@ -178,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
 	*buf = '\0';
 	rcu_read_lock();
 	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
-		if (!ca->non_restricted)
+		if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
 			continue;
 		offs += snprintf(buf + offs, maxlen - offs,
 				 "%s%s",
@@ -209,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
 		}
 	}
 
-	/* pass 2 clear */
+	/* pass 2 clear old values */
 	list_for_each_entry_rcu(ca, &tcp_cong_list, list)
-		ca->non_restricted = 0;
+		ca->flags &= ~TCP_CONG_NON_RESTRICTED;
 
 	/* pass 3 mark as allowed */
 	while ((name = strsep(&val, " ")) && *name) {
 		ca = tcp_ca_find(name);
 		WARN_ON(!ca);
 		if (ca)
-			ca->non_restricted = 1;
+			ca->flags |= TCP_CONG_NON_RESTRICTED;
 	}
 out:
 	spin_unlock(&tcp_cong_list_lock);
@@ -236,6 +239,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 
 	rcu_read_lock();
 	ca = tcp_ca_find(name);
+
 	/* no change asking for existing value */
 	if (ca == icsk->icsk_ca_ops)
 		goto out;
@@ -252,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	if (!ca)
 		err = -ENOENT;
 
-	else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
 		err = -EPERM;
 
 	else if (!try_module_get(ca->owner))
@@ -261,7 +265,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	else {
 		tcp_cleanup_congestion_control(sk);
 		icsk->icsk_ca_ops = ca;
-		if (icsk->icsk_ca_ops->init)
+
+		if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
 			icsk->icsk_ca_ops->init(sk);
 	}
  out:
@@ -271,10 +276,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 
 
 /*
- * Linear increase during slow start
+ * Slow start (exponential increase) with
+ * RFC3742 Limited Slow Start (fast linear increase) support.
  */
 void tcp_slow_start(struct tcp_sock *tp)
 {
+	int cnt = 0;
+
 	if (sysctl_tcp_abc) {
 		/* RFC3465: Slow Start
 		 * TCP sender SHOULD increase cwnd by the number of
@@ -283,17 +291,25 @@ void tcp_slow_start(struct tcp_sock *tp)
 		 */
 		if (tp->bytes_acked < tp->mss_cache)
 			return;
-
-		/* We MAY increase by 2 if discovered delayed ack */
-		if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
 	}
+
+	if (sysctl_tcp_max_ssthresh > 0 &&
+	    tp->snd_cwnd > sysctl_tcp_max_ssthresh)
+		cnt += sysctl_tcp_max_ssthresh>>1;
+	else
+		cnt += tp->snd_cwnd;
+
+	/* RFC3465: We MAY increase by 2 if discovered delayed ack */
+	if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
+		cnt <<= 1;
 	tp->bytes_acked = 0;
 
-	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-		tp->snd_cwnd++;
+	tp->snd_cwnd_cnt += cnt;
+	while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+		tp->snd_cwnd_cnt -= tp->snd_cwnd;
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
+	}
 }
 EXPORT_SYMBOL_GPL(tcp_slow_start);
 
@@ -355,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
 
 struct tcp_congestion_ops tcp_reno = {
+	.flags		= TCP_CONG_NON_RESTRICTED,
 	.name		= "reno",
-	.non_restricted = 1,
 	.owner		= THIS_MODULE,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 9a582fb4ef9f..14224487b16b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,5 +1,5 @@
 /*
- * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.1
  *
  * This is from the implementation of CUBIC TCP in
  * Injong Rhee, Lisong Xu.
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
 module_param(tcp_friendliness, int, 0644);
 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
 
-#include <asm/div64.h>
-
 /* BIC TCP Parameters */
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
@@ -93,50 +91,51 @@ static void bictcp_init(struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
-	u_int32_t d = divisor;
-
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
-
-		d = divisor >> shift;
-		dividend >>= shift;
-	}
-
-	/* avoid 64 bit division if possible */
-	if (dividend >> 32)
-		do_div(dividend, d);
-	else
-		dividend = (uint32_t) dividend / d;
-
-	return dividend;
-}
-
-/*
- * calculate the cubic root of x using Newton-Raphson
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
  */
 static u32 cubic_root(u64 a)
 {
-	u32 x, x1;
-
-	/* Initial estimate is based on:
-	 * cbrt(x) = exp(log(x) / 3)
+	u32 x, b, shift;
+	/*
+	 * cbrt(x) MSB values for x MSB values in [0..63].
+	 * Precomputed then refined by hand - Willy Tarreau
+	 *
+	 * For x in [0..63],
+	 *   v = cbrt(x << 18) - 1
+	 *   cbrt(x) = (v[x] + 10) >> 6
 	 */
-	x = 1u << (fls64(a)/3);
+	static const u8 v[] = {
+		/* 0x00 */    0,   54,   54,   54,  118,  118,  118,  118,
+		/* 0x08 */  123,  129,  134,  138,  143,  147,  151,  156,
+		/* 0x10 */  157,  161,  164,  168,  170,  173,  176,  179,
+		/* 0x18 */  181,  185,  187,  190,  192,  194,  197,  199,
+		/* 0x20 */  200,  202,  204,  206,  209,  211,  213,  215,
+		/* 0x28 */  217,  219,  221,  222,  224,  225,  227,  229,
+		/* 0x30 */  231,  232,  234,  236,  237,  239,  240,  242,
+		/* 0x38 */  244,  245,  246,  248,  250,  251,  252,  254,
+	};
+
+	b = fls64(a);
+	if (b < 7) {
+		/* a in [0..63] */
+		return ((u32)v[(u32)a] + 35) >> 6;
+	}
+
+	b = ((b * 84) >> 8) - 1;
+	shift = (a >> (b * 3));
+
+	x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
 
 	/*
-	 * Iteration based on:
+	 * Newton-Raphson iteration
 	 *                         2
 	 * x    = ( 2 * x  +  a / x  ) / 3
 	 *  k+1          k         k
 	 */
-	do {
-		x1 = x;
-		x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
-	} while (abs(x1 - x) > 1);
-
+	x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+	x = ((x * 341) >> 10);
 	return x;
 }
 
@@ -215,7 +214,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 	if (ca->delay_min > 0) {
 		/* max increment = Smax * rtt / 0.1  */
 		min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
-		if (ca->cnt < min_cnt)
+
+		/* use concave growth when the target is above the origin */
+		if (ca->cnt < min_cnt && t >= ca->bic_K)
 			ca->cnt = min_cnt;
 	}
 
@@ -333,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -401,4 +402,4 @@ module_exit(cubictcp_unregister);
 MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CUBIC TCP");
-MODULE_VERSION("2.0");
+MODULE_VERSION("2.1");
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 1020eb48d8d1..4ba4a7ae0a85 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
 	}
 }
 
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 59e691d26f64..e5be35117223 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -144,7 +144,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 	ca->snd_cwnd_cents += odd;
 
 	/* check when fractions goes >=128 and increase cwnd by 1. */
-	while(ca->snd_cwnd_cents >= 128) {
+	while (ca->snd_cwnd_cents >= 128) {
 		tp->snd_cwnd++;
 		ca->snd_cwnd_cents -= 128;
 		tp->snd_cwnd_cnt = 0;
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
new file mode 100644
index 000000000000..4adc47c55351
--- /dev/null
+++ b/net/ipv4/tcp_illinois.c
@@ -0,0 +1,356 @@
+/*
+ * TCP Illinois congestion control.
+ * Home page:
+ *	http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+ *
+ * The algorithm is described in:
+ * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
+ *  for High-Speed Networks"
+ * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
+ *
+ * Implemented from description in paper and ns-2 simulation.
+ * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+#include <asm/div64.h>
+#include <net/tcp.h>
+
+#define ALPHA_SHIFT	7
+#define ALPHA_SCALE	(1u<<ALPHA_SHIFT)
+#define ALPHA_MIN	((3*ALPHA_SCALE)/10)	/* ~0.3 */
+#define ALPHA_MAX	(10*ALPHA_SCALE)	/* 10.0 */
+#define ALPHA_BASE	ALPHA_SCALE		/* 1.0 */
+#define U32_MAX		((u32)~0U)
+#define RTT_MAX		(U32_MAX / ALPHA_MAX)	/* 3.3 secs */
+
+#define BETA_SHIFT	6
+#define BETA_SCALE	(1u<<BETA_SHIFT)
+#define BETA_MIN	(BETA_SCALE/8)		/* 0.125 */
+#define BETA_MAX	(BETA_SCALE/2)		/* 0.5 */
+#define BETA_BASE	BETA_MAX
+
+static int win_thresh __read_mostly = 15;
+module_param(win_thresh, int, 0);
+MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
+
+static int theta __read_mostly = 5;
+module_param(theta, int, 0);
+MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
+
+/* TCP Illinois Parameters */
+struct illinois {
+	u64	sum_rtt;	/* sum of rtt's measured within last rtt */
+	u16	cnt_rtt;	/* # of rtts measured within last rtt */
+	u32	base_rtt;	/* min of all rtt in usec */
+	u32	max_rtt;	/* max of all rtt in usec */
+	u32	end_seq;	/* right edge of current RTT */
+	u32	alpha;		/* Additive increase */
+	u32	beta;		/* Muliplicative decrease */
+	u16	acked;		/* # packets acked by current ACK */
+	u8	rtt_above;	/* average rtt has gone above threshold */
+	u8	rtt_low;	/* # of rtts measurements below threshold */
+};
+
+static void rtt_reset(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	ca->end_seq = tp->snd_nxt;
+	ca->cnt_rtt = 0;
+	ca->sum_rtt = 0;
+
+	/* TODO: age max_rtt? */
+}
+
+static void tcp_illinois_init(struct sock *sk)
+{
+	struct illinois *ca = inet_csk_ca(sk);
+
+	ca->alpha = ALPHA_MAX;
+	ca->beta = BETA_BASE;
+	ca->base_rtt = 0x7fffffff;
+	ca->max_rtt = 0;
+
+	ca->acked = 0;
+	ca->rtt_low = 0;
+	ca->rtt_above = 0;
+
+	rtt_reset(sk);
+}
+
+/* Measure RTT for each ack. */
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+{
+	struct illinois *ca = inet_csk_ca(sk);
+	u32 rtt;
+
+	ca->acked = pkts_acked;
+
+	rtt = ktime_to_us(net_timedelta(last));
+
+	/* ignore bogus values, this prevents wraparound in alpha math */
+	if (rtt > RTT_MAX)
+		rtt = RTT_MAX;
+
+	/* keep track of minimum RTT seen so far */
+	if (ca->base_rtt > rtt)
+		ca->base_rtt = rtt;
+
+	/* and max */
+	if (ca->max_rtt < rtt)
+		ca->max_rtt = rtt;
+
+	++ca->cnt_rtt;
+	ca->sum_rtt += rtt;
+}
+
+/* Maximum queuing delay */
+static inline u32 max_delay(const struct illinois *ca)
+{
+	return ca->max_rtt - ca->base_rtt;
+}
+
+/* Average queuing delay */
+static inline u32 avg_delay(const struct illinois *ca)
+{
+	u64 t = ca->sum_rtt;
+
+	do_div(t, ca->cnt_rtt);
+	return t - ca->base_rtt;
+}
+
+/*
+ * Compute value of alpha used for additive increase.
+ * If small window then use 1.0, equivalent to Reno.
+ *
+ * For larger windows, adjust based on average delay.
+ * A. If average delay is at minimum (we are uncongested),
+ *    then use large alpha (10.0) to increase faster.
+ * B. If average delay is at maximum (getting congested)
+ *    then use small alpha (0.3)
+ *
+ * The result is a convex window growth curve.
+ */
+static u32 alpha(struct illinois *ca, u32 da, u32 dm)
+{
+	u32 d1 = dm / 100;	/* Low threshold */
+
+	if (da <= d1) {
+		/* If never got out of low delay zone, then use max */
+		if (!ca->rtt_above)
+			return ALPHA_MAX;
+
+		/* Wait for 5 good RTT's before allowing alpha to go alpha max.
+		 * This prevents one good RTT from causing sudden window increase.
+		 */
+		if (++ca->rtt_low < theta)
+			return ca->alpha;
+
+		ca->rtt_low = 0;
+		ca->rtt_above = 0;
+		return ALPHA_MAX;
+	}
+
+	ca->rtt_above = 1;
+
+	/*
+	 * Based on:
+	 *
+	 *      (dm - d1) amin amax
+	 * k1 = -------------------
+	 *         amax - amin
+	 *
+	 *       (dm - d1) amin
+	 * k2 = ----------------  - d1
+	 *        amax - amin
+	 *
+	 *             k1
+	 * alpha = ----------
+	 *          k2 + da
+	 */
+
+	dm -= d1;
+	da -= d1;
+	return (dm * ALPHA_MAX) /
+		(dm + (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
+}
+
+/*
+ * Beta used for multiplicative decrease.
+ * For small window sizes returns same value as Reno (0.5)
+ *
+ * If delay is small (10% of max) then beta = 1/8
+ * If delay is up to 80% of max then beta = 1/2
+ * In between is a linear function
+ */
+static u32 beta(u32 da, u32 dm)
+{
+	u32 d2, d3;
+
+	d2 = dm / 10;
+	if (da <= d2)
+		return BETA_MIN;
+
+	d3 = (8 * dm) / 10;
+	if (da >= d3 || d3 <= d2)
+		return BETA_MAX;
+
+	/*
+	 * Based on:
+	 *
+	 *       bmin d3 - bmax d2
+	 * k3 = -------------------
+	 *         d3 - d2
+	 *
+	 *       bmax - bmin
+	 * k4 = -------------
+	 *         d3 - d2
+	 *
+	 * b = k3 + k4 da
+	 */
+	return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
+		/ (d3 - d2);
+}
+
+/* Update alpha and beta values once per RTT */
+static void update_params(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	if (tp->snd_cwnd < win_thresh) {
+		ca->alpha = ALPHA_BASE;
+		ca->beta = BETA_BASE;
+	} else if (ca->cnt_rtt > 0) {
+		u32 dm = max_delay(ca);
+		u32 da = avg_delay(ca);
+
+		ca->alpha = alpha(ca, da, dm);
+		ca->beta = beta(da, dm);
+	}
+
+	rtt_reset(sk);
+}
+
+/*
+ * In case of loss, reset to default values
+ */
+static void tcp_illinois_state(struct sock *sk, u8 new_state)
+{
+	struct illinois *ca = inet_csk_ca(sk);
+
+	if (new_state == TCP_CA_Loss) {
+		ca->alpha = ALPHA_BASE;
+		ca->beta = BETA_BASE;
+		ca->rtt_low = 0;
+		ca->rtt_above = 0;
+		rtt_reset(sk);
+	}
+}
+
+/*
+ * Increase window in response to successful acknowledgment.
+ */
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+				    u32 in_flight, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	if (after(ack, ca->end_seq))
+		update_params(sk);
+
+	/* RFC2861 only increase cwnd if fully utilized */
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	/* In slow start */
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+
+	else {
+		u32 delta;
+
+		/* snd_cwnd_cnt is # of packets since last cwnd increment */
+		tp->snd_cwnd_cnt += ca->acked;
+		ca->acked = 1;
+
+		/* This is close approximation of:
+		 * tp->snd_cwnd += alpha/tp->snd_cwnd
+		*/
+		delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
+		if (delta >= tp->snd_cwnd) {
+			tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
+					   (u32) tp->snd_cwnd_clamp);
+			tp->snd_cwnd_cnt = 0;
+		}
+	}
+}
+
+static u32 tcp_illinois_ssthresh(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	/* Multiplicative decrease */
+	return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U);
+}
+
+
+/* Extract info for Tcp socket info provided via netlink. */
+static void tcp_illinois_info(struct sock *sk, u32 ext,
+			      struct sk_buff *skb)
+{
+	const struct illinois *ca = inet_csk_ca(sk);
+
+	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+		struct tcpvegas_info info = {
+			.tcpv_enabled = 1,
+			.tcpv_rttcnt = ca->cnt_rtt,
+			.tcpv_minrtt = ca->base_rtt,
+		};
+		u64 t = ca->sum_rtt;
+
+		do_div(t, ca->cnt_rtt);
+		info.tcpv_rtt = t;
+
+		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+	}
+}
+
+static struct tcp_congestion_ops tcp_illinois = {
+	.flags		= TCP_CONG_RTT_STAMP,
+	.init		= tcp_illinois_init,
+	.ssthresh	= tcp_illinois_ssthresh,
+	.min_cwnd	= tcp_reno_min_cwnd,
+	.cong_avoid	= tcp_illinois_cong_avoid,
+	.set_state	= tcp_illinois_state,
+	.get_info	= tcp_illinois_info,
+	.pkts_acked	= tcp_illinois_acked,
+
+	.owner		= THIS_MODULE,
+	.name		= "illinois",
+};
+
+static int __init tcp_illinois_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&tcp_illinois);
+}
+
+static void __exit tcp_illinois_unregister(void)
+{
+	tcp_unregister_congestion_control(&tcp_illinois);
+}
+
+module_init(tcp_illinois_register);
+module_exit(tcp_illinois_unregister);
+
+MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP Illinois");
+MODULE_VERSION("1.0");
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a14191687ac..7641b2761a14 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_nometrics_save __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
@@ -100,6 +101,7 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
 #define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
 #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
+#define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -110,6 +112,8 @@ int sysctl_tcp_abc __read_mostly;
 #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
 #define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
 
+#define IsSackFrto() (sysctl_tcp_frto == 0x2)
+
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 
 /* Adapt the MSS value used to make delayed ack decision to the
@@ -136,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 		 *
 		 * "len" is invariant segment length, including TCP header.
 		 */
-		len += skb->data - skb->h.raw;
+		len += skb->data - skb_transport_header(skb);
 		if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
 		    /* If PSH is not set, packet should be
 		     * full sized, provided peer TCP is not badly broken.
@@ -144,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 		     * to handle super-low mtu links fairly.
 		     */
 		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
-		     !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
+		     !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
 			/* Subtract also invariant (if peer is RFC compliant),
 			 * tcp header plus fixed timestamp option length.
 			 * Resulting "len" is MSS free of SACK jitter.
@@ -231,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk)
  */
 
 /* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
-			     const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	/* Optimize this! */
 	int truesize = tcp_win_from_space(skb->truesize)/2;
 	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
@@ -248,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 	return 0;
 }
 
-static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+static void tcp_grow_window(struct sock *sk,
 			    struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
@@ -263,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
 		if (tcp_win_from_space(skb->truesize) <= skb->len)
 			incr = 2*tp->advmss;
 		else
-			incr = __tcp_grow_window(sk, tp, skb);
+			incr = __tcp_grow_window(sk, skb);
 
 		if (incr) {
 			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
@@ -326,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk)
 }
 
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
-static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
+static void tcp_clamp_window(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	icsk->icsk_ack.quick = 0;
@@ -499,8 +506,9 @@ new_measure:
  * each ACK we send, he increments snd_cwnd and transmits more of his
  * queue.  -DaveM
  */
-static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now;
 
@@ -541,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 	TCP_ECN_check_ce(tp, skb);
 
 	if (skb->len >= 128)
-		tcp_grow_window(sk, tp, skb);
+		tcp_grow_window(sk, skb);
 }
 
 /* Called to compute a smoothed rtt estimate. The data fed to this
@@ -574,7 +582,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
 	 */
-	if(m == 0)
+	if (m == 0)
 		m = 1;
 	if (tp->srtt != 0) {
 		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
@@ -759,15 +767,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 }
 
 /* Set slow start threshold and cwnd not falling to slow start */
-void tcp_enter_cwr(struct sock *sk)
+void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tp->prior_ssthresh = 0;
 	tp->bytes_acked = 0;
-	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+	if (icsk->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
-		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+		if (set_ssthresh)
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tp->snd_cwnd = min(tp->snd_cwnd,
 				   tcp_packets_in_flight(tp) + 1U);
 		tp->snd_cwnd_cnt = 0;
@@ -934,7 +944,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+	unsigned char *ptr = (skb_transport_header(ack_skb) +
+			      TCP_SKB_CB(ack_skb)->sacked);
 	struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
 	struct sk_buff *cached_skb;
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
@@ -1038,7 +1049,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	cached_skb = tp->fastpath_skb_hint;
 	cached_fack_count = tp->fastpath_cnt_hint;
 	if (!cached_skb) {
-		cached_skb = sk->sk_write_queue.next;
+		cached_skb = tcp_write_queue_head(sk);
 		cached_fack_count = 0;
 	}
 
@@ -1055,10 +1066,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
 			int in_sack, pcount;
 			u8 sacked;
 
+			if (skb == tcp_send_head(sk))
+				break;
+
 			cached_skb = skb;
 			cached_fack_count = fack_count;
 			if (i == first_sack_index) {
@@ -1159,6 +1173,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						/* clear lost hint */
 						tp->retransmit_skb_hint = NULL;
 					}
+					/* SACK enhanced F-RTO detection.
+					 * Set flag if and only if non-rexmitted
+					 * segments below frto_highmark are
+					 * SACKed (RFC4138; Appendix B).
+					 * Clearing correct due to in-order walk
+					 */
+					if (after(end_seq, tp->frto_highmark)) {
+						flag &= ~FLAG_ONLY_ORIG_SACKED;
+					} else {
+						if (!(sacked & TCPCB_RETRANS))
+							flag |= FLAG_ONLY_ORIG_SACKED;
+					}
 				}
 
 				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -1195,7 +1221,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
 		struct sk_buff *skb;
 
-		sk_stream_for_retrans_queue(skb, sk) {
+		tcp_for_write_queue(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
 				break;
 			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
@@ -1224,7 +1252,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 	tp->left_out = tp->sacked_out + tp->lost_out;
 
-	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
+	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
 		tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
 
 #if FASTRETRANS_DEBUG > 0
@@ -1236,9 +1265,49 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	return flag;
 }
 
-/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
- * segments to see from the next ACKs whether any data was really missing.
- * If the RTO was spurious, new ACKs should arrive.
+/* F-RTO can only be used if TCP has never retransmitted anything other than
+ * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
+ */
+int tcp_use_frto(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	if (!sysctl_tcp_frto)
+		return 0;
+
+	if (IsSackFrto())
+		return 1;
+
+	/* Avoid expensive walking of rexmit queue if possible */
+	if (tp->retrans_out > 1)
+		return 0;
+
+	skb = tcp_write_queue_head(sk);
+	skb = tcp_write_queue_next(sk, skb);	/* Skips head */
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
+		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+			return 0;
+		/* Short-circuit when first non-SACKed skb has been checked */
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+			break;
+	}
+	return 1;
+}
+
+/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
+ * recovery a bit and use heuristics in tcp_process_frto() to detect if
+ * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
+ * keep retrans_out counting accurate (with SACK F-RTO, other than head
+ * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
+ * bits are handled if the Loss state is really to be entered (in
+ * tcp_enter_frto_loss).
+ *
+ * Do like tcp_enter_loss() would; when RTO expires the second time it
+ * does:
+ *  "Reduce ssthresh if it has not yet been made inside this window."
  */
 void tcp_enter_frto(struct sock *sk)
 {
@@ -1246,39 +1315,69 @@ void tcp_enter_frto(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	tp->frto_counter = 1;
-
-	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+	if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
 	    tp->snd_una == tp->high_seq ||
-	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
+	     !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
-		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+		/* Our state is too optimistic in ssthresh() call because cwnd
+		 * is not reduced until tcp_enter_frto_loss() when previous FRTO
+		 * recovery has not yet completed. Pattern would be this: RTO,
+		 * Cumulative ACK, RTO (2xRTO for the same segment does not end
+		 * up here twice).
+		 * RFC4138 should be more specific on what to do, even though
+		 * RTO is quite unlikely to occur after the first Cumulative ACK
+		 * due to back-off and complexity of triggering events ...
+		 */
+		if (tp->frto_counter) {
+			u32 stored_cwnd;
+			stored_cwnd = tp->snd_cwnd;
+			tp->snd_cwnd = 2;
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+			tp->snd_cwnd = stored_cwnd;
+		} else {
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+		}
+		/* ... in theory, cong.control module could do "any tricks" in
+		 * ssthresh(), which means that ca_state, lost bits and lost_out
+		 * counter would have to be faked before the call occurs. We
+		 * consider that too expensive, unlikely and hacky, so modules
+		 * using these in ssthresh() must deal these incompatibility
+		 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
+		 */
 		tcp_ca_event(sk, CA_EVENT_FRTO);
 	}
 
-	/* Have to clear retransmission markers here to keep the bookkeeping
-	 * in shape, even though we are not yet in Loss state.
-	 * If something was really lost, it is eventually caught up
-	 * in tcp_enter_frto_loss.
-	 */
-	tp->retrans_out = 0;
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
+	skb = tcp_write_queue_head(sk);
+	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+		tp->retrans_out -= tcp_skb_pcount(skb);
 	}
 	tcp_sync_left_out(tp);
 
-	tcp_set_ca_state(sk, TCP_CA_Open);
-	tp->frto_highmark = tp->snd_nxt;
+	/* Earlier loss recovery underway (see RFC4138; Appendix B).
+	 * The last condition is necessary at least in tp->frto_counter case.
+	 */
+	if (IsSackFrto() && (tp->frto_counter ||
+	    ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
+	    after(tp->high_seq, tp->snd_una)) {
+		tp->frto_highmark = tp->high_seq;
+	} else {
+		tp->frto_highmark = tp->snd_nxt;
+	}
+	tcp_set_ca_state(sk, TCP_CA_Disorder);
+	tp->high_seq = tp->snd_nxt;
+	tp->frto_counter = 1;
 }
 
 /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
  * which indicates that we should follow the traditional RTO recovery,
  * i.e. mark everything lost and do go-back-N retransmission.
  */
-static void tcp_enter_frto_loss(struct sock *sk)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1287,10 +1386,23 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	tp->sacked_out = 0;
 	tp->lost_out = 0;
 	tp->fackets_out = 0;
+	tp->retrans_out = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		cnt += tcp_skb_pcount(skb);
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+		/*
+		 * Count the retransmission made on RTO correctly (only when
+		 * waiting for the first ACK and did not get it)...
+		 */
+		if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+			tp->retrans_out += tcp_skb_pcount(skb);
+			/* ...enter this if branch just for the first segment */
+			flag |= FLAG_DATA_ACKED;
+		} else {
+			TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+		}
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 
 			/* Do not mark those segments lost that were
@@ -1308,7 +1420,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	}
 	tcp_sync_left_out(tp);
 
-	tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
+	tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
 	tp->snd_cwnd_cnt = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->undo_marker = 0;
@@ -1366,7 +1478,9 @@ void tcp_enter_loss(struct sock *sk, int how)
 	if (!how)
 		tp->undo_marker = tp->snd_una;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		cnt += tcp_skb_pcount(skb);
 		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
 			tp->undo_marker = 0;
@@ -1401,14 +1515,14 @@ static int tcp_check_sack_reneging(struct sock *sk)
 	 * receiver _host_ is heavily congested (or buggy).
 	 * Do processing similar to RTO timeout.
 	 */
-	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
+	if ((skb = tcp_write_queue_head(sk)) != NULL &&
 	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
 		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
 
 		tcp_enter_loss(sk, 1);
 		icsk->icsk_retransmits++;
-		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 					  icsk->icsk_rto, TCP_RTO_MAX);
 		return 1;
@@ -1426,10 +1540,12 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
 }
 
-static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
+static inline int tcp_head_timedout(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	return tp->packets_out &&
-	       tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue));
+	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));
 }
 
 /* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1525,10 +1641,15 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
  * Main question: may we further continue forward transmission
  * with the same cwnd?
  */
-static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
+static int tcp_time_to_recover(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 packets_out;
 
+	/* Do not perform any recovery during FRTO algorithm */
+	if (tp->frto_counter)
+		return 0;
+
 	/* Trick#1: The loss is proven. */
 	if (tp->lost_out)
 		return 1;
@@ -1540,7 +1661,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 	/* Trick#3 : when we use RFC2988 timer restart, fast
 	 * retransmit can be triggered by timeout of queue head.
 	 */
-	if (tcp_head_timedout(sk, tp))
+	if (tcp_head_timedout(sk))
 		return 1;
 
 	/* Trick#4: It is still not OK... But will it be useful to delay
@@ -1549,7 +1670,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 	packets_out = tp->packets_out;
 	if (packets_out <= tp->reordering &&
 	    tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
-	    !tcp_may_send_now(sk, tp)) {
+	    !tcp_may_send_now(sk)) {
 		/* We have nothing to send. This connection is limited
 		 * either by receiver window or by application.
 		 */
@@ -1589,8 +1710,10 @@ static void tcp_add_reno_sack(struct sock *sk)
 
 /* Account for ACK, ACKing some data in Reno Recovery phase. */
 
-static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked)
+static void tcp_remove_reno_sacks(struct sock *sk, int acked)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (acked > 0) {
 		/* One ACK acked hole. The rest eat duplicate ACKs. */
 		if (acked-1 >= tp->sacked_out)
@@ -1609,9 +1732,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
 }
 
 /* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
+static void tcp_mark_head_lost(struct sock *sk,
 			       int packets, u32 high_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int cnt;
 
@@ -1620,11 +1744,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 		skb = tp->lost_skb_hint;
 		cnt = tp->lost_cnt_hint;
 	} else {
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		cnt = 0;
 	}
 
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		/* TODO: do this better */
 		/* this is not the most efficient way to do this... */
 		tp->lost_skb_hint = skb;
@@ -1638,12 +1764,11 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 
 			/* clear xmit_retransmit_queue hints
 			 *  if this is beyond hint */
-			if(tp->retransmit_skb_hint != NULL &&
-			   before(TCP_SKB_CB(skb)->seq,
-				  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
-
+			if (tp->retransmit_skb_hint != NULL &&
+			    before(TCP_SKB_CB(skb)->seq,
+				   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
 				tp->retransmit_skb_hint = NULL;
-			}
+
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -1651,15 +1776,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 
 /* Account newly detected lost packet(s) */
 
-static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
+static void tcp_update_scoreboard(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (IsFack(tp)) {
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
-		tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+		tcp_mark_head_lost(sk, lost, tp->high_seq);
 	} else {
-		tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+		tcp_mark_head_lost(sk, 1, tp->high_seq);
 	}
 
 	/* New heuristics: it is possible only after we switched
@@ -1667,13 +1794,15 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 	 * Hence, we can detect timed out packets during fast
 	 * retransmit without falling to slow start.
 	 */
-	if (!IsReno(tp) && tcp_head_timedout(sk, tp)) {
+	if (!IsReno(tp) && tcp_head_timedout(sk)) {
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
-			: sk->sk_write_queue.next;
+			: tcp_write_queue_head(sk);
 
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			if (!tcp_skb_timedout(sk, skb))
 				break;
 
@@ -1745,9 +1874,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp)
 /* Undo procedures. */
 
 #if FASTRETRANS_DEBUG > 1
-static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
+static void DBGUNDO(struct sock *sk, const char *msg)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+
 	printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
 	       msg,
 	       NIPQUAD(inet->daddr), ntohs(inet->dport),
@@ -1793,13 +1924,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp)
 }
 
 /* People celebrate: "We love our President!" */
-static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_recovery(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_may_undo(tp)) {
 		/* Happy end! We did not retransmit anything
 		 * or our original transmission succeeded.
 		 */
-		DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
+		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
 		tcp_undo_cwr(sk, 1);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
 			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
@@ -1819,10 +1952,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
 }
 
 /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
+static void tcp_try_undo_dsack(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tp->undo_marker && !tp->undo_retrans) {
-		DBGUNDO(sk, tp, "D-SACK");
+		DBGUNDO(sk, "D-SACK");
 		tcp_undo_cwr(sk, 1);
 		tp->undo_marker = 0;
 		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
@@ -1831,9 +1966,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
 
 /* Undo during fast recovery after partial ACK. */
 
-static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
-				int acked)
+static int tcp_try_undo_partial(struct sock *sk, int acked)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	/* Partial ACK arrived. Force Hoe's retransmit. */
 	int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
 
@@ -1846,7 +1981,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
 
 		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
 
-		DBGUNDO(sk, tp, "Hoe");
+		DBGUNDO(sk, "Hoe");
 		tcp_undo_cwr(sk, 0);
 		NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
 
@@ -1860,17 +1995,21 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
 }
 
 /* Undo during loss recovery after partial ACK. */
-static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_loss(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_may_undo(tp)) {
 		struct sk_buff *skb;
-		sk_stream_for_retrans_queue(skb, sk) {
+		tcp_for_write_queue(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
 
 		clear_all_retrans_hints(tp);
 
-		DBGUNDO(sk, tp, "partial loss");
+		DBGUNDO(sk, "partial loss");
 		tp->lost_out = 0;
 		tp->left_out = tp->sacked_out;
 		tcp_undo_cwr(sk, 1);
@@ -1892,15 +2031,17 @@ static inline void tcp_complete_cwr(struct sock *sk)
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 
-static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	tp->left_out = tp->sacked_out;
 
 	if (tp->retrans_out == 0)
 		tp->retrans_stamp = 0;
 
 	if (flag&FLAG_ECE)
-		tcp_enter_cwr(sk);
+		tcp_enter_cwr(sk, 1);
 
 	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
 		int state = TCP_CA_Open;
@@ -1987,7 +2128,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+		tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
@@ -1997,14 +2138,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 	/* E. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		if (!sysctl_tcp_frto)
-			BUG_TRAP(tp->retrans_out == 0);
+		BUG_TRAP(tp->retrans_out == 0);
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (icsk->icsk_ca_state) {
 		case TCP_CA_Loss:
 			icsk->icsk_retransmits = 0;
-			if (tcp_try_undo_recovery(sk, tp))
+			if (tcp_try_undo_recovery(sk))
 				return;
 			break;
 
@@ -2018,7 +2158,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			break;
 
 		case TCP_CA_Disorder:
-			tcp_try_undo_dsack(sk, tp);
+			tcp_try_undo_dsack(sk);
 			if (!tp->undo_marker ||
 			    /* For SACK case do not Open to allow to undo
 			     * catching for all duplicate ACKs. */
@@ -2031,7 +2171,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		case TCP_CA_Recovery:
 			if (IsReno(tp))
 				tcp_reset_reno_sack(tp);
-			if (tcp_try_undo_recovery(sk, tp))
+			if (tcp_try_undo_recovery(sk))
 				return;
 			tcp_complete_cwr(sk);
 			break;
@@ -2047,14 +2187,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		} else {
 			int acked = prior_packets - tp->packets_out;
 			if (IsReno(tp))
-				tcp_remove_reno_sacks(sk, tp, acked);
-			is_dupack = tcp_try_undo_partial(sk, tp, acked);
+				tcp_remove_reno_sacks(sk, acked);
+			is_dupack = tcp_try_undo_partial(sk, acked);
 		}
 		break;
 	case TCP_CA_Loss:
 		if (flag&FLAG_DATA_ACKED)
 			icsk->icsk_retransmits = 0;
-		if (!tcp_try_undo_loss(sk, tp)) {
+		if (!tcp_try_undo_loss(sk)) {
 			tcp_moderate_cwnd(tp);
 			tcp_xmit_retransmit_queue(sk);
 			return;
@@ -2071,10 +2211,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		}
 
 		if (icsk->icsk_ca_state == TCP_CA_Disorder)
-			tcp_try_undo_dsack(sk, tp);
+			tcp_try_undo_dsack(sk);
 
-		if (!tcp_time_to_recover(sk, tp)) {
-			tcp_try_to_open(sk, tp, flag);
+		if (!tcp_time_to_recover(sk)) {
+			tcp_try_to_open(sk, flag);
 			return;
 		}
 
@@ -2113,8 +2253,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 	}
 
-	if (is_dupack || tcp_head_timedout(sk, tp))
-		tcp_update_scoreboard(sk, tp);
+	if (is_dupack || tcp_head_timedout(sk))
+		tcp_update_scoreboard(sk);
 	tcp_cwnd_down(sk);
 	tcp_xmit_retransmit_queue(sk);
 }
@@ -2190,8 +2330,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
  * RFC2988 recommends to restart timer to now+rto.
  */
 
-static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 	} else {
@@ -2255,14 +2397,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
-static u32 tcp_usrtt(struct timeval *tv)
-{
-	struct timeval now;
-
-	do_gettimeofday(&now);
-	return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
-}
-
 /* Remove acknowledged frames from the retransmission queue. */
 static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
@@ -2273,12 +2407,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	int acked = 0;
 	__s32 seq_rtt = -1;
 	u32 pkts_acked = 0;
-	void (*rtt_sample)(struct sock *sk, u32 usrtt)
-		= icsk->icsk_ca_ops->rtt_sample;
-	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+	ktime_t last_ackt = ktime_set(0,0);
 
-	while ((skb = skb_peek(&sk->sk_write_queue)) &&
-	       skb != sk->sk_send_head) {
+	while ((skb = tcp_write_queue_head(sk)) &&
+	       skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 		__u8 sacked = scb->sacked;
 
@@ -2318,13 +2450,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 
 		if (sacked) {
 			if (sacked & TCPCB_RETRANS) {
-				if(sacked & TCPCB_SACKED_RETRANS)
+				if (sacked & TCPCB_SACKED_RETRANS)
 					tp->retrans_out -= tcp_skb_pcount(skb);
 				acked |= FLAG_RETRANS_DATA_ACKED;
 				seq_rtt = -1;
 			} else if (seq_rtt < 0) {
 				seq_rtt = now - scb->when;
-				skb_get_timestamp(skb, &tv);
+				last_ackt = skb->tstamp;
 			}
 			if (sacked & TCPCB_SACKED_ACKED)
 				tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2337,23 +2469,24 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 			}
 		} else if (seq_rtt < 0) {
 			seq_rtt = now - scb->when;
-			skb_get_timestamp(skb, &tv);
+			last_ackt = skb->tstamp;
 		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
-		__skb_unlink(skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(skb, sk);
 		sk_stream_free_skb(sk, skb);
 		clear_all_retrans_hints(tp);
 	}
 
 	if (acked&FLAG_ACKED) {
+		const struct tcp_congestion_ops *ca_ops
+			= inet_csk(sk)->icsk_ca_ops;
+
 		tcp_ack_update_rtt(sk, acked, seq_rtt);
-		tcp_ack_packets_out(sk, tp);
-		if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
-			(*rtt_sample)(sk, tcp_usrtt(&tv));
+		tcp_ack_packets_out(sk);
 
-		if (icsk->icsk_ca_ops->pkts_acked)
-			icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
+		if (ca_ops->pkts_acked)
+			ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
 	}
 
 #if FASTRETRANS_DEBUG > 0
@@ -2390,7 +2523,7 @@ static void tcp_ack_probe(struct sock *sk)
 
 	/* Was it a usable window open? */
 
-	if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
 		   tp->snd_una + tp->snd_wnd)) {
 		icsk->icsk_backoff = 0;
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
@@ -2433,13 +2566,14 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
  * and in FreeBSD. NetBSD's one is even worse.) is wrong.
  */
-static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
-				 struct sk_buff *skb, u32 ack, u32 ack_seq)
+static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+				 u32 ack_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int flag = 0;
-	u32 nwin = ntohs(skb->h.th->window);
+	u32 nwin = ntohs(tcp_hdr(skb)->window);
 
-	if (likely(!skb->h.th->syn))
+	if (likely(!tcp_hdr(skb)->syn))
 		nwin <<= tp->rx_opt.snd_wscale;
 
 	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
@@ -2453,7 +2587,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 			 * fast path is recovered for sending TCP.
 			 */
 			tp->pred_flags = 0;
-			tcp_fast_path_check(sk, tp);
+			tcp_fast_path_check(sk);
 
 			if (nwin > tp->max_window) {
 				tp->max_window = nwin;
@@ -2467,39 +2601,139 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 	return flag;
 }
 
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
+/* A very conservative spurious RTO response algorithm: reduce cwnd and
+ * continue in congestion avoidance.
+ */
+static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
+{
+	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	tp->snd_cwnd_cnt = 0;
+	tcp_moderate_cwnd(tp);
+}
+
+/* A conservative spurious RTO response algorithm: reduce cwnd using
+ * rate halving and continue in congestion avoidance.
+ */
+static void tcp_ratehalving_spur_to_response(struct sock *sk)
+{
+	tcp_enter_cwr(sk, 0);
+}
+
+static void tcp_undo_spur_to_response(struct sock *sk, int flag)
+{
+	if (flag&FLAG_ECE)
+		tcp_ratehalving_spur_to_response(sk);
+	else
+		tcp_undo_cwr(sk, 1);
+}
+
+/* F-RTO spurious RTO detection algorithm (RFC4138)
+ *
+ * F-RTO affects during two new ACKs following RTO (well, almost, see inline
+ * comments). State (ACK number) is kept in frto_counter. When ACK advances
+ * window (but not to or beyond highest sequence sent before RTO):
+ *   On First ACK,  send two new segments out.
+ *   On Second ACK, RTO was likely spurious. Do spurious response (response
+ *                  algorithm is not part of the F-RTO detection algorithm
+ *                  given in RFC4138 but can be selected separately).
+ * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
+ * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
+ * of Nagle, this is done using frto_counter states 2 and 3, when a new data
+ * segment of any size sent during F-RTO, state 2 is upgraded to 3.
+ *
+ * Rationale: if the RTO was spurious, new ACKs should arrive from the
+ * original window even after we transmit two new data segments.
+ *
+ * SACK version:
+ *   on first step, wait until first cumulative ACK arrives, then move to
+ *   the second step. In second step, the next ACK decides.
+ *
+ * F-RTO is implemented (mainly) in four functions:
+ *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
+ *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
+ *     called when tcp_use_frto() showed green light
+ *   - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
+ *   - tcp_enter_frto_loss() is called if there is not enough evidence
+ *     to prove that the RTO is indeed spurious. It transfers the control
+ *     from F-RTO to the conventional RTO recovery
+ */
+static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tcp_sync_left_out(tp);
 
-	if (tp->snd_una == prior_snd_una ||
-	    !before(tp->snd_una, tp->frto_highmark)) {
-		/* RTO was caused by loss, start retransmitting in
-		 * go-back-N slow start
+	/* Duplicate the behavior from Loss state (fastretrans_alert) */
+	if (flag&FLAG_DATA_ACKED)
+		inet_csk(sk)->icsk_retransmits = 0;
+
+	if (!before(tp->snd_una, tp->frto_highmark)) {
+		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
+		return 1;
+	}
+
+	if (!IsSackFrto() || IsReno(tp)) {
+		/* RFC4138 shortcoming in step 2; should also have case c):
+		 * ACK isn't duplicate nor advances window, e.g., opposite dir
+		 * data, winupdate
 		 */
-		tcp_enter_frto_loss(sk);
-		return;
+		if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+		    !(flag&FLAG_FORWARD_PROGRESS))
+			return 1;
+
+		if (!(flag&FLAG_DATA_ACKED)) {
+			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
+					    flag);
+			return 1;
+		}
+	} else {
+		if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+			/* Prevent sending of new data. */
+			tp->snd_cwnd = min(tp->snd_cwnd,
+					   tcp_packets_in_flight(tp));
+			return 1;
+		}
+
+		if ((tp->frto_counter >= 2) &&
+		    (!(flag&FLAG_FORWARD_PROGRESS) ||
+		     ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+			/* RFC4138 shortcoming (see comment above) */
+			if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+				return 1;
+
+			tcp_enter_frto_loss(sk, 3, flag);
+			return 1;
+		}
 	}
 
 	if (tp->frto_counter == 1) {
-		/* First ACK after RTO advances the window: allow two new
-		 * segments out.
-		 */
+		/* Sending of the next skb must be allowed or no FRTO */
+		if (!tcp_send_head(sk) ||
+		    after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
+				     tp->snd_una + tp->snd_wnd)) {
+			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
+					    flag);
+			return 1;
+		}
+
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
+		tp->frto_counter = 2;
+		return 1;
 	} else {
-		/* Also the second ACK after RTO advances the window.
-		 * The RTO was likely spurious. Reduce cwnd and continue
-		 * in congestion avoidance
-		 */
-		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-		tcp_moderate_cwnd(tp);
+		switch (sysctl_tcp_frto_response) {
+		case 2:
+			tcp_undo_spur_to_response(sk, flag);
+			break;
+		case 1:
+			tcp_conservative_spur_to_response(tp);
+			break;
+		default:
+			tcp_ratehalving_spur_to_response(sk);
+			break;
+		}
+		tp->frto_counter = 0;
 	}
-
-	/* F-RTO affects on two new ACKs following RTO.
-	 * At latest on third ACK the TCP behavior is back to normal.
-	 */
-	tp->frto_counter = (tp->frto_counter + 1) % 3;
+	return 0;
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
@@ -2513,6 +2747,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 prior_in_flight;
 	s32 seq_rtt;
 	int prior_packets;
+	int frto_cwnd = 0;
 
 	/* If the ack is newer than sent or older than previous acks
 	 * then we can probably ignore it.
@@ -2549,12 +2784,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		else
 			NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
 
-		flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
+		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
 		if (TCP_SKB_CB(skb)->sacked)
 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
 
-		if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+		if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
 			flag |= FLAG_ECE;
 
 		tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
@@ -2575,15 +2810,16 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
-		tcp_process_frto(sk, prior_snd_una);
+		frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
-		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
+		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
+		    tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
 		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
 	} else {
-		if ((flag & FLAG_DATA_ACKED))
+		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
 			tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
 	}
 
@@ -2599,7 +2835,7 @@ no_queue:
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
 	 */
-	if (sk->sk_send_head)
+	if (tcp_send_head(sk))
 		tcp_ack_probe(sk);
 	return 1;
 
@@ -2620,13 +2856,13 @@ uninteresting_ack:
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
 {
 	unsigned char *ptr;
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	int length=(th->doff*4)-sizeof(struct tcphdr);
 
 	ptr = (unsigned char *)(th + 1);
 	opt_rx->saw_tstamp = 0;
 
-	while(length>0) {
+	while (length > 0) {
 		int opcode=*ptr++;
 		int opsize;
 
@@ -2642,9 +2878,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					return;
 				if (opsize > length)
 					return;	/* don't parse partial options */
-				switch(opcode) {
+				switch (opcode) {
 				case TCPOPT_MSS:
-					if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+					if (opsize==TCPOLEN_MSS && th->syn && !estab) {
 						u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
 						if (in_mss) {
 							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
@@ -2654,12 +2890,12 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					}
 					break;
 				case TCPOPT_WINDOW:
-					if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+					if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
 						if (sysctl_tcp_window_scaling) {
 							__u8 snd_wscale = *(__u8 *) ptr;
 							opt_rx->wscale_ok = 1;
 							if (snd_wscale > 14) {
-								if(net_ratelimit())
+								if (net_ratelimit())
 									printk(KERN_INFO "tcp_parse_options: Illegal window "
 									       "scaling value %d >14 received.\n",
 									       snd_wscale);
@@ -2669,7 +2905,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 						}
 					break;
 				case TCPOPT_TIMESTAMP:
-					if(opsize==TCPOLEN_TIMESTAMP) {
+					if (opsize==TCPOLEN_TIMESTAMP) {
 						if ((estab && opt_rx->tstamp_ok) ||
 						    (!estab && sysctl_tcp_timestamps)) {
 							opt_rx->saw_tstamp = 1;
@@ -2679,7 +2915,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					}
 					break;
 				case TCPOPT_SACK_PERM:
-					if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+					if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
 						if (sysctl_tcp_sack) {
 							opt_rx->sack_ok = 1;
 							tcp_sack_reset(opt_rx);
@@ -2688,7 +2924,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					break;
 
 				case TCPOPT_SACK:
-					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+					if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
 					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
 					   opt_rx->sack_ok) {
 						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
@@ -2701,10 +2937,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					 */
 					break;
 #endif
-				};
+				}
+
 				ptr+=opsize-2;
 				length-=opsize;
-		};
+		}
 	}
 }
 
@@ -2737,7 +2974,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 static inline void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+	tp->rx_opt.ts_recent_stamp = get_seconds();
 }
 
 static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -2750,8 +2987,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 		 * Not only, also it occurs for expired timestamps.
 		 */
 
-		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
-		   xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+		if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
+		   get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
 			tcp_store_ts_recent(tp);
 	}
 }
@@ -2782,7 +3019,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 
@@ -2803,7 +3040,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
-		xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+		get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
 		!tcp_disordered_ack(sk, skb));
 }
 
@@ -2910,7 +3147,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 			printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
 			       __FUNCTION__, sk->sk_state);
 			break;
-	};
+	}
 
 	/* It _is_ possible, that we have something out-of-order _after_ FIN.
 	 * Probably, we should reset in this case. For now drop them.
@@ -3009,7 +3246,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 */
 			tp->rx_opt.num_sacks--;
 			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
-			for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+			for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
 				sp[i] = sp[i+1];
 			continue;
 		}
@@ -3062,7 +3299,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 		tp->rx_opt.num_sacks--;
 		sp--;
 	}
-	for(; this_sack > 0; this_sack--, sp--)
+	for (; this_sack > 0; this_sack--, sp--)
 		*sp = *(sp-1);
 
 new_sack:
@@ -3088,7 +3325,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		return;
 	}
 
-	for(this_sack = 0; this_sack < num_sacks; ) {
+	for (this_sack = 0; this_sack < num_sacks; ) {
 		/* Check if the start of the sack is covered by RCV.NXT. */
 		if (!before(tp->rcv_nxt, sp->start_seq)) {
 			int i;
@@ -3144,8 +3381,8 @@ static void tcp_ofo_queue(struct sock *sk)
 		__skb_unlink(skb, &tp->out_of_order_queue);
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if(skb->h.th->fin)
-			tcp_fin(skb, sk, skb->h.th);
+		if (tcp_hdr(skb)->fin)
+			tcp_fin(skb, sk, tcp_hdr(skb));
 	}
 }
 
@@ -3153,7 +3390,7 @@ static int tcp_prune_queue(struct sock *sk);
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int eaten = -1;
 
@@ -3210,9 +3447,9 @@ queue_and_out:
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if(skb->len)
-			tcp_event_data_recv(sk, tp, skb);
-		if(th->fin)
+		if (skb->len)
+			tcp_event_data_recv(sk, skb);
+		if (th->fin)
 			tcp_fin(skb, sk, th);
 
 		if (!skb_queue_empty(&tp->out_of_order_queue)) {
@@ -3228,7 +3465,7 @@ queue_and_out:
 		if (tp->rx_opt.num_sacks)
 			tcp_sack_remove(tp);
 
-		tcp_fast_path_check(sk, tp);
+		tcp_fast_path_check(sk);
 
 		if (eaten > 0)
 			__kfree_skb(skb);
@@ -3392,7 +3629,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		 * - bloated or contains data before "start" or
 		 *   overlaps to the next one.
 		 */
-		if (!skb->h.th->syn && !skb->h.th->fin &&
+		if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
 		    (tcp_win_from_space(skb->truesize) > skb->len ||
 		     before(TCP_SKB_CB(skb)->seq, start) ||
 		     (skb->next != tail &&
@@ -3403,7 +3640,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		start = TCP_SKB_CB(skb)->end_seq;
 		skb = skb->next;
 	}
-	if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+	if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
 		return;
 
 	while (before(start, end)) {
@@ -3419,11 +3656,14 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
 		if (!nskb)
 			return;
+
+		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
+		skb_set_network_header(nskb, (skb_network_header(skb) -
+					      skb->head));
+		skb_set_transport_header(nskb, (skb_transport_header(skb) -
+						skb->head));
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
-		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
-		nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
-		nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
 		__skb_insert(nskb, skb->prev, skb, list);
@@ -3449,7 +3689,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 				__kfree_skb(skb);
 				NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
 				skb = next;
-				if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+				if (skb == tail ||
+				    tcp_hdr(skb)->syn ||
+				    tcp_hdr(skb)->fin)
 					return;
 			}
 		}
@@ -3514,7 +3756,7 @@ static int tcp_prune_queue(struct sock *sk)
 	NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-		tcp_clamp_window(sk, tp);
+		tcp_clamp_window(sk);
 	else if (tcp_memory_pressure)
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
@@ -3583,8 +3825,10 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
 	 */
@@ -3616,7 +3860,7 @@ static void tcp_new_space(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_should_expand_sndbuf(sk, tp)) {
+	if (tcp_should_expand_sndbuf(sk)) {
 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
@@ -3640,9 +3884,9 @@ static void tcp_check_space(struct sock *sk)
 	}
 }
 
-static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk)
 {
-	tcp_push_pending_frames(sk, tp);
+	tcp_push_pending_frames(sk);
 	tcp_check_space(sk);
 }
 
@@ -3790,7 +4034,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 	int err;
 
 	local_bh_enable();
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
 	else
 		err = skb_copy_and_csum_datagram_iovec(skb, hlen,
@@ -3822,7 +4066,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
 
 static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	return !skb_csum_unnecessary(skb) &&
 		__tcp_checksum_complete_user(sk, skb);
 }
 
@@ -3840,7 +4084,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 	if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
 		tp->ucopy.dma_chan = get_softnet_dma();
 
-	if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
+	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
 
 		dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
 			skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
@@ -3856,7 +4100,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 		tcp_rcv_space_adjust(sk);
 
 		if ((tp->ucopy.len == 0) ||
-		    (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
+		    (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
 		    (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
 			tp->ucopy.wakeup = 1;
 			sk->sk_data_ready(sk, 0);
@@ -3976,7 +4220,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				 */
 				tcp_ack(sk, skb, 0);
 				__kfree_skb(skb);
-				tcp_data_snd_check(sk, tp);
+				tcp_data_snd_check(sk);
 				return 0;
 			} else { /* Header too small */
 				TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -4047,12 +4291,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 			}
 
-			tcp_event_data_recv(sk, tp, skb);
+			tcp_event_data_recv(sk, skb);
 
 			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
 				/* Well, only one small jumplet in fast path... */
 				tcp_ack(sk, skb, FLAG_DATA);
-				tcp_data_snd_check(sk, tp);
+				tcp_data_snd_check(sk);
 				if (!inet_csk_ack_scheduled(sk))
 					goto no_ack;
 			}
@@ -4109,7 +4353,7 @@ slow_path:
 		goto discard;
 	}
 
-	if(th->rst) {
+	if (th->rst) {
 		tcp_reset(sk);
 		goto discard;
 	}
@@ -4124,7 +4368,7 @@ slow_path:
 	}
 
 step5:
-	if(th->ack)
+	if (th->ack)
 		tcp_ack(sk, skb, FLAG_SLOWPATH);
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
@@ -4135,7 +4379,7 @@ step5:
 	/* step 7: process the segment text */
 	tcp_data_queue(sk, skb);
 
-	tcp_data_snd_check(sk, tp);
+	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
 	return 0;
 
@@ -4412,13 +4656,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		goto discard;
 
 	case TCP_LISTEN:
-		if(th->ack)
+		if (th->ack)
 			return 1;
 
-		if(th->rst)
+		if (th->rst)
 			goto discard;
 
-		if(th->syn) {
+		if (th->syn) {
 			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
 				return 1;
 
@@ -4452,7 +4696,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* Do step6 onward by hand. */
 		tcp_urg(sk, skb, th);
 		__kfree_skb(skb);
-		tcp_data_snd_check(sk, tp);
+		tcp_data_snd_check(sk);
 		return 0;
 	}
 
@@ -4474,7 +4718,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* step 2: check RST bit */
-	if(th->rst) {
+	if (th->rst) {
 		tcp_reset(sk);
 		goto discard;
 	}
@@ -4497,7 +4741,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	if (th->ack) {
 		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
 
-		switch(sk->sk_state) {
+		switch (sk->sk_state) {
 		case TCP_SYN_RECV:
 			if (acceptable) {
 				tp->copied_seq = tp->rcv_nxt;
@@ -4644,7 +4888,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 	/* tcp_data could move socket to TIME-WAIT */
 	if (sk->sk_state != TCP_CLOSE) {
-		tcp_data_snd_check(sk, tp);
+		tcp_data_snd_check(sk);
 		tcp_ack_snd_check(sk);
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba74bbe7d30..5a3e7f839fc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,7 +88,7 @@ int sysctl_tcp_low_latency __read_mostly;
 #define ICMP_MIN_LENGTH 8
 
 /* Socket used for sending RSTs */
-static struct socket *tcp_socket;
+static struct socket *tcp_socket __read_mostly;
 
 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
@@ -125,10 +125,10 @@ void tcp_unhash(struct sock *sk)
 
 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
-	return secure_tcp_sequence_number(skb->nh.iph->daddr,
-					  skb->nh.iph->saddr,
-					  skb->h.th->dest,
-					  skb->h.th->source);
+	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
+					  ip_hdr(skb)->saddr,
+					  tcp_hdr(skb)->dest,
+					  tcp_hdr(skb)->source);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	 */
 	if (tcptw->tw_ts_recent_stamp &&
 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
-			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 		if (tp->write_seq == 0)
 			tp->write_seq = 1;
@@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		 * when trying new connection.
 		 */
 		if (peer != NULL &&
-		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 			tp->rx_opt.ts_recent = peer->tcp_ts;
 		}
@@ -354,8 +354,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
 	struct tcp_sock *tp;
 	struct inet_sock *inet;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	__u32 seq;
 	int err;
@@ -499,11 +499,12 @@ out:
 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~tcp_v4_check(len, inet->saddr,
 					  inet->daddr, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
@@ -515,17 +516,18 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 
 int tcp_v4_gso_send_check(struct sk_buff *skb)
 {
-	struct iphdr *iph;
+	const struct iphdr *iph;
 	struct tcphdr *th;
 
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		return -EINVAL;
 
-	iph = skb->nh.iph;
-	th = skb->h.th;
+	iph = ip_hdr(skb);
+	th = tcp_hdr(skb);
 
 	th->check = 0;
 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
@@ -546,7 +548,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 #ifdef CONFIG_TCP_MD5SIG
@@ -585,7 +587,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
+	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -597,14 +599,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
 					key,
-					skb->nh.iph->daddr,
-					skb->nh.iph->saddr,
+					ip_hdr(skb)->daddr,
+					ip_hdr(skb)->saddr,
 					&rep.th, IPPROTO_TCP,
 					arg.iov[0].iov_len);
 	}
 #endif
-	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
-				      skb->nh.iph->saddr, /* XXX */
+	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+				      ip_hdr(skb)->saddr, /* XXX */
 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 
@@ -622,7 +624,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 			    struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 ts)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -670,7 +672,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 	 * skb->sk) holds true, but we program defensively.
 	 */
 	if (!twsk && skb->sk) {
-		key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
+		key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
 	} else if (twsk && twsk->tw_md5_keylen) {
 		tw_key.key = twsk->tw_md5_key;
 		tw_key.keylen = twsk->tw_md5_keylen;
@@ -690,14 +692,14 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 
 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
 					key,
-					skb->nh.iph->daddr,
-					skb->nh.iph->saddr,
+					ip_hdr(skb)->daddr,
+					ip_hdr(skb)->saddr,
 					&rep.th, IPPROTO_TCP,
 					arg.iov[0].iov_len);
 	}
 #endif
-	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
-				      skb->nh.iph->saddr, /* XXX */
+	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+				      ip_hdr(skb)->saddr, /* XXX */
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 
@@ -745,7 +747,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 	skb = tcp_make_synack(sk, dst, req);
 
 	if (skb) {
-		struct tcphdr *th = skb->h.th;
+		struct tcphdr *th = tcp_hdr(skb);
 
 		th->check = tcp_v4_check(skb->len,
 					 ireq->loc_addr,
@@ -781,7 +783,7 @@ static void syn_flood_warning(struct sk_buff *skb)
 		warntime = jiffies;
 		printk(KERN_INFO
 		       "possible SYN flooding on port %d. Sending cookies.\n",
-		       ntohs(skb->h.th->dest));
+		       ntohs(tcp_hdr(skb)->dest));
 	}
 }
 #endif
@@ -1133,8 +1135,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	 */
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
-	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *th = skb->h.th;
+	const struct iphdr *iph = ip_hdr(skb);
+	struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff << 2) - sizeof(struct tcphdr);
 	int genhash;
 	unsigned char *ptr;
@@ -1251,8 +1253,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct inet_request_sock *ireq;
 	struct tcp_options_received tmp_opt;
 	struct request_sock *req;
-	__be32 saddr = skb->nh.iph->saddr;
-	__be32 daddr = skb->nh.iph->daddr;
+	__be32 saddr = ip_hdr(skb)->saddr;
+	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
 #ifdef CONFIG_SYN_COOKIES
@@ -1327,7 +1329,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->rmt_addr = saddr;
 	ireq->opt = tcp_v4_save_options(sk, skb);
 	if (!want_cookie)
-		TCP_ECN_create_request(req, skb->h.th);
+		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
 #ifdef CONFIG_SYN_COOKIES
@@ -1351,7 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
-			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
@@ -1375,7 +1377,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
 				       "request from %u.%u.%u.%u/%u\n",
 				       NIPQUAD(saddr),
-				       ntohs(skb->h.th->source));
+				       ntohs(tcp_hdr(skb)->source));
 			dst_release(dst);
 			goto drop_and_free;
 		}
@@ -1439,7 +1441,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->opt	      = ireq->opt;
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
-	newinet->mc_ttl	      = skb->nh.iph->ttl;
+	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newinet->opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
@@ -1481,8 +1483,8 @@ exit:
 
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
-	struct iphdr *iph = skb->nh.iph;
+	struct tcphdr *th = tcp_hdr(skb);
+	const struct iphdr *iph = ip_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
 	/* Find possible connection requests. */
@@ -1491,9 +1493,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
-	nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
-				      th->source, skb->nh.iph->daddr,
-				      th->dest, inet_iif(skb));
+	nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
+				      iph->daddr, th->dest, inet_iif(skb));
 
 	if (nsk) {
 		if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1513,15 +1514,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 
 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
 {
+	const struct iphdr *iph = ip_hdr(skb);
+
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v4_check(skb->len, skb->nh.iph->saddr,
-				  skb->nh.iph->daddr, skb->csum)) {
+		if (!tcp_v4_check(skb->len, iph->saddr,
+				  iph->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
 		}
 	}
 
-	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 				       skb->len, IPPROTO_TCP, 0);
 
 	if (skb->len <= 76) {
@@ -1555,7 +1558,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
+		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
 			goto reset;
 		}
@@ -1563,7 +1566,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
 		goto csum_err;
 
 	if (sk->sk_state == TCP_LISTEN) {
@@ -1581,7 +1584,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
+	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
 		rsk = sk;
 		goto reset;
 	}
@@ -1610,6 +1613,7 @@ csum_err:
 
 int tcp_v4_rcv(struct sk_buff *skb)
 {
+	const struct iphdr *iph;
 	struct tcphdr *th;
 	struct sock *sk;
 	int ret;
@@ -1623,7 +1627,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr) / 4)
 		goto bad_packet;
@@ -1634,23 +1638,21 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	 * Packet length and doff are validated by header prediction,
 	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v4_checksum_init(skb)))
+	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
 		goto bad_packet;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
+	iph = ip_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff * 4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when	 = 0;
-	TCP_SKB_CB(skb)->flags	 = skb->nh.iph->tos;
+	TCP_SKB_CB(skb)->flags	 = iph->tos;
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
-	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
-			   skb->nh.iph->daddr, th->dest,
-			   inet_iif(skb));
-
+	sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
+			   iph->daddr, th->dest, inet_iif(skb));
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1724,8 +1726,7 @@ do_time_wait:
 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
-							skb->nh.iph->daddr,
-							th->dest,
+							iph->daddr, th->dest,
 							inet_iif(skb));
 		if (sk2) {
 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
@@ -1770,7 +1771,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
 
 	if (peer) {
 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
-		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
 			peer->tcp_ts = tp->rx_opt.ts_recent;
@@ -1791,7 +1792,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 
 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
-		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
 			peer->tcp_ts	   = tcptw->tw_ts_recent;
@@ -1890,7 +1891,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	tcp_cleanup_congestion_control(sk);
 
 	/* Cleanup up the write buffer. */
-	sk_stream_writequeue_purge(sk);
+	tcp_write_queue_purge(sk);
 
 	/* Cleans up our, hopefully empty, out_of_order_queue. */
 	__skb_queue_purge(&tp->out_of_order_queue);
@@ -2293,13 +2294,13 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
 		req);
 }
 
-static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
+static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
 {
 	int timer_active;
 	unsigned long timer_expires;
-	struct tcp_sock *tp = tcp_sk(sp);
-	const struct inet_connection_sock *icsk = inet_csk(sp);
-	struct inet_sock *inet = inet_sk(sp);
+	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet = inet_sk(sk);
 	__be32 dest = inet->daddr;
 	__be32 src = inet->rcv_saddr;
 	__u16 destp = ntohs(inet->dport);
@@ -2311,9 +2312,9 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
 		timer_active	= 4;
 		timer_expires	= icsk->icsk_timeout;
-	} else if (timer_pending(&sp->sk_timer)) {
+	} else if (timer_pending(&sk->sk_timer)) {
 		timer_active	= 2;
-		timer_expires	= sp->sk_timer.expires;
+		timer_expires	= sk->sk_timer.expires;
 	} else {
 		timer_active	= 0;
 		timer_expires = jiffies;
@@ -2321,17 +2322,17 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
 
 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
-		i, src, srcp, dest, destp, sp->sk_state,
+		i, src, srcp, dest, destp, sk->sk_state,
 		tp->write_seq - tp->snd_una,
-		sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
+		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
 					     (tp->rcv_nxt - tp->copied_seq),
 		timer_active,
 		jiffies_to_clock_t(timer_expires - jiffies),
 		icsk->icsk_retransmits,
-		sock_i_uid(sp),
+		sock_i_uid(sk),
 		icsk->icsk_probes_out,
-		sock_i_ino(sp),
-		atomic_read(&sp->sk_refcnt), sp,
+		sock_i_ino(sk),
+		atomic_read(&sk->sk_refcnt), sk,
 		icsk->icsk_rto,
 		icsk->icsk_ack.ato,
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index f0ebaf0e21cb..43294ad9f63e 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
  *   3. calc smoothed OWD (SOWD).
  * Most ideas come from the original TCP-LP implementation.
  */
-static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
+static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
 {
 	struct lp *lp = inet_csk_ca(sk);
 	s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
  * newReno in increase case.
  * We work it out by following the idea from TCP-LP's paper directly
  */
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
+static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct lp *lp = inet_csk_ca(sk);
 
+	tcp_lp_rtt_sample(sk,  ktime_to_us(net_timedelta(last)));
+
 	/* calc inference */
 	if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
 		lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
 }
 
 static struct tcp_congestion_ops tcp_lp = {
+	.flags = TCP_CONG_RTT_STAMP,
 	.init = tcp_lp_init,
 	.ssthresh = tcp_reno_ssthresh,
 	.cong_avoid = tcp_lp_cong_avoid,
 	.min_cwnd = tcp_reno_min_cwnd,
-	.rtt_sample = tcp_lp_rtt_sample,
 	.pkts_acked = tcp_lp_pkts_acked,
 
 	.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6b5c64f3c925..a12b08fca5ad 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -149,7 +149,7 @@ kill_with_rst:
 		tw->tw_substate	  = TCP_TIME_WAIT;
 		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (tmp_opt.saw_tstamp) {
-			tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+			tcptw->tw_ts_recent_stamp = get_seconds();
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
@@ -208,7 +208,7 @@ kill:
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-			tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+			tcptw->tw_ts_recent_stamp = get_seconds();
 		}
 
 		inet_twsk_put(tw);
@@ -246,7 +246,7 @@ kill:
 	if (paws_reject)
 		NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
 
-	if(!th->rst) {
+	if (!th->rst) {
 		/* In this case we must reset the TIMEWAIT timer.
 		 *
 		 * If it is ACKless SYN it may be both old duplicate
@@ -324,7 +324,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 				if (tcp_alloc_md5sig_pool() == NULL)
 					BUG();
 			}
-		} while(0);
+		} while (0);
 #endif
 
 		/* Linkage updates. */
@@ -387,8 +387,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		/* Now setup tcp_sock */
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
+		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
+		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
 
 		tcp_prequeue_init(newtp);
 
@@ -422,10 +422,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->rcv_wup = treq->rcv_isn + 1;
 		newtp->write_seq = treq->snt_isn + 1;
 		newtp->pushed_seq = newtp->write_seq;
-		newtp->copied_seq = treq->rcv_isn + 1;
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -440,7 +438,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 						       keepalive_time_when(newtp));
 
 		newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
-		if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
+		if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
 			if (sysctl_tcp_fack)
 				newtp->rx_opt.sack_ok |= 2;
 		}
@@ -455,12 +453,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
 			newtp->window_clamp = min(newtp->window_clamp, 65535U);
 		}
-		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+		newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
+				  newtp->rx_opt.snd_wscale);
 		newtp->max_window = newtp->snd_wnd;
 
 		if (newtp->rx_opt.tstamp_ok) {
 			newtp->rx_opt.ts_recent = req->ts_recent;
-			newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+			newtp->rx_opt.ts_recent_stamp = get_seconds();
 			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 		} else {
 			newtp->rx_opt.ts_recent_stamp = 0;
@@ -490,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			   struct request_sock *req,
 			   struct request_sock **prev)
 {
-	struct tcphdr *th = skb->h.th;
+	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 	struct tcp_options_received tmp_opt;
@@ -506,7 +505,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
 			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
 		}
 	}
@@ -712,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	int state = child->sk_state;
 
 	if (!sock_owned_by_user(child)) {
-		ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
-
+		ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
+					    skb->len);
 		/* Wakeup parent, send SIGIO */
 		if (state == TCP_SYN_RECV && child->sk_state != state)
 			parent->sk_data_ready(parent, 0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dc151139b5af..0faacf9c419d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,14 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
-static void update_send_head(struct sock *sk, struct tcp_sock *tp,
-			     struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct sk_buff *skb)
 {
-	sk->sk_send_head = skb->next;
-	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
-		sk->sk_send_head = NULL;
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-	tcp_packets_out_inc(sk, tp, skb);
+	tcp_packets_out_inc(sk, skb);
 }
 
 /* SND.NXT, if window was not shrunk.
@@ -78,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp,
  * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
  * invalid. OK, let's make this for now:
  */
-static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp)
+static inline __u32 tcp_acceptable_seq(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
 		return tp->snd_nxt;
 	else
@@ -238,7 +239,7 @@ static u16 tcp_select_window(struct sock *sk)
 	u32 new_win = __tcp_select_window(sk);
 
 	/* Never shrink the offered window */
-	if(new_win < cur_win) {
+	if (new_win < cur_win) {
 		/* Danger Will Robinson!
 		 * Don't update rcv_wup/rcv_wnd here or else
 		 * we will not be able to advertise a zero
@@ -289,10 +290,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
 			       (TCPOPT_SACK <<  8) |
 			       (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
 						     TCPOLEN_SACK_PERBLOCK)));
-		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+
+		for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
 			*ptr++ = htonl(sp[this_sack].start_seq);
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
+
 		if (tp->rx_opt.dsack) {
 			tp->rx_opt.dsack = 0;
 			tp->rx_opt.eff_sacks--;
@@ -337,7 +340,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
 	 */
 	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
 	if (ts) {
-		if(sack)
+		if (sack)
 			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
 				       (TCPOLEN_SACK_PERM << 16) |
 				       (TCPOPT_TIMESTAMP << 8) |
@@ -349,7 +352,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
 				       TCPOLEN_TIMESTAMP);
 		*ptr++ = htonl(tstamp);		/* TSVAL */
 		*ptr++ = htonl(ts_recent);	/* TSECR */
-	} else if(sack)
+	} else if (sack)
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_SACK_PERM << 8) |
@@ -406,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* If congestion control is doing timestamping, we must
 	 * take such a timestamp before we potentially clone/copy.
 	 */
-	if (icsk->icsk_ca_ops->rtt_sample)
+	if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
 		__net_timestamp(skb);
 
 	if (likely(clone_it)) {
@@ -430,7 +433,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	sysctl_flags = 0;
 	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
 		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-		if(sysctl_tcp_timestamps) {
+		if (sysctl_tcp_timestamps) {
 			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
 			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
 		}
@@ -465,11 +468,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
 #endif
 
-	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-	skb->h.th = th;
+	skb_push(skb, tcp_header_size);
+	skb_reset_transport_header(skb);
 	skb_set_owner_w(skb, sk);
 
 	/* Build TCP header and checksum it. */
+	th = tcp_hdr(skb);
 	th->source		= inet->sport;
 	th->dest		= inet->dport;
 	th->seq			= htonl(tcb->seq);
@@ -515,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 					     md5 ? &md5_hash_location :
 #endif
 					     NULL);
-		TCP_ECN_send(sk, tp, skb, tcp_header_size);
+		TCP_ECN_send(sk, skb, tcp_header_size);
 	}
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -524,7 +528,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5,
 					       sk, NULL, NULL,
-					       skb->h.th,
+					       tcp_hdr(skb),
 					       sk->sk_protocol,
 					       skb->len);
 	}
@@ -545,7 +549,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (likely(err <= 0))
 		return err;
 
-	tcp_enter_cwr(sk);
+	tcp_enter_cwr(sk, 1);
 
 	return net_xmit_eval(err);
 
@@ -567,12 +571,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 	/* Advance write_seq and place onto the write_queue. */
 	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
 	skb_header_release(skb);
-	__skb_queue_tail(&sk->sk_write_queue, skb);
+	tcp_add_write_queue_tail(sk, skb);
 	sk_charge_skb(sk, skb);
-
-	/* Queue it, remembering where we must start sending. */
-	if (sk->sk_send_head == NULL)
-		sk->sk_send_head = skb;
 }
 
 static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -705,7 +705,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff, &sk->sk_write_queue);
+	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
 }
@@ -736,7 +736,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 	}
 	skb_shinfo(skb)->nr_frags = k;
 
-	skb->tail = skb->data;
+	skb_reset_tail_pointer(skb);
 	skb->data_len -= len;
 	skb->len = skb->data_len;
 }
@@ -930,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 /* Congestion window validation. (RFC2861) */
 
-static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 packets_out = tp->packets_out;
 
 	if (packets_out >= tp->snd_cwnd) {
@@ -943,7 +944,8 @@ static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
 		if (tp->packets_out > tp->snd_cwnd_used)
 			tp->snd_cwnd_used = tp->packets_out;
 
-		if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
+		if (sysctl_tcp_slow_start_after_idle &&
+		    (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
 			tcp_cwnd_application_limited(sk);
 	}
 }
@@ -1033,8 +1035,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	if (nonagle & TCP_NAGLE_PUSH)
 		return 1;
 
-	/* Don't use the nagle rule for urgent data (or for the final FIN).  */
-	if (tp->urg_mode ||
+	/* Don't use the nagle rule for urgent data (or for the final FIN).
+	 * Nagle can be ignored during F-RTO too (see RFC4138).
+	 */
+	if (tp->urg_mode || (tp->frto_counter == 2) ||
 	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
 		return 1;
 
@@ -1055,7 +1059,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
 	return !after(end_seq, tp->snd_una + tp->snd_wnd);
 }
 
-/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
+/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
  * should be put on the wire right now.  If so, it returns the number of
  * packets allowed by the congestion window.
  */
@@ -1078,15 +1082,10 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 	return cwnd_quota;
 }
 
-static inline int tcp_skb_is_last(const struct sock *sk,
-				  const struct sk_buff *skb)
+int tcp_may_send_now(struct sock *sk)
 {
-	return skb->next == (struct sk_buff *)&sk->sk_write_queue;
-}
-
-int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
-{
-	struct sk_buff *skb = sk->sk_send_head;
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb = tcp_send_head(sk);
 
 	return (skb &&
 		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
@@ -1142,7 +1141,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff, &sk->sk_write_queue);
+	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
 }
@@ -1152,8 +1151,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  *
  * This algorithm is from John Heffner.
  */
-static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
 
@@ -1248,10 +1248,10 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	/* Have enough data in the send queue to probe? */
 	len = 0;
-	if ((skb = sk->sk_send_head) == NULL)
+	if ((skb = tcp_send_head(sk)) == NULL)
 		return -1;
 	while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
-		skb = skb->next;
+		skb = tcp_write_queue_next(sk, skb);
 	if (len < probe_size)
 		return -1;
 
@@ -1278,9 +1278,9 @@ static int tcp_mtu_probe(struct sock *sk)
 		return -1;
 	sk_charge_skb(sk, nskb);
 
-	skb = sk->sk_send_head;
-	__skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue);
-	sk->sk_send_head = nskb;
+	skb = tcp_send_head(sk);
+	tcp_insert_write_queue_before(nskb, skb, sk);
+	tcp_advance_send_head(sk, skb);
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1291,7 +1291,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	len = 0;
 	while (len < probe_size) {
-		next = skb->next;
+		next = tcp_write_queue_next(sk, skb);
 
 		copy = min_t(int, skb->len, probe_size - len);
 		if (nskb->ip_summed)
@@ -1304,7 +1304,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			/* We've eaten all the data from this skb.
 			 * Throw it away. */
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
-			__skb_unlink(skb, &sk->sk_write_queue);
+			tcp_unlink_write_queue(skb, sk);
 			sk_stream_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
@@ -1332,7 +1332,7 @@ static int tcp_mtu_probe(struct sock *sk)
 		/* Decrement cwnd here because we are sending
 		* effectively two packets. */
 		tp->snd_cwnd--;
-		update_send_head(sk, tp, nskb);
+		update_send_head(sk, nskb);
 
 		icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
 		tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1376,7 +1376,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		sent_pkts = 1;
 	}
 
-	while ((skb = sk->sk_send_head)) {
+	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
 		tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
@@ -1395,7 +1395,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 						      nonagle : TCP_NAGLE_PUSH))))
 				break;
 		} else {
-			if (tcp_tso_should_defer(sk, tp, skb))
+			if (tcp_tso_should_defer(sk, skb))
 				break;
 		}
 
@@ -1424,31 +1424,31 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		/* Advance the send_head.  This one is sent out.
 		 * This call will increment packets_out.
 		 */
-		update_send_head(sk, tp, skb);
+		update_send_head(sk, skb);
 
 		tcp_minshall_update(tp, mss_now, skb);
 		sent_pkts++;
 	}
 
 	if (likely(sent_pkts)) {
-		tcp_cwnd_validate(sk, tp);
+		tcp_cwnd_validate(sk);
 		return 0;
 	}
-	return !tp->packets_out && sk->sk_send_head;
+	return !tp->packets_out && tcp_send_head(sk);
 }
 
 /* Push out any pending frames which were held back due to
  * TCP_CORK or attempt at coalescing tiny packets.
  * The socket must be locked by the caller.
  */
-void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
-			       unsigned int cur_mss, int nonagle)
+void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+			       int nonagle)
 {
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 
 	if (skb) {
 		if (tcp_write_xmit(sk, cur_mss, nonagle))
-			tcp_check_probe_timer(sk, tp);
+			tcp_check_probe_timer(sk);
 	}
 }
 
@@ -1458,7 +1458,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 void tcp_push_one(struct sock *sk, unsigned int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 	unsigned int tso_segs, cwnd_quota;
 
 	BUG_ON(!skb || skb->len < mss_now);
@@ -1492,8 +1492,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
 		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
-			update_send_head(sk, tp, skb);
-			tcp_cwnd_validate(sk, tp);
+			update_send_head(sk, skb);
+			tcp_cwnd_validate(sk);
 			return;
 		}
 	}
@@ -1607,6 +1607,9 @@ u32 __tcp_select_window(struct sock *sk)
 		 */
 		if (window <= free_space - mss || window > free_space)
 			window = (free_space/mss)*mss;
+		else if (mss == full_space &&
+		         free_space > window + full_space/2)
+			window = free_space;
 	}
 
 	return window;
@@ -1616,7 +1619,7 @@ u32 __tcp_select_window(struct sock *sk)
 static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *next_skb = skb->next;
+	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
 
 	/* The first test we must make is that neither of these two
 	 * SKB's are still referenced by someone else.
@@ -1626,7 +1629,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		u16 flags = TCP_SKB_CB(skb)->flags;
 
 		/* Also punt if next skb has been SACK'd. */
-		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+		if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
 			return;
 
 		/* Next skb is out of window. */
@@ -1648,9 +1651,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		clear_all_retrans_hints(tp);
 
 		/* Ok.	We will be able to collapse the packet. */
-		__skb_unlink(next_skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(next_skb, sk);
 
-		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
+		skb_copy_from_linear_data(next_skb,
+					  skb_put(skb, next_skb_size),
+					  next_skb_size);
 
 		if (next_skb->ip_summed == CHECKSUM_PARTIAL)
 			skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1702,7 +1707,9 @@ void tcp_simple_retransmit(struct sock *sk)
 	unsigned int mss = tcp_current_mss(sk, 0);
 	int lost = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		if (skb->len > mss &&
 		    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 			if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1784,13 +1791,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Collapse two adjacent packets if worthwhile and we can. */
-	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
-	   (skb->len < (cur_mss >> 1)) &&
-	   (skb->next != sk->sk_send_head) &&
-	   (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
-	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
-	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) &&
-	   (sysctl_tcp_retrans_collapse != 0))
+	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+	    (skb->len < (cur_mss >> 1)) &&
+	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
+	    (!tcp_skb_is_last(sk, skb)) &&
+	    (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+	    (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
+	    (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
@@ -1800,9 +1807,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * retransmit when old data is attached.  So strip it off
 	 * since it is cheap to do so and saves bytes on the network.
 	 */
-	if(skb->len > 0 &&
-	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
-	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+	if (skb->len > 0 &&
+	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
 			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
 			skb_shinfo(skb)->gso_segs = 1;
@@ -1868,15 +1875,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		skb = tp->retransmit_skb_hint;
 		packet_cnt = tp->retransmit_cnt_hint;
 	}else{
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		packet_cnt = 0;
 	}
 
 	/* First pass: retransmit lost packets. */
 	if (tp->lost_out) {
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+			if (skb == tcp_send_head(sk))
+				break;
 			/* we could do better than to assign each time */
 			tp->retransmit_skb_hint = skb;
 			tp->retransmit_cnt_hint = packet_cnt;
@@ -1902,8 +1911,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 					else
 						NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
 
-					if (skb ==
-					    skb_peek(&sk->sk_write_queue))
+					if (skb == tcp_write_queue_head(sk))
 						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 									  inet_csk(sk)->icsk_rto,
 									  TCP_RTO_MAX);
@@ -1933,18 +1941,20 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	 * segments to send.
 	 */
 
-	if (tcp_may_send_now(sk, tp))
+	if (tcp_may_send_now(sk))
 		return;
 
 	if (tp->forward_skb_hint) {
 		skb = tp->forward_skb_hint;
 		packet_cnt = tp->forward_cnt_hint;
 	} else{
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		packet_cnt = 0;
 	}
 
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		tp->forward_cnt_hint = packet_cnt;
 		tp->forward_skb_hint = skb;
 
@@ -1969,7 +1979,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			break;
 		}
 
-		if (skb == skb_peek(&sk->sk_write_queue))
+		if (skb == tcp_write_queue_head(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  inet_csk(sk)->icsk_rto,
 						  TCP_RTO_MAX);
@@ -1985,7 +1995,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 void tcp_send_fin(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
+	struct sk_buff *skb = tcp_write_queue_tail(sk);
 	int mss_now;
 
 	/* Optimization, tack on the FIN if we have a queue of
@@ -1994,7 +2004,7 @@ void tcp_send_fin(struct sock *sk)
 	 */
 	mss_now = tcp_current_mss(sk, 1);
 
-	if (sk->sk_send_head != NULL) {
+	if (tcp_send_head(sk) != NULL) {
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
 		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
@@ -2021,17 +2031,16 @@ void tcp_send_fin(struct sock *sk)
 		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
 		tcp_queue_skb(sk, skb);
 	}
-	__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
+	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
 }
 
 /* We get here when a process closes a file descriptor (either due to
  * an explicit close() or as a byproduct of exit()'ing) and there
  * was unread data in the receive queue.  This behavior is recommended
- * by draft-ietf-tcpimpl-prob-03.txt section 3.10.  -DaveM
+ * by RFC 2525, section 2.17.  -DaveM
  */
 void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
 	/* NOTE: No TCP options attached and we never retransmit this. */
@@ -2051,7 +2060,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	skb_shinfo(skb)->gso_type = 0;
 
 	/* Send it off. */
-	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2067,7 +2076,7 @@ int tcp_send_synack(struct sock *sk)
 {
 	struct sk_buff* skb;
 
-	skb = skb_peek(&sk->sk_write_queue);
+	skb = tcp_write_queue_head(sk);
 	if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
@@ -2077,9 +2086,9 @@ int tcp_send_synack(struct sock *sk)
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
 				return -ENOMEM;
-			__skb_unlink(skb, &sk->sk_write_queue);
+			tcp_unlink_write_queue(skb, sk);
 			skb_header_release(nskb);
-			__skb_queue_head(&sk->sk_write_queue, nskb);
+			__tcp_add_write_queue_head(sk, nskb);
 			sk_stream_free_skb(sk, skb);
 			sk_charge_skb(sk, nskb);
 			skb = nskb;
@@ -2129,8 +2138,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	if (md5)
 		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
 #endif
-	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+	skb_push(skb, tcp_header_size);
+	skb_reset_transport_header(skb);
 
+	th = tcp_hdr(skb);
 	memset(th, 0, sizeof(struct tcphdr));
 	th->syn = 1;
 	th->ack = 1;
@@ -2184,7 +2195,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5,
 					       NULL, dst, req,
-					       skb->h.th, sk->sk_protocol,
+					       tcp_hdr(skb), sk->sk_protocol,
 					       skb->len);
 	}
 #endif
@@ -2267,7 +2278,7 @@ int tcp_connect(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 
 	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
-	TCP_ECN_send_syn(sk, tp, buff);
+	TCP_ECN_send_syn(sk, buff);
 	TCP_SKB_CB(buff)->sacked = 0;
 	skb_shinfo(buff)->gso_segs = 1;
 	skb_shinfo(buff)->gso_size = 0;
@@ -2281,7 +2292,7 @@ int tcp_connect(struct sock *sk)
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
 	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
 	skb_header_release(buff);
-	__skb_queue_tail(&sk->sk_write_queue, buff);
+	__tcp_add_write_queue_tail(sk, buff);
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
 	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2359,7 +2370,6 @@ void tcp_send_ack(struct sock *sk)
 {
 	/* If we have been reset, we may not send again. */
 	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
 		struct sk_buff *buff;
 
 		/* We are not putting this on the write queue, so
@@ -2385,7 +2395,7 @@ void tcp_send_ack(struct sock *sk)
 		skb_shinfo(buff)->gso_type = 0;
 
 		/* Send it off, this clears delayed acks for us. */
-		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
 		TCP_SKB_CB(buff)->when = tcp_time_stamp;
 		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 	}
@@ -2437,7 +2447,7 @@ int tcp_write_wakeup(struct sock *sk)
 		struct tcp_sock *tp = tcp_sk(sk);
 		struct sk_buff *skb;
 
-		if ((skb = sk->sk_send_head) != NULL &&
+		if ((skb = tcp_send_head(sk)) != NULL &&
 		    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
 			int err;
 			unsigned int mss = tcp_current_mss(sk, 0);
@@ -2463,7 +2473,7 @@ int tcp_write_wakeup(struct sock *sk)
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
 			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 			if (!err) {
-				update_send_head(sk, tp, skb);
+				update_send_head(sk, skb);
 			}
 			return err;
 		} else {
@@ -2487,7 +2497,7 @@ void tcp_send_probe0(struct sock *sk)
 
 	err = tcp_write_wakeup(sk);
 
-	if (tp->packets_out || !sk->sk_send_head) {
+	if (tp->packets_out || !tcp_send_head(sk)) {
 		/* Cancel probe timer, if it is not required. */
 		icsk->icsk_probes_out = 0;
 		icsk->icsk_backoff = 0;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 61f406f27294..3938d5dbdf20 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -26,6 +26,8 @@
 #include <linux/proc_fs.h>
 #include <linux/module.h>
 #include <linux/kfifo.h>
+#include <linux/ktime.h>
+#include <linux/time.h>
 #include <linux/vmalloc.h>
 
 #include <net/tcp.h>
@@ -34,43 +36,45 @@ MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
 MODULE_DESCRIPTION("TCP cwnd snooper");
 MODULE_LICENSE("GPL");
 
-static int port = 0;
+static int port __read_mostly = 0;
 MODULE_PARM_DESC(port, "Port to match (0=all)");
 module_param(port, int, 0);
 
-static int bufsize = 64*1024;
+static int bufsize __read_mostly = 64*1024;
 MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
 module_param(bufsize, int, 0);
 
+static int full __read_mostly;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd changes)");
+module_param(full, int, 0);
+
 static const char procname[] = "tcpprobe";
 
 struct {
-	struct kfifo  *fifo;
-	spinlock_t    lock;
+	struct kfifo	*fifo;
+	spinlock_t	lock;
 	wait_queue_head_t wait;
-	struct timeval tstart;
+	ktime_t		start;
+	u32		lastcwnd;
 } tcpw;
 
+/*
+ * Print to log with timestamps.
+ * FIXME: causes an extra copy
+ */
 static void printl(const char *fmt, ...)
 {
 	va_list args;
 	int len;
-	struct timeval now;
+	struct timespec tv;
 	char tbuf[256];
 
 	va_start(args, fmt);
-	do_gettimeofday(&now);
+	/* want monotonic time since start of tcp_probe */
+	tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start));
 
-	now.tv_sec -= tcpw.tstart.tv_sec;
-	now.tv_usec -= tcpw.tstart.tv_usec;
-	if (now.tv_usec < 0) {
-		--now.tv_sec;
-		now.tv_usec += 1000000;
-	}
-
-	len = sprintf(tbuf, "%lu.%06lu ",
-		      (unsigned long) now.tv_sec,
-		      (unsigned long) now.tv_usec);
+	len = sprintf(tbuf, "%lu.%09lu ",
+		      (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec);
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
@@ -78,38 +82,44 @@ static void printl(const char *fmt, ...)
 	wake_up(&tcpw.wait);
 }
 
-static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t size)
+/*
+ * Hook inserted to be called before each receive packet.
+ * Note: arguments must match tcp_rcv_established()!
+ */
+static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			       struct tcphdr *th, unsigned len)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
 
-	if (port == 0 || ntohs(inet->dport) == port ||
-	    ntohs(inet->sport) == port) {
+	/* Only update if port matches */
+	if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
+	    && (full || tp->snd_cwnd != tcpw.lastcwnd)) {
 		printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n",
 		       NIPQUAD(inet->saddr), ntohs(inet->sport),
 		       NIPQUAD(inet->daddr), ntohs(inet->dport),
-		       size, tp->snd_nxt, tp->snd_una,
+		       skb->len, tp->snd_nxt, tp->snd_una,
 		       tp->snd_cwnd, tcp_current_ssthresh(sk),
-		       tp->snd_wnd);
+		       tp->snd_wnd, tp->srtt >> 3);
+		tcpw.lastcwnd = tp->snd_cwnd;
 	}
 
 	jprobe_return();
 	return 0;
 }
 
-static struct jprobe tcp_send_probe = {
+static struct jprobe tcp_probe = {
 	.kp = {
-		.symbol_name	= "tcp_sendmsg",
+		.symbol_name	= "tcp_rcv_established",
 	},
-	.entry	= JPROBE_ENTRY(jtcp_sendmsg),
+	.entry	= JPROBE_ENTRY(jtcp_rcv_established),
 };
 
 
 static int tcpprobe_open(struct inode * inode, struct file * file)
 {
 	kfifo_reset(tcpw.fifo);
-	do_gettimeofday(&tcpw.tstart);
+	tcpw.start = ktime_get();
 	return 0;
 }
 
@@ -162,7 +172,7 @@ static __init int tcpprobe_init(void)
 	if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
 		goto err0;
 
-	ret = register_jprobe(&tcp_send_probe);
+	ret = register_jprobe(&tcp_probe);
 	if (ret)
 		goto err1;
 
@@ -180,7 +190,7 @@ static __exit void tcpprobe_exit(void)
 {
 	kfifo_free(tcpw.fifo);
 	proc_net_remove(procname);
-	unregister_jprobe(&tcp_send_probe);
+	unregister_jprobe(&tcp_probe);
 
 }
 module_exit(tcpprobe_exit);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a9243cfc1bea..2ca97b20929d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	int max_probes;
 
-	if (tp->packets_out || !sk->sk_send_head) {
+	if (tp->packets_out || !tcp_send_head(sk)) {
 		icsk->icsk_probes_out = 0;
 		return;
 	}
@@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 	if (!tp->packets_out)
 		goto out;
 
-	BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
+	BUG_TRAP(!tcp_write_queue_empty(sk));
 
 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 			goto out;
 		}
 		tcp_enter_loss(sk, 0);
-		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
 		__sk_dst_reset(sk);
 		goto out_reset_timer;
 	}
@@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 		tcp_enter_loss(sk, 0);
 	}
 
-	if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
+	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
 		/* Retransmission failed because of local congestion,
 		 * do not backoff.
 		 */
@@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	elapsed = keepalive_time_when(tp);
 
 	/* It is alive without keepalive 8) */
-	if (tp->packets_out || sk->sk_send_head)
+	if (tp->packets_out || tcp_send_head(sk))
 		goto resched;
 
 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 5c484dceb967..73e19cf7df21 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -38,6 +38,8 @@
 
 #include <net/tcp.h>
 
+#include "tcp_vegas.h"
+
 /* Default values of the Vegas variables, in fixed-point representation
  * with V_PARAM_SHIFT bits to the right of the binary point.
  */
@@ -54,17 +56,6 @@ module_param(gamma, int, 0644);
 MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
 
 
-/* Vegas variables */
-struct vegas {
-	u32	beg_snd_nxt;	/* right edge during last RTT */
-	u32	beg_snd_una;	/* left edge  during last RTT */
-	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
-	u8	doing_vegas_now;/* if true, do vegas for this RTT */
-	u16	cntRTT;		/* # of RTTs measured within last RTT */
-	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
-	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
-};
-
 /* There are several situations when we must "re-start" Vegas:
  *
  *  o when a connection is established
@@ -81,7 +72,7 @@ struct vegas {
  * Instead we must wait until the completion of an RTT during
  * which we actually receive ACKs.
  */
-static inline void vegas_enable(struct sock *sk)
+static void vegas_enable(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct vegas *vegas = inet_csk_ca(sk);
@@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk)
 	vegas->doing_vegas_now = 0;
 }
 
-static void tcp_vegas_init(struct sock *sk)
+void tcp_vegas_init(struct sock *sk)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
 
 	vegas->baseRTT = 0x7fffffff;
 	vegas_enable(sk);
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_init);
 
 /* Do RTT sampling needed for Vegas.
  * Basically we:
@@ -120,10 +112,13 @@ static void tcp_vegas_init(struct sock *sk)
  *   o min-filter RTT samples from a much longer window (forever for now)
  *     to find the propagation delay (baseRTT)
  */
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = ktime_to_us(net_timedelta(last)) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < vegas->baseRTT)
@@ -135,8 +130,9 @@ static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
 	vegas->minRTT = min(vegas->minRTT, vrtt);
 	vegas->cntRTT++;
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
 
-static void tcp_vegas_state(struct sock *sk, u8 ca_state)
+void tcp_vegas_state(struct sock *sk, u8 ca_state)
 {
 
 	if (ca_state == TCP_CA_Open)
@@ -144,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
 	else
 		vegas_disable(sk);
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_state);
 
 /*
  * If the connection is idle and we are restarting,
@@ -154,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
  * packets, _then_ we can make Vegas calculations
  * again.
  */
-static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_CWND_RESTART ||
 	    event == CA_EVENT_TX_START)
 		tcp_vegas_init(sk);
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
 
 static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 				 u32 seq_rtt, u32 in_flight, int flag)
@@ -336,30 +334,29 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_vegas_get_info(struct sock *sk, u32 ext,
-			       struct sk_buff *skb)
+void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 {
 	const struct vegas *ca = inet_csk_ca(sk);
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcpvegas_info *info;
-
-		info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
-					  sizeof(*info)));
-
-		info->tcpv_enabled = ca->doing_vegas_now;
-		info->tcpv_rttcnt = ca->cntRTT;
-		info->tcpv_rtt = ca->baseRTT;
-		info->tcpv_minrtt = ca->minRTT;
-	rtattr_failure:	;
+		struct tcpvegas_info info = {
+			.tcpv_enabled = ca->doing_vegas_now,
+			.tcpv_rttcnt = ca->cntRTT,
+			.tcpv_rtt = ca->baseRTT,
+			.tcpv_minrtt = ca->minRTT,
+		};
+
+		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
 
 static struct tcp_congestion_ops tcp_vegas = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_vegas_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_vegas_cong_avoid,
 	.min_cwnd	= tcp_reno_min_cwnd,
-	.rtt_sample	= tcp_vegas_rtt_calc,
+	.pkts_acked	= tcp_vegas_pkts_acked,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
 	.get_info	= tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
new file mode 100644
index 000000000000..502fa8183634
--- /dev/null
+++ b/net/ipv4/tcp_vegas.h
@@ -0,0 +1,24 @@
+/*
+ * TCP Vegas congestion control interface
+ */
+#ifndef __TCP_VEGAS_H
+#define __TCP_VEGAS_H 1
+
+/* Vegas variables */
+struct vegas {
+	u32	beg_snd_nxt;	/* right edge during last RTT */
+	u32	beg_snd_una;	/* left edge  during last RTT */
+	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
+	u8	doing_vegas_now;/* if true, do vegas for this RTT */
+	u16	cntRTT;		/* # of RTTs measured within last RTT */
+	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
+	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
+};
+
+extern void tcp_vegas_init(struct sock *sk);
+extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
+extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last);
+extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
+extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+
+#endif	/* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ce57bf302f6c..9edb340f2f95 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
 }
 
 /* Do rtt sampling needed for Veno. */
-static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct veno *veno = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1;	/* Never allow zero rtt or basertt */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = ktime_to_us(net_timedelta(last)) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 }
 
 static struct tcp_congestion_ops tcp_veno = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
 	.cong_avoid	= tcp_veno_cong_avoid,
-	.rtt_sample	= tcp_veno_rtt_calc,
+	.pkts_acked	= tcp_veno_pkts_acked,
 	.set_state	= tcp_veno_state,
 	.cwnd_event	= tcp_veno_cwnd_event,
 
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4e1b61032a9c..e61e09dd513e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
  * Called after processing group of packets.
  * but all westwood needs is the last sample of srtt.
  */
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct westwood *w = inet_csk_ca(sk);
 	if (cnt > 0)
@@ -226,7 +226,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct westwood *w = inet_csk_ca(sk);
 
-	switch(event) {
+	switch (event) {
 	case CA_EVENT_FAST_ACK:
 		westwood_fast_bw(sk);
 		break;
@@ -260,16 +260,13 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
 {
 	const struct westwood *ca = inet_csk_ca(sk);
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct rtattr *rta;
-		struct tcpvegas_info *info;
-
-		rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info));
-		info = RTA_DATA(rta);
-		info->tcpv_enabled = 1;
-		info->tcpv_rttcnt = 0;
-		info->tcpv_rtt = jiffies_to_usecs(ca->rtt);
-		info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
-	rtattr_failure:	;
+		struct tcpvegas_info info = {
+			.tcpv_enabled = 1,
+			.tcpv_rtt = jiffies_to_usecs(ca->rtt),
+			.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+		};
+
+		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
 }
 
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
new file mode 100644
index 000000000000..545ed237ab53
--- /dev/null
+++ b/net/ipv4/tcp_yeah.c
@@ -0,0 +1,268 @@
+/*
+ *
+ *   YeAH TCP
+ *
+ * For further details look at:
+ *    http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+ *
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+
+#include <net/tcp.h>
+
+#include "tcp_vegas.h"
+
+#define TCP_YEAH_ALPHA       80 //lin number of packets queued at the bottleneck
+#define TCP_YEAH_GAMMA        1 //lin fraction of queue to be removed per rtt
+#define TCP_YEAH_DELTA        3 //log minimum fraction of cwnd to be removed on loss
+#define TCP_YEAH_EPSILON      1 //log maximum fraction to be removed on early decongestion
+#define TCP_YEAH_PHY          8 //lin maximum delta from base
+#define TCP_YEAH_RHO         16 //lin minumum number of consecutive rtt to consider competition on loss
+#define TCP_YEAH_ZETA        50 //lin minimum number of state switchs to reset reno_count
+
+#define TCP_SCALABLE_AI_CNT	 100U
+
+/* YeAH variables */
+struct yeah {
+	struct vegas vegas;	/* must be first */
+
+	/* YeAH */
+	u32 lastQ;
+	u32 doing_reno_now;
+
+	u32 reno_count;
+	u32 fast_count;
+
+	u32 pkts_acked;
+};
+
+static void tcp_yeah_init(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+
+	tcp_vegas_init(sk);
+
+	yeah->doing_reno_now = 0;
+	yeah->lastQ = 0;
+
+	yeah->reno_count = 2;
+
+	/* Ensure the MD arithmetic works.  This is somewhat pedantic,
+	 * since I don't think we will see a cwnd this large. :) */
+	tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
+
+}
+
+
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+
+	if (icsk->icsk_ca_state == TCP_CA_Open)
+		yeah->pkts_acked = pkts_acked;
+
+	tcp_vegas_pkts_acked(sk, pkts_acked, last);
+}
+
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
+				u32 seq_rtt, u32 in_flight, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+
+	else if (!yeah->doing_reno_now) {
+		/* Scalable */
+
+		tp->snd_cwnd_cnt+=yeah->pkts_acked;
+		if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		}
+
+		yeah->pkts_acked = 1;
+
+	} else {
+		/* Reno */
+
+		if (tp->snd_cwnd_cnt < tp->snd_cwnd)
+			tp->snd_cwnd_cnt++;
+
+		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+			tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		}
+	}
+
+	/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
+	 *
+	 * These are so named because they represent the approximate values
+	 * of snd_una and snd_nxt at the beginning of the current RTT. More
+	 * precisely, they represent the amount of data sent during the RTT.
+	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
+	 * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
+	 * bytes of data have been ACKed during the course of the RTT, giving
+	 * an "actual" rate of:
+	 *
+	 *     (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
+	 *
+	 * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
+	 * because delayed ACKs can cover more than one segment, so they
+	 * don't line up yeahly with the boundaries of RTTs.
+	 *
+	 * Another unfortunate fact of life is that delayed ACKs delay the
+	 * advance of the left edge of our send window, so that the number
+	 * of bytes we send in an RTT is often less than our cwnd will allow.
+	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
+	 */
+
+	if (after(ack, yeah->vegas.beg_snd_nxt)) {
+
+		/* We do the Vegas calculations only if we got enough RTT
+		 * samples that we can be reasonably sure that we got
+		 * at least one RTT sample that wasn't from a delayed ACK.
+		 * If we only had 2 samples total,
+		 * then that means we're getting only 1 ACK per RTT, which
+		 * means they're almost certainly delayed ACKs.
+		 * If  we have 3 samples, we should be OK.
+		 */
+
+		if (yeah->vegas.cntRTT > 2) {
+			u32 rtt, queue;
+			u64 bw;
+
+			/* We have enough RTT samples, so, using the Vegas
+			 * algorithm, we determine if we should increase or
+			 * decrease cwnd, and by how much.
+			 */
+
+			/* Pluck out the RTT we are using for the Vegas
+			 * calculations. This is the min RTT seen during the
+			 * last RTT. Taking the min filters out the effects
+			 * of delayed ACKs, at the cost of noticing congestion
+			 * a bit later.
+			 */
+			rtt = yeah->vegas.minRTT;
+
+			/* Compute excess number of packets above bandwidth
+			 * Avoid doing full 64 bit divide.
+			 */
+			bw = tp->snd_cwnd;
+			bw *= rtt - yeah->vegas.baseRTT;
+			do_div(bw, rtt);
+			queue = bw;
+
+			if (queue > TCP_YEAH_ALPHA ||
+			    rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
+				if (queue > TCP_YEAH_ALPHA
+				    && tp->snd_cwnd > yeah->reno_count) {
+					u32 reduction = min(queue / TCP_YEAH_GAMMA ,
+							    tp->snd_cwnd >> TCP_YEAH_EPSILON);
+
+					tp->snd_cwnd -= reduction;
+
+					tp->snd_cwnd = max(tp->snd_cwnd,
+							   yeah->reno_count);
+
+					tp->snd_ssthresh = tp->snd_cwnd;
+				}
+
+				if (yeah->reno_count <= 2)
+					yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
+				else
+					yeah->reno_count++;
+
+				yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
+							   0xffffffU);
+			} else {
+				yeah->fast_count++;
+
+				if (yeah->fast_count > TCP_YEAH_ZETA) {
+					yeah->reno_count = 2;
+					yeah->fast_count = 0;
+				}
+
+				yeah->doing_reno_now = 0;
+			}
+
+			yeah->lastQ = queue;
+
+		}
+
+		/* Save the extent of the current window so we can use this
+		 * at the end of the next RTT.
+		 */
+		yeah->vegas.beg_snd_una  = yeah->vegas.beg_snd_nxt;
+		yeah->vegas.beg_snd_nxt  = tp->snd_nxt;
+		yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
+
+		/* Wipe the slate clean for the next RTT. */
+		yeah->vegas.cntRTT = 0;
+		yeah->vegas.minRTT = 0x7fffffff;
+	}
+}
+
+static u32 tcp_yeah_ssthresh(struct sock *sk) {
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+	u32 reduction;
+
+	if (yeah->doing_reno_now < TCP_YEAH_RHO) {
+		reduction = yeah->lastQ;
+
+		reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) );
+
+		reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+	} else
+		reduction = max(tp->snd_cwnd>>1,2U);
+
+	yeah->fast_count = 0;
+	yeah->reno_count = max(yeah->reno_count>>1, 2U);
+
+	return tp->snd_cwnd - reduction;
+}
+
+static struct tcp_congestion_ops tcp_yeah = {
+	.flags		= TCP_CONG_RTT_STAMP,
+	.init		= tcp_yeah_init,
+	.ssthresh	= tcp_yeah_ssthresh,
+	.cong_avoid	= tcp_yeah_cong_avoid,
+	.min_cwnd	= tcp_reno_min_cwnd,
+	.set_state	= tcp_vegas_state,
+	.cwnd_event	= tcp_vegas_cwnd_event,
+	.get_info	= tcp_vegas_get_info,
+	.pkts_acked	= tcp_yeah_pkts_acked,
+
+	.owner		= THIS_MODULE,
+	.name		= "yeah",
+};
+
+static int __init tcp_yeah_register(void)
+{
+	BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
+	tcp_register_congestion_control(&tcp_yeah);
+	return 0;
+}
+
+static void __exit tcp_yeah_unregister(void)
+{
+	tcp_unregister_congestion_control(&tcp_yeah);
+}
+
+module_init(tcp_yeah_register);
+module_exit(tcp_yeah_unregister);
+
+MODULE_AUTHOR("Angelo P. Castellani");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("YeAH TCP");
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
new file mode 100644
index 000000000000..ed3b7198f23c
--- /dev/null
+++ b/net/ipv4/tcp_yeah.h
@@ -0,0 +1,7 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+#include <asm/div64.h>
+
+#include <net/tcp.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fc620a7c1db4..113e0c4c8a92 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,14 +114,33 @@ DEFINE_RWLOCK(udp_hash_lock);
 
 static int udp_port_rover;
 
-static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
+/*
+ * Note about this hash function :
+ * Typical use is probably daddr = 0, only dport is going to vary hash
+ */
+static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr)
+{
+	addr ^= addr >> 16;
+	addr ^= addr >> 8;
+	return port ^ addr;
+}
+
+static inline int __udp_lib_port_inuse(unsigned int hash, int port,
+	__be32 daddr, struct hlist_head udptable[])
 {
 	struct sock *sk;
 	struct hlist_node *node;
+	struct inet_sock *inet;
 
-	sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
-		if (sk->sk_hash == num)
+	sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
+		if (sk->sk_hash != hash)
+			continue;
+		inet = inet_sk(sk);
+		if (inet->num != port)
+			continue;
+		if (inet->rcv_saddr == daddr)
 			return 1;
+	}
 	return 0;
 }
 
@@ -142,6 +161,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 	struct hlist_node *node;
 	struct hlist_head *head;
 	struct sock *sk2;
+	unsigned int hash;
 	int    error = 1;
 
 	write_lock_bh(&udp_hash_lock);
@@ -156,7 +176,9 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
 			int size;
 
-			head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
+			hash = hash_port_and_addr(result,
+					inet_sk(sk)->rcv_saddr);
+			head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
 			if (hlist_empty(head)) {
 				if (result > sysctl_local_port_range[1])
 					result = sysctl_local_port_range[0] +
@@ -175,12 +197,23 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 			;
 		}
 		result = best;
-		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
+		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
+		     i++, result += UDP_HTABLE_SIZE) {
 			if (result > sysctl_local_port_range[1])
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
 					   (UDP_HTABLE_SIZE - 1));
-			if (! __udp_lib_lport_inuse(result, udptable))
+			hash = hash_port_and_addr(result, 0);
+			if (__udp_lib_port_inuse(hash, result,
+						 0, udptable))
+				continue;
+			if (!inet_sk(sk)->rcv_saddr)
+				break;
+
+			hash = hash_port_and_addr(result,
+					inet_sk(sk)->rcv_saddr);
+			if (! __udp_lib_port_inuse(hash, result,
+				inet_sk(sk)->rcv_saddr, udptable))
 				break;
 		}
 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
@@ -188,21 +221,41 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 gotit:
 		*port_rover = snum = result;
 	} else {
-		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+		hash = hash_port_and_addr(snum, 0);
+		head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
 
 		sk_for_each(sk2, node, head)
-			if (sk2->sk_hash == snum                             &&
-			    sk2 != sk                                        &&
-			    (!sk2->sk_reuse        || !sk->sk_reuse)         &&
-			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
-			     || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-			    (*saddr_comp)(sk, sk2)                             )
+			if (sk2->sk_hash == hash &&
+			    sk2 != sk &&
+			    inet_sk(sk2)->num == snum &&
+			    (!sk2->sk_reuse || !sk->sk_reuse) &&
+			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
+			     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+			    (*saddr_comp)(sk, sk2))
 				goto fail;
+
+		if (inet_sk(sk)->rcv_saddr) {
+			hash = hash_port_and_addr(snum,
+						  inet_sk(sk)->rcv_saddr);
+			head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
+
+			sk_for_each(sk2, node, head)
+				if (sk2->sk_hash == hash &&
+				    sk2 != sk &&
+				    inet_sk(sk2)->num == snum &&
+				    (!sk2->sk_reuse || !sk->sk_reuse) &&
+				    (!sk2->sk_bound_dev_if ||
+				     !sk->sk_bound_dev_if ||
+				     sk2->sk_bound_dev_if ==
+				     sk->sk_bound_dev_if) &&
+				    (*saddr_comp)(sk, sk2))
+					goto fail;
+		}
 	}
 	inet_sk(sk)->num = snum;
-	sk->sk_hash = snum;
+	sk->sk_hash = hash;
 	if (sk_unhashed(sk)) {
-		head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+		head = &udptable[hash & (UDP_HTABLE_SIZE - 1)];
 		sk_add_node(sk, head);
 		sock_prot_inc_use(sk->sk_prot);
 	}
@@ -212,13 +265,13 @@ fail:
 	return error;
 }
 
-__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
+int udp_get_port(struct sock *sk, unsigned short snum,
 			int (*scmp)(const struct sock *, const struct sock *))
 {
 	return  __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
 }
 
-inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 {
 	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
 
@@ -241,63 +294,77 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
 {
 	struct sock *sk, *result = NULL;
 	struct hlist_node *node;
-	unsigned short hnum = ntohs(dport);
-	int badness = -1;
+	unsigned int hash, hashwild;
+	int score, best = -1, hport = ntohs(dport);
+
+ 	hash = hash_port_and_addr(hport, daddr);
+ 	hashwild = hash_port_and_addr(hport, 0);
 
 	read_lock(&udp_hash_lock);
-	sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+
+lookup:
+
+	sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) {
 		struct inet_sock *inet = inet_sk(sk);
 
-		if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
-			int score = (sk->sk_family == PF_INET ? 1 : 0);
-			if (inet->rcv_saddr) {
-				if (inet->rcv_saddr != daddr)
-					continue;
-				score+=2;
-			}
-			if (inet->daddr) {
-				if (inet->daddr != saddr)
-					continue;
-				score+=2;
-			}
-			if (inet->dport) {
-				if (inet->dport != sport)
-					continue;
-				score+=2;
-			}
-			if (sk->sk_bound_dev_if) {
-				if (sk->sk_bound_dev_if != dif)
-					continue;
-				score+=2;
-			}
-			if(score == 9) {
-				result = sk;
-				break;
-			} else if(score > badness) {
-				result = sk;
-				badness = score;
-			}
+		if (sk->sk_hash != hash || ipv6_only_sock(sk) ||
+			inet->num != hport)
+			continue;
+
+		score = (sk->sk_family == PF_INET ? 1 : 0);
+		if (inet->rcv_saddr) {
+			if (inet->rcv_saddr != daddr)
+				continue;
+			score+=2;
+		}
+		if (inet->daddr) {
+			if (inet->daddr != saddr)
+				continue;
+			score+=2;
 		}
+		if (inet->dport) {
+			if (inet->dport != sport)
+				continue;
+			score+=2;
+		}
+		if (sk->sk_bound_dev_if) {
+			if (sk->sk_bound_dev_if != dif)
+				continue;
+			score+=2;
+		}
+		if (score == 9) {
+			result = sk;
+			goto found;
+		} else if (score > best) {
+			result = sk;
+			best = score;
+		}
+	}
+
+	if (hash != hashwild) {
+		hash = hashwild;
+		goto lookup;
 	}
+found:
 	if (result)
 		sock_hold(result);
 	read_unlock(&udp_hash_lock);
 	return result;
 }
 
-static inline struct sock *udp_v4_mcast_next(struct sock *sk,
-					     __be16 loc_port, __be32 loc_addr,
+static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum,
+					     int hport, __be32 loc_addr,
 					     __be16 rmt_port, __be32 rmt_addr,
 					     int dif)
 {
 	struct hlist_node *node;
 	struct sock *s = sk;
-	unsigned short hnum = ntohs(loc_port);
 
 	sk_for_each_from(s, node) {
 		struct inet_sock *inet = inet_sk(s);
 
 		if (s->sk_hash != hnum					||
+		    inet->num != hport					||
 		    (inet->daddr && inet->daddr != rmt_addr)		||
 		    (inet->dport != rmt_port && inet->dport)		||
 		    (inet->rcv_saddr && inet->rcv_saddr != loc_addr)	||
@@ -329,8 +396,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
 	struct inet_sock *inet;
 	struct iphdr *iph = (struct iphdr*)skb->data;
 	struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	int harderr;
 	int err;
@@ -390,7 +457,7 @@ out:
 	sock_put(sk);
 }
 
-__inline__ void udp_err(struct sk_buff *skb, u32 info)
+void udp_err(struct sk_buff *skb, u32 info)
 {
 	return __udp4_lib_err(skb, info, udp_hash);
 }
@@ -419,13 +486,14 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 				 __be32 src, __be32 dst, int len      )
 {
 	unsigned int offset;
-	struct udphdr *uh = skb->h.uh;
+	struct udphdr *uh = udp_hdr(skb);
 	__wsum csum = 0;
 
 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
 		/*
 		 * Only one fragment on the socket.
 		 */
+		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
 		uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
 	} else {
@@ -434,7 +502,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 		 * fragments on the socket so that all csums of sk_buffs
 		 * should be together
 		 */
-		offset = skb->h.raw - skb->data;
+		offset = skb_transport_offset(skb);
 		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
 		skb->ip_summed = CHECKSUM_NONE;
@@ -469,7 +537,7 @@ static int udp_push_pending_frames(struct sock *sk)
 	/*
 	 * Create a UDP header
 	 */
-	uh = skb->h.uh;
+	uh = udp_hdr(skb);
 	uh->source = fl->fl_ip_sport;
 	uh->dest = fl->fl_ip_dport;
 	uh->len = htons(up->len);
@@ -765,38 +833,38 @@ out:
 
 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
-	switch(cmd)
+	switch (cmd) {
+	case SIOCOUTQ:
 	{
-		case SIOCOUTQ:
-		{
-			int amount = atomic_read(&sk->sk_wmem_alloc);
-			return put_user(amount, (int __user *)arg);
-		}
+		int amount = atomic_read(&sk->sk_wmem_alloc);
+		return put_user(amount, (int __user *)arg);
+	}
 
-		case SIOCINQ:
-		{
-			struct sk_buff *skb;
-			unsigned long amount;
-
-			amount = 0;
-			spin_lock_bh(&sk->sk_receive_queue.lock);
-			skb = skb_peek(&sk->sk_receive_queue);
-			if (skb != NULL) {
-				/*
-				 * We will only return the amount
-				 * of this packet since that is all
-				 * that will be read.
-				 */
-				amount = skb->len - sizeof(struct udphdr);
-			}
-			spin_unlock_bh(&sk->sk_receive_queue.lock);
-			return put_user(amount, (int __user *)arg);
+	case SIOCINQ:
+	{
+		struct sk_buff *skb;
+		unsigned long amount;
+
+		amount = 0;
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb != NULL) {
+			/*
+			 * We will only return the amount
+			 * of this packet since that is all
+			 * that will be read.
+			 */
+			amount = skb->len - sizeof(struct udphdr);
 		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		return put_user(amount, (int __user *)arg);
+	}
 
-		default:
-			return -ENOIOCTLCMD;
+	default:
+		return -ENOIOCTLCMD;
 	}
-	return(0);
+
+	return 0;
 }
 
 /*
@@ -810,7 +878,9 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 	struct sk_buff *skb;
-	int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
+	unsigned int ulen, copied;
+	int err;
+	int is_udplite = IS_UDPLITE(sk);
 
 	/*
 	 *	Check any passed addresses
@@ -826,28 +896,25 @@ try_again:
 	if (!skb)
 		goto out;
 
-	copied = skb->len - sizeof(struct udphdr);
-	if (copied > len) {
-		copied = len;
+	ulen = skb->len - sizeof(struct udphdr);
+	copied = len;
+	if (copied > ulen)
+		copied = ulen;
+	else if (copied < ulen)
 		msg->msg_flags |= MSG_TRUNC;
-	}
 
 	/*
-	 * 	Decide whether to checksum and/or copy data.
-	 *
-	 * 	UDP:      checksum may have been computed in HW,
-	 * 	          (re-)compute it if message is truncated.
-	 * 	UDP-Lite: always needs to checksum, no HW support.
+	 * If checksum is needed at all, try to do it while copying the
+	 * data.  If the data is truncated, or if we only want a partial
+	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
-	copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
 
-	if (is_udplite  ||  (!copy_only  &&  msg->msg_flags&MSG_TRUNC)) {
-		if (__udp_lib_checksum_complete(skb))
+	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
-		copy_only = 1;
 	}
 
-	if (copy_only)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -866,8 +933,8 @@ try_again:
 	if (sin)
 	{
 		sin->sin_family = AF_INET;
-		sin->sin_port = skb->h.uh->source;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_port = udp_hdr(skb)->source;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
 	if (inet->cmsg_flags)
@@ -875,7 +942,7 @@ try_again:
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-		err = skb->len - sizeof(struct udphdr);
+		err = ulen;
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -949,7 +1016,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 		return 1;
 
 	/* Now we can get the pointers */
-	uh = skb->h.uh;
+	uh = udp_hdr(skb);
 	udpdata = (__u8 *)uh + sizeof(struct udphdr);
 	udpdata32 = (__be32 *)udpdata;
 
@@ -959,7 +1026,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
 			return 0;
-		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) {
+		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
 		} else
@@ -990,7 +1057,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 		return 0;
 
 	/* Now we can update and verify the packet length... */
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	iphlen = iph->ihl << 2;
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
@@ -1002,7 +1069,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	skb->h.raw = skb_pull(skb, len);
+	__skb_pull(skb, len);
+	skb_reset_transport_header(skb);
 
 	/* modify the protocol (it's ESP!) */
 	iph->protocol = IPPROTO_ESP;
@@ -1095,10 +1163,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		}
 	}
 
-	if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if (__udp_lib_checksum_complete(skb))
+	if (sk->sk_filter) {
+		if (udp_lib_checksum_complete(skb))
 			goto drop;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
@@ -1128,33 +1195,49 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 				    __be32 saddr, __be32 daddr,
 				    struct hlist_head udptable[])
 {
-	struct sock *sk;
+	struct sock *sk, *skw, *sknext;
 	int dif;
+	int hport = ntohs(uh->dest);
+	unsigned int hash = hash_port_and_addr(hport, daddr);
+	unsigned int hashwild = hash_port_and_addr(hport, 0);
 
-	read_lock(&udp_hash_lock);
-	sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
 	dif = skb->dev->ifindex;
-	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
-	if (sk) {
-		struct sock *sknext = NULL;
 
+	read_lock(&udp_hash_lock);
+
+	sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]);
+	skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]);
+
+	sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif);
+	if (!sk) {
+		hash = hashwild;
+		sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source,
+			saddr, dif);
+	}
+	if (sk) {
 		do {
 			struct sk_buff *skb1 = skb;
-
-			sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
-						   uh->source, saddr, dif);
-			if(sknext)
+			sknext = udp_v4_mcast_next(sk_next(sk), hash, hport,
+						daddr, uh->source, saddr, dif);
+			if (!sknext && hash != hashwild) {
+				hash = hashwild;
+				sknext = udp_v4_mcast_next(skw, hash, hport,
+					daddr, uh->source, saddr, dif);
+			}
+			if (sknext)
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
-			if(skb1) {
+			if (skb1) {
 				int ret = udp_queue_rcv_skb(sk, skb1);
 				if (ret > 0)
-					/* we should probably re-process instead
-					 * of dropping packets here. */
+					/*
+					 * we should probably re-process
+					 * instead of dropping packets here.
+					 */
 					kfree_skb(skb1);
 			}
 			sk = sknext;
-		} while(sknext);
+		} while (sknext);
 	} else
 		kfree_skb(skb);
 	read_unlock(&udp_hash_lock);
@@ -1166,25 +1249,37 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
  * Otherwise, csum completion requires chacksumming packet body,
  * including udp header and folding it to skb->csum.
  */
-static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
+static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
+				 int proto)
 {
+	const struct iphdr *iph;
+	int err;
+
+	UDP_SKB_CB(skb)->partial_cov = 0;
+	UDP_SKB_CB(skb)->cscov = skb->len;
+
+	if (proto == IPPROTO_UDPLITE) {
+		err = udplite_checksum_init(skb, uh);
+		if (err)
+			return err;
+	}
+
+	iph = ip_hdr(skb);
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
-	       if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
-				      skb->len, IPPROTO_UDP, skb->csum       ))
+	       if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+				      proto, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
-					       skb->nh.iph->daddr,
-					       skb->len, IPPROTO_UDP, 0);
+	if (!skb_csum_unnecessary(skb))
+		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+					       skb->len, proto, 0);
 	/* Probably, we should checksum udp header (it should be in cache
 	 * in any case) and data in tiny packets (< rx copybreak).
 	 */
 
-	/* UDP = UDP-Lite with a non-partial checksum coverage */
-	UDP_SKB_CB(skb)->partial_cov = 0;
+	return 0;
 }
 
 /*
@@ -1192,14 +1287,14 @@ static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
  */
 
 int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
-		   int is_udplite)
+		   int proto)
 {
 	struct sock *sk;
-	struct udphdr *uh = skb->h.uh;
+	struct udphdr *uh = udp_hdr(skb);
 	unsigned short ulen;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	__be32 saddr = skb->nh.iph->saddr;
-	__be32 daddr = skb->nh.iph->daddr;
+	__be32 saddr = ip_hdr(skb)->saddr;
+	__be32 daddr = ip_hdr(skb)->daddr;
 
 	/*
 	 *  Validate the packet.
@@ -1211,24 +1306,21 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (ulen > skb->len)
 		goto short_packet;
 
-	if(! is_udplite ) {		/* UDP validates ulen. */
-
+	if (proto == IPPROTO_UDP) {
+		/* UDP validates ulen. */
 		if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
 			goto short_packet;
-		uh = skb->h.uh;
-
-		udp4_csum_init(skb, uh);
-
-	} else 	{			/* UDP-Lite validates cscov. */
-		if (udplite4_csum_init(skb, uh))
-			goto csum_error;
+		uh = udp_hdr(skb);
 	}
 
-	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+	if (udp4_csum_init(skb, uh, proto))
+		goto csum_error;
+
+	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
 
 	sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
-			       skb->dev->ifindex, udptable        );
+			       skb->dev->ifindex, udptable);
 
 	if (sk != NULL) {
 		int ret = udp_queue_rcv_skb(sk, skb);
@@ -1250,7 +1342,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp_lib_checksum_complete(skb))
 		goto csum_error;
 
-	UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+	UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 
 	/*
@@ -1258,11 +1350,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	 * don't wanna listen.  Ignore it.
 	 */
 	kfree_skb(skb);
-	return(0);
+	return 0;
 
 short_packet:
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
-		       is_udplite? "-Lite" : "",
+		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
 		       NIPQUAD(saddr),
 		       ntohs(uh->source),
 		       ulen,
@@ -1277,21 +1369,21 @@ csum_error:
 	 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
 	 */
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
-		       is_udplite? "-Lite" : "",
+		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
 		       NIPQUAD(saddr),
 		       ntohs(uh->source),
 		       NIPQUAD(daddr),
 		       ntohs(uh->dest),
 		       ulen);
 drop:
-	UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+	UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
-	return(0);
+	return 0;
 }
 
-__inline__ int udp_rcv(struct sk_buff *skb)
+int udp_rcv(struct sk_buff *skb)
 {
-	return __udp4_lib_rcv(skb, udp_hash, 0);
+	return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
 }
 
 int udp_destroy_sock(struct sock *sk)
@@ -1313,13 +1405,13 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	int val;
 	int err = 0;
 
-	if(optlen<sizeof(int))
+	if (optlen<sizeof(int))
 		return -EINVAL;
 
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
-	switch(optname) {
+	switch (optname) {
 	case UDP_CORK:
 		if (val != 0) {
 			up->corkflag = 1;
@@ -1373,7 +1465,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	default:
 		err = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	return err;
 }
@@ -1404,15 +1496,15 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 	struct udp_sock *up = udp_sk(sk);
 	int val, len;
 
-	if(get_user(len,optlen))
+	if (get_user(len,optlen))
 		return -EFAULT;
 
 	len = min_t(unsigned int, len, sizeof(int));
 
-	if(len < 0)
+	if (len < 0)
 		return -EINVAL;
 
-	switch(optname) {
+	switch (optname) {
 	case UDP_CORK:
 		val = up->corkflag;
 		break;
@@ -1433,11 +1525,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
-	if(put_user(len, optlen))
+	if (put_user(len, optlen))
 		return -EFAULT;
-	if(copy_to_user(optval, &val,len))
+	if (copy_to_user(optval, &val,len))
 		return -EFAULT;
 	return 0;
 }
@@ -1486,15 +1578,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		struct sk_buff *skb;
 
 		spin_lock_bh(&rcvq->lock);
-		while ((skb = skb_peek(rcvq)) != NULL) {
-			if (udp_lib_checksum_complete(skb)) {
-				UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
-				__skb_unlink(skb, rcvq);
-				kfree_skb(skb);
-			} else {
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				break;
-			}
+		while ((skb = skb_peek(rcvq)) != NULL &&
+		       udp_lib_checksum_complete(skb)) {
+			UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
+			__skb_unlink(skb, rcvq);
+			kfree_skb(skb);
 		}
 		spin_unlock_bh(&rcvq->lock);
 
@@ -1573,7 +1661,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
 	struct sock *sk = udp_get_first(seq);
 
 	if (sk)
-		while(pos && (sk = udp_get_next(seq, sk)) != NULL)
+		while (pos && (sk = udp_get_next(seq, sk)) != NULL)
 			--pos;
 	return pos ? NULL : sk;
 }
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index b28fe1edf98b..f34fd686a8f1 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -31,7 +31,7 @@ static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
 
 static int udplite_rcv(struct sk_buff *skb)
 {
-	return __udp4_lib_rcv(skb, udplite_hash, 1);
+	return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
 }
 
 static void udplite_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 78e80deb7e89..5ceca951d73f 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -28,7 +28,7 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
 	switch (nexthdr) {
 	case IPPROTO_IPIP:
 	case IPPROTO_IPV6:
-		*spi = skb->nh.iph->saddr;
+		*spi = ip_hdr(skb)->saddr;
 		*seq = 0;
 		return 0;
 	}
@@ -39,9 +39,9 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
 #ifdef CONFIG_NETFILTER
 static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
-
 	if (skb->dst == NULL) {
+		const struct iphdr *iph = ip_hdr(skb);
+
 		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
 				   skb->dev))
 			goto drop;
@@ -55,18 +55,18 @@ drop:
 
 int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 {
-	int err;
 	__be32 spi, seq;
 	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
 	struct xfrm_state *x;
 	int xfrm_nr = 0;
 	int decaps = 0;
+	int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
 
-	if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
+	if (err != 0)
 		goto drop;
 
 	do {
-		struct iphdr *iph = skb->nh.iph;
+		const struct iphdr *iph = ip_hdr(skb);
 
 		if (xfrm_nr == XFRM_MAX_DEPTH)
 			goto drop;
@@ -113,7 +113,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 			break;
 		}
 
-		if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
+		err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
+		if (err < 0)
 			goto drop;
 	} while (!err);
 
@@ -146,15 +147,15 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 		return 0;
 	} else {
 #ifdef CONFIG_NETFILTER
-		__skb_push(skb, skb->data - skb->nh.raw);
-		skb->nh.iph->tot_len = htons(skb->len);
-		ip_send_check(skb->nh.iph);
+		__skb_push(skb, skb->data - skb_network_header(skb));
+		ip_hdr(skb)->tot_len = htons(skb->len);
+		ip_send_check(ip_hdr(skb));
 
 		NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
 			xfrm4_rcv_encap_finish);
 		return 0;
 #else
-		return -skb->nh.iph->protocol;
+		return -ip_hdr(skb)->protocol;
 #endif
 	}
 
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 89cf59ea7bbe..a73e710740c2 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -29,32 +29,34 @@
  */
 static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph, *top_iph = NULL;
+	struct iphdr *iph, *top_iph;
 	int hdrlen, optlen;
 
-	iph = skb->nh.iph;
-	skb->h.ipiph = iph;
+	iph = ip_hdr(skb);
+	skb->transport_header = skb->network_header;
 
 	hdrlen = 0;
 	optlen = iph->ihl * 4 - sizeof(*iph);
 	if (unlikely(optlen))
 		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
 
-	skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
-	top_iph = skb->nh.iph;
-	hdrlen = iph->ihl * 4 - optlen;
-	skb->h.raw += hdrlen;
+	skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen);
+	skb_reset_network_header(skb);
+	top_iph = ip_hdr(skb);
+	skb->transport_header += sizeof(*iph) - hdrlen;
 
-	memmove(top_iph, iph, hdrlen);
+	memmove(top_iph, iph, sizeof(*iph));
 	if (unlikely(optlen)) {
 		struct ip_beet_phdr *ph;
 
 		BUG_ON(optlen < 0);
 
-		ph = (struct ip_beet_phdr *)skb->h.raw;
+		ph = (struct ip_beet_phdr *)skb_transport_header(skb);
 		ph->padlen = 4 - (optlen & 4);
-		ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8;
+		ph->hdrlen = optlen / 8;
 		ph->nexthdr = top_iph->protocol;
+		if (ph->padlen)
+			memset(ph + 1, IPOPT_NOP, ph->padlen);
 
 		top_iph->protocol = IPPROTO_BEETPH;
 		top_iph->ihl = sizeof(struct iphdr) / 4;
@@ -68,46 +70,45 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	int phlen = 0;
 	int optlen = 0;
-	__u8 ph_nexthdr = 0, protocol = 0;
+	u8 ph_nexthdr = 0;
 	int err = -EINVAL;
 
-	protocol = iph->protocol;
-
 	if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
-		struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1);
+		struct ip_beet_phdr *ph;
 
 		if (!pskb_may_pull(skb, sizeof(*ph)))
 			goto out;
+		ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1);
 
-		phlen = ph->hdrlen * 8;
-		optlen = phlen - ph->padlen - sizeof(*ph);
+		phlen = sizeof(*ph) + ph->padlen;
+		optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
 		if (optlen < 0 || optlen & 3 || optlen > 250)
 			goto out;
 
-		if (!pskb_may_pull(skb, phlen))
+		if (!pskb_may_pull(skb, phlen + optlen))
 			goto out;
+		skb->len -= phlen + optlen;
 
 		ph_nexthdr = ph->nexthdr;
 	}
 
-	skb_push(skb, sizeof(*iph) - phlen + optlen);
-	memmove(skb->data, skb->nh.raw, sizeof(*iph));
-	skb->nh.raw = skb->data;
+	skb_set_network_header(skb, phlen - sizeof(*iph));
+	memmove(skb_network_header(skb), iph, sizeof(*iph));
+	skb_set_transport_header(skb, phlen + optlen);
+	skb->data = skb_transport_header(skb);
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	iph->ihl = (sizeof(*iph) + optlen) / 4;
-	iph->tot_len = htons(skb->len);
+	iph->tot_len = htons(skb->len + iph->ihl * 4);
 	iph->daddr = x->sel.daddr.a4;
 	iph->saddr = x->sel.saddr.a4;
 	if (ph_nexthdr)
 		iph->protocol = ph_nexthdr;
-	else
-		iph->protocol = protocol;
 	iph->check = 0;
-	iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+	iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	err = 0;
 out:
 	return err;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 92676b7e4034..601047161ea6 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -23,16 +23,13 @@
  */
 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph;
-	int ihl;
+	struct iphdr *iph = ip_hdr(skb);
+	int ihl = iph->ihl * 4;
 
-	iph = skb->nh.iph;
-	skb->h.ipiph = iph;
-
-	ihl = iph->ihl * 4;
-	skb->h.raw += ihl;
-
-	skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
+	skb->transport_header = skb->network_header + ihl;
+	skb_push(skb, x->props.header_len);
+	skb_reset_network_header(skb);
+	memmove(skb_network_header(skb), iph, ihl);
 	return 0;
 }
 
@@ -46,12 +43,15 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int ihl = skb->data - skb->h.raw;
+	int ihl = skb->data - skb_transport_header(skb);
 
-	if (skb->h.raw != skb->nh.raw)
-		skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
-	skb->nh.iph->tot_len = htons(skb->len + ihl);
-	skb->h.raw = skb->data;
+	if (skb->transport_header != skb->network_header) {
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
+		skb->network_header = skb->transport_header;
+	}
+	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
+	skb_reset_transport_header(skb);
 	return 0;
 }
 
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ceb4376f572a..a2f2e6a5ec5d 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,8 +16,8 @@
 
 static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
 {
-	struct iphdr *outer_iph = skb->nh.iph;
-	struct iphdr *inner_iph = skb->h.ipiph;
+	struct iphdr *outer_iph = ip_hdr(skb);
+	struct iphdr *inner_iph = ipip_hdr(skb);
 
 	if (INET_ECN_is_ce(outer_iph->tos))
 		IP_ECN_set_ce(inner_iph);
@@ -26,7 +26,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(iph->tos))
-		IP6_ECN_set_ce(skb->nh.ipv6h);
+		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
 /* Add encapsulation header.
@@ -46,11 +46,12 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph, *top_iph;
 	int flags;
 
-	iph = skb->nh.iph;
-	skb->h.ipiph = iph;
+	iph = ip_hdr(skb);
+	skb->transport_header = skb->network_header;
 
-	skb->nh.raw = skb_push(skb, x->props.header_len);
-	top_iph = skb->nh.iph;
+	skb_push(skb, x->props.header_len);
+	skb_reset_network_header(skb);
+	top_iph = ip_hdr(skb);
 
 	top_iph->ihl = 5;
 	top_iph->version = 4;
@@ -90,10 +91,11 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
+	const unsigned char *old_mac;
 	int err = -EINVAL;
 
-	switch(iph->protocol){
+	switch (iph->protocol){
 		case IPPROTO_IPIP:
 			break;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -111,10 +113,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (iph->protocol == IPPROTO_IPIP) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv4_copy_dscp(iph, skb->h.ipiph);
+			ipv4_copy_dscp(iph, ipip_hdr(skb));
 		if (!(x->props.flags & XFRM_STATE_NOECN))
 			ipip_ecn_decapsulate(skb);
 	}
@@ -125,9 +127,10 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IPV6);
 	}
 #endif
-	skb->mac.raw = memmove(skb->data - skb->mac_len,
-			       skb->mac.raw, skb->mac_len);
-	skb->nh.raw = skb->data;
+	old_mac = skb_mac_header(skb);
+	skb_set_mac_header(skb, -skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+	skb_reset_network_header(skb);
 	err = 0;
 
 out:
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 038ca160fe2c..44ef208a75cb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -22,14 +22,13 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
 	struct dst_entry *dst;
-	struct iphdr *iph = skb->nh.iph;
 
 	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
 		goto out;
 
 	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
 
-	if (!(iph->frag_off & htons(IP_DF)) || skb->local_df)
+	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
 		goto out;
 
 	dst = skb->dst;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 5d51a2af34c1..4ff8ed30024f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -119,7 +119,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
 			unsigned short encap_family = xfrm[i]->props.family;
-			switch(encap_family) {
+			switch (encap_family) {
 			case AF_INET:
 				fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
 				fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
@@ -209,8 +209,8 @@ error:
 static void
 _decode_session4(struct sk_buff *skb, struct flowi *fl)
 {
-	struct iphdr *iph = skb->nh.iph;
-	u8 *xprth = skb->nh.raw + iph->ihl*4;
+	struct iphdr *iph = ip_hdr(skb);
+	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 
 	memset(fl, 0, sizeof(struct flowi));
 	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
@@ -263,7 +263,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		default:
 			fl->fl_ipsec_spi = 0;
 			break;
-		};
+		}
 	}
 	fl->proto = iph->protocol;
 	fl->fl4_dst = iph->daddr;
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 3eef06454da9..568510304553 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -12,9 +12,8 @@
 
 static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph;
+	struct iphdr *iph = ip_hdr(skb);
 
-	iph = skb->nh.iph;
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 79682efb14be..8e5d54f23b49 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -57,6 +57,16 @@ config IPV6_ROUTE_INFO
 
 	  If unsure, say N.
 
+config IPV6_OPTIMISTIC_DAD
+	bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
+	depends on IPV6 && EXPERIMENTAL
+	---help---
+	  This is experimental support for optimistic Duplicate
+	  Address Detection.  It allows for autoconfigured addresses
+	  to be used more quickly.
+
+	  If unsure, say N.
+
 config INET6_AH
 	tristate "IPv6: AH transformation"
 	depends on IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index d460017bb353..bb33309044c9 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,14 +7,15 @@ obj-$(CONFIG_IPV6) += ipv6.o
 ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
-		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
-		ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o
+		exthdrs.o sysctl_net_ipv6.o datagram.o \
+		ip6_flowlabel.o inet6_connection_sock.o
 
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
 ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+ipv6-$(CONFIG_PROC_FS) += proc.o
 
 ipv6-objs += $(ipv6-y)
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1b616992d916..3452433cbc96 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
 #endif
 
 #include <asm/uaccess.h>
+#include <asm/unaligned.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -172,6 +173,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = {
 #endif
 #endif
 	.proxy_ndp		= 0,
+	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -203,12 +205,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 #endif
 #endif
 	.proxy_ndp		= 0,
+	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-#if 0
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
-#endif
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -244,6 +245,37 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 	add_timer(&ifp->timer);
 }
 
+static int snmp6_alloc_dev(struct inet6_dev *idev)
+{
+	int err = -ENOMEM;
+
+	if (!idev || !idev->dev)
+		return -EINVAL;
+
+	if (snmp_mib_init((void **)idev->stats.ipv6,
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
+		goto err_ip;
+	if (snmp_mib_init((void **)idev->stats.icmpv6,
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
+		goto err_icmp;
+
+	return 0;
+
+err_icmp:
+	snmp_mib_free((void **)idev->stats.ipv6);
+err_ip:
+	return err;
+}
+
+static int snmp6_free_dev(struct inet6_dev *idev)
+{
+	snmp_mib_free((void **)idev->stats.icmpv6);
+	snmp_mib_free((void **)idev->stats.ipv6);
+	return 0;
+}
+
 /* Nobody refers to this device, we may destroy it. */
 
 static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
@@ -269,6 +301,8 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 	call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
 }
 
+EXPORT_SYMBOL(in6_dev_finish_destroy);
+
 static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 {
 	struct inet6_dev *ndev;
@@ -342,6 +376,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	}
 #endif
 
+	if (netif_running(dev) && netif_carrier_ok(dev))
+		ndev->if_flags |= IF_READY;
+
 	ipv6_mc_init_dev(ndev);
 	ndev->tstamp = jiffies;
 #ifdef CONFIG_SYSCTL
@@ -523,6 +560,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 
 	ifa->rt = rt;
 
+	/*
+	 * part one of RFC 4429, section 3.3
+	 * We should not configure an address as
+	 * optimistic if we do not yet know the link
+	 * layer address of our nexhop router
+	 */
+
+	if (rt->rt6i_nexthop == NULL)
+		ifa->flags &= ~IFA_F_OPTIMISTIC;
+
 	ifa->idev = idev;
 	in6_dev_hold(idev);
 	/* For caller */
@@ -699,6 +746,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
 	int tmp_plen;
 	int ret = 0;
 	int max_addresses;
+	u32 addr_flags;
 
 	write_lock(&idev->lock);
 	if (ift) {
@@ -756,10 +804,17 @@ retry:
 	spin_unlock_bh(&ifp->lock);
 
 	write_unlock(&idev->lock);
+
+	addr_flags = IFA_F_TEMPORARY;
+	/* set in addrconf_prefix_rcv() */
+	if (ifp->flags & IFA_F_OPTIMISTIC)
+		addr_flags |= IFA_F_OPTIMISTIC;
+
 	ift = !max_addresses ||
 	      ipv6_count_addresses(idev) < max_addresses ?
 		ipv6_add_addr(idev, &addr, tmp_plen,
-			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
+			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+			      addr_flags) : NULL;
 	if (!ift || IS_ERR(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
@@ -891,13 +946,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 			 * - Tentative Address (RFC2462 section 5.4)
 			 *  - A tentative address is not considered
 			 *    "assigned to an interface" in the traditional
-			 *    sense.
+			 *    sense, unless it is also flagged as optimistic.
 			 * - Candidate Source Address (section 4)
 			 *  - In any case, anycast addresses, multicast
 			 *    addresses, and the unspecified address MUST
 			 *    NOT be included in a candidate set.
 			 */
-			if (ifa->flags & IFA_F_TENTATIVE)
+			if ((ifa->flags & IFA_F_TENTATIVE) &&
+			    (!(ifa->flags & IFA_F_OPTIMISTIC)))
 				continue;
 			if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
 				     score.addr_type & IPV6_ADDR_MULTICAST)) {
@@ -956,15 +1012,17 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 				}
 			}
 
-			/* Rule 3: Avoid deprecated address */
+			/* Rule 3: Avoid deprecated and optimistic addresses */
 			if (hiscore.rule < 3) {
 				if (ipv6_saddr_preferred(hiscore.addr_type) ||
-				    !(ifa_result->flags & IFA_F_DEPRECATED))
+				   (((ifa_result->flags &
+				    (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
 					hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
 				hiscore.rule++;
 			}
 			if (ipv6_saddr_preferred(score.addr_type) ||
-			    !(ifa->flags & IFA_F_DEPRECATED)) {
+			   (((ifa_result->flags &
+			    (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
 				score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
 				if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
 					score.rule = 3;
@@ -1102,8 +1160,10 @@ int ipv6_get_saddr(struct dst_entry *dst,
 	return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
 }
 
+EXPORT_SYMBOL(ipv6_get_saddr);
 
-int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+		    unsigned char banned_flags)
 {
 	struct inet6_dev *idev;
 	int err = -EADDRNOTAVAIL;
@@ -1114,7 +1174,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 
 		read_lock_bh(&idev->lock);
 		for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-			if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+			if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
 				ipv6_addr_copy(addr, &ifp->addr);
 				err = 0;
 				break;
@@ -1156,6 +1216,8 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
 	return ifp != NULL;
 }
 
+EXPORT_SYMBOL(ipv6_chk_addr);
+
 static
 int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
 {
@@ -1664,6 +1726,13 @@ ok:
 
 		if (ifp == NULL && valid_lft) {
 			int max_addresses = in6_dev->cnf.max_addresses;
+			u32 addr_flags = 0;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+			if (in6_dev->cnf.optimistic_dad &&
+			    !ipv6_devconf.forwarding)
+				addr_flags = IFA_F_OPTIMISTIC;
+#endif
 
 			/* Do not allow to create too much of autoconfigured
 			 * addresses; this would be too easy way to crash kernel.
@@ -1671,7 +1740,8 @@ ok:
 			if (!max_addresses ||
 			    ipv6_count_addresses(in6_dev) < max_addresses)
 				ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
-						    addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+						    addr_type&IPV6_ADDR_SCOPE_MASK,
+						    addr_flags);
 
 			if (!ifp || IS_ERR(ifp)) {
 				in6_dev_put(in6_dev);
@@ -1879,6 +1949,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
 
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
 				      jiffies_to_clock_t(valid_lft * HZ), flags);
+		/*
+		 * Note that section 3.1 of RFC 4429 indicates
+		 * that the Optimistic flag should not be set for
+		 * manually configured addresses
+		 */
 		addrconf_dad_start(ifp, 0);
 		in6_ifa_put(ifp);
 		addrconf_verify(0);
@@ -2055,8 +2130,16 @@ static void init_loopback(struct net_device *dev)
 static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
 {
 	struct inet6_ifaddr * ifp;
+	u32 addr_flags = IFA_F_PERMANENT;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	if (idev->cnf.optimistic_dad &&
+	    !ipv6_devconf.forwarding)
+		addr_flags |= IFA_F_OPTIMISTIC;
+#endif
+
 
-	ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
+	ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
 	if (!IS_ERR(ifp)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
 		addrconf_dad_start(ifp, 0);
@@ -2124,7 +2207,7 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
 {
 	struct in6_addr lladdr;
 
-	if (!ipv6_get_lladdr(link_dev, &lladdr)) {
+	if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
 		addrconf_add_linklocal(idev, &lladdr);
 		return 0;
 	}
@@ -2235,7 +2318,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		default:
 			addrconf_dev_config(dev);
 			break;
-		};
+		}
 		if (idev) {
 			if (run_pending)
 				addrconf_dad_run(idev);
@@ -2276,8 +2359,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		break;
 
 	case NETDEV_CHANGENAME:
-#ifdef CONFIG_SYSCTL
 		if (idev) {
+			snmp6_unregister_dev(idev);
+#ifdef CONFIG_SYSCTL
 			addrconf_sysctl_unregister(&idev->cnf);
 			neigh_sysctl_unregister(idev->nd_parms);
 			neigh_sysctl_register(dev, idev->nd_parms,
@@ -2285,10 +2369,11 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 					      &ndisc_ifinfo_sysctl_change,
 					      NULL);
 			addrconf_sysctl_register(idev, &idev->cnf);
-		}
 #endif
+			snmp6_register_dev(idev);
+		}
 		break;
-	};
+	}
 
 	return NOTIFY_OK;
 }
@@ -2469,7 +2554,11 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
 	unsigned long rand_num;
 	struct inet6_dev *idev = ifp->idev;
 
-	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+	if (ifp->flags & IFA_F_OPTIMISTIC)
+		rand_num = 0;
+	else
+		rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+
 	ifp->probes = idev->cnf.dad_transmits;
 	addrconf_mod_timer(ifp, AC_DAD, rand_num);
 }
@@ -2491,7 +2580,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
-		ifp->flags &= ~IFA_F_TENTATIVE;
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
@@ -2511,6 +2600,14 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		addrconf_dad_stop(ifp);
 		return;
 	}
+
+	/*
+	 * Optimistic nodes can start receiving
+	 * Frames right away
+	 */
+	if(ifp->flags & IFA_F_OPTIMISTIC)
+		ip6_ins_rt(ifp->rt);
+
 	addrconf_dad_kick(ifp);
 	spin_unlock_bh(&ifp->lock);
 out:
@@ -2535,7 +2632,7 @@ static void addrconf_dad_timer(unsigned long data)
 		 * DAD was successful
 		 */
 
-		ifp->flags &= ~IFA_F_TENTATIVE;
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
@@ -3159,7 +3256,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 
 	s_idx = cb->args[0];
 	s_ip_idx = ip_idx = cb->args[1];
-	read_lock(&dev_base_lock);
 
 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
@@ -3221,7 +3317,6 @@ done:
 		read_unlock_bh(&idev->lock);
 		in6_dev_put(idev);
 	}
-	read_unlock(&dev_base_lock);
 	cb->args[0] = idx;
 	cb->args[1] = ip_idx;
 	return skb->len;
@@ -3353,6 +3448,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 #endif
 #endif
 	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
+	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+#endif
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -3366,14 +3465,44 @@ static inline size_t inet6_if_nlmsg_size(void)
 			nla_total_size(4) /* IFLA_INET6_FLAGS */
 			+ nla_total_size(sizeof(struct ifla_cacheinfo))
 			+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+			+ nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+			+ nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
 		 );
 }
 
+static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
+				      int bytes)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
+
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(snmp_fold_field(mib, i), &stats[i]);
+
+	memset(&stats[items], 0, pad);
+}
+
+static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
+			     int bytes)
+{
+	switch(attrtype) {
+	case IFLA_INET6_STATS:
+		__snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		break;
+	case IFLA_INET6_ICMP6STATS:
+		__snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+		break;
+	}
+}
+
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct net_device *dev = idev->dev;
-	struct nlattr *conf;
+	struct nlattr *nla;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	void *protoinfo;
@@ -3413,12 +3542,22 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	ci.retrans_time = idev->nd_parms->retrans_time;
 	NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
 
-	conf = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
-	if (conf == NULL)
+	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+	if (nla == NULL)
 		goto nla_put_failure;
-	ipv6_store_devconf(&idev->cnf, nla_data(conf), nla_len(conf));
+	ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+
+	/* XXX - MC not implemented */
 
-	/* XXX - Statistics/MC not implemented */
+	nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+	nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
 
 	nla_nest_end(skb, protoinfo);
 	return nlmsg_end(skb, nlh);
@@ -3544,30 +3683,20 @@ errout:
 		rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
 }
 
-static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
-	[RTM_GETLINK - RTM_BASE] = { .dumpit	= inet6_dump_ifinfo, },
-	[RTM_NEWADDR - RTM_BASE] = { .doit	= inet6_rtm_newaddr, },
-	[RTM_DELADDR - RTM_BASE] = { .doit	= inet6_rtm_deladdr, },
-	[RTM_GETADDR - RTM_BASE] = { .doit	= inet6_rtm_getaddr,
-				     .dumpit	= inet6_dump_ifaddr, },
-	[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
-	[RTM_GETANYCAST - RTM_BASE] = { .dumpit	= inet6_dump_ifacaddr, },
-	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet6_rtm_newroute, },
-	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
-				      .dumpit	= inet6_dump_fib, },
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib6_rules_dump,   },
-#endif
-};
-
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
 
 	switch (event) {
 	case RTM_NEWADDR:
-		ip6_ins_rt(ifp->rt);
+		/*
+		 * If the address was optimistic
+		 * we inserted the route at the start of
+		 * our DAD process, so we don't need
+		 * to do it again
+		 */
+		if (!(ifp->rt->rt6i_node))
+			ip6_ins_rt(ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		break;
@@ -3881,6 +4010,25 @@ static struct addrconf_sysctl_table
 			.proc_handler	=	&proc_dointvec,
 		},
 		{
+			.ctl_name	=	NET_IPV6_ACCEPT_SOURCE_ROUTE,
+			.procname	=	"accept_source_route",
+			.data		=	&ipv6_devconf.accept_source_route,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+		{
+			.ctl_name	=	CTL_UNNUMBERED,
+			.procname       =       "optimistic_dad",
+			.data           =       &ipv6_devconf.optimistic_dad,
+			.maxlen         =       sizeof(int),
+			.mode           =       0644,
+			.proc_handler   =       &proc_dointvec,
+
+		},
+#endif
+		{
 			.ctl_name	=	0,	/* sentinel */
 		}
 	},
@@ -4007,11 +4155,15 @@ int register_inet6addr_notifier(struct notifier_block *nb)
 	return atomic_notifier_chain_register(&inet6addr_chain, nb);
 }
 
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
 int unregister_inet6addr_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
 }
 
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
 /*
  *	Init / cleanup code
  */
@@ -4050,7 +4202,18 @@ int __init addrconf_init(void)
 	register_netdevice_notifier(&ipv6_dev_notf);
 
 	addrconf_verify(0);
-	rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
+
+	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
+	if (err < 0)
+		goto errout;
+
+	/* Only the first call to __rtnl_register can fail */
+	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
+	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
+	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
+	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
+	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
+
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl.sysctl_header =
 		register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
@@ -4058,6 +4221,10 @@ int __init addrconf_init(void)
 #endif
 
 	return 0;
+errout:
+	unregister_netdevice_notifier(&ipv6_dev_notf);
+
+	return err;
 }
 
 void __exit addrconf_cleanup(void)
@@ -4069,7 +4236,6 @@ void __exit addrconf_cleanup(void)
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 
-	rtnetlink_links[PF_INET6] = NULL;
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
 	addrconf_sysctl_unregister(&ipv6_devconf);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5cac14a5c778..18cb928c8d92 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -98,6 +98,11 @@ static int inet6_create(struct socket *sock, int protocol)
 	int try_loading_module = 0;
 	int err;
 
+	if (sock->type != SOCK_RAW &&
+	    sock->type != SOCK_DGRAM &&
+	    !inet_ehash_secret)
+		build_ehash_secret();
+
 	/* Look for the requested type/protocol pair. */
 	answer = NULL;
 lookup_protocol:
@@ -349,6 +354,8 @@ out:
 	return err;
 }
 
+EXPORT_SYMBOL(inet6_bind);
+
 int inet6_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -365,6 +372,8 @@ int inet6_release(struct socket *sock)
 	return inet_release(sock);
 }
 
+EXPORT_SYMBOL(inet6_release);
+
 int inet6_destroy_sock(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,6 +437,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	return(0);
 }
 
+EXPORT_SYMBOL(inet6_getname);
+
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
@@ -437,6 +448,9 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCGSTAMP:
 		return sock_get_timestamp(sk, (struct timeval __user *)arg);
 
+	case SIOCGSTAMPNS:
+		return sock_get_timestampns(sk, (struct timespec __user *)arg);
+
 	case SIOCADDRT:
 	case SIOCDELRT:
 
@@ -457,6 +471,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return(0);
 }
 
+EXPORT_SYMBOL(inet6_ioctl);
+
 const struct proto_ops inet6_stream_ops = {
 	.family		   = PF_INET6,
 	.owner		   = THIS_MODULE,
@@ -603,6 +619,8 @@ out_illegal:
 	goto out;
 }
 
+EXPORT_SYMBOL(inet6_register_protosw);
+
 void
 inet6_unregister_protosw(struct inet_protosw *p)
 {
@@ -619,6 +637,8 @@ inet6_unregister_protosw(struct inet_protosw *p)
 	}
 }
 
+EXPORT_SYMBOL(inet6_unregister_protosw);
+
 int inet6_sk_rebuild_header(struct sock *sk)
 {
 	int err;
@@ -678,7 +698,8 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 	if (np->rxopt.all) {
 		if ((opt->hop && (np->rxopt.bits.hopopts ||
 				  np->rxopt.bits.ohopopts)) ||
-		    ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) &&
+		    ((IPV6_FLOWINFO_MASK &
+		      *(__be32 *)skb_network_header(skb)) &&
 		     np->rxopt.bits.rxflow) ||
 		    (opt->srcrt && (np->rxopt.bits.srcrt ||
 		     np->rxopt.bits.osrcrt)) ||
@@ -691,61 +712,28 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 
 EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 
-int
-snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
-{
-	if (ptr == NULL)
-		return -EINVAL;
-
-	ptr[0] = __alloc_percpu(mibsize);
-	if (!ptr[0])
-		goto err0;
-
-	ptr[1] = __alloc_percpu(mibsize);
-	if (!ptr[1])
-		goto err1;
-
-	return 0;
-
-err1:
-	free_percpu(ptr[0]);
-	ptr[0] = NULL;
-err0:
-	return -ENOMEM;
-}
-
-void
-snmp6_mib_free(void *ptr[2])
-{
-	if (ptr == NULL)
-		return;
-	free_percpu(ptr[0]);
-	free_percpu(ptr[1]);
-	ptr[0] = ptr[1] = NULL;
-}
-
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
-			   __alignof__(struct ipstats_mib)) < 0)
+	if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
-			   __alignof__(struct icmpv6_mib)) < 0)
+	if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
-			   __alignof__(struct udp_mib)) < 0)
+	if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
-	if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
-			   __alignof__(struct udp_mib)) < 0)
+	if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	return 0;
 
 err_udplite_mib:
-	snmp6_mib_free((void **)udp_stats_in6);
+	snmp_mib_free((void **)udp_stats_in6);
 err_udp_mib:
-	snmp6_mib_free((void **)icmpv6_statistics);
+	snmp_mib_free((void **)icmpv6_statistics);
 err_icmp_mib:
-	snmp6_mib_free((void **)ipv6_statistics);
+	snmp_mib_free((void **)ipv6_statistics);
 err_ip_mib:
 	return -ENOMEM;
 
@@ -753,10 +741,10 @@ err_ip_mib:
 
 static void cleanup_ipv6_mibs(void)
 {
-	snmp6_mib_free((void **)ipv6_statistics);
-	snmp6_mib_free((void **)icmpv6_statistics);
-	snmp6_mib_free((void **)udp_stats_in6);
-	snmp6_mib_free((void **)udplite_stats_in6);
+	snmp_mib_free((void **)ipv6_statistics);
+	snmp_mib_free((void **)icmpv6_statistics);
+	snmp_mib_free((void **)udp_stats_in6);
+	snmp_mib_free((void **)udplite_stats_in6);
 }
 
 static int __init inet6_init(void)
@@ -929,6 +917,8 @@ static void __exit inet6_exit(void)
 {
 	/* First of all disallow new sockets creation. */
 	sock_unregister(PF_INET6);
+	/* Disallow any further netlink messages */
+	rtnl_unregister_all(PF_INET6);
 
 	/* Cleanup code parts. */
 	ipv6_packet_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dc68b7269c3c..b696c8401200 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -238,8 +238,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph = (struct ipv6hdr *)skb->data;
 	top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_AH;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_AH;
 
 	/* When there are no extension headers, we only need to save the first
 	 * 8 bytes of the base IP header.
@@ -247,7 +247,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	memcpy(tmp_base, top_iph, sizeof(tmp_base));
 
 	tmp_ext = NULL;
-	extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
+	extlen = skb_transport_offset(skb) + sizeof(struct ipv6hdr);
 	if (extlen) {
 		extlen += sizeof(*tmp_ext);
 		tmp_ext = kmalloc(extlen, GFP_ATOMIC);
@@ -268,7 +268,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 			goto error_free_iph;
 	}
 
-	ah = (struct ip_auth_hdr *)skb->h.raw;
+	ah = (struct ip_auth_hdr *)skb_transport_header(skb);
 	ah->nexthdr = nexthdr;
 
 	top_iph->priority    = 0;
@@ -316,8 +316,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	 *
 	 * To erase AH:
 	 * Keeping copy of cleared headers. After AH processing,
-	 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
-	 * header length. Then copy back the copy as long as hdr_len
+	 * Moving the pointer of skb->network_header by using skb_pull as long
+	 * as AH header length. Then copy back the copy as long as hdr_len
 	 * If destination header following AH exists, copy it into after [Ext2].
 	 *
 	 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
@@ -325,6 +325,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	 */
 
 	struct ipv6_auth_hdr *ah;
+	struct ipv6hdr *ip6h;
 	struct ah_data *ahp;
 	unsigned char *tmp_hdr = NULL;
 	u16 hdr_len;
@@ -341,7 +342,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 		goto out;
 
-	hdr_len = skb->data - skb->nh.raw;
+	hdr_len = skb->data - skb_network_header(skb);
 	ah = (struct ipv6_auth_hdr*)skb->data;
 	ahp = x->data;
 	nexthdr = ah->nexthdr;
@@ -354,16 +355,17 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, ah_hlen))
 		goto out;
 
-	tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC);
+	tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
 	if (!tmp_hdr)
 		goto out;
-	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
+	ip6h = ipv6_hdr(skb);
+	if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
 		goto free_out;
-	skb->nh.ipv6h->priority    = 0;
-	skb->nh.ipv6h->flow_lbl[0] = 0;
-	skb->nh.ipv6h->flow_lbl[1] = 0;
-	skb->nh.ipv6h->flow_lbl[2] = 0;
-	skb->nh.ipv6h->hop_limit   = 0;
+	ip6h->priority    = 0;
+	ip6h->flow_lbl[0] = 0;
+	ip6h->flow_lbl[1] = 0;
+	ip6h->flow_lbl[2] = 0;
+	ip6h->hop_limit   = 0;
 
 	{
 		u8 auth_data[MAX_AH_AUTH_LEN];
@@ -382,7 +384,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 
-	skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len);
+	skb->network_header += ah_hlen;
+	memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
+	skb->transport_header = skb->network_header;
 	__skb_pull(skb, ah_hlen + hdr_len);
 
 	kfree(tmp_hdr);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b4e8dcf4c86..403eee66b9c5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -209,7 +209,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 		     __be16 port, u32 info, u8 *payload)
 {
 	struct ipv6_pinfo *np  = inet6_sk(sk);
-	struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+	struct icmp6hdr *icmph = icmp6_hdr(skb);
 	struct sock_exterr_skb *serr;
 
 	if (!np->recverr)
@@ -227,11 +227,12 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+	serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+				  skb_network_header(skb);
 	serr->port = port;
 
-	skb->h.raw = payload;
 	__skb_pull(skb, payload - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
@@ -251,8 +252,9 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	if (!skb)
 		return;
 
-	iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
-	skb->nh.ipv6h = iph;
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
 	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
 
 	serr = SKB_EXT_ERR(skb);
@@ -263,11 +265,11 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = fl->fl_ip_dport;
 
-	skb->h.raw = skb->tail;
-	__skb_pull(skb, skb->tail - skb->data);
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
@@ -309,21 +311,24 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 
 	sin = (struct sockaddr_in6 *)msg->msg_name;
 	if (sin) {
+		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = serr->port;
 		sin->sin6_scope_id = 0;
 		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
 			ipv6_addr_copy(&sin->sin6_addr,
-			  (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+				  (struct in6_addr *)(nh + serr->addr_offset));
 			if (np->sndflow)
-				sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+				sin->sin6_flowinfo =
+					(*(__be32 *)(nh + serr->addr_offset - 24) &
+					 IPV6_FLOWINFO_MASK);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
 			ipv6_addr_set(&sin->sin6_addr, 0, 0,
 				      htonl(0xffff),
-				      *(__be32*)(skb->nh.raw + serr->addr_offset));
+				      *(__be32 *)(nh + serr->addr_offset));
 		}
 	}
 
@@ -335,7 +340,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_flowinfo = 0;
 		sin->sin6_scope_id = 0;
 		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
-			ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+			ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
 			if (np->rxopt.all)
 				datagram_recv_ctl(sk, msg, skb);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -344,8 +349,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 			struct inet_sock *inet = inet_sk(sk);
 
 			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff),
-				      skb->nh.iph->saddr);
+				      htonl(0xffff), ip_hdr(skb)->saddr);
 			if (inet->cmsg_flags)
 				ip_cmsg_recv(msg, skb);
 		}
@@ -381,33 +385,34 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet6_skb_parm *opt = IP6CB(skb);
+	unsigned char *nh = skb_network_header(skb);
 
 	if (np->rxopt.bits.rxinfo) {
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
 		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 	}
 
 	if (np->rxopt.bits.rxhlim) {
-		int hlim = skb->nh.ipv6h->hop_limit;
+		int hlim = ipv6_hdr(skb)->hop_limit;
 		put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
 	}
 
 	if (np->rxopt.bits.rxtclass) {
-		int tclass = (ntohl(*(__be32 *)skb->nh.ipv6h) >> 20) & 0xff;
+		int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
-	if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
-		__be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+	if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
+		__be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
 		put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
 	}
 
 	/* HbH is allowed only once */
 	if (np->rxopt.bits.hopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 
@@ -423,11 +428,11 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
 		 */
 		unsigned int off = sizeof(struct ipv6hdr);
-		u8 nexthdr = skb->nh.ipv6h->nexthdr;
+		u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 
 		while (off <= opt->lastopt) {
 			unsigned len;
-			u8 *ptr = skb->nh.raw + off;
+			u8 *ptr = nh + off;
 
 			switch(nexthdr) {
 			case IPPROTO_DSTOPTS:
@@ -461,27 +466,27 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
 		put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 	}
 	if (np->rxopt.bits.rxohlim) {
-		int hlim = skb->nh.ipv6h->hop_limit;
+		int hlim = ipv6_hdr(skb)->hop_limit;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
 	}
 	if (np->rxopt.bits.ohopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst0) {
-		u8 *ptr = skb->nh.raw + opt->dst0;
+		u8 *ptr = nh + opt->dst0;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.osrcrt && opt->srcrt) {
-		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
 		put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst1) {
-		u8 *ptr = skb->nh.raw + opt->dst1;
+		u8 *ptr = nh + opt->dst1;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	return 0;
@@ -718,7 +723,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 				       cmsg->cmsg_type);
 			err = -EINVAL;
 			break;
-		};
+		}
 	}
 
 exit_f:
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 363e63ffecca..7107bb7e2e62 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -42,21 +42,19 @@
 static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
-	int hdr_len;
 	struct ipv6hdr *top_iph;
 	struct ipv6_esp_hdr *esph;
 	struct crypto_blkcipher *tfm;
 	struct blkcipher_desc desc;
-	struct esp_data *esp;
 	struct sk_buff *trailer;
 	int blksize;
 	int clen;
 	int alen;
 	int nfrags;
-
-	esp = x->data;
-	hdr_len = skb->h.raw - skb->data +
-		  sizeof(*esph) + esp->conf.ivlen;
+	u8 *tail;
+	struct esp_data *esp = x->data;
+	int hdr_len = (skb_transport_offset(skb) +
+		       sizeof(*esph) + esp->conf.ivlen);
 
 	/* Strip IP+ESP header. */
 	__skb_pull(skb, hdr_len);
@@ -81,19 +79,20 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	/* Fill padding... */
+	tail = skb_tail_pointer(trailer);
 	do {
 		int i;
 		for (i=0; i<clen-skb->len - 2; i++)
-			*(u8*)(trailer->tail + i) = i+1;
+			tail[i] = i + 1;
 	} while (0);
-	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	tail[clen-skb->len - 2] = (clen - skb->len) - 2;
 	pskb_put(skb, trailer, clen - skb->len);
 
 	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
-	esph = (struct ipv6_esp_hdr *)skb->h.raw;
+	esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
 	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
-	*(u8*)(trailer->tail - 1) = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_ESP;
+	*(skb_tail_pointer(trailer) - 1) = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_ESP;
 
 	esph->spi = x->id.spi;
 	esph->seq_no = htonl(++x->replay.oseq);
@@ -150,8 +149,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	int alen = esp->auth.icv_trunc_len;
 	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
-
-	int hdr_len = skb->h.raw - skb->nh.raw;
+	int hdr_len = skb_network_header_len(skb);
 	int nfrags;
 	int ret = 0;
 
@@ -191,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	esph = (struct ipv6_esp_hdr*)skb->data;
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	/* Get ivec. This can be wrong, check against another impls. */
 	if (esp->conf.ivlen)
@@ -231,28 +229,30 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		ret = nexthdr[1];
 	}
 
-	skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len;
-
+	__skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+	skb_set_transport_header(skb, -hdr_len);
 out:
 	return ret;
 }
 
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+	u32 align = max_t(u32, blksize, esp->conf.padlen);
+	u32 rem;
+
+	mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+	rem = mtu & (align - 1);
+	mtu &= ~(align - 1);
 
-	if (x->props.mode == XFRM_MODE_TUNNEL) {
-		mtu = ALIGN(mtu + 2, blksize);
-	} else {
-		/* The worst case. */
+	if (x->props.mode != XFRM_MODE_TUNNEL) {
 		u32 padsize = ((blksize - 1) & 7) + 1;
-		mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
+		mtu -= blksize - padsize;
+		mtu += min_t(u32, blksize - padsize, rem);
 	}
-	if (esp->conf.padlen)
-		mtu = ALIGN(mtu, esp->conf.padlen);
 
-	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+	return mtu - 2;
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -382,7 +382,7 @@ static struct xfrm_type esp6_type =
 	.proto	     	= IPPROTO_ESP,
 	.init_state	= esp6_init_state,
 	.destructor	= esp6_destroy,
-	.get_max_size	= esp6_get_max_size,
+	.get_mtu	= esp6_get_mtu,
 	.input		= esp6_input,
 	.output		= esp6_output,
 	.hdr_offset	= xfrm6_find_1stfragopt,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 28e0c6568272..6d8e4ac7bdad 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,13 +50,14 @@
 
 int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 {
-	int packet_len = skb->tail - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	int packet_len = skb->tail - skb->network_header;
 	struct ipv6_opt_hdr *hdr;
 	int len;
 
 	if (offset + 2 > packet_len)
 		goto bad;
-	hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	hdr = (struct ipv6_opt_hdr *)(nh + offset);
 	len = ((hdr->hdrlen + 1) << 3);
 
 	if (offset + len > packet_len)
@@ -66,7 +67,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 	len -= 2;
 
 	while (len > 0) {
-		int opttype = skb->nh.raw[offset];
+		int opttype = nh[offset];
 		int optlen;
 
 		if (opttype == type)
@@ -77,7 +78,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 			optlen = 1;
 			break;
 		default:
-			optlen = skb->nh.raw[offset + 1] + 2;
+			optlen = nh[offset + 1] + 2;
 			if (optlen > len)
 				goto bad;
 			break;
@@ -113,7 +114,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
 
-	switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return 1;
 
@@ -124,12 +125,12 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 		/* Actually, it is redundant check. icmp_send
 		   will recheck in any case.
 		 */
-		if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+		if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
 			break;
 	case 2: /* send ICMP PARM PROB regardless and drop packet */
 		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
 		return 0;
-	};
+	}
 
 	kfree_skb(skb);
 	return 0;
@@ -141,19 +142,20 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
-	int off = skb->h.raw - skb->nh.raw;
-	int len = ((skb->h.raw[1]+1)<<3);
+	const unsigned char *nh = skb_network_header(skb);
+	int off = skb_network_header_len(skb);
+	int len = (skb_transport_header(skb)[1] + 1) << 3;
 
-	if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
+	if (skb_transport_offset(skb) + len > skb_headlen(skb))
 		goto bad;
 
 	off += 2;
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = skb->nh.raw[off+1]+2;
+		int optlen = nh[off + 1] + 2;
 
-		switch (skb->nh.raw[off]) {
+		switch (nh[off]) {
 		case IPV6_TLV_PAD0:
 			optlen = 1;
 			break;
@@ -165,7 +167,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 			if (optlen > len)
 				goto bad;
 			for (curr=procs; curr->type >= 0; curr++) {
-				if (curr->type == skb->nh.raw[off]) {
+				if (curr->type == nh[off]) {
 					/* type specific length/alignment
 					   checks will be performed in the
 					   func(). */
@@ -200,7 +202,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	struct sk_buff *skb = *skbp;
 	struct ipv6_destopt_hao *hao;
 	struct inet6_skb_parm *opt = IP6CB(skb);
-	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct in6_addr tmp_addr;
 	int ret;
 
@@ -211,7 +213,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	opt->dsthao = opt->dst1;
 	opt->dst1 = 0;
 
-	hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+	hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
 
 	if (hao->length != 16) {
 		LIMIT_NETDEBUG(
@@ -244,8 +246,9 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 
 		/* update all variable using below by copied skbuff */
 		*skbp = skb = skb2;
-		hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
-		ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+						  optoff);
+		ipv6h = ipv6_hdr(skb2);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -255,7 +258,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
 	ipv6_addr_copy(&hao->addr, &tmp_addr);
 
-	if (skb->tstamp.off_sec == 0)
+	if (skb->tstamp.tv64 == 0)
 		__net_timestamp(skb);
 
 	return 1;
@@ -285,16 +288,16 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 #endif
 	struct dst_entry *dst;
 
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
-	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	opt->lastopt = skb->h.raw - skb->nh.raw;
-	opt->dst1 = skb->h.raw - skb->nh.raw;
+	opt->lastopt = opt->dst1 = skb_network_header_len(skb);
 #ifdef CONFIG_IPV6_MIP6
 	dstbuf = opt->dst1;
 #endif
@@ -303,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
 		dst_release(dst);
 		skb = *skbp;
-		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 #ifdef CONFIG_IPV6_MIP6
 		opt->nhoff = dstbuf;
@@ -362,22 +365,58 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
+	struct inet6_dev *idev;
 	int n, i;
-
 	struct ipv6_rt_hdr *hdr;
 	struct rt0_hdr *rthdr;
+	int accept_source_route = ipv6_devconf.accept_source_route;
+
+	if (accept_source_route < 0 ||
+	    ((idev = in6_dev_get(skb->dev)) == NULL)) {
+		kfree_skb(skb);
+		return -1;
+	}
+	if (idev->cnf.accept_source_route < 0) {
+		in6_dev_put(idev);
+		kfree_skb(skb);
+		return -1;
+	}
+
+	if (accept_source_route > idev->cnf.accept_source_route)
+		accept_source_route = idev->cnf.accept_source_route;
 
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
-	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+	in6_dev_put(idev);
+
+	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+	hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
+	switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+	case IPV6_SRCRT_TYPE_2:
+		break;
+#endif
+	case IPV6_SRCRT_TYPE_0:
+		if (accept_source_route > 0)
+			break;
+		kfree_skb(skb);
+		return -1;
+	default:
+		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+				 IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  (&hdr->type) - skb_network_header(skb));
+		return -1;
+	}
+
+	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INADDRERRORS);
@@ -405,12 +444,11 @@ looped_back:
 			break;
 		}
 
-		opt->lastopt = skb->h.raw - skb->nh.raw;
-		opt->srcrt = skb->h.raw - skb->nh.raw;
-		skb->h.raw += (hdr->hdrlen + 1) << 3;
+		opt->lastopt = opt->srcrt = skb_network_header_len(skb);
+		skb->transport_header += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
-		opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
+		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
 		return 1;
 	}
 
@@ -419,7 +457,9 @@ looped_back:
 		if (hdr->hdrlen & 0x01) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+					  ((&hdr->hdrlen) -
+					   skb_network_header(skb)));
 			return -1;
 		}
 		break;
@@ -434,11 +474,6 @@ looped_back:
 		}
 		break;
 #endif
-	default:
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
-				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
-		return -1;
 	}
 
 	/*
@@ -451,7 +486,9 @@ looped_back:
 	if (hdr->segments_left > n) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((&hdr->segments_left) -
+				   skb_network_header(skb)));
 		return -1;
 	}
 
@@ -470,7 +507,7 @@ looped_back:
 		kfree_skb(skb);
 		*skbp = skb = skb2;
 		opt = IP6CB(skb2);
-		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -486,7 +523,7 @@ looped_back:
 #ifdef CONFIG_IPV6_MIP6
 	case IPV6_SRCRT_TYPE_2:
 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
-				     (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
 				     IPPROTO_ROUTING) < 0) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INADDRERRORS);
@@ -513,19 +550,19 @@ looped_back:
 	}
 
 	ipv6_addr_copy(&daddr, addr);
-	ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
-	ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
+	ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
+	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
 
 	dst_release(xchg(&skb->dst, NULL));
 	ip6_route_input(skb);
 	if (skb->dst->error) {
-		skb_push(skb, skb->data - skb->nh.raw);
+		skb_push(skb, skb->data - skb_network_header(skb));
 		dst_input(skb);
 		return -1;
 	}
 
 	if (skb->dst->dev->flags&IFF_LOOPBACK) {
-		if (skb->nh.ipv6h->hop_limit <= 1) {
+		if (ipv6_hdr(skb)->hop_limit <= 1) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
@@ -533,11 +570,11 @@ looped_back:
 			kfree_skb(skb);
 			return -1;
 		}
-		skb->nh.ipv6h->hop_limit--;
+		ipv6_hdr(skb)->hop_limit--;
 		goto looped_back;
 	}
 
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 	dst_input(skb);
 	return -1;
 }
@@ -628,13 +665,14 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
 static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
+	const unsigned char *nh = skb_network_header(skb);
 
-	if (skb->nh.raw[optoff+1] == 2) {
+	if (nh[optoff + 1] == 2) {
 		IP6CB(skb)->ra = optoff;
 		return 1;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
-		       skb->nh.raw[optoff+1]);
+		       nh[optoff + 1]);
 	kfree_skb(skb);
 	return 0;
 }
@@ -644,23 +682,24 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
+	const unsigned char *nh = skb_network_header(skb);
 	u32 pkt_len;
 
-	if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
 		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
-			       skb->nh.raw[optoff+1]);
+			       nh[optoff+1]);
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		goto drop;
 	}
 
-	pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2));
+	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
 	if (pkt_len <= IPV6_MAXPLEN) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
 		return 0;
 	}
-	if (skb->nh.ipv6h->payload_len) {
+	if (ipv6_hdr(skb)->payload_len) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
 		return 0;
@@ -699,13 +738,14 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	/*
-	 * skb->nh.raw is equal to skb->data, and
-	 * skb->h.raw - skb->nh.raw is always equal to
+	 * skb_network_header(skb) is equal to skb->data, and
+	 * skb_network_header_len(skb) is always equal to
 	 * sizeof(struct ipv6hdr) by definition of
 	 * hop-by-hop options.
 	 */
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
-	    !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) {
+	    !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		kfree_skb(skb);
 		return -1;
 	}
@@ -713,7 +753,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	opt->hop = sizeof(struct ipv6hdr);
 	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
 		skb = *skbp;
-		skb->h.raw += (skb->h.raw[1]+1)<<3;
+		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
@@ -782,6 +822,8 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
 		ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
 }
 
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
+
 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
 {
 	if (opt->dst1opt)
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 0862809ffcf7..fc3882c90604 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -17,6 +17,7 @@
 
 #include <net/fib_rules.h>
 #include <net/ipv6.h>
+#include <net/addrconf.h>
 #include <net/ip6_route.h>
 #include <net/netlink.h>
 
@@ -95,8 +96,27 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 	if (table)
 		rt = lookup(table, flp, flags);
 
-	if (rt != &ip6_null_entry)
+	if (rt != &ip6_null_entry) {
+		struct fib6_rule *r = (struct fib6_rule *)rule;
+
+		/*
+		 * If we need to find a source address for this traffic,
+		 * we check the result if it meets requirement of the rule.
+		 */
+		if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+		    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+			struct in6_addr saddr;
+			if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst,
+					   &saddr))
+				goto again;
+			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+					       r->src.plen))
+				goto again;
+			ipv6_addr_copy(&flp->fl6_src, &saddr);
+		}
 		goto out;
+	}
+again:
 	dst_release(&rt->u.dst);
 	rt = NULL;
 	goto out;
@@ -117,9 +137,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	    !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
 		return 0;
 
+	/*
+	 * If FIB_RULE_FIND_SADDR is set and we do not have a
+	 * source address for the traffic, we defer check for
+	 * source address.
+	 */
 	if (r->src.plen) {
-		if (!(flags & RT6_LOOKUP_F_HAS_SADDR) ||
-		    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+		if (flags & RT6_LOOKUP_F_HAS_SADDR) {
+			if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr,
+					       r->src.plen))
+				return 0;
+		} else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
 			return 0;
 	}
 
@@ -131,8 +159,6 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 
 static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = {
 	FRA_GENERIC_POLICY,
-	[FRA_SRC]	= { .len = sizeof(struct in6_addr) },
-	[FRA_DST]	= { .len = sizeof(struct in6_addr) },
 };
 
 static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
@@ -142,9 +168,6 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 	int err = -EINVAL;
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
-	if (frh->src_len > 128 || frh->dst_len > 128)
-		goto errout;
-
 	if (rule->action == FR_ACT_TO_TBL) {
 		if (rule->table == RT6_TABLE_UNSPEC)
 			goto errout;
@@ -155,11 +178,11 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		}
 	}
 
-	if (tb[FRA_SRC])
+	if (frh->src_len)
 		nla_memcpy(&rule6->src.addr, tb[FRA_SRC],
 			   sizeof(struct in6_addr));
 
-	if (tb[FRA_DST])
+	if (frh->dst_len)
 		nla_memcpy(&rule6->dst.addr, tb[FRA_DST],
 			   sizeof(struct in6_addr));
 
@@ -186,11 +209,11 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 	if (frh->tos && (rule6->tclass != frh->tos))
 		return 0;
 
-	if (tb[FRA_SRC] &&
+	if (frh->src_len &&
 	    nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
 		return 0;
 
-	if (tb[FRA_DST] &&
+	if (frh->dst_len &&
 	    nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr)))
 		return 0;
 
@@ -221,11 +244,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return fib_rules_dump(skb, cb, AF_INET6);
-}
-
 static u32 fib6_rule_default_pref(void)
 {
 	return 0x3FFF;
@@ -240,6 +258,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 static struct fib_rules_ops fib6_rules_ops = {
 	.family			= AF_INET6,
 	.rule_size		= sizeof(struct fib6_rule),
+	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
 	.match			= fib6_rule_match,
 	.configure		= fib6_rule_configure,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index edfe98bf64c3..e9bcce9e7bdf 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
 #include <asm/system.h>
 
 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
+EXPORT_SYMBOL(icmpv6_statistics);
 
 /*
  *	The ICMP socket(s). This is the most convenient way to flow control
@@ -128,9 +129,9 @@ void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
 
 static int is_ineligible(struct sk_buff *skb)
 {
-	int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
+	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 	int len = skb->len - ptr;
-	__u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 
 	if (len < 0)
 		return 1;
@@ -205,7 +206,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
 {
 	u8 _optval, *op;
 
-	offset += skb->nh.raw - skb->data;
+	offset += skb_network_offset(skb);
 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 	if (op == NULL)
 		return 1;
@@ -221,7 +222,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
 		goto out;
 
-	icmp6h = (struct icmp6hdr*) skb->h.raw;
+	icmp6h = icmp6_hdr(skb);
 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 	icmp6h->icmp6_cksum = 0;
 
@@ -274,7 +275,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
 #ifdef CONFIG_IPV6_MIP6
 static void mip6_addr_swap(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct ipv6_destopt_hao *hao;
 	struct in6_addr tmp;
@@ -283,7 +284,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
 	if (opt->dsthao) {
 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 		if (likely(off >= 0)) {
-			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + off);
 			ipv6_addr_copy(&tmp, &iph->saddr);
 			ipv6_addr_copy(&iph->saddr, &hao->addr);
 			ipv6_addr_copy(&hao->addr, &tmp);
@@ -301,7 +303,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		 struct net_device *dev)
 {
 	struct inet6_dev *idev = NULL;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct sock *sk;
 	struct ipv6_pinfo *np;
 	struct in6_addr *saddr = NULL;
@@ -315,7 +317,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	int hlimit, tclass;
 	int err = 0;
 
-	if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+	if ((u8 *)hdr < skb->head ||
+	    (skb->network_header + sizeof(*hdr)) > skb->tail)
 		return;
 
 	/*
@@ -430,7 +433,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		tclass = 0;
 
 	msg.skb = skb;
-	msg.offset = skb->nh.raw - skb->data;
+	msg.offset = skb_network_offset(skb);
 	msg.type = type;
 
 	len = skb->len - msg.offset;
@@ -466,13 +469,15 @@ out:
 	icmpv6_xmit_unlock();
 }
 
+EXPORT_SYMBOL(icmpv6_send);
+
 static void icmpv6_echo_reply(struct sk_buff *skb)
 {
 	struct sock *sk;
 	struct inet6_dev *idev;
 	struct ipv6_pinfo *np;
 	struct in6_addr *saddr = NULL;
-	struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+	struct icmp6hdr *icmph = icmp6_hdr(skb);
 	struct icmp6hdr tmp_hdr;
 	struct flowi fl;
 	struct icmpv6_msg msg;
@@ -481,7 +486,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	int hlimit;
 	int tclass;
 
-	saddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->daddr;
 
 	if (!ipv6_unicast_destination(skb))
 		saddr = NULL;
@@ -491,7 +496,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	if (saddr)
 		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = skb->dev->ifindex;
@@ -579,8 +584,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
 	if (!pskb_may_pull(skb, inner_offset+8))
 		return;
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
 
 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 	   Without this we will not able f.e. to make source routed
@@ -624,8 +629,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 
 	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
 
 	/* Perform checksum. */
 	switch (skb->ip_summed) {
@@ -647,7 +652,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 	if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
 		goto discard_it;
 
-	hdr = (struct icmp6hdr *) skb->h.raw;
+	hdr = icmp6_hdr(skb);
 
 	type = hdr->icmp6_type;
 
@@ -673,7 +678,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 		 */
 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 			goto discard_it;
-		hdr = (struct icmp6hdr *) skb->h.raw;
+		hdr = icmp6_hdr(skb);
 		orig_hdr = (struct ipv6hdr *) (hdr + 1);
 		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
 				   ntohl(hdr->icmp6_mtu));
@@ -727,7 +732,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 		 */
 
 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
-	};
+	}
+
 	kfree_skb(skb);
 	return 0;
 
@@ -860,11 +866,13 @@ int icmpv6_err_convert(int type, int code, int *err)
 	case ICMPV6_TIME_EXCEED:
 		*err = EHOSTUNREACH;
 		break;
-	};
+	}
 
 	return fatal;
 }
 
+EXPORT_SYMBOL(icmpv6_err_convert);
+
 #ifdef CONFIG_SYSCTL
 ctl_table ipv6_icmp_table[] = {
 	{
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f4d7be77eb0f..ca08ee88d07f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -359,7 +359,7 @@ end:
 	return res;
 }
 
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
@@ -658,6 +658,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		ins = &iter->u.dst.rt6_next;
 	}
 
+	/* Reset round-robin state, if necessary */
+	if (ins == &fn->leaf)
+		fn->rr_ptr = NULL;
+
 	/*
 	 *	insert node
 	 */
@@ -1109,6 +1113,10 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	rt6_stats.fib_rt_entries--;
 	rt6_stats.fib_discarded_routes++;
 
+	/* Reset round-robin state, if necessary */
+	if (fn->rr_ptr == rt)
+		fn->rr_ptr = NULL;
+
 	/* Adjust walkers */
 	read_lock(&fib6_walker_lock);
 	FOR_WALKERS(w) {
@@ -1478,6 +1486,8 @@ void __init fib6_init(void)
 					   NULL, NULL);
 
 	fib6_tables_init();
+
+	__rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
 }
 
 void fib6_gc_cleanup(void)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 11bfc7c43182..be0ee8a34f9b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -96,25 +96,27 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
 		goto err;
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	if (hdr->version != 6)
 		goto err;
 
-	skb->h.raw = (u8 *)(hdr + 1);
+	skb->transport_header = skb->network_header + sizeof(*hdr);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
 	pkt_len = ntohs(hdr->payload_len);
 
 	/* pkt_len may be zero if Jumbo payload option is present */
 	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
-		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
-			goto truncated;
+		if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
+			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+			goto drop;
+		}
 		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
 			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
 			goto drop;
 		}
-		hdr = skb->nh.ipv6h;
+		hdr = ipv6_hdr(skb);
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
@@ -128,8 +130,6 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	rcu_read_unlock();
 
 	return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
-truncated:
-	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS);
 err:
 	IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
 drop:
@@ -160,10 +160,10 @@ static inline int ip6_input_finish(struct sk_buff *skb)
 	rcu_read_lock();
 resubmit:
 	idev = ip6_dst_idev(skb->dst);
-	if (!pskb_pull(skb, skb->h.raw - skb->data))
+	if (!pskb_pull(skb, skb_transport_offset(skb)))
 		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb->nh.raw[nhoff];
+	nexthdr = skb_network_header(skb)[nhoff];
 
 	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
 	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
@@ -181,9 +181,9 @@ resubmit:
 			   indefinitely. */
 			nf_reset(skb);
 
-			skb_postpull_rcsum(skb, skb->nh.raw,
-					   skb->h.raw - skb->nh.raw);
-			hdr = skb->nh.ipv6h;
+			skb_postpull_rcsum(skb, skb_network_header(skb),
+					   skb_network_header_len(skb));
+			hdr = ipv6_hdr(skb);
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
 			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
 			    &hdr->saddr) &&
@@ -234,7 +234,7 @@ int ip6_mc_input(struct sk_buff *skb)
 
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 	deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
 	    ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 305516921aa8..f508171bab73 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -88,8 +88,8 @@ static inline int ip6_output_finish(struct sk_buff *skb)
 /* dev_loopback_xmit for use with netfilter. */
 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 {
-	newskb->mac.raw = newskb->data;
-	__skb_pull(newskb, newskb->nh.raw - newskb->data);
+	skb_reset_mac_header(newskb);
+	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	BUG_TRAP(newskb->dst);
@@ -107,13 +107,13 @@ static int ip6_output2(struct sk_buff *skb)
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
 
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
+	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 
 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
-		    ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
-				&skb->nh.ipv6h->saddr)) {
+		    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
+					&ipv6_hdr(skb)->saddr)) {
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 
 			/* Do not check for IFF_ALLMULTI; multicast routing
@@ -124,7 +124,7 @@ static int ip6_output2(struct sk_buff *skb)
 					newskb->dev,
 					ip6_dev_loopback_xmit);
 
-			if (skb->nh.ipv6h->hop_limit == 0) {
+			if (ipv6_hdr(skb)->hop_limit == 0) {
 				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 				kfree_skb(skb);
 				return 0;
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
 	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 }
 
+static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
+	       skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
 int ip6_output(struct sk_buff *skb)
 {
-	if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
+	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 				dst_allfrag(skb->dst))
 		return ip6_fragment(skb, ip6_output2);
 	else
@@ -191,7 +199,9 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 	}
 
-	hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	hdr = ipv6_hdr(skb);
 
 	/*
 	 *	Fill in the IPv6 header
@@ -239,6 +249,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	return -EMSGSIZE;
 }
 
+EXPORT_SYMBOL(ip6_xmit);
+
 /*
  *	To avoid extra problems ND packets are send through this
  *	routine. It's code duplication but I really want to avoid
@@ -259,8 +271,9 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 
 	totlen = len + sizeof(struct ipv6hdr);
 
-	hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
-	skb->nh.ipv6h = hdr;
+	skb_reset_network_header(skb);
+	skb_put(skb, sizeof(struct ipv6hdr));
+	hdr = ipv6_hdr(skb);
 
 	*(__be32*)hdr = htonl(0x60000000);
 
@@ -305,7 +318,7 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 
 static int ip6_forward_proxy_check(struct sk_buff *skb)
 {
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	u8 nexthdr = hdr->nexthdr;
 	int offset;
 
@@ -319,10 +332,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 	if (nexthdr == IPPROTO_ICMPV6) {
 		struct icmp6hdr *icmp6;
 
-		if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+		if (!pskb_may_pull(skb, (skb_network_header(skb) +
+					 offset + 1 - skb->data)))
 			return 0;
 
-		icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 
 		switch (icmp6->icmp6_type) {
 		case NDISC_ROUTER_SOLICITATION:
@@ -361,7 +375,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
 int ip6_forward(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	if (ipv6_devconf.forwarding == 0)
@@ -372,7 +386,7 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 	}
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_forward_csum(skb);
 
 	/*
 	 *	We DO NOT make any processing on
@@ -388,7 +402,7 @@ int ip6_forward(struct sk_buff *skb)
 	 *	that different fragments will go along one path. --ANK
 	 */
 	if (opt->ra) {
-		u8 *ptr = skb->nh.raw + opt->ra;
+		u8 *ptr = skb_network_header(skb) + opt->ra;
 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 			return 0;
 	}
@@ -470,7 +484,7 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 	}
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	/* Mangling hops number delayed to point after skb COW */
 
@@ -499,33 +513,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif
-#ifdef CONFIG_NETFILTER
-	/* Connection association is same as pre-frag packet */
-	nf_conntrack_put(to->nfct);
-	to->nfct = from->nfct;
-	nf_conntrack_get(to->nfct);
-	to->nfctinfo = from->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	nf_conntrack_put_reasm(to->nfct_reasm);
-	to->nfct_reasm = from->nfct_reasm;
-	nf_conntrack_get_reasm(to->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	nf_bridge_put(to->nf_bridge);
-	to->nf_bridge = from->nf_bridge;
-	nf_bridge_get(to->nf_bridge);
-#endif
-#endif
+	nf_copy(to, from);
 	skb_copy_secmark(to, from);
 }
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	struct ipv6_opt_hdr *exthdr =
+				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	unsigned int packet_len = skb->tail - skb->network_header;
 	int found_rhdr = 0;
-	*nexthdr = &skb->nh.ipv6h->nexthdr;
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
 	while (offset + 1 <= packet_len) {
 
@@ -550,7 +549,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+						 offset);
 	}
 
 	return offset;
@@ -574,7 +574,20 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
 
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = ip6_skb_dst_mtu(skb);
+
+	/* We must not fragment if the socket is set to force MTU discovery
+	 * or if the skb it not generated by a local socket.  (This last
+	 * check should be redundant, but it's free.)
+	 */
+	if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
+		skb->dev = skb->dst->dev;
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
 	if (np && np->frag_size < mtu) {
 		if (np->frag_size)
 			mtu = np->frag_size;
@@ -616,7 +629,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		/* BUILD HEADER */
 
 		*prevhdr = NEXTHDR_FRAGMENT;
-		tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC);
+		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 		if (!tmp_hdr) {
 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 			return -ENOMEM;
@@ -624,8 +637,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 		__skb_pull(skb, hlen);
 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
-		skb->nh.raw = __skb_push(skb, hlen);
-		memcpy(skb->nh.raw, tmp_hdr, hlen);
+		__skb_push(skb, hlen);
+		skb_reset_network_header(skb);
+		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 
 		ipv6_select_ident(skb, fh);
 		fh->nexthdr = nexthdr;
@@ -636,7 +650,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		first_len = skb_pagelen(skb);
 		skb->data_len = first_len - skb_headlen(skb);
 		skb->len = first_len;
-		skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+		ipv6_hdr(skb)->payload_len = htons(first_len -
+						   sizeof(struct ipv6hdr));
 
 		dst_hold(&rt->u.dst);
 
@@ -645,10 +660,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			 * before previous one went down. */
 			if (frag) {
 				frag->ip_summed = CHECKSUM_NONE;
-				frag->h.raw = frag->data;
+				skb_reset_transport_header(frag);
 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
-				frag->nh.raw = __skb_push(frag, hlen);
-				memcpy(frag->nh.raw, tmp_hdr, hlen);
+				__skb_push(frag, hlen);
+				skb_reset_network_header(frag);
+				memcpy(skb_network_header(frag), tmp_hdr,
+				       hlen);
 				offset += skb->len - hlen - sizeof(struct frag_hdr);
 				fh->nexthdr = nexthdr;
 				fh->reserved = 0;
@@ -656,7 +673,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 				if (frag->next != NULL)
 					fh->frag_off |= htons(IP6_MF);
 				fh->identification = frag_id;
-				frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+				ipv6_hdr(frag)->payload_len =
+						htons(frag->len -
+						      sizeof(struct ipv6hdr));
 				ip6_copy_metadata(frag, skb);
 			}
 
@@ -733,9 +752,10 @@ slow_path:
 		ip6_copy_metadata(frag, skb);
 		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
-		frag->nh.raw = frag->data;
-		fh = (struct frag_hdr*)(frag->data + hlen);
-		frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+		skb_reset_network_header(frag);
+		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
+		frag->transport_header = (frag->network_header + hlen +
+					  sizeof(struct frag_hdr));
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -747,7 +767,7 @@ slow_path:
 		/*
 		 *	Copy the packet header into the new buffer.
 		 */
-		memcpy(frag->nh.raw, skb->data, hlen);
+		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 
 		/*
 		 *	Build fragment header.
@@ -763,14 +783,15 @@ slow_path:
 		/*
 		 *	Copy a block of the IP datagram.
 		 */
-		if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+		if (skb_copy_bits(skb, ptr, skb_transport_header(skb), len))
 			BUG();
 		left -= len;
 
 		fh->frag_off = htons(offset);
 		if (left > 0)
 			fh->frag_off |= htons(IP6_MF);
-		frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+		ipv6_hdr(frag)->payload_len = htons(frag->len -
+						    sizeof(struct ipv6hdr));
 
 		ptr += len;
 		offset += len;
@@ -861,6 +882,41 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 			goto out_err_release;
 	}
 
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+		/*
+		 * Here if the dst entry we've looked up
+		 * has a neighbour entry that is in the INCOMPLETE
+		 * state and the src address from the flow is
+		 * marked as OPTIMISTIC, we release the found
+		 * dst entry and replace it instead with the
+		 * dst entry of the nexthop router
+		 */
+		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
+			struct inet6_ifaddr *ifp;
+			struct flowi fl_gw;
+			int redirect;
+
+			ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
+
+			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+			if (ifp)
+				in6_ifa_put(ifp);
+
+			if (redirect) {
+				/*
+				 * We need to get the dst entry for the
+				 * default router instead
+				 */
+				dst_release(*dst);
+				memcpy(&fl_gw, fl, sizeof(struct flowi));
+				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
+				*dst = ip6_route_output(sk, &fl_gw);
+				if ((err = (*dst)->error))
+					goto out_err_release;
+			}
+		}
+#endif
+
 	return 0;
 
 out_err_release:
@@ -939,10 +995,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb_put(skb,fragheaderlen + transhdrlen);
 
 		/* initialize network header pointer */
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->data + fragheaderlen;
+		skb->transport_header = skb->network_header + fragheaderlen;
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
@@ -1015,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		inet->cork.fl = *fl;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
-		mtu = dst_mtu(rt->u.dst.path);
+		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
 		if (np->frag_size < mtu) {
 			if (np->frag_size)
 				mtu = np->frag_size;
@@ -1162,10 +1219,10 @@ alloc_new_skb:
 			 *	Find where to start putting bytes
 			 */
 			data = skb_put(skb, fraglen);
-			skb->nh.raw = data + exthdrlen;
+			skb_set_network_header(skb, exthdrlen);
 			data += fragheaderlen;
-			skb->h.raw = data + exthdrlen;
-
+			skb->transport_header = (skb->network_header +
+						 fragheaderlen);
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
 					skb_prev, maxfraglen,
@@ -1288,10 +1345,10 @@ int ip6_push_pending_frames(struct sock *sk)
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
-	if (skb->data < skb->nh.raw)
-		__skb_pull(skb, skb->nh.raw - skb->data);
+	if (skb->data < skb_network_header(skb))
+		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
-		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
 		skb->len += tmp_skb->len;
@@ -1303,13 +1360,15 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
-	__skb_pull(skb, skb->h.raw - skb->nh.raw);
+	__skb_pull(skb, skb_network_header_len(skb));
 	if (opt && opt->opt_flen)
 		ipv6_push_frag_opts(skb, opt, &proto);
 	if (opt && opt->opt_nflen)
 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
 
-	skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	hdr = ipv6_hdr(skb);
 
 	*(__be32*)hdr = fl->fl6_flowlabel |
 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 08d944223ec8..a0902fbdb4e1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1,14 +1,15 @@
 /*
- *	IPv6 over IPv6 tunnel device
+ *	IPv6 tunneling device
  *	Linux INET6 implementation
  *
  *	Authors:
  *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
+ *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
  *
  *	$Id$
  *
  *      Based on:
- *      linux/net/ipv6/sit.c
+ *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  *
  *      RFC 2473
  *
@@ -24,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sockios.h>
+#include <linux/icmp.h>
 #include <linux/if.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -41,6 +43,7 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include <net/icmp.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -51,7 +54,7 @@
 #include <net/inet_ecn.h>
 
 MODULE_AUTHOR("Ville Nuorvala");
-MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_DESCRIPTION("IPv6 tunneling device");
 MODULE_LICENSE("GPL");
 
 #define IPV6_TLV_TEL_DST_SIZE 8
@@ -63,6 +66,7 @@ MODULE_LICENSE("GPL");
 #endif
 
 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
 
 #define HASH_SIZE  32
 
@@ -70,12 +74,12 @@ MODULE_LICENSE("GPL");
 		     (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
 		    (HASH_SIZE - 1))
 
-static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
-static int ip6ip6_tnl_dev_init(struct net_device *dev);
-static void ip6ip6_tnl_dev_setup(struct net_device *dev);
+static int ip6_fb_tnl_dev_init(struct net_device *dev);
+static int ip6_tnl_dev_init(struct net_device *dev);
+static void ip6_tnl_dev_setup(struct net_device *dev);
 
 /* the IPv6 tunnel fallback device */
-static struct net_device *ip6ip6_fb_tnl_dev;
+static struct net_device *ip6_fb_tnl_dev;
 
 
 /* lists for storing tunnels in use */
@@ -84,7 +88,7 @@ static struct ip6_tnl *tnls_wc[1];
 static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
 
 /* lock for the tunnel lists */
-static DEFINE_RWLOCK(ip6ip6_lock);
+static DEFINE_RWLOCK(ip6_tnl_lock);
 
 static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
 {
@@ -115,7 +119,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
 }
 
 /**
- * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
  *   @remote: the address of the tunnel exit-point
  *   @local: the address of the tunnel entry-point
  *
@@ -126,7 +130,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
  **/
 
 static struct ip6_tnl *
-ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
 {
 	unsigned h0 = HASH(remote);
 	unsigned h1 = HASH(local);
@@ -145,18 +149,18 @@ ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
 }
 
 /**
- * ip6ip6_bucket - get head of list matching given tunnel parameters
+ * ip6_tnl_bucket - get head of list matching given tunnel parameters
  *   @p: parameters containing tunnel end-points
  *
  * Description:
- *   ip6ip6_bucket() returns the head of the list matching the
+ *   ip6_tnl_bucket() returns the head of the list matching the
  *   &struct in6_addr entries laddr and raddr in @p.
  *
  * Return: head of IPv6 tunnel list
  **/
 
 static struct ip6_tnl **
-ip6ip6_bucket(struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_parm *p)
 {
 	struct in6_addr *remote = &p->raddr;
 	struct in6_addr *local = &p->laddr;
@@ -171,36 +175,36 @@ ip6ip6_bucket(struct ip6_tnl_parm *p)
 }
 
 /**
- * ip6ip6_tnl_link - add tunnel to hash table
+ * ip6_tnl_link - add tunnel to hash table
  *   @t: tunnel to be added
  **/
 
 static void
-ip6ip6_tnl_link(struct ip6_tnl *t)
+ip6_tnl_link(struct ip6_tnl *t)
 {
-	struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+	struct ip6_tnl **tp = ip6_tnl_bucket(&t->parms);
 
 	t->next = *tp;
-	write_lock_bh(&ip6ip6_lock);
+	write_lock_bh(&ip6_tnl_lock);
 	*tp = t;
-	write_unlock_bh(&ip6ip6_lock);
+	write_unlock_bh(&ip6_tnl_lock);
 }
 
 /**
- * ip6ip6_tnl_unlink - remove tunnel from hash table
+ * ip6_tnl_unlink - remove tunnel from hash table
  *   @t: tunnel to be removed
  **/
 
 static void
-ip6ip6_tnl_unlink(struct ip6_tnl *t)
+ip6_tnl_unlink(struct ip6_tnl *t)
 {
 	struct ip6_tnl **tp;
 
-	for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+	for (tp = ip6_tnl_bucket(&t->parms); *tp; tp = &(*tp)->next) {
 		if (t == *tp) {
-			write_lock_bh(&ip6ip6_lock);
+			write_lock_bh(&ip6_tnl_lock);
 			*tp = t->next;
-			write_unlock_bh(&ip6ip6_lock);
+			write_unlock_bh(&ip6_tnl_lock);
 			break;
 		}
 	}
@@ -237,12 +241,12 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
 		if (i == IP6_TNL_MAX)
 			goto failed;
 	}
-	dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
+	dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
 	if (dev == NULL)
 		goto failed;
 
 	t = netdev_priv(dev);
-	dev->init = ip6ip6_tnl_dev_init;
+	dev->init = ip6_tnl_dev_init;
 	t->parms = *p;
 
 	if ((err = register_netdevice(dev)) < 0) {
@@ -250,19 +254,19 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
 		goto failed;
 	}
 	dev_hold(dev);
-	ip6ip6_tnl_link(t);
+	ip6_tnl_link(t);
 	return t;
 failed:
 	return NULL;
 }
 
 /**
- * ip6ip6_tnl_locate - find or create tunnel matching given parameters
+ * ip6_tnl_locate - find or create tunnel matching given parameters
  *   @p: tunnel parameters
  *   @create: != 0 if allowed to create new tunnel if no match found
  *
  * Description:
- *   ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ *   ip6_tnl_locate() first tries to locate an existing tunnel
  *   based on @parms. If this is unsuccessful, but @create is set a new
  *   tunnel device is created and registered for use.
  *
@@ -270,13 +274,13 @@ failed:
  *   matching tunnel or NULL
  **/
 
-static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
+static struct ip6_tnl *ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
 {
 	struct in6_addr *remote = &p->raddr;
 	struct in6_addr *local = &p->laddr;
 	struct ip6_tnl *t;
 
-	for (t = *ip6ip6_bucket(p); t; t = t->next) {
+	for (t = *ip6_tnl_bucket(p); t; t = t->next) {
 		if (ipv6_addr_equal(local, &t->parms.laddr) &&
 		    ipv6_addr_equal(remote, &t->parms.raddr))
 			return t;
@@ -287,24 +291,24 @@ static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
 }
 
 /**
- * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ * ip6_tnl_dev_uninit - tunnel device uninitializer
  *   @dev: the device to be destroyed
  *
  * Description:
- *   ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ *   ip6_tnl_dev_uninit() removes tunnel from its list
  **/
 
 static void
-ip6ip6_tnl_dev_uninit(struct net_device *dev)
+ip6_tnl_dev_uninit(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 
-	if (dev == ip6ip6_fb_tnl_dev) {
-		write_lock_bh(&ip6ip6_lock);
+	if (dev == ip6_fb_tnl_dev) {
+		write_lock_bh(&ip6_tnl_lock);
 		tnls_wc[0] = NULL;
-		write_unlock_bh(&ip6ip6_lock);
+		write_unlock_bh(&ip6_tnl_lock);
 	} else {
-		ip6ip6_tnl_unlink(t);
+		ip6_tnl_unlink(t);
 	}
 	ip6_tnl_dst_reset(t);
 	dev_put(dev);
@@ -372,16 +376,16 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
 }
 
 /**
- * ip6ip6_err - tunnel error handler
+ * ip6_tnl_err - tunnel error handler
  *
  * Description:
- *   ip6ip6_err() should handle errors in the tunnel according
+ *   ip6_tnl_err() should handle errors in the tunnel according
  *   to the specifications in RFC 2473.
  **/
 
 static int
-ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-	   int type, int code, int offset, __be32 info)
+ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
+	    int *type, int *code, int *msg, __be32 *info, int offset)
 {
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
 	struct ip6_tnl *t;
@@ -396,13 +400,16 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	   in trouble since we might need the source address for further
 	   processing of the error. */
 
-	read_lock(&ip6ip6_lock);
-	if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+	read_lock(&ip6_tnl_lock);
+	if ((t = ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+		goto out;
+
+	if (t->parms.proto != ipproto && t->parms.proto != 0)
 		goto out;
 
 	err = 0;
 
-	switch (type) {
+	switch (*type) {
 		__u32 teli;
 		struct ipv6_tlv_tnl_enc_lim *tel;
 		__u32 mtu;
@@ -414,7 +421,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		rel_msg = 1;
 		break;
 	case ICMPV6_TIME_EXCEED:
-		if (code == ICMPV6_EXC_HOPLIMIT) {
+		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "%s: Too small hop limit or "
@@ -425,10 +432,10 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		break;
 	case ICMPV6_PARAMPROB:
 		teli = 0;
-		if (code == ICMPV6_HDR_FIELD)
+		if ((*code) == ICMPV6_HDR_FIELD)
 			teli = parse_tlv_tnl_enc_lim(skb, skb->data);
 
-		if (teli && teli == ntohl(info) - 2) {
+		if (teli && teli == ntohl(*info) - 2) {
 			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 			if (tel->encap_limit == 0) {
 				if (net_ratelimit())
@@ -445,7 +452,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 		break;
 	case ICMPV6_PKT_TOOBIG:
-		mtu = ntohl(info) - offset;
+		mtu = ntohl(*info) - offset;
 		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
 		t->dev->mtu = mtu;
@@ -458,20 +465,144 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 		break;
 	}
-	if (rel_msg &&  pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+
+	*type = rel_type;
+	*code = rel_code;
+	*info = rel_info;
+	*msg = rel_msg;
+
+out:
+	read_unlock(&ip6_tnl_lock);
+	return err;
+}
+
+static int
+ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	   int type, int code, int offset, __u32 info)
+{
+	int rel_msg = 0;
+	int rel_type = type;
+	int rel_code = code;
+	__u32 rel_info = info;
+	int err;
+	struct sk_buff *skb2;
+	struct iphdr *eiph;
+	struct flowi fl;
+	struct rtable *rt;
+
+	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
+			  &rel_msg, &rel_info, offset);
+	if (err < 0)
+		return err;
+
+	if (rel_msg == 0)
+		return 0;
+
+	switch (rel_type) {
+	case ICMPV6_DEST_UNREACH:
+		if (rel_code != ICMPV6_ADDR_UNREACH)
+			return 0;
+		rel_type = ICMP_DEST_UNREACH;
+		rel_code = ICMP_HOST_UNREACH;
+		break;
+	case ICMPV6_PKT_TOOBIG:
+		if (rel_code != 0)
+			return 0;
+		rel_type = ICMP_DEST_UNREACH;
+		rel_code = ICMP_FRAG_NEEDED;
+		break;
+	default:
+		return 0;
+	}
+
+	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
+		return 0;
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (!skb2)
+		return 0;
+
+	dst_release(skb2->dst);
+	skb2->dst = NULL;
+	skb_pull(skb2, offset);
+	skb_reset_network_header(skb2);
+	eiph = ip_hdr(skb2);
+
+	/* Try to guess incoming interface */
+	memset(&fl, 0, sizeof(fl));
+	fl.fl4_dst = eiph->saddr;
+	fl.fl4_tos = RT_TOS(eiph->tos);
+	fl.proto = IPPROTO_IPIP;
+	if (ip_route_output_key(&rt, &fl))
+		goto out;
+
+	skb2->dev = rt->u.dst.dev;
+
+	/* route "incoming" packet */
+	if (rt->rt_flags & RTCF_LOCAL) {
+		ip_rt_put(rt);
+		rt = NULL;
+		fl.fl4_dst = eiph->daddr;
+		fl.fl4_src = eiph->saddr;
+		fl.fl4_tos = eiph->tos;
+		if (ip_route_output_key(&rt, &fl) ||
+		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+			ip_rt_put(rt);
+			goto out;
+		}
+	} else {
+		ip_rt_put(rt);
+		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
+				   skb2->dev) ||
+		    skb2->dst->dev->type != ARPHRD_TUNNEL)
+			goto out;
+	}
+
+	/* change mtu on this route */
+	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
+		if (rel_info > dst_mtu(skb2->dst))
+			goto out;
+
+		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+		rel_info = htonl(rel_info);
+	}
+
+	icmp_send(skb2, rel_type, rel_code, rel_info);
+
+out:
+	kfree_skb(skb2);
+	return 0;
+}
+
+static int
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	   int type, int code, int offset, __u32 info)
+{
+	int rel_msg = 0;
+	int rel_type = type;
+	int rel_code = code;
+	__u32 rel_info = info;
+	int err;
+
+	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
+			  &rel_msg, &rel_info, offset);
+	if (err < 0)
+		return err;
+
+	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
 		struct rt6_info *rt;
 		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 		if (!skb2)
-			goto out;
+			return 0;
 
 		dst_release(skb2->dst);
 		skb2->dst = NULL;
 		skb_pull(skb2, offset);
-		skb2->nh.raw = skb2->data;
+		skb_reset_network_header(skb2);
 
 		/* Try to guess incoming interface */
-		rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+		rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0);
 
 		if (rt && rt->rt6i_dev)
 			skb2->dev = rt->rt6i_dev;
@@ -483,19 +614,34 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		kfree_skb(skb2);
 	}
-out:
-	read_unlock(&ip6ip6_lock);
-	return err;
+
+	return 0;
 }
 
-static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
-					  struct sk_buff *skb)
+static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+					struct ipv6hdr *ipv6h,
+					struct sk_buff *skb)
 {
-	struct ipv6hdr *inner_iph = skb->nh.ipv6h;
+	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
 
-	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
-		IP6_ECN_set_ce(inner_iph);
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
+
+	if (INET_ECN_is_ce(dsfield))
+		IP_ECN_set_ce(ip_hdr(skb));
+}
+
+static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+					struct ipv6hdr *ipv6h,
+					struct sk_buff *skb)
+{
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv6_copy_dscp(ipv6h, ipv6_hdr(skb));
+
+	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
+
 static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 {
 	struct ip6_tnl_parm *p = &t->parms;
@@ -519,53 +665,61 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 }
 
 /**
- * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
  *   @skb: received socket buffer
+ *   @protocol: ethernet protocol ID
+ *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
  *
  * Return: 0
  **/
 
-static int
-ip6ip6_rcv(struct sk_buff *skb)
+static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
+		       __u8 ipproto,
+		       void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
+						    struct ipv6hdr *ipv6h,
+						    struct sk_buff *skb))
 {
-	struct ipv6hdr *ipv6h;
 	struct ip6_tnl *t;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 
-	ipv6h = skb->nh.ipv6h;
+	read_lock(&ip6_tnl_lock);
 
-	read_lock(&ip6ip6_lock);
+	if ((t = ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+		if (t->parms.proto != ipproto && t->parms.proto != 0) {
+			read_unlock(&ip6_tnl_lock);
+			goto discard;
+		}
 
-	if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-			read_unlock(&ip6ip6_lock);
+			read_unlock(&ip6_tnl_lock);
 			goto discard;
 		}
 
 		if (!ip6_tnl_rcv_ctl(t)) {
 			t->stat.rx_dropped++;
-			read_unlock(&ip6ip6_lock);
+			read_unlock(&ip6_tnl_lock);
 			goto discard;
 		}
 		secpath_reset(skb);
-		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
-		skb->protocol = htons(ETH_P_IPV6);
+		skb->mac_header = skb->network_header;
+		skb_reset_network_header(skb);
+		skb->protocol = htons(protocol);
 		skb->pkt_type = PACKET_HOST;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		skb->dev = t->dev;
 		dst_release(skb->dst);
 		skb->dst = NULL;
 		nf_reset(skb);
-		if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-			ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
-		ip6ip6_ecn_decapsulate(ipv6h, skb);
+
+		dscp_ecn_decapsulate(t, ipv6h, skb);
+
 		t->stat.rx_packets++;
 		t->stat.rx_bytes += skb->len;
 		netif_rx(skb);
-		read_unlock(&ip6ip6_lock);
+		read_unlock(&ip6_tnl_lock);
 		return 0;
 	}
-	read_unlock(&ip6ip6_lock);
+	read_unlock(&ip6_tnl_lock);
 	return 1;
 
 discard:
@@ -573,6 +727,18 @@ discard:
 	return 0;
 }
 
+static int ip4ip6_rcv(struct sk_buff *skb)
+{
+	return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
+			   ip4ip6_dscp_ecn_decapsulate);
+}
+
+static int ip6ip6_rcv(struct sk_buff *skb)
+{
+	return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
+			   ip6ip6_dscp_ecn_decapsulate);
+}
+
 struct ipv6_tel_txoption {
 	struct ipv6_txoptions ops;
 	__u8 dst_opt[8];
@@ -593,7 +759,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
 }
 
 /**
- * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
  *   @t: the outgoing tunnel device
  *   @hdr: IPv6 header from the incoming packet
  *
@@ -607,7 +773,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
  **/
 
 static inline int
-ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
 {
 	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 }
@@ -641,72 +807,49 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 	return ret;
 }
 /**
- * ip6ip6_tnl_xmit - encapsulate packet and send
+ * ip6_tnl_xmit2 - encapsulate packet and send
  *   @skb: the outgoing socket buffer
  *   @dev: the outgoing tunnel device
+ *   @dsfield: dscp code for outer header
+ *   @fl: flow of tunneled packet
+ *   @encap_limit: encapsulation limit
+ *   @pmtu: Path MTU is stored if packet is too big
  *
  * Description:
  *   Build new header and do some sanity checks on the packet before sending
  *   it.
  *
  * Return:
- *   0
+ *   0 on success
+ *   -1 fail
+ *   %-EMSGSIZE message too big. return mtu in this case.
  **/
 
-static int
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+static int ip6_tnl_xmit2(struct sk_buff *skb,
+			 struct net_device *dev,
+			 __u8 dsfield,
+			 struct flowi *fl,
+			 int encap_limit,
+			 __u32 *pmtu)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net_device_stats *stats = &t->stat;
-	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
-	int encap_limit = -1;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct ipv6_tel_txoption opt;
-	__u16 offset;
-	struct flowi fl;
 	struct dst_entry *dst;
 	struct net_device *tdev;
 	int mtu;
 	int max_headroom = sizeof(struct ipv6hdr);
 	u8 proto;
-	int err;
+	int err = -1;
 	int pkt_len;
-	int dsfield;
-
-	if (t->recursion++) {
-		stats->collisions++;
-		goto tx_err;
-	}
-	if (skb->protocol != htons(ETH_P_IPV6) ||
-	    !ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
-		goto tx_err;
-
-	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
-		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
-		if (tel->encap_limit == 0) {
-			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
-			goto tx_err;
-		}
-		encap_limit = tel->encap_limit - 1;
-	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
-
-	memcpy(&fl, &t->fl, sizeof (fl));
-	proto = fl.proto;
-
-	dsfield = ipv6_get_dsfield(ipv6h);
-	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
-		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
-	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
-		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
 
 	if ((dst = ip6_tnl_dst_check(t)) != NULL)
 		dst_hold(dst);
 	else {
-		dst = ip6_route_output(NULL, &fl);
+		dst = ip6_route_output(NULL, fl);
 
-		if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+		if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
 			goto tx_err_link_failure;
 	}
 
@@ -730,7 +873,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (skb->dst)
 		skb->dst->ops->update_pmtu(skb->dst, mtu);
 	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		*pmtu = mtu;
+		err = -EMSGSIZE;
 		goto tx_err_dst_release;
 	}
 
@@ -754,22 +898,24 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	dst_release(skb->dst);
 	skb->dst = dst_clone(dst);
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 
+	proto = fl->proto;
 	if (encap_limit >= 0) {
 		init_tel_txopt(&opt, encap_limit);
 		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
 	}
-	skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
-	ipv6h = skb->nh.ipv6h;
-	*(__be32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	ipv6h = ipv6_hdr(skb);
+	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
 	dsfield = INET_ECN_encapsulate(0, dsfield);
 	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
 	ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
-	ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
-	ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
+	ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
+	ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
 	nf_reset(skb);
 	pkt_len = skb->len;
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
@@ -783,13 +929,131 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		stats->tx_aborted_errors++;
 	}
 	ip6_tnl_dst_store(t, dst);
-	t->recursion--;
 	return 0;
 tx_err_link_failure:
 	stats->tx_carrier_errors++;
 	dst_link_failure(skb);
 tx_err_dst_release:
 	dst_release(dst);
+	return err;
+}
+
+static inline int
+ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct iphdr  *iph = ip_hdr(skb);
+	int encap_limit = -1;
+	struct flowi fl;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
+	    !ip6_tnl_xmit_ctl(t))
+		return -1;
+
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl, &t->fl, sizeof (fl));
+	fl.proto = IPPROTO_IPIP;
+
+	dsfield = ipv4_get_dsfield(iph);
+
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+		fl.fl6_flowlabel |= ntohl(((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+					  & IPV6_TCLASS_MASK);
+
+	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+	if (err != 0) {
+		/* XXX: send ICMP error even if DF is not set. */
+		if (err == -EMSGSIZE)
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	int encap_limit = -1;
+	__u16 offset;
+	struct flowi fl;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+	    !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
+		return -1;
+
+	offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+	if (offset > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+			return -1;
+		}
+		encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl, &t->fl, sizeof (fl));
+	fl.proto = IPPROTO_IPV6;
+
+	dsfield = ipv6_get_dsfield(ipv6h);
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+
+	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+	if (err != 0) {
+		if (err == -EMSGSIZE)
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net_device_stats *stats = &t->stat;
+	int ret;
+
+	if (t->recursion++) {
+		t->stat.collisions++;
+		goto tx_err;
+	}
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		ret = ip4ip6_tnl_xmit(skb, dev);
+		break;
+	case __constant_htons(ETH_P_IPV6):
+		ret = ip6ip6_tnl_xmit(skb, dev);
+		break;
+	default:
+		goto tx_err;
+	}
+
+	if (ret < 0)
+		goto tx_err;
+
+	t->recursion--;
+	return 0;
+
 tx_err:
 	stats->tx_errors++;
 	stats->tx_dropped++;
@@ -817,7 +1081,7 @@ static void ip6_tnl_set_cap(struct ip6_tnl *t)
 	}
 }
 
-static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+static void ip6_tnl_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
 	struct ip6_tnl_parm *p = &t->parms;
@@ -870,17 +1134,17 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
 }
 
 /**
- * ip6ip6_tnl_change - update the tunnel parameters
+ * ip6_tnl_change - update the tunnel parameters
  *   @t: tunnel to be changed
  *   @p: tunnel configuration parameters
  *   @active: != 0 if tunnel is ready for use
  *
  * Description:
- *   ip6ip6_tnl_change() updates the tunnel parameters
+ *   ip6_tnl_change() updates the tunnel parameters
  **/
 
 static int
-ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 {
 	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
 	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
@@ -889,19 +1153,20 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 	t->parms.encap_limit = p->encap_limit;
 	t->parms.flowinfo = p->flowinfo;
 	t->parms.link = p->link;
+	t->parms.proto = p->proto;
 	ip6_tnl_dst_reset(t);
-	ip6ip6_tnl_link_config(t);
+	ip6_tnl_link_config(t);
 	return 0;
 }
 
 /**
- * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
  *   @dev: virtual device associated with tunnel
  *   @ifr: parameters passed from userspace
  *   @cmd: command to be performed
  *
  * Description:
- *   ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels
+ *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
  *   from userspace.
  *
  *   The possible commands are the following:
@@ -923,7 +1188,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
  **/
 
 static int
-ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	int err = 0;
 	struct ip6_tnl_parm p;
@@ -931,12 +1196,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
-		if (dev == ip6ip6_fb_tnl_dev) {
+		if (dev == ip6_fb_tnl_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
 				err = -EFAULT;
 				break;
 			}
-			t = ip6ip6_tnl_locate(&p, 0);
+			t = ip6_tnl_locate(&p, 0);
 		}
 		if (t == NULL)
 			t = netdev_priv(dev);
@@ -954,10 +1219,11 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
 			break;
 		err = -EINVAL;
-		if (p.proto != IPPROTO_IPV6)
+		if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
+		    p.proto != 0)
 			break;
-		t = ip6ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
-		if (dev != ip6ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+		t = ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
+		if (dev != ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {
 				if (t->dev != dev) {
 					err = -EEXIST;
@@ -966,9 +1232,9 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			} else
 				t = netdev_priv(dev);
 
-			ip6ip6_tnl_unlink(t);
-			err = ip6ip6_tnl_change(t, &p);
-			ip6ip6_tnl_link(t);
+			ip6_tnl_unlink(t);
+			err = ip6_tnl_change(t, &p);
+			ip6_tnl_link(t);
 			netdev_state_change(dev);
 		}
 		if (t) {
@@ -984,15 +1250,15 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (!capable(CAP_NET_ADMIN))
 			break;
 
-		if (dev == ip6ip6_fb_tnl_dev) {
+		if (dev == ip6_fb_tnl_dev) {
 			err = -EFAULT;
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
 				break;
 			err = -ENOENT;
-			if ((t = ip6ip6_tnl_locate(&p, 0)) == NULL)
+			if ((t = ip6_tnl_locate(&p, 0)) == NULL)
 				break;
 			err = -EPERM;
-			if (t->dev == ip6ip6_fb_tnl_dev)
+			if (t->dev == ip6_fb_tnl_dev)
 				break;
 			dev = t->dev;
 		}
@@ -1006,20 +1272,20 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 }
 
 /**
- * ip6ip6_tnl_get_stats - return the stats for tunnel device
+ * ip6_tnl_get_stats - return the stats for tunnel device
  *   @dev: virtual device associated with tunnel
  *
  * Return: stats for device
  **/
 
 static struct net_device_stats *
-ip6ip6_tnl_get_stats(struct net_device *dev)
+ip6_tnl_get_stats(struct net_device *dev)
 {
 	return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
 }
 
 /**
- * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ * ip6_tnl_change_mtu - change mtu manually for tunnel device
  *   @dev: virtual device associated with tunnel
  *   @new_mtu: the new mtu
  *
@@ -1029,7 +1295,7 @@ ip6ip6_tnl_get_stats(struct net_device *dev)
  **/
 
 static int
-ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < IPV6_MIN_MTU) {
 		return -EINVAL;
@@ -1039,22 +1305,22 @@ ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 }
 
 /**
- * ip6ip6_tnl_dev_setup - setup virtual tunnel device
+ * ip6_tnl_dev_setup - setup virtual tunnel device
  *   @dev: virtual device associated with tunnel
  *
  * Description:
  *   Initialize function pointers and device parameters
  **/
 
-static void ip6ip6_tnl_dev_setup(struct net_device *dev)
+static void ip6_tnl_dev_setup(struct net_device *dev)
 {
 	SET_MODULE_OWNER(dev);
-	dev->uninit = ip6ip6_tnl_dev_uninit;
+	dev->uninit = ip6_tnl_dev_uninit;
 	dev->destructor = free_netdev;
-	dev->hard_start_xmit = ip6ip6_tnl_xmit;
-	dev->get_stats = ip6ip6_tnl_get_stats;
-	dev->do_ioctl = ip6ip6_tnl_ioctl;
-	dev->change_mtu = ip6ip6_tnl_change_mtu;
+	dev->hard_start_xmit = ip6_tnl_xmit;
+	dev->get_stats = ip6_tnl_get_stats;
+	dev->do_ioctl = ip6_tnl_ioctl;
+	dev->change_mtu = ip6_tnl_change_mtu;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1065,50 +1331,56 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
 
 
 /**
- * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
  *   @dev: virtual device associated with tunnel
  **/
 
 static inline void
-ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+ip6_tnl_dev_init_gen(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	t->fl.proto = IPPROTO_IPV6;
 	t->dev = dev;
 	strcpy(t->parms.name, dev->name);
 }
 
 /**
- * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
  *   @dev: virtual device associated with tunnel
  **/
 
 static int
-ip6ip6_tnl_dev_init(struct net_device *dev)
+ip6_tnl_dev_init(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	ip6ip6_tnl_dev_init_gen(dev);
-	ip6ip6_tnl_link_config(t);
+	ip6_tnl_dev_init_gen(dev);
+	ip6_tnl_link_config(t);
 	return 0;
 }
 
 /**
- * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
  *   @dev: fallback device
  *
  * Return: 0
  **/
 
 static int
-ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+ip6_fb_tnl_dev_init(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	ip6ip6_tnl_dev_init_gen(dev);
+	ip6_tnl_dev_init_gen(dev);
+	t->parms.proto = IPPROTO_IPV6;
 	dev_hold(dev);
 	tnls_wc[0] = t;
 	return 0;
 }
 
+static struct xfrm6_tunnel ip4ip6_handler = {
+	.handler	= ip4ip6_rcv,
+	.err_handler	= ip4ip6_err,
+	.priority	=	1,
+};
+
 static struct xfrm6_tunnel ip6ip6_handler = {
 	.handler	= ip6ip6_rcv,
 	.err_handler	= ip6ip6_err,
@@ -1125,30 +1397,40 @@ static int __init ip6_tunnel_init(void)
 {
 	int  err;
 
+	if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
+		printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
+		err = -EAGAIN;
+		goto out;
+	}
+
 	if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
-		printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
-		return -EAGAIN;
+		printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
+		err = -EAGAIN;
+		goto unreg_ip4ip6;
 	}
-	ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
-					 ip6ip6_tnl_dev_setup);
+	ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+				      ip6_tnl_dev_setup);
 
-	if (!ip6ip6_fb_tnl_dev) {
+	if (!ip6_fb_tnl_dev) {
 		err = -ENOMEM;
 		goto fail;
 	}
-	ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
+	ip6_fb_tnl_dev->init = ip6_fb_tnl_dev_init;
 
-	if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
-		free_netdev(ip6ip6_fb_tnl_dev);
+	if ((err = register_netdev(ip6_fb_tnl_dev))) {
+		free_netdev(ip6_fb_tnl_dev);
 		goto fail;
 	}
 	return 0;
 fail:
 	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
+unreg_ip4ip6:
+	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
+out:
 	return err;
 }
 
-static void __exit ip6ip6_destroy_tunnels(void)
+static void __exit ip6_tnl_destroy_tunnels(void)
 {
 	int h;
 	struct ip6_tnl *t;
@@ -1168,11 +1450,14 @@ static void __exit ip6ip6_destroy_tunnels(void)
 
 static void __exit ip6_tunnel_cleanup(void)
 {
+	if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
+		printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
+
 	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
-		printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
+		printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
 
 	rtnl_lock();
-	ip6ip6_destroy_tunnels();
+	ip6_tnl_destroy_tunnels();
 	rtnl_unlock();
 }
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5724ba9f75de..1ee50b5782e1 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -79,9 +79,9 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Remove ipcomp header and decompress original payload */
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 	ipch = (void *)skb->data;
-	skb->h.raw = skb->nh.raw + sizeof(*ipch);
+	skb->transport_header = skb->network_header + sizeof(*ipch);
 	__skb_pull(skb, sizeof(*ipch));
 
 	/* decompression */
@@ -111,7 +111,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->truesize += dlen - plen;
 	__skb_put(skb, dlen - plen);
-	memcpy(skb->data, scratch, dlen);
+	skb_copy_to_linear_data(skb, scratch, dlen);
 	err = ipch->nexthdr;
 
 out_put_cpu:
@@ -124,15 +124,13 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
 	struct ipv6hdr *top_iph;
-	int hdr_len;
 	struct ipv6_comp_hdr *ipch;
 	struct ipcomp_data *ipcd = x->data;
 	int plen, dlen;
 	u8 *start, *scratch;
 	struct crypto_comp *tfm;
 	int cpu;
-
-	hdr_len = skb->h.raw - skb->data;
+	int hdr_len = skb_transport_offset(skb);
 
 	/* check whether datagram len is larger than threshold */
 	if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -145,7 +143,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	/* compression */
 	plen = skb->len - hdr_len;
 	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->h.raw;
+	start = skb_transport_header(skb);
 
 	cpu = get_cpu();
 	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
@@ -166,10 +164,10 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
 	ipch = (struct ipv6_comp_hdr *)start;
-	ipch->nexthdr = *skb->nh.raw;
+	ipch->nexthdr = *skb_network_header(skb);
 	ipch->flags = 0;
 	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb->nh.raw = IPPROTO_COMP;
+	*skb_network_header(skb) = IPPROTO_COMP;
 
 out_ok:
 	return 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f5f9582a8d39..aa3d07c52a8f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -101,14 +101,14 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
 		goto out;
 
-	ipv6h = skb->nh.ipv6h;
+	ipv6h = ipv6_hdr(skb);
 	__skb_pull(skb, sizeof(*ipv6h));
 	err = -EPROTONOSUPPORT;
 
 	rcu_read_lock();
 	ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
 	if (likely(ops && ops->gso_send_check)) {
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		err = ops->gso_send_check(skb);
 	}
 	rcu_read_unlock();
@@ -137,14 +137,14 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
 		goto out;
 
-	ipv6h = skb->nh.ipv6h;
+	ipv6h = ipv6_hdr(skb);
 	__skb_pull(skb, sizeof(*ipv6h));
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
 	rcu_read_lock();
 	ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
 	if (likely(ops && ops->gso_segment)) {
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		segs = ops->gso_segment(skb, features);
 	}
 	rcu_read_unlock();
@@ -153,7 +153,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 		goto out;
 
 	for (skb = segs; skb; skb = skb->next) {
-		ipv6h = skb->nh.ipv6h;
+		ipv6h = ipv6_hdr(skb);
 		ipv6h->payload_len = htons(skb->len - skb->mac_len -
 					   sizeof(*ipv6h));
 	}
@@ -694,7 +694,7 @@ done:
 		retv = ip6_ra_control(sk, val, NULL);
 		break;
 	case IPV6_MTU_DISCOVER:
-		if (val<0 || val>2)
+		if (val<0 || val>3)
 			goto e_inval;
 		np->pmtudisc = val;
 		retv = 0;
@@ -761,6 +761,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+EXPORT_SYMBOL(ipv6_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
@@ -796,18 +797,37 @@ EXPORT_SYMBOL(compat_ipv6_setsockopt);
 #endif
 
 static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
-				  char __user *optval, int len)
+				  int optname, char __user *optval, int len)
 {
 	struct ipv6_opt_hdr *hdr;
 
-	if (!opt || !opt->hopopt)
+	if (!opt)
+		return 0;
+
+	switch(optname) {
+	case IPV6_HOPOPTS:
+		hdr = opt->hopopt;
+		break;
+	case IPV6_RTHDRDSTOPTS:
+		hdr = opt->dst0opt;
+		break;
+	case IPV6_RTHDR:
+		hdr = (struct ipv6_opt_hdr *)opt->srcrt;
+		break;
+	case IPV6_DSTOPTS:
+		hdr = opt->dst1opt;
+		break;
+	default:
+		return -EINVAL;	/* should not happen */
+	}
+
+	if (!hdr)
 		return 0;
-	hdr = opt->hopopt;
 
 	len = min_t(unsigned int, len, ipv6_optlen(hdr));
-	if (copy_to_user(optval, hdr, ipv6_optlen(hdr)))
+	if (copy_to_user(optval, hdr, len));
 		return -EFAULT;
-	return len;
+	return ipv6_optlen(hdr);
 }
 
 static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
@@ -945,7 +965,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 
 		lock_sock(sk);
 		len = ipv6_getsockopt_sticky(sk, np->opt,
-					     optval, len);
+					     optname, optval, len);
 		release_sock(sk);
 		return put_user(len, optlen);
 	}
@@ -1066,6 +1086,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+EXPORT_SYMBOL(ipv6_getsockopt);
+
 #ifdef CONFIG_COMPAT
 int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
deleted file mode 100644
index e12e3d4fccec..000000000000
--- a/net/ipv6/ipv6_syms.c
+++ /dev/null
@@ -1,36 +0,0 @@
-
-#include <linux/module.h>
-#include <net/protocol.h>
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_route.h>
-#include <net/xfrm.h>
-
-EXPORT_SYMBOL(icmpv6_send);
-EXPORT_SYMBOL(icmpv6_statistics);
-EXPORT_SYMBOL(icmpv6_err_convert);
-EXPORT_SYMBOL(ndisc_mc_map);
-EXPORT_SYMBOL(register_inet6addr_notifier);
-EXPORT_SYMBOL(unregister_inet6addr_notifier);
-EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(ipv6_setsockopt);
-EXPORT_SYMBOL(ipv6_getsockopt);
-EXPORT_SYMBOL(inet6_register_protosw);
-EXPORT_SYMBOL(inet6_unregister_protosw);
-EXPORT_SYMBOL(inet6_add_protocol);
-EXPORT_SYMBOL(inet6_del_protocol);
-EXPORT_SYMBOL(ip6_xmit);
-EXPORT_SYMBOL(inet6_release);
-EXPORT_SYMBOL(inet6_bind);
-EXPORT_SYMBOL(inet6_getname);
-EXPORT_SYMBOL(inet6_ioctl);
-EXPORT_SYMBOL(ipv6_get_saddr);
-EXPORT_SYMBOL(ipv6_chk_addr);
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-#ifdef CONFIG_XFRM
-EXPORT_SYMBOL(xfrm6_rcv);
-EXPORT_SYMBOL(xfrm6_input_addr);
-EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-#endif
-EXPORT_SYMBOL(rt6_lookup);
-EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a8d6625ec782..6c2758951d60 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -988,7 +988,7 @@ int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
 	if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
 		return 0;
 
-	pic = (struct icmp6hdr *)skb->h.raw;
+	pic = icmp6_hdr(skb);
 
 	switch (pic->icmp6_type) {
 	case ICMPV6_MGM_QUERY:
@@ -1167,11 +1167,11 @@ int igmp6_event_query(struct sk_buff *skb)
 		return -EINVAL;
 
 	/* compute payload length excluding extension headers */
-	len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
-	len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h;
+	len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
+	len -= skb_network_header_len(skb);
 
 	/* Drop queries with not link local source */
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
 	idev = in6_dev_get(skb->dev);
@@ -1179,7 +1179,7 @@ int igmp6_event_query(struct sk_buff *skb)
 	if (idev == NULL)
 		return 0;
 
-	hdr = (struct icmp6hdr *) skb->h.raw;
+	hdr = icmp6_hdr(skb);
 	group = (struct in6_addr *) (hdr + 1);
 	group_type = ipv6_addr_type(group);
 
@@ -1212,7 +1212,7 @@ int igmp6_event_query(struct sk_buff *skb)
 			in6_dev_put(idev);
 			return -EINVAL;
 		}
-		mlh2 = (struct mld2_query *) skb->h.raw;
+		mlh2 = (struct mld2_query *)skb_transport_header(skb);
 		max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
 		if (!max_delay)
 			max_delay = 1;
@@ -1235,7 +1235,7 @@ int igmp6_event_query(struct sk_buff *skb)
 				in6_dev_put(idev);
 				return -EINVAL;
 			}
-			mlh2 = (struct mld2_query *) skb->h.raw;
+			mlh2 = (struct mld2_query *)skb_transport_header(skb);
 			mark = 1;
 		}
 	} else {
@@ -1300,10 +1300,10 @@ int igmp6_event_report(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
 		return -EINVAL;
 
-	hdr = (struct icmp6hdr*) skb->h.raw;
+	hdr = icmp6_hdr(skb);
 
 	/* Drop reports with not link local source */
-	addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
+	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
 	if (addr_type != IPV6_ADDR_ANY &&
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	if (ipv6_get_lladdr(dev, &addr_buf)) {
+	if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:
 		 * use unspecified address as the source address
 		 * when a valid link-local address is not available.
@@ -1423,8 +1423,9 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
-	pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
-	skb->h.raw = (unsigned char *)pmr;
+	skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
+	skb_put(skb, sizeof(*pmr));
+	pmr = (struct mld2_report *)skb_transport_header(skb);
 	pmr->type = ICMPV6_MLD2_REPORT;
 	pmr->resv1 = 0;
 	pmr->csum = 0;
@@ -1441,7 +1442,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
 		unsigned char ha[MAX_ADDR_LEN];
 		int err;
 
-		ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1);
+		ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1);
 		err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
 		if (err < 0) {
 			kfree_skb(skb);
@@ -1459,20 +1460,21 @@ static inline int mld_dev_queue_xmit(struct sk_buff *skb)
 
 static void mld_sendpack(struct sk_buff *skb)
 {
-	struct ipv6hdr *pip6 = skb->nh.ipv6h;
-	struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+	struct ipv6hdr *pip6 = ipv6_hdr(skb);
+	struct mld2_report *pmr =
+			      (struct mld2_report *)skb_transport_header(skb);
 	int payload_len, mldlen;
 	struct inet6_dev *idev = in6_dev_get(skb->dev);
 	int err;
 
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
-		sizeof(struct ipv6hdr);
-	mldlen = skb->tail - skb->h.raw;
+	payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
+	mldlen = skb->tail - skb->transport_header;
 	pip6->payload_len = htons(payload_len);
 
 	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
-		IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
+		IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
+					     mldlen, 0));
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
 		mld_dev_queue_xmit);
 	if (!err) {
@@ -1506,7 +1508,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->mca_addr;	/* structure copy */
-	pmr = (struct mld2_report *)skb->h.raw;
+	pmr = (struct mld2_report *)skb_transport_header(skb);
 	pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
 	*ppgr = pgr;
 	return skb;
@@ -1539,7 +1541,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	if (!*psf_list)
 		goto empty_source;
 
-	pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+	pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
@@ -1791,7 +1793,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	if (ipv6_get_lladdr(dev, &addr_buf)) {
+	if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:
 		 * use unspecified address as the source address
 		 * when a valid link-local address is not available.
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0afcabdd8ed6..13b7160fb892 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -90,23 +90,26 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 {
 	struct ip6_mh *mh;
 
-	if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
-	    !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
+	if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb_transport_header(skb)[1] + 1) << 3))))
 		return -1;
 
-	mh = (struct ip6_mh *)skb->h.raw;
+	mh = (struct ip6_mh *)skb_transport_header(skb);
 
 	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
 			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
-		mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+		mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
+					 skb_network_header(skb)));
 		return -1;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
 			       mh->ip6mh_proto);
-		mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+		mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
+					 skb_network_header(skb)));
 		return -1;
 	}
 
@@ -122,12 +125,12 @@ struct mip6_report_rate_limiter {
 };
 
 static struct mip6_report_rate_limiter mip6_report_rl = {
-	.lock = SPIN_LOCK_UNLOCKED
+	.lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
 };
 
 static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
 
 	if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
@@ -152,10 +155,10 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = (struct ipv6hdr *)skb->data;
 	iph->payload_len = htons(skb->len - sizeof(*iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_DSTOPTS;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_DSTOPTS;
 
-	dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+	dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
 	dstopt->nexthdr = nexthdr;
 
 	hao = mip6_padn((char *)(dstopt + 1),
@@ -215,21 +218,22 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
 	if (likely(opt->dsthao)) {
 		offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 		if (likely(offset >= 0))
-			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + offset);
 	}
 
 	skb_get_timestamp(skb, &stamp);
 
-	if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
-				  hao ? &hao->addr : &skb->nh.ipv6h->saddr,
+	if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+				  hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
 				  opt->iif))
 		goto out;
 
 	memset(&sel, 0, sizeof(sel));
-	memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
+	memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
 	       sizeof(sel.daddr));
 	sel.prefixlen_d = 128;
-	memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+	memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
 	       sizeof(sel.saddr));
 	sel.prefixlen_s = 128;
 	sel.family = AF_INET6;
@@ -253,11 +257,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 			       u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb->tail - skb->network_header;
 	int found_rhdr = 0;
 
-	*nexthdr = &skb->nh.ipv6h->nexthdr;
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
 	while (offset + 1 <= packet_len) {
 
@@ -288,7 +294,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 	}
 
 	return offset;
@@ -361,10 +367,10 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = (struct ipv6hdr *)skb->data;
 	iph->payload_len = htons(skb->len - sizeof(*iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_ROUTING;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_ROUTING;
 
-	rt2 = (struct rt2_hdr *)skb->h.raw;
+	rt2 = (struct rt2_hdr *)skb_transport_header(skb);
 	rt2->rt_hdr.nexthdr = nexthdr;
 	rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
 	rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
@@ -383,11 +389,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 			     u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb->tail - skb->network_header;
 	int found_rhdr = 0;
 
-	*nexthdr = &skb->nh.ipv6h->nexthdr;
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
 	while (offset + 1 <= packet_len) {
 
@@ -397,7 +405,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 		case NEXTHDR_ROUTING:
 			if (offset + 3 <= packet_len) {
 				struct ipv6_rt_hdr *rt;
-				rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+				rt = (struct ipv6_rt_hdr *)(nh + offset);
 				if (rt->type != 0)
 					return offset;
 			}
@@ -417,7 +425,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 	}
 
 	return offset;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 121f31c283f8..d8b36451bada 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -319,6 +319,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
 	return -EINVAL;
 }
 
+EXPORT_SYMBOL(ndisc_mc_map);
+
 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
 {
 	const u32 *p32 = pkey;
@@ -425,36 +427,23 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
 	security_sk_classify_flow(ndisc_socket->sk, fl);
 }
 
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
-		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
-		   int router, int solicited, int override, int inc_opt)
+static void __ndisc_send(struct net_device *dev,
+			 struct neighbour *neigh,
+			 struct in6_addr *daddr, struct in6_addr *saddr,
+			 struct icmp6hdr *icmp6h, struct in6_addr *target,
+			 int llinfo, int icmp6_mib_outnd)
 {
-	struct in6_addr tmpaddr;
-	struct inet6_ifaddr *ifp;
-	struct inet6_dev *idev;
 	struct flowi fl;
-	struct dst_entry* dst;
+	struct dst_entry *dst;
 	struct sock *sk = ndisc_socket->sk;
-	struct in6_addr *src_addr;
-	struct nd_msg *msg;
-	int len;
 	struct sk_buff *skb;
+	struct icmp6hdr *hdr;
+	struct inet6_dev *idev;
+	int len;
 	int err;
+	u8 *opt;
 
-	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
-	/* for anycast or proxy, solicited_addr != src_addr */
-	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
-	if (ifp) {
-		src_addr = solicited_addr;
-		in6_ifa_put(ifp);
-	} else {
-		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
-			return;
-		src_addr = &tmpaddr;
-	}
-
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
+	ndisc_flow_init(&fl, icmp6h->icmp6_type, saddr, daddr,
 			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
@@ -465,60 +454,57 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	if (err < 0)
 		return;
 
-	if (inc_opt) {
-		if (dev->addr_len)
-			len += ndisc_opt_addr_space(dev);
-		else
-			inc_opt = 0;
-	}
+	if (!dev->addr_len)
+		llinfo = 0;
+
+	len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
+	if (llinfo)
+		len += ndisc_opt_addr_space(dev);
 
 	skb = sock_alloc_send_skb(sk,
 				  (MAX_HEADER + sizeof(struct ipv6hdr) +
 				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
-
-	if (skb == NULL) {
+	if (!skb) {
 		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 NA: %s() failed to allocate an skb.\n",
+			   "ICMPv6 ND: %s() failed to allocate an skb.\n",
 			   __FUNCTION__);
 		dst_release(dst);
 		return;
 	}
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
-
-	msg = (struct nd_msg *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)msg;
+	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
-	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
-	msg->icmph.icmp6_code = 0;
-	msg->icmph.icmp6_cksum = 0;
+	skb->transport_header = skb->tail;
+	skb_put(skb, len);
 
-	msg->icmph.icmp6_unused = 0;
-	msg->icmph.icmp6_router    = router;
-	msg->icmph.icmp6_solicited = solicited;
-	msg->icmph.icmp6_override  = override;
+	hdr = (struct icmp6hdr *)skb_transport_header(skb);
+	memcpy(hdr, icmp6h, sizeof(*hdr));
 
-	/* Set the target address. */
-	ipv6_addr_copy(&msg->target, solicited_addr);
+	opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
+	if (target) {
+		ipv6_addr_copy((struct in6_addr *)opt, target);
+		opt += sizeof(*target);
+	}
 
-	if (inc_opt)
-		ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
+	if (llinfo)
+		ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
 				       dev->addr_len, dev->type);
 
-	/* checksum */
-	msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
-						 IPPROTO_ICMPV6,
-						 csum_partial((__u8 *) msg,
-							      len, 0));
+	hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
+					   IPPROTO_ICMPV6,
+					   csum_partial((__u8 *) hdr,
+							len, 0));
 
 	skb->dst = dst;
+
 	idev = in6_dev_get(dst->dev);
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
+
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
 	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+		ICMP6_INC_STATS(idev, icmp6_mib_outnd);
 		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
 	}
 
@@ -526,165 +512,95 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		in6_dev_put(idev);
 }
 
+static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
+		   int router, int solicited, int override, int inc_opt)
+{
+	struct in6_addr tmpaddr;
+	struct inet6_ifaddr *ifp;
+	struct in6_addr *src_addr;
+	struct icmp6hdr icmp6h = {
+		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+	};
+
+	/* for anycast or proxy, solicited_addr != src_addr */
+	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
+	if (ifp) {
+		src_addr = solicited_addr;
+		if (ifp->flags & IFA_F_OPTIMISTIC)
+			override = 0;
+		in6_ifa_put(ifp);
+	} else {
+		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+			return;
+		src_addr = &tmpaddr;
+	}
+
+	icmp6h.icmp6_router = router;
+	icmp6h.icmp6_solicited = solicited;
+	icmp6h.icmp6_override = override;
+
+	__ndisc_send(dev, neigh, daddr, src_addr,
+		     &icmp6h, solicited_addr,
+		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0,
+		     ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+}
+
 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		   struct in6_addr *solicit,
 		   struct in6_addr *daddr, struct in6_addr *saddr)
 {
-	struct flowi fl;
-	struct dst_entry* dst;
-	struct inet6_dev *idev;
-	struct sock *sk = ndisc_socket->sk;
-	struct sk_buff *skb;
-	struct nd_msg *msg;
 	struct in6_addr addr_buf;
-	int len;
-	int err;
-	int send_llinfo;
+	struct icmp6hdr icmp6h = {
+		.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+	};
 
 	if (saddr == NULL) {
-		if (ipv6_get_lladdr(dev, &addr_buf))
+		if (ipv6_get_lladdr(dev, &addr_buf,
+				   (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
 			return;
 		saddr = &addr_buf;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
-			dev->ifindex);
-
-	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
-	if (!dst)
-		return;
-
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
-		return;
-
-	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-	send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
-	if (send_llinfo)
-		len += ndisc_opt_addr_space(dev);
-
-	skb = sock_alloc_send_skb(sk,
-				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + LL_RESERVED_SPACE(dev)),
-				  1, &err);
-	if (skb == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 NA: %s() failed to allocate an skb.\n",
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
-	}
-
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
-	msg = (struct nd_msg *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)msg;
-	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
-	msg->icmph.icmp6_code = 0;
-	msg->icmph.icmp6_cksum = 0;
-	msg->icmph.icmp6_unused = 0;
-
-	/* Set the target address. */
-	ipv6_addr_copy(&msg->target, solicit);
-
-	if (send_llinfo)
-		ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
-				       dev->addr_len, dev->type);
-
-	/* checksum */
-	msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-						 daddr, len,
-						 IPPROTO_ICMPV6,
-						 csum_partial((__u8 *) msg,
-							      len, 0));
-	/* send it! */
-	skb->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-	}
-
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	__ndisc_send(dev, neigh, daddr, saddr,
+		     &icmp6h, solicit,
+		     !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0,
+		     ICMP6_MIB_OUTNEIGHBORSOLICITS);
 }
 
 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 		   struct in6_addr *daddr)
 {
-	struct flowi fl;
-	struct dst_entry* dst;
-	struct inet6_dev *idev;
-	struct sock *sk = ndisc_socket->sk;
-	struct sk_buff *skb;
-	struct icmp6hdr *hdr;
-	__u8 * opt;
-	int len;
-	int err;
-
-	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
-			dev->ifindex);
-
-	dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
-	if (!dst)
-		return;
-
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
-		return;
-
-	len = sizeof(struct icmp6hdr);
-	if (dev->addr_len)
-		len += ndisc_opt_addr_space(dev);
-
-	skb = sock_alloc_send_skb(sk,
-				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + LL_RESERVED_SPACE(dev)),
-				  1, &err);
-	if (skb == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 RS: %s() failed to allocate an skb.\n",
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
-	}
-
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
-	hdr = (struct icmp6hdr *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)hdr;
-	hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
-	hdr->icmp6_code = 0;
-	hdr->icmp6_cksum = 0;
-	hdr->icmp6_unused = 0;
-
-	opt = (u8*) (hdr + 1);
-
-	if (dev->addr_len)
-		ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
-				       dev->addr_len, dev->type);
-
-	/* checksum */
-	hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr, len, 0));
+	struct icmp6hdr icmp6h = {
+		.icmp6_type = NDISC_ROUTER_SOLICITATION,
+	};
+	int send_sllao = dev->addr_len;
 
-	/* send it! */
-	skb->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	/*
+	 * According to section 2.2 of RFC 4429, we must not
+	 * send router solicitations with a sllao from
+	 * optimistic addresses, but we may send the solicitation
+	 * if we don't include the sllao.  So here we check
+	 * if our address is optimistic, and if so, we
+	 * supress the inclusion of the sllao.
+	 */
+	if (send_sllao) {
+		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1);
+		if (ifp) {
+			if (ifp->flags & IFA_F_OPTIMISTIC)  {
+				send_sllao = 0;
+			}
+			in6_ifa_put(ifp);
+		} else {
+			send_sllao = 0;
+		}
 	}
-
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+#endif
+	__ndisc_send(dev, NULL, daddr, saddr,
+		     &icmp6h, NULL,
+		     send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0,
+		     ICMP6_MIB_OUTROUTERSOLICITS);
 }
 
 
@@ -708,8 +624,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
 
-	if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
-		saddr = &skb->nh.ipv6h->saddr;
+	if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1))
+		saddr = &ipv6_hdr(skb)->saddr;
 
 	if ((probes -= neigh->parms->ucast_probes) < 0) {
 		if (!(neigh->nud_state & NUD_VALID)) {
@@ -732,11 +648,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 
 static void ndisc_recv_ns(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
-	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
-	u32 ndoptlen = skb->tail - msg->opt;
+	u32 ndoptlen = skb->tail - (skb->transport_header +
+				    offsetof(struct nd_msg, opt));
 	struct ndisc_options ndopts;
 	struct net_device *dev = skb->dev;
 	struct inet6_ifaddr *ifp;
@@ -796,28 +713,40 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	inc = ipv6_addr_is_multicast(daddr);
 
 	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
-		if (ifp->flags & IFA_F_TENTATIVE) {
-			/* Address is tentative. If the source
-			   is unspecified address, it is someone
-			   does DAD, otherwise we ignore solicitations
-			   until DAD timer expires.
-			 */
-			if (!dad)
+
+		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
+			if (dad) {
+				if (dev->type == ARPHRD_IEEE802_TR) {
+					const unsigned char *sadr;
+					sadr = skb_mac_header(skb);
+					if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
+					    sadr[9] == dev->dev_addr[1] &&
+					    sadr[10] == dev->dev_addr[2] &&
+					    sadr[11] == dev->dev_addr[3] &&
+					    sadr[12] == dev->dev_addr[4] &&
+					    sadr[13] == dev->dev_addr[5]) {
+						/* looped-back to us */
+						goto out;
+					}
+				}
+
+				/*
+				 * We are colliding with another node
+				 * who is doing DAD
+				 * so fail our DAD process
+				 */
+				addrconf_dad_failure(ifp);
 				goto out;
-			if (dev->type == ARPHRD_IEEE802_TR) {
-				unsigned char *sadr = skb->mac.raw;
-				if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
-				    sadr[9] == dev->dev_addr[1] &&
-				    sadr[10] == dev->dev_addr[2] &&
-				    sadr[11] == dev->dev_addr[3] &&
-				    sadr[12] == dev->dev_addr[4] &&
-				    sadr[13] == dev->dev_addr[5]) {
-					/* looped-back to us */
+			} else {
+				/*
+				 * This is not a dad solicitation.
+				 * If we are an optimistic node,
+				 * we should respond.
+				 * Otherwise, we should ignore it.
+				 */
+				if (!(ifp->flags & IFA_F_OPTIMISTIC))
 					goto out;
-				}
 			}
-			addrconf_dad_failure(ifp);
-			return;
 		}
 
 		idev = ifp->idev;
@@ -898,11 +827,12 @@ out:
 
 static void ndisc_recv_na(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
-	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
+	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
-	u32 ndoptlen = skb->tail - msg->opt;
+	u32 ndoptlen = skb->tail - (skb->transport_header +
+				    offsetof(struct nd_msg, opt));
 	struct ndisc_options ndopts;
 	struct net_device *dev = skb->dev;
 	struct inet6_ifaddr *ifp;
@@ -1000,11 +930,11 @@ out:
 
 static void ndisc_recv_rs(struct sk_buff *skb)
 {
-	struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
 	struct neighbour *neigh;
 	struct inet6_dev *idev;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct ndisc_options ndopts;
 	u8 *lladdr = NULL;
 
@@ -1057,7 +987,7 @@ out:
 
 static void ndisc_router_discovery(struct sk_buff *skb)
 {
-	struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
 	struct rt6_info *rt = NULL;
@@ -1068,9 +998,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 
 	__u8 * opt = (__u8 *)(ra_msg + 1);
 
-	optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+	optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
 
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 RA: source address is not link-local.\n");
 		return;
@@ -1136,7 +1066,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 #endif
 
-	rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+	rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
 
 	if (rt)
 		neigh = rt->rt6i_nexthop;
@@ -1151,7 +1081,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		ND_PRINTK3(KERN_DEBUG
 			   "ICMPv6 RA: adding default router.\n");
 
-		rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev, pref);
+		rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
 		if (rt == NULL) {
 			ND_PRINTK0(KERN_ERR
 				   "ICMPv6 RA: %s() failed to add default route.\n",
@@ -1223,7 +1153,7 @@ skip_defrtr:
 	 */
 
 	if (!neigh)
-		neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+		neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
 				       skb->dev, 1);
 	if (neigh) {
 		u8 *lladdr = NULL;
@@ -1252,7 +1182,7 @@ skip_defrtr:
 			if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
 				continue;
 			rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
-				      &skb->nh.ipv6h->saddr);
+				      &ipv6_hdr(skb)->saddr);
 		}
 	}
 #endif
@@ -1311,13 +1241,13 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 	int optlen;
 	u8 *lladdr = NULL;
 
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 Redirect: source address is not link-local.\n");
 		return;
 	}
 
-	optlen = skb->tail - skb->h.raw;
+	optlen = skb->tail - skb->transport_header;
 	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
 
 	if (optlen < 0) {
@@ -1326,7 +1256,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	icmph = (struct icmp6hdr *) skb->h.raw;
+	icmph = icmp6_hdr(skb);
 	target = (struct in6_addr *) (icmph + 1);
 	dest = target + 1;
 
@@ -1376,8 +1306,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 
 	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
 	if (neigh) {
-		rt6_redirect(dest, &skb->nh.ipv6h->daddr,
-			     &skb->nh.ipv6h->saddr, neigh, lladdr,
+		rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
+			     &ipv6_hdr(skb)->saddr, neigh, lladdr,
 			     on_link);
 		neigh_release(neigh);
 	}
@@ -1406,21 +1336,21 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	dev = skb->dev;
 
-	if (ipv6_get_lladdr(dev, &saddr_buf)) {
+	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 Redirect: no link-local address on %s\n",
 			   dev->name);
 		return;
 	}
 
-	if (!ipv6_addr_equal(&skb->nh.ipv6h->daddr, target) &&
+	if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
 	    !(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
 			"ICMPv6 Redirect: target address is not link-local.\n");
 		return;
 	}
 
-	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
+	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr,
 			dev->ifindex);
 
 	dst = ip6_route_output(NULL, &fl);
@@ -1475,11 +1405,12 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	hlen = 0;
 
 	skb_reserve(buff, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
+	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
 		   IPPROTO_ICMPV6, len);
 
-	icmph = (struct icmp6hdr *)skb_put(buff, len);
-	buff->h.raw = (unsigned char*)icmph;
+	skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
+	skb_put(buff, len);
+	icmph = icmp6_hdr(buff);
 
 	memset(icmph, 0, sizeof(struct icmp6hdr));
 	icmph->icmp6_type = NDISC_REDIRECT;
@@ -1491,7 +1422,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	addrp = (struct in6_addr *)(icmph + 1);
 	ipv6_addr_copy(addrp, target);
 	addrp++;
-	ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
 
 	opt = (u8*) (addrp + 1);
 
@@ -1512,9 +1443,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	*(opt++) = (rd_len >> 3);
 	opt += 6;
 
-	memcpy(opt, skb->nh.ipv6h, rd_len - 8);
+	memcpy(opt, ipv6_hdr(skb), rd_len - 8);
 
-	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
+	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
 					     len, IPPROTO_ICMPV6,
 					     csum_partial((u8 *) icmph, len, 0));
 
@@ -1544,14 +1475,14 @@ int ndisc_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb->len))
 		return 0;
 
-	msg = (struct nd_msg *) skb->h.raw;
+	msg = (struct nd_msg *)skb_transport_header(skb);
 
-	__skb_push(skb, skb->data-skb->h.raw);
+	__skb_push(skb, skb->data - skb_transport_header(skb));
 
-	if (skb->nh.ipv6h->hop_limit != 255) {
+	if (ipv6_hdr(skb)->hop_limit != 255) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 NDISC: invalid hop-limit: %d\n",
-			   skb->nh.ipv6h->hop_limit);
+			   ipv6_hdr(skb)->hop_limit);
 		return 0;
 	}
 
@@ -1584,7 +1515,7 @@ int ndisc_rcv(struct sk_buff *skb)
 	case NDISC_REDIRECT:
 		ndisc_redirect_rcv(skb);
 		break;
-	};
+	}
 
 	return 0;
 }
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1c405dd30c67..38b149613915 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -11,7 +11,7 @@
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct dst_entry *dst;
 	struct flowi fl = {
 		.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -61,7 +61,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		rt_info->daddr = iph->daddr;
 		rt_info->saddr = iph->saddr;
@@ -73,7 +73,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = (*pskb)->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(*pskb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
 			return ip6_route_me_harder(*pskb);
@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
 __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 			     unsigned int dataoff, u_int8_t protocol)
 {
-	struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	__sum16 csum = 0;
 
 	switch (skb->ip_summed) {
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index fdb30a5916e5..0004db38af6d 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -11,18 +11,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
- *             to adapt it to IPv6
- *             HEAVILY based in ipqueue.c by James Morris. It's just
- *             a little modified version of it, so he's nearly the
- *             real coder of this.
- *             Few changes needed, mainly the hard_routing code and
- *             the netlink socket protocol (we're NETLINK_IP6_FW).
- * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
- * 2005-02-04: Added /proc counter for dropped packets; fixed so
- *             packets aren't delivered to user space if they're going
- *             to be dropped.
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -189,12 +177,13 @@ ipq_flush(int verdict)
 static struct sk_buff *
 ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 {
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	size_t size = 0;
 	size_t data_len = 0;
 	struct sk_buff *skb;
 	struct ipq_packet_msg *pmsg;
 	struct nlmsghdr *nlh;
+	struct timeval tv;
 
 	read_lock_bh(&queue_lock);
 
@@ -232,15 +221,16 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	if (!skb)
 		goto nlmsg_failure;
 
-	old_tail= skb->tail;
+	old_tail = skb->tail;
 	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
 	pmsg = NLMSG_DATA(nlh);
 	memset(pmsg, 0, sizeof(*pmsg));
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
-	pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
+	tv = ktime_to_timeval(entry->skb->tstamp);
+	pmsg->timestamp_sec   = tv.tv_sec;
+	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
 	pmsg->hook            = entry->info->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
@@ -376,7 +366,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 	}
 	if (!skb_make_writable(&e->skb, v->data_len))
 		return -ENOMEM;
-	memcpy(e->skb->data, v->payload, v->data_len);
+	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
 
 	return 0;
@@ -485,7 +475,7 @@ ipq_rcv_skb(struct sk_buff *skb)
 	if (skblen < sizeof(*nlh))
 		return;
 
-	nlh = (struct nlmsghdr *)skb->data;
+	nlh = nlmsg_hdr(skb);
 	nlmsglen = nlh->nlmsg_len;
 	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
 		return;
@@ -667,7 +657,7 @@ static int __init ip6_queue_init(void)
 	struct proc_dir_entry *proc;
 
 	netlink_register_notifier(&ipq_nl_notifier);
-	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
+	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
 				      THIS_MODULE);
 	if (ipqnl == NULL) {
 		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7c512e13f956..9aa624026688 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -7,15 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * 	- increase module usage count as soon as we have rules inside
- * 	  a table
- * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *      - new extension header parser code
- * 15 Oct 2005 Harald Welte <laforge@netfilter.org>
- * 	- Unification of {ip,ip6}_tables into x_tables
- * 	- Removed tcp and udp code, since it's not ipv6 specific
  */
 
 #include <linux/capability.h>
@@ -115,7 +106,7 @@ ip6_packet_match(const struct sk_buff *skb,
 {
 	size_t i;
 	unsigned long ret;
-	const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
+	const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
 
 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
 
@@ -301,7 +292,7 @@ ip6t_do_table(struct sk_buff **pskb,
 				goto no_match;
 
 			ADD_COUNTER(e->counters,
-				    ntohs((*pskb)->nh.ipv6h->payload_len)
+				    ntohs(ipv6_hdr(*pskb)->payload_len)
 				    + IPV6_HDR_LEN,
 				    1);
 
@@ -1448,8 +1439,8 @@ static void __exit ip6_tables_fini(void)
 int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 		  int target, unsigned short *fragoff)
 {
-	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
+	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 	unsigned int len = skb->len - start;
 
 	if (fragoff)
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ccbab66277e3..4115a576ba25 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -32,7 +32,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	ip6h = (*pskb)->nh.ipv6h;
+	ip6h = ipv6_hdr(*pskb);
 
 	switch (info->mode) {
 		case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index afaa039d0b7b..5bb9cd349350 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -396,8 +396,8 @@ ip6t_log_packet(unsigned int pf,
 		/* MAC logging for input chain only. */
 		printk("MAC=");
 		if (skb->dev && (len = skb->dev->hard_header_len) &&
-		    skb->mac.raw != skb->nh.raw) {
-			unsigned char *p = skb->mac.raw;
+		    skb->mac_header != skb->network_header) {
+			const unsigned char *p = skb_mac_header(skb);
 			int i;
 
 			if (skb->dev->type == ARPHRD_SIT &&
@@ -412,7 +412,8 @@ ip6t_log_packet(unsigned int pf,
 			printk(" ");
 
 			if (skb->dev->type == ARPHRD_SIT) {
-				struct iphdr *iph = (struct iphdr *)skb->mac.raw;
+				const struct iphdr *iph =
+					(struct iphdr *)skb_mac_header(skb);
 				printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
 				       NIPQUAD(iph->saddr),
 				       NIPQUAD(iph->daddr));
@@ -421,7 +422,7 @@ ip6t_log_packet(unsigned int pf,
 			printk(" ");
 	}
 
-	dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
+	dump_packet(loginfo, skb, skb_network_offset(skb), 1);
 	printk("\n");
 	spin_unlock_bh(&log_lock);
 }
@@ -489,14 +490,10 @@ static int __init ip6t_log_init(void)
 	ret = xt_register_target(&ip6t_log_reg);
 	if (ret < 0)
 		return ret;
-	if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
-		printk(KERN_WARNING "ip6t_LOG: not logging via system console "
-		       "since somebody else already registered for PF_INET6\n");
-		/* we cannot make module load fail here, since otherwise
-		 * ip6tables userspace would abort */
-	}
-
-	return 0;
+	ret = nf_log_register(PF_INET6, &ip6t_logger);
+	if (ret < 0 && ret != -EEXIST)
+		xt_unregister_target(&ip6t_log_reg);
+	return ret;
 }
 
 static void __exit ip6t_log_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 6abee94c929f..cb3d2415a064 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -47,7 +47,7 @@ static void send_reset(struct sk_buff *oldskb)
 	struct tcphdr otcph, *tcph;
 	unsigned int otcplen, hh_len;
 	int tcphoff, needs_ack;
-	struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h;
+	struct ipv6hdr *oip6h = ipv6_hdr(oldskb), *ip6h;
 	struct dst_entry *dst = NULL;
 	u8 proto;
 	struct flowi fl;
@@ -120,8 +120,9 @@ static void send_reset(struct sk_buff *oldskb)
 
 	skb_reserve(nskb, hh_len + dst->header_len);
 
-	ip6h = nskb->nh.ipv6h = (struct ipv6hdr *)
-					skb_put(nskb, sizeof(struct ipv6hdr));
+	skb_put(nskb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(nskb);
+	ip6h = ipv6_hdr(nskb);
 	ip6h->version = 6;
 	ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
 	ip6h->nexthdr = IPPROTO_TCP;
@@ -155,8 +156,8 @@ static void send_reset(struct sk_buff *oldskb)
 	tcph->check = 0;
 
 	/* Adjust TCP checksum */
-	tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr,
-				      &nskb->nh.ipv6h->daddr,
+	tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
+				      &ipv6_hdr(nskb)->daddr,
 				      sizeof(struct tcphdr), IPPROTO_TCP,
 				      csum_partial((char *)tcph,
 						   sizeof(struct tcphdr), 0));
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 967bed71d4a8..0f3dd932f0a6 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -32,8 +32,8 @@ match(const struct sk_buff *skb,
 	unsigned char eui64[8];
 	int i = 0;
 
-	if (!(skb->mac.raw >= skb->head &&
-	      (skb->mac.raw + ETH_HLEN) <= skb->data) &&
+	if (!(skb_mac_header(skb) >= skb->head &&
+	      (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
 	    offset != 0) {
 		*hotdrop = 1;
 		return 0;
@@ -42,7 +42,7 @@ match(const struct sk_buff *skb,
 	memset(eui64, 0, sizeof(eui64));
 
 	if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) {
-		if (skb->nh.ipv6h->version == 0x6) {
+		if (ipv6_hdr(skb)->version == 0x6) {
 			memcpy(eui64, eth_hdr(skb)->h_source, 3);
 			memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
 			eui64[3] = 0xff;
@@ -50,7 +50,7 @@ match(const struct sk_buff *skb,
 			eui64[0] |= 0x02;
 
 			i = 0;
-			while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == eui64[i])
+			while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i])
 			       && (i < 8))
 				i++;
 
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 37c8a4d4ed78..d606c0e6d6fd 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -25,7 +25,7 @@ static int match(const struct sk_buff *skb,
 		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ip6t_hl_info *info = matchinfo;
-	const struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 
 	switch (info->mode) {
 		case IP6T_HL_EQ:
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 700a11d25deb..fd6a0869099b 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -45,7 +45,7 @@ ipv6header_match(const struct sk_buff *skb,
 	/* Make sure this isn't an evil packet */
 
 	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
+	nexthdr = ipv6_hdr(skb)->nexthdr;
 	/* pointer to the 1st exthdr */
 	ptr = sizeof(struct ipv6hdr);
 	/* available length */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 112a21d0c6da..76f0cf66f95c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -102,7 +102,7 @@ ip6t_local_out_hook(unsigned int hook,
 #if 0
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 0c468d35a937..a9f10e32c163 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -7,8 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
@@ -138,7 +136,7 @@ ip6t_local_hook(unsigned int hook,
 #if 0
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
@@ -146,21 +144,21 @@ ip6t_local_hook(unsigned int hook,
 #endif
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority,  */
-	memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
-	memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
+	memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
 	mark = (*pskb)->mark;
-	hop_limit = (*pskb)->nh.ipv6h->hop_limit;
+	hop_limit = ipv6_hdr(*pskb)->hop_limit;
 
 	/* flowlabel and prio (includes version, which shouldn't change either */
-	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
+	flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
 
 	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
 
 	if (ret != NF_DROP && ret != NF_STOLEN
-		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
-		    || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
+		&& (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
+		    || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
 		    || (*pskb)->mark != mark
-		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
+		    || ipv6_hdr(*pskb)->hop_limit != hop_limit))
 		return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
 
 	return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d1102455668d..6d2a08205111 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -7,17 +7,6 @@
  *
  * Author:
  *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- support Layer 3 protocol independent connection tracking.
- *	  Based on the original ip_conntrack code which	had the following
- *	  copyright information:
- *		(C) 1999-2001 Paul `Rusty' Russell
- *		(C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- add get_features() to support various size of conntrack
- *	  structures.
  */
 
 #include <linux/types.h>
@@ -138,16 +127,10 @@ static int
 ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	     u_int8_t *protonum)
 {
-	unsigned int extoff;
-	unsigned char pnum;
-	int protoff;
-
-	extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
-	pnum = (*pskb)->nh.ipv6h->nexthdr;
-
-	protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
-					 (*pskb)->len - extoff);
-
+	unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+	unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+	int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+					     (*pskb)->len - extoff);
 	/*
 	 * (protoff == (*pskb)->len) mean that the packet doesn't have no data
 	 * except of IPv6 & ext headers. but it's tracked anyway. - YK
@@ -179,9 +162,8 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 	struct nf_conn_help *help;
 	enum ip_conntrack_info ctinfo;
 	unsigned int ret, protoff;
-	unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
-			      - (*pskb)->data;
-	unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
+	unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+	unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
 
 
 	/* This is where we call the helper: as the packet goes out. */
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 075da4f287b8..0be790d250f9 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -7,13 +7,6 @@
  *
  * Author:
  *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- ICMPv6 tracking support. Derived from the original ip_conntrack code
- *	  net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
- *	  copyright information:
- *		(C) 1999-2001 Paul `Rusty' Russell
- *		(C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  */
 
 #include <linux/types.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 15ab1e3e8b56..347ab7608231 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -82,7 +82,7 @@ struct nf_ct_frag6_queue
 	struct sk_buff		*fragments;
 	int			len;
 	int			meat;
-	struct timeval		stamp;
+	ktime_t			stamp;
 	unsigned int		csum;
 	__u8			last_in;	/* has first/last segment arrived? */
 #define COMPLETE		4
@@ -353,9 +353,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   str
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	init_timer(&fq->timer);
-	fq->timer.function = nf_ct_frag6_expire;
-	fq->timer.data = (long) fq;
+	setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
 	spin_lock_init(&fq->lock);
 	atomic_set(&fq->refcnt, 1);
 
@@ -400,19 +398,20 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	}
 
 	offset = ntohs(fhdr->frag_off) & ~0x7;
-	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
-			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		DEBUGP("offset is too large.\n");
 		return -1;
 	}
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
-				     csum_partial(skb->nh.raw,
-						  (u8*)(fhdr + 1) - skb->nh.raw,
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
 						  0));
+	}
 
 	/* Is this the final fragment? */
 	if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -542,7 +541,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		fq->fragments = skb;
 
 	skb->dev = NULL;
-	skb_get_timestamp(skb, &fq->stamp);
+	fq->stamp = skb->tstamp;
 	fq->meat += skb->len;
 	atomic_add(skb->truesize, &nf_ct_frag6_mem);
 
@@ -583,7 +582,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN) {
 		DEBUGP("payload len is too large.\n");
 		goto out_oversize;
@@ -624,15 +625,15 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
-	head->nh.raw[fq->nhoffset] = head->h.raw[0];
+	skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
-	head->mac.raw += sizeof(struct frag_hdr);
-	head->nh.raw += sizeof(struct frag_hdr);
+	head->mac_header += sizeof(struct frag_hdr);
+	head->network_header += sizeof(struct frag_hdr);
 
 	skb_shinfo(head)->frag_list = head->next;
-	head->h.raw = head->data;
-	skb_push(head, head->data - head->nh.raw);
+	skb_reset_transport_header(head);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &nf_ct_frag6_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
@@ -648,12 +649,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	head->next = NULL;
 	head->dev = dev;
-	skb_set_timestamp(head, &fq->stamp);
-	head->nh.ipv6h->payload_len = htons(payload_len);
+	head->tstamp = fq->stamp;
+	ipv6_hdr(head)->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+		head->csum = csum_partial(skb_network_header(head),
+					  skb_network_header_len(head),
+					  head->csum);
 
 	fq->fragments = NULL;
 
@@ -701,9 +704,10 @@ out_fail:
 static int
 find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 {
-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
-	u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
-	int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
+	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+	const int netoff = skb_network_offset(skb);
+	u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
+	int start = netoff + sizeof(struct ipv6hdr);
 	int len = skb->len - start;
 	u8 prevhdr = NEXTHDR_IPV6;
 
@@ -759,7 +763,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	struct sk_buff *ret_skb = NULL;
 
 	/* Jumbo payload inhibits frag. header */
-	if (skb->nh.ipv6h->payload_len == 0) {
+	if (ipv6_hdr(skb)->payload_len == 0) {
 		DEBUGP("payload len = 0\n");
 		return skb;
 	}
@@ -780,9 +784,9 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 		goto ret_orig;
 	}
 
-	clone->h.raw = clone->data + fhoff;
-	hdr = clone->nh.ipv6h;
-	fhdr = (struct frag_hdr *)clone->h.raw;
+	skb_set_transport_header(clone, fhoff);
+	hdr = ipv6_hdr(clone);
+	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		DEBUGP("Invalid fragment offset\n");
@@ -864,8 +868,7 @@ int nf_ct_frag6_init(void)
 	nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
 				   (jiffies ^ (jiffies >> 6)));
 
-	init_timer(&nf_ct_frag6_secret_timer);
-	nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
+	setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
 	nf_ct_frag6_secret_timer.expires = jiffies
 					   + nf_ct_frag6_secret_interval;
 	add_timer(&nf_ct_frag6_secret_timer);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index fa3fb509f187..920dc9cf6a84 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -23,12 +23,12 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
+#include <net/ip.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
-#ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *proc_net_devsnmp6;
 
 static int fold_prot_inuse(struct proto *proto)
@@ -142,26 +142,13 @@ static struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static unsigned long
-fold_field(void *mib[], int offt)
-{
-	unsigned long res = 0;
-	int i;
-
-	for_each_possible_cpu(i) {
-		res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
-		res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
-	}
-	return res;
-}
-
 static inline void
 snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
 {
 	int i;
 	for (i=0; itemlist[i].name; i++)
 		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
-				fold_field(mib, itemlist[i].entry));
+			   snmp_fold_field(mib, itemlist[i].entry));
 }
 
 static int snmp6_seq_show(struct seq_file *seq, void *v)
@@ -236,6 +223,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 		return -EINVAL;
 	remove_proc_entry(idev->stats.proc_dir_entry->name,
 			  proc_net_devsnmp6);
+	idev->stats.proc_dir_entry = NULL;
 	return 0;
 }
 
@@ -271,47 +259,3 @@ void ipv6_misc_proc_exit(void)
 	proc_net_remove("snmp6");
 }
 
-#else	/* CONFIG_PROC_FS */
-
-
-int snmp6_register_dev(struct inet6_dev *idev)
-{
-	return 0;
-}
-
-int snmp6_unregister_dev(struct inet6_dev *idev)
-{
-	return 0;
-}
-#endif	/* CONFIG_PROC_FS */
-
-int snmp6_alloc_dev(struct inet6_dev *idev)
-{
-	int err = -ENOMEM;
-
-	if (!idev || !idev->dev)
-		return -EINVAL;
-
-	if (snmp6_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib),
-			   __alignof__(struct ipstats_mib)) < 0)
-		goto err_ip;
-	if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
-			   __alignof__(struct icmpv6_mib)) < 0)
-		goto err_icmp;
-
-	return 0;
-
-err_icmp:
-	snmp6_mib_free((void **)idev->stats.ipv6);
-err_ip:
-	return err;
-}
-
-int snmp6_free_dev(struct inet6_dev *idev)
-{
-	snmp6_mib_free((void **)idev->stats.icmpv6);
-	snmp6_mib_free((void **)idev->stats.ipv6);
-	return 0;
-}
-
-
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index ef43bd57baed..f929f47b925e 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -60,6 +60,8 @@ int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
 	return ret;
 }
 
+EXPORT_SYMBOL(inet6_add_protocol);
+
 /*
  *	Remove a protocol from the hash tables.
  */
@@ -83,3 +85,5 @@ int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
 
 	return ret;
 }
+
+EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 306d5d83c068..009a1047fc3f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	int delivered = 0;
 	__u8 hash;
 
-	saddr = &skb->nh.ipv6h->saddr;
+	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = saddr + 1;
 
 	hash = nexthdr & (MAX_INET_PROTOS - 1);
@@ -361,17 +361,18 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		skb_postpull_rcsum(skb, skb->nh.raw,
-				   skb->h.raw - skb->nh.raw);
-		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-				     &skb->nh.ipv6h->daddr,
+		skb_postpull_rcsum(skb, skb_network_header(skb),
+				   skb_network_header_len(skb));
+		if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+				     &ipv6_hdr(skb)->daddr,
 				     skb->len, inet->num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-					     &skb->nh.ipv6h->daddr,
-					     skb->len, inet->num, 0));
+	if (!skb_csum_unnecessary(skb))
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len,
+							 inet->num, 0));
 
 	if (inet->hdrincl) {
 		if (skb_checksum_complete(skb)) {
@@ -420,7 +421,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+	if (skb_csum_unnecessary(skb)) {
 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
 		if (__skb_checksum_complete(skb))
@@ -438,7 +439,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (sin6) {
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = 0;
-		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
 		sin6->sin6_flowinfo = 0;
 		sin6->sin6_scope_id = 0;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -488,7 +489,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		goto out;
 
 	offset = rp->offset;
-	total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+	total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
+						skb->data);
 	if (offset >= total_len - 1) {
 		err = -EINVAL;
 		ip6_flush_pending_frames(sk);
@@ -511,7 +513,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 			if (csum_skb)
 				continue;
 
-			len = skb->len - (skb->h.raw - skb->data);
+			len = skb->len - skb_transport_offset(skb);
 			if (offset >= len) {
 				offset -= len;
 				continue;
@@ -523,7 +525,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		skb = csum_skb;
 	}
 
-	offset += skb->h.raw - skb->data;
+	offset += skb_transport_offset(skb);
 	if (skb_copy_bits(skb, offset, &csum, 2))
 		BUG();
 
@@ -575,11 +577,13 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	skb->priority = sk->sk_priority;
 	skb->dst = dst_clone(&rt->u.dst);
 
-	skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+	skb_put(skb, length);
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	err = memcpy_fromiovecend((void *)iph, from, 0, length);
 	if (err)
 		goto error_fault;
@@ -687,9 +691,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int err;
 
 	/* Rough check on arithmetic overflow,
-	   better check is made in ip6_build_xmit
+	   better check is made in ip6_append_data().
 	 */
-	if (len < 0)
+	if (len > INT_MAX)
 		return -EMSGSIZE;
 
 	/* Mirror BSD error message compatibility */
@@ -878,7 +882,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
@@ -903,7 +907,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
@@ -957,7 +961,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 		default:
 			return ipv6_setsockopt(sk, level, optname, optval,
 					       optlen);
-	};
+	}
+
 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
 }
 
@@ -978,7 +983,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
 	default:
 		return compat_ipv6_setsockopt(sk, level, optname,
 					      optval, optlen);
-	};
+	}
 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
 }
 #endif
@@ -1031,7 +1036,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 		default:
 			return ipv6_getsockopt(sk, level, optname, optval,
 					       optlen);
-	};
+	}
+
 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 
@@ -1052,7 +1058,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
 	default:
 		return compat_ipv6_getsockopt(sk, level, optname,
 					      optval, optlen);
-	};
+	}
 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 #endif
@@ -1073,7 +1079,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			spin_lock_bh(&sk->sk_receive_queue.lock);
 			skb = skb_peek(&sk->sk_receive_queue);
 			if (skb != NULL)
-				amount = skb->tail - skb->h.raw;
+				amount = skb->tail - skb->transport_header;
 			spin_unlock_bh(&sk->sk_receive_queue.lock);
 			return put_user(amount, (int __user *)arg);
 		}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7034c54e5010..de795c04e34c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -88,7 +88,7 @@ struct frag_queue
 	int			len;
 	int			meat;
 	int			iif;
-	struct timeval		stamp;
+	ktime_t			stamp;
 	unsigned int		csum;
 	__u8			last_in;	/* has first/last segment arrived? */
 #define COMPLETE		4
@@ -430,19 +430,24 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		goto err;
 
 	offset = ntohs(fhdr->frag_off) & ~0x7;
-	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
-			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((u8 *)&fhdr->frag_off -
+				   skb_network_header(skb)));
 		return;
 	}
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
-				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+						  0));
+	}
 
 	/* Is this the final fragment? */
 	if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -562,7 +567,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	if (skb->dev)
 		fq->iif = skb->dev->ifindex;
 	skb->dev = NULL;
-	skb_get_timestamp(skb, &fq->stamp);
+	fq->stamp = skb->tstamp;
 	fq->meat += skb->len;
 	atomic_add(skb->truesize, &ip6_frag_mem);
 
@@ -605,7 +610,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	BUG_TRAP(FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
 
@@ -639,15 +646,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
 	nhoff = fq->nhoffset;
-	head->nh.raw[nhoff] = head->h.raw[0];
+	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
-	head->mac.raw += sizeof(struct frag_hdr);
-	head->nh.raw += sizeof(struct frag_hdr);
+	head->mac_header += sizeof(struct frag_hdr);
+	head->network_header += sizeof(struct frag_hdr);
 
 	skb_shinfo(head)->frag_list = head->next;
-	head->h.raw = head->data;
-	skb_push(head, head->data - head->nh.raw);
+	skb_reset_transport_header(head);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &ip6_frag_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
@@ -663,15 +670,17 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
 	head->next = NULL;
 	head->dev = dev;
-	skb_set_timestamp(head, &fq->stamp);
-	head->nh.ipv6h->payload_len = htons(payload_len);
+	head->tstamp = fq->stamp;
+	ipv6_hdr(head)->payload_len = htons(payload_len);
 	IP6CB(head)->nhoff = nhoff;
 
 	*skb_in = head;
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+		head->csum = csum_partial(skb_network_header(head),
+					  skb_network_header_len(head),
+					  head->csum);
 
 	rcu_read_lock();
 	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -699,33 +708,34 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 	struct net_device *dev = skb->dev;
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
-	struct ipv6hdr *hdr;
-
-	hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
 
 	/* Jumbo payload inhibits frag. header */
 	if (hdr->payload_len==0) {
 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  skb_network_header_len(skb));
 		return -1;
 	}
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
+	if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 sizeof(struct frag_hdr)))) {
 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  skb_network_header_len(skb));
 		return -1;
 	}
 
-	hdr = skb->nh.ipv6h;
-	fhdr = (struct frag_hdr *)skb->h.raw;
+	hdr = ipv6_hdr(skb);
+	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		/* It is not a fragmented frame */
-		skb->h.raw += sizeof(struct frag_hdr);
+		skb->transport_header += sizeof(struct frag_hdr);
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
 
-		IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
+		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 		return 1;
 	}
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a6b3117df546..b46ad53044ba 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -311,21 +311,12 @@ static inline void rt6_probe(struct rt6_info *rt)
 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 {
 	struct net_device *dev = rt->rt6i_dev;
-	int ret = 0;
-
-	if (!oif)
+	if (!oif || dev->ifindex == oif)
 		return 2;
-	if (dev->flags & IFF_LOOPBACK) {
-		if (!WARN_ON(rt->rt6i_idev == NULL) &&
-		    rt->rt6i_idev->dev->ifindex == oif)
-			ret = 1;
-		else
-			return 0;
-	}
-	if (dev->ifindex == oif)
-		return 2;
-
-	return ret;
+	if ((dev->flags & IFF_LOOPBACK) &&
+	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
+		return 1;
+	return 0;
 }
 
 static inline int rt6_check_neigh(struct rt6_info *rt)
@@ -363,55 +354,76 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
 	return m;
 }
 
-static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
-				   int strict)
+static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
+				   int *mpri, struct rt6_info *match)
 {
-	struct rt6_info *match = NULL, *last = NULL;
-	struct rt6_info *rt, *rt0 = *head;
-	u32 metric;
+	int m;
+
+	if (rt6_check_expired(rt))
+		goto out;
+
+	m = rt6_score_route(rt, oif, strict);
+	if (m < 0)
+		goto out;
+
+	if (m > *mpri) {
+		if (strict & RT6_LOOKUP_F_REACHABLE)
+			rt6_probe(match);
+		*mpri = m;
+		match = rt;
+	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
+		rt6_probe(rt);
+	}
+
+out:
+	return match;
+}
+
+static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+				     struct rt6_info *rr_head,
+				     u32 metric, int oif, int strict)
+{
+	struct rt6_info *rt, *match;
 	int mpri = -1;
 
-	RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
-		  __FUNCTION__, head, head ? *head : NULL, oif);
+	match = NULL;
+	for (rt = rr_head; rt && rt->rt6i_metric == metric;
+	     rt = rt->u.dst.rt6_next)
+		match = find_match(rt, oif, strict, &mpri, match);
+	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
+	     rt = rt->u.dst.rt6_next)
+		match = find_match(rt, oif, strict, &mpri, match);
 
-	for (rt = rt0, metric = rt0->rt6i_metric;
-	     rt && rt->rt6i_metric == metric && (!last || rt != rt0);
-	     rt = rt->u.dst.rt6_next) {
-		int m;
+	return match;
+}
 
-		if (rt6_check_expired(rt))
-			continue;
+static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+{
+	struct rt6_info *match, *rt0;
 
-		last = rt;
+	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
+		  __FUNCTION__, fn->leaf, oif);
 
-		m = rt6_score_route(rt, oif, strict);
-		if (m < 0)
-			continue;
+	rt0 = fn->rr_ptr;
+	if (!rt0)
+		fn->rr_ptr = rt0 = fn->leaf;
 
-		if (m > mpri) {
-			if (strict & RT6_LOOKUP_F_REACHABLE)
-				rt6_probe(match);
-			match = rt;
-			mpri = m;
-		} else if (strict & RT6_LOOKUP_F_REACHABLE) {
-			rt6_probe(rt);
-		}
-	}
+	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 	if (!match &&
-	    (strict & RT6_LOOKUP_F_REACHABLE) &&
-	    last && last != rt0) {
+	    (strict & RT6_LOOKUP_F_REACHABLE)) {
+		struct rt6_info *next = rt0->u.dst.rt6_next;
+
 		/* no entries matched; do round-robin */
-		static DEFINE_SPINLOCK(lock);
-		spin_lock(&lock);
-		*head = rt0->u.dst.rt6_next;
-		rt0->u.dst.rt6_next = last->u.dst.rt6_next;
-		last->u.dst.rt6_next = rt0;
-		spin_unlock(&lock);
+		if (!next || next->rt6i_metric != rt0->rt6i_metric)
+			next = fn->leaf;
+
+		if (next != rt0)
+			fn->rr_ptr = next;
 	}
 
-	RT6_TRACE("%s() => %p, score=%d\n",
-		  __FUNCTION__, match, mpri);
+	RT6_TRACE("%s() => %p\n",
+		  __FUNCTION__, match);
 
 	return (match ? match : &ip6_null_entry);
 }
@@ -563,6 +575,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
 	return NULL;
 }
 
+EXPORT_SYMBOL(rt6_lookup);
+
 /* ip6_ins_rt is called with FREE table->tb6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
@@ -657,7 +671,7 @@ restart_2:
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-	rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
+	rt = rt6_select(fn, fl->iif, strict | reachable);
 	BACKTRACK(&fl->fl6_src);
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
@@ -712,7 +726,7 @@ out2:
 
 void ip6_route_input(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct flowi fl = {
 		.iif = skb->dev->ifindex,
@@ -752,7 +766,7 @@ restart_2:
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-	rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
+	rt = rt6_select(fn, fl->oif, strict | reachable);
 	BACKTRACK(&fl->fl6_src);
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
@@ -817,6 +831,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 }
 
+EXPORT_SYMBOL(ip6_route_output);
 
 /*
  *	Destination cache support functions
@@ -1745,7 +1760,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 		rtnl_unlock();
 
 		return err;
-	};
+	}
 
 	return -EINVAL;
 }
@@ -1754,13 +1769,22 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
  *	Drop the packet on the floor
  */
 
-static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
+static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
+			       int ipstats_mib_noroutes)
 {
-	int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
-	if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
-		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
-
-	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
+	int type;
+	switch (ipstats_mib_noroutes) {
+	case IPSTATS_MIB_INNOROUTES:
+		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
+		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
+			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
+			break;
+		}
+		/* FALLTHROUGH */
+	case IPSTATS_MIB_OUTNOROUTES:
+		IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
+		break;
+	}
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
 	kfree_skb(skb);
 	return 0;
@@ -1768,26 +1792,26 @@ static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
 
 static int ip6_pkt_discard(struct sk_buff *skb)
 {
-	return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
+	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
 }
 
 static int ip6_pkt_discard_out(struct sk_buff *skb)
 {
 	skb->dev = skb->dst->dev;
-	return ip6_pkt_discard(skb);
+	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
 }
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
 static int ip6_pkt_prohibit(struct sk_buff *skb)
 {
-	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
+	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
 }
 
 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
 {
 	skb->dev = skb->dst->dev;
-	return ip6_pkt_prohibit(skb);
+	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
 }
 
 static int ip6_pkt_blk_hole(struct sk_buff *skb)
@@ -1991,7 +2015,7 @@ errout:
 	return err;
 }
 
-int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib6_config cfg;
 	int err;
@@ -2003,7 +2027,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	return ip6_route_del(&cfg);
 }
 
-int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib6_config cfg;
 	int err;
@@ -2140,7 +2164,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 		     prefix, NLM_F_MULTI);
 }
 
-int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct nlattr *tb[RTA_MAX+1];
 	struct rt6_info *rt;
@@ -2194,7 +2218,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
 	 */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
@@ -2465,8 +2489,9 @@ ctl_table ipv6_route_table[] = {
 
 void __init ip6_route_init(void)
 {
+#ifdef 	CONFIG_PROC_FS
 	struct proc_dir_entry *p;
-
+#endif
 	ip6_dst_ops.kmem_cachep =
 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
@@ -2484,6 +2509,10 @@ void __init ip6_route_init(void)
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	fib6_rules_init();
 #endif
+
+	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
+	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
+	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
 }
 
 void ip6_route_cleanup(void)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 08d6ed3396e4..1efa95a99f45 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -99,10 +99,10 @@ static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local)
 	return NULL;
 }
 
-static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip6_bucket(struct ip_tunnel_parm *parms)
 {
-	__be32 remote = t->parms.iph.daddr;
-	__be32 local = t->parms.iph.saddr;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
 	unsigned h = 0;
 	int prio = 0;
 
@@ -117,6 +117,11 @@ static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
 	return &tunnels[prio][h];
 }
 
+static inline struct ip_tunnel **ipip6_bucket(struct ip_tunnel *t)
+{
+	return __ipip6_bucket(&t->parms);
+}
+
 static void ipip6_tunnel_unlink(struct ip_tunnel *t)
 {
 	struct ip_tunnel **tp;
@@ -147,19 +152,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
 	__be32 local = parms->iph.saddr;
 	struct ip_tunnel *t, **tp, *nt;
 	struct net_device *dev;
-	unsigned h = 0;
-	int prio = 0;
 	char name[IFNAMSIZ];
 
-	if (remote) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	if (local) {
-		prio |= 1;
-		h ^= HASH(local);
-	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = __ipip6_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 			return t;
 	}
@@ -224,8 +219,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
    ICMP in the real Internet is absolutely infeasible.
  */
 	struct iphdr *iph = (struct iphdr*)skb->data;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	int err;
 
@@ -280,8 +275,8 @@ out:
 	struct iphdr *iph = (struct iphdr*)dp;
 	int hlen = iph->ihl<<2;
 	struct ipv6hdr *iph6;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int rel_type = 0;
 	int rel_code = 0;
 	int rel_info = 0;
@@ -296,14 +291,14 @@ out:
 	default:
 		return;
 	case ICMP_PARAMETERPROB:
-		if (skb->h.icmph->un.gateway < hlen)
+		if (icmp_hdr(skb)->un.gateway < hlen)
 			return;
 
 		/* So... This guy found something strange INSIDE encapsulated
 		   packet. Well, he is fool, but what can we do ?
 		 */
 		rel_type = ICMPV6_PARAMPROB;
-		rel_info = skb->h.icmph->un.gateway - hlen;
+		rel_info = icmp_hdr(skb)->un.gateway - hlen;
 		break;
 
 	case ICMP_DEST_UNREACH:
@@ -340,7 +335,7 @@ out:
 	dst_release(skb2->dst);
 	skb2->dst = NULL;
 	skb_pull(skb2, skb->data - (u8*)iph6);
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	/* Try to guess incoming interface */
 	rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
@@ -366,7 +361,7 @@ out:
 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(iph->tos))
-		IP6_ECN_set_ce(skb->nh.ipv6h);
+		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
 static int ipip6_rcv(struct sk_buff *skb)
@@ -377,13 +372,13 @@ static int ipip6_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	read_lock(&ipip6_lock);
 	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 		secpath_reset(skb);
-		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
+		skb->mac_header = skb->network_header;
+		skb_reset_network_header(skb);
 		IPCB(skb)->flags = 0;
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->pkt_type = PACKET_HOST;
@@ -430,7 +425,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
 	struct iphdr  *tiph = &tunnel->parms.iph;
-	struct ipv6hdr *iph6 = skb->nh.ipv6h;
+	struct ipv6hdr *iph6 = ipv6_hdr(skb);
 	u8     tos = tunnel->parms.iph.tos;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;			/* Device to other host */
@@ -468,7 +463,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		addr_type = ipv6_addr_type(addr6);
 
 		if (addr_type == IPV6_ADDR_ANY) {
-			addr6 = &skb->nh.ipv6h->daddr;
+			addr6 = &ipv6_hdr(skb)->daddr;
 			addr_type = ipv6_addr_type(addr6);
 		}
 
@@ -550,11 +545,12 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		iph6 = skb->nh.ipv6h;
+		iph6 = ipv6_hdr(skb);
 	}
 
-	skb->h.raw = skb->nh.raw;
-	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	skb->transport_header = skb->network_header;
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags = 0;
 	dst_release(skb->dst);
@@ -564,7 +560,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Push down and install the IPIP header.
 	 */
 
-	iph 			=	skb->nh.iph;
+	iph 			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr)>>2;
 	if (mtu > IPV6_MIN_MTU)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 92f99927d12d..e2f25ea43b68 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -115,10 +115,10 @@ static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
 
 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
 {
-	return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
-					    skb->nh.ipv6h->saddr.s6_addr32,
-					    skb->h.th->dest,
-					    skb->h.th->source);
+	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+					    ipv6_hdr(skb)->saddr.s6_addr32,
+					    tcp_hdr(skb)->dest,
+					    tcp_hdr(skb)->source);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -486,7 +486,9 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 			struct sk_buff *pktopts = treq->pktopts;
 			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 			if (rxopt->srcrt)
-				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
+				opt = ipv6_invert_rthdr(sk,
+			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+						 rxopt->srcrt));
 		}
 
 		if (opt && opt->srcrt) {
@@ -507,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	skb = tcp_make_synack(sk, dst, req);
 	if (skb) {
-		struct tcphdr *th = skb->h.th;
+		struct tcphdr *th = tcp_hdr(skb);
 
 		th->check = tcp_v6_check(th, skb->len,
 					 &treq->loc_addr, &treq->rmt_addr,
@@ -835,8 +837,8 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 {
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
-	struct ipv6hdr *ip6h = skb->nh.ipv6h;
-	struct tcphdr *th = skb->h.th;
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff << 2) - sizeof (*th);
 	int genhash;
 	u8 *ptr;
@@ -944,10 +946,11 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
@@ -964,12 +967,13 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		return -EINVAL;
 
-	ipv6h = skb->nh.ipv6h;
-	th = skb->h.th;
+	ipv6h = ipv6_hdr(skb);
+	th = tcp_hdr(skb);
 
 	th->check = 0;
 	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
 				     IPPROTO_TCP, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
@@ -977,7 +981,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th, *t1;
+	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
 	int tot_len = sizeof(*th);
@@ -993,7 +997,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (sk)
-		key = tcp_v6_md5_do_lookup(sk, &skb->nh.ipv6h->daddr);
+		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
 	else
 		key = NULL;
 
@@ -1037,20 +1041,18 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_MD5SIG << 8) |
 			       TCPOLEN_MD5SIG);
-		tcp_v6_do_calc_md5_hash((__u8*)&opt[1],
-					key,
-					&skb->nh.ipv6h->daddr,
-					&skb->nh.ipv6h->saddr,
-					t1, IPPROTO_TCP,
-					tot_len);
+		tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
+					&ipv6_hdr(skb)->daddr,
+					&ipv6_hdr(skb)->saddr,
+					t1, IPPROTO_TCP, tot_len);
 	}
 #endif
 
 	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
 
 	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 				    sizeof(*t1), IPPROTO_TCP,
@@ -1079,7 +1081,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 			    struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
 {
-	struct tcphdr *th = skb->h.th, *t1;
+	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
 	int tot_len = sizeof(struct tcphdr);
@@ -1091,7 +1093,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (!tw && skb->sk) {
-		key = tcp_v6_md5_do_lookup(skb->sk, &skb->nh.ipv6h->daddr);
+		key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
 	} else if (tw && tw->tw_md5_keylen) {
 		tw_key.key = tw->tw_md5_key;
 		tw_key.keylen = tw->tw_md5_keylen;
@@ -1140,20 +1142,18 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 	if (key) {
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
-		tcp_v6_do_calc_md5_hash((__u8 *)topt,
-					key,
-					&skb->nh.ipv6h->daddr,
-					&skb->nh.ipv6h->saddr,
-					t1, IPPROTO_TCP,
-					tot_len);
+		tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
+					&ipv6_hdr(skb)->daddr,
+					&ipv6_hdr(skb)->saddr,
+					t1, IPPROTO_TCP, tot_len);
 	}
 #endif
 
 	buff->csum = csum_partial((char *)t1, tot_len, 0);
 
 	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 				    tot_len, IPPROTO_TCP,
@@ -1197,18 +1197,18 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 {
 	struct request_sock *req, **prev;
-	const struct tcphdr *th = skb->h.th;
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct sock *nsk;
 
 	/* Find possible connection requests. */
 	req = inet6_csk_search_req(sk, &prev, th->source,
-				   &skb->nh.ipv6h->saddr,
-				   &skb->nh.ipv6h->daddr, inet6_iif(skb));
+				   &ipv6_hdr(skb)->saddr,
+				   &ipv6_hdr(skb)->daddr, inet6_iif(skb));
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
-	nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
-					 th->source, &skb->nh.ipv6h->daddr,
+	nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr,
+					 th->source, &ipv6_hdr(skb)->daddr,
 					 ntohs(th->dest), inet6_iif(skb));
 
 	if (nsk) {
@@ -1275,9 +1275,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	treq = inet6_rsk(req);
-	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
-	TCP_ECN_create_request(req, skb->h.th);
+	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
+	TCP_ECN_create_request(req, tcp_hdr(skb));
 	treq->pktopts = NULL;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -1363,7 +1363,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
-		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1389,7 +1389,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	    opt == NULL && treq->pktopts) {
 		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
 		if (rxopt->srcrt)
-			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
+			opt = ipv6_invert_rthdr(sk,
+		   (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) +
+					  rxopt->srcrt));
 	}
 
 	if (dst == NULL) {
@@ -1469,7 +1471,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
-	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1528,15 +1530,16 @@ out:
 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				  &skb->nh.ipv6h->daddr,skb->csum)) {
+		if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
+				  &ipv6_hdr(skb)->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
 		}
 	}
 
-	skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				  &skb->nh.ipv6h->daddr, 0));
+	skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
+					      &ipv6_hdr(skb)->saddr,
+					      &ipv6_hdr(skb)->daddr, 0));
 
 	if (skb->len <= 76) {
 		return __skb_checksum_complete(skb);
@@ -1600,7 +1603,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
 			goto reset;
 		TCP_CHECK_TIMER(sk);
 		if (opt_skb)
@@ -1608,7 +1611,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
 		goto csum_err;
 
 	if (sk->sk_state == TCP_LISTEN) {
@@ -1631,7 +1634,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
 		goto reset;
 	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
@@ -1664,7 +1667,7 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
 			np->mcast_oif = inet6_iif(opt_skb);
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
-			np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
+			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1697,28 +1700,27 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr)/4)
 		goto bad_packet;
 	if (!pskb_may_pull(skb, th->doff*4))
 		goto discard_it;
 
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v6_checksum_init(skb)))
+	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
 		goto bad_packet;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when = 0;
-	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
+	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
 	TCP_SKB_CB(skb)->sacked = 0;
 
-	sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
-			    &skb->nh.ipv6h->daddr, ntohs(th->dest),
+	sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source,
+			    &ipv6_hdr(skb)->daddr, ntohs(th->dest),
 			    inet6_iif(skb));
 
 	if (!sk)
@@ -1798,7 +1800,7 @@ do_time_wait:
 		struct sock *sk2;
 
 		sk2 = inet6_lookup_listener(&tcp_hashinfo,
-					    &skb->nh.ipv6h->daddr,
+					    &ipv6_hdr(skb)->daddr,
 					    ntohs(th->dest), inet6_iif(skb));
 		if (sk2 != NULL) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
@@ -1945,6 +1947,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
 	return inet6_destroy_sock(sk);
 }
 
+#ifdef CONFIG_PROC_FS
 /* Proc filesystem TCPv6 sock list dumping. */
 static void get_openreq6(struct seq_file *seq,
 			 struct sock *sk, struct request_sock *req, int i, int uid)
@@ -2061,7 +2064,6 @@ static void get_timewait6_sock(struct seq_file *seq,
 		   atomic_read(&tw->tw_refcnt), tw);
 }
 
-#ifdef CONFIG_PROC_FS
 static int tcp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct tcp_iter_state *st;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0ad471909881..b083c09e3d2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -93,10 +93,10 @@ static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
 					continue;
 				score++;
 			}
-			if(score == 4) {
+			if (score == 4) {
 				result = sk;
 				break;
-			} else if(score > badness) {
+			} else if (score > badness) {
 				result = sk;
 				badness = score;
 			}
@@ -120,8 +120,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
-	size_t copied;
-	int err, copy_only, is_udplite = IS_UDPLITE(sk);
+	unsigned int ulen, copied;
+	int err;
+	int is_udplite = IS_UDPLITE(sk);
 
 	if (addr_len)
 		*addr_len=sizeof(struct sockaddr_in6);
@@ -134,24 +135,25 @@ try_again:
 	if (!skb)
 		goto out;
 
-	copied = skb->len - sizeof(struct udphdr);
-	if (copied > len) {
-		copied = len;
+	ulen = skb->len - sizeof(struct udphdr);
+	copied = len;
+	if (copied > ulen)
+		copied = ulen;
+	else if (copied < ulen)
 		msg->msg_flags |= MSG_TRUNC;
-	}
 
 	/*
-	 * 	Decide whether to checksum and/or copy data.
+	 * If checksum is needed at all, try to do it while copying the
+	 * data.  If the data is truncated, or if we only want a partial
+	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
-	copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
 
-	if (is_udplite  ||  (!copy_only  &&  msg->msg_flags&MSG_TRUNC)) {
-		if (__udp_lib_checksum_complete(skb))
+	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
-		copy_only = 1;
 	}
 
-	if (copy_only)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -170,15 +172,16 @@ try_again:
 
 		sin6 = (struct sockaddr_in6 *) msg->msg_name;
 		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = skb->h.uh->source;
+		sin6->sin6_port = udp_hdr(skb)->source;
 		sin6->sin6_flowinfo = 0;
 		sin6->sin6_scope_id = 0;
 
 		if (skb->protocol == htons(ETH_P_IP))
 			ipv6_addr_set(&sin6->sin6_addr, 0, 0,
-				      htonl(0xffff), skb->nh.iph->saddr);
+				      htonl(0xffff), ip_hdr(skb)->saddr);
 		else {
-			ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+			ipv6_addr_copy(&sin6->sin6_addr,
+				       &ipv6_hdr(skb)->saddr);
 			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin6->sin6_scope_id = IP6CB(skb)->iif;
 		}
@@ -194,7 +197,7 @@ try_again:
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-		err = skb->len - sizeof(struct udphdr);
+		err = ulen;
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -279,8 +282,10 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		}
 	}
 
-	if (udp_lib_checksum_complete(skb))
-		goto drop;
+	if (sk->sk_filter) {
+		if (udp_lib_checksum_complete(skb))
+			goto drop;
+	}
 
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
@@ -325,7 +330,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
 				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
 					continue;
 			}
-			if(!inet6_mc_check(s, loc_addr, rmt_addr))
+			if (!inet6_mc_check(s, loc_addr, rmt_addr))
 				continue;
 			return s;
 		}
@@ -341,7 +346,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
 			   struct in6_addr *daddr, struct hlist_head udptable[])
 {
 	struct sock *sk, *sk2;
-	const struct udphdr *uh = skb->h.uh;
+	const struct udphdr *uh = udp_hdr(skb);
 	int dif;
 
 	read_lock(&udp_hash_lock);
@@ -366,9 +371,20 @@ out:
 	return 0;
 }
 
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
-
+static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
+				 int proto)
 {
+	int err;
+
+	UDP_SKB_CB(skb)->partial_cov = 0;
+	UDP_SKB_CB(skb)->cscov = skb->len;
+
+	if (proto == IPPROTO_UDPLITE) {
+		err = udplite_checksum_init(skb, uh);
+		if (err)
+			return err;
+	}
+
 	if (uh->check == 0) {
 		/* RFC 2460 section 8.1 says that we SHOULD log
 		   this error. Well, it is reasonable.
@@ -377,21 +393,20 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
 		return 1;
 	}
 	if (skb->ip_summed == CHECKSUM_COMPLETE &&
-	    !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
-			     skb->len, IPPROTO_UDP, skb->csum             ))
+	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+			     skb->len, proto, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-							 &skb->nh.ipv6h->daddr,
-							 skb->len, IPPROTO_UDP,
-							 0));
+	if (!skb_csum_unnecessary(skb))
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len, proto, 0));
 
-	return (UDP_SKB_CB(skb)->partial_cov = 0);
+	return 0;
 }
 
 int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
-		   int is_udplite)
+		   int proto)
 {
 	struct sk_buff *skb = *pskb;
 	struct sock *sk;
@@ -403,15 +418,16 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto short_packet;
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
-	uh = skb->h.uh;
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
+	uh = udp_hdr(skb);
 
 	ulen = ntohs(uh->len);
 	if (ulen > skb->len)
 		goto short_packet;
 
-	if(! is_udplite ) {		/* UDP validates ulen. */
+	if (proto == IPPROTO_UDP) {
+		/* UDP validates ulen. */
 
 		/* Check for jumbo payload */
 		if (ulen == 0)
@@ -423,19 +439,15 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 		if (ulen < skb->len) {
 			if (pskb_trim_rcsum(skb, ulen))
 				goto short_packet;
-			saddr = &skb->nh.ipv6h->saddr;
-			daddr = &skb->nh.ipv6h->daddr;
-			uh = skb->h.uh;
+			saddr = &ipv6_hdr(skb)->saddr;
+			daddr = &ipv6_hdr(skb)->daddr;
+			uh = udp_hdr(skb);
 		}
-
-		if (udp6_csum_init(skb, uh))
-			goto discard;
-
-	} else 	{			/* UDP-Lite validates cscov. */
-		if (udplite6_csum_init(skb, uh))
-			goto discard;
 	}
 
+	if (udp6_csum_init(skb, uh, proto))
+		goto discard;
+
 	/*
 	 *	Multicast receive code
 	 */
@@ -457,33 +469,34 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 
 		if (udp_lib_checksum_complete(skb))
 			goto discard;
-		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
 
 		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
 
 		kfree_skb(skb);
-		return(0);
+		return 0;
 	}
 
 	/* deliver */
 
 	udpv6_queue_rcv_skb(sk, skb);
 	sock_put(sk);
-	return(0);
+	return 0;
 
 short_packet:
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
-		       is_udplite? "-Lite" : "",  ulen, skb->len);
+		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
+		       ulen, skb->len);
 
 discard:
-	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
-	return(0);
+	return 0;
 }
 
 static __inline__ int udpv6_rcv(struct sk_buff **pskb)
 {
-	return __udp6_lib_rcv(pskb, udp_hash, 0);
+	return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
 }
 
 /*
@@ -521,7 +534,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 	/*
 	 * Create a UDP header
 	 */
-	uh = skb->h.uh;
+	uh = udp_hdr(skb);
 	uh->source = fl->fl_ip_sport;
 	uh->dest = fl->fl_ip_dport;
 	uh->len = htons(up->len);
@@ -615,7 +628,7 @@ do_udp_sendmsg:
 		return udp_sendmsg(iocb, sk, msg, len);
 
 	/* Rough check on arithmetic overflow,
-	   better check is made in ip6_build_xmit
+	   better check is made in ip6_append_data().
 	   */
 	if (len > INT_MAX - sizeof(struct udphdr))
 		return -EMSGSIZE;
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 629f97162fbc..f54016a55004 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -19,7 +19,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
 
 static int udplitev6_rcv(struct sk_buff **pskb)
 {
-	return __udp6_lib_rcv(pskb, udplite_hash, 1);
+	return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
 }
 
 static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 31f651f95096..d7ed8aa56ec1 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -28,14 +28,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	unsigned int nhoff;
 
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb->nh.raw[nhoff];
+	nexthdr = skb_network_header(skb)[nhoff];
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
 		goto drop;
 
 	do {
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		if (xfrm_nr == XFRM_MAX_DEPTH)
 			goto drop;
@@ -58,7 +58,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 		if (nexthdr <= 0)
 			goto drop_unlock;
 
-		skb->nh.raw[nhoff] = nexthdr;
+		skb_network_header(skb)[nhoff] = nexthdr;
 
 		if (x->props.replay_window)
 			xfrm_replay_advance(x, seq);
@@ -112,8 +112,8 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 		return -1;
 	} else {
 #ifdef CONFIG_NETFILTER
-		skb->nh.ipv6h->payload_len = htons(skb->len);
-		__skb_push(skb, skb->data - skb->nh.raw);
+		ipv6_hdr(skb)->payload_len = htons(skb->len);
+		__skb_push(skb, skb->data - skb_network_header(skb));
 
 		NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
 			ip6_rcv_finish);
@@ -140,19 +140,19 @@ int xfrm6_rcv(struct sk_buff **pskb)
 	return xfrm6_rcv_spi(*pskb, 0);
 }
 
+EXPORT_SYMBOL(xfrm6_rcv);
+
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		     xfrm_address_t *saddr, u8 proto)
 {
 	struct xfrm_state *x = NULL;
 	int wildcard = 0;
-	struct in6_addr any;
 	xfrm_address_t *xany;
 	struct xfrm_state *xfrm_vec_one = NULL;
 	int nh = 0;
 	int i = 0;
 
-	ipv6_addr_set(&any, 0, 0, 0, 0);
-	xany = (xfrm_address_t *)&any;
+	xany = (xfrm_address_t *)&in6addr_any;
 
 	for (i = 0; i < 3; i++) {
 		xfrm_address_t *dst, *src;
@@ -247,3 +247,5 @@ drop:
 		xfrm_state_put(xfrm_vec_one);
 	return -1;
 }
+
+EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index edcfffa9e87b..2e61d6ddece3 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -38,17 +38,18 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdr_len;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
-	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_network_header(skb,
+			       (prevhdr - x->props.header_len) - skb->data);
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 
-	skb->nh.raw = skb->data;
-	top_iph = skb->nh.ipv6h;
-	skb->nh.raw = &top_iph->nexthdr;
-	skb->h.ipv6h = top_iph + 1;
+	skb_reset_network_header(skb);
+	top_iph = ipv6_hdr(skb);
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+	skb->network_header += offsetof(struct ipv6hdr, nexthdr);
 
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
@@ -59,6 +60,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *ip6h;
+	const unsigned char *old_mac;
 	int size = sizeof(struct ipv6hdr);
 	int err = -EINVAL;
 
@@ -66,13 +68,14 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 
 	skb_push(skb, size);
-	memmove(skb->data, skb->nh.raw, size);
-	skb->nh.raw = skb->data;
+	memmove(skb->data, skb_network_header(skb), size);
+	skb_reset_network_header(skb);
 
-	skb->mac.raw = memmove(skb->data - skb->mac_len,
-			       skb->mac.raw, skb->mac_len);
+	old_mac = skb_mac_header(skb);
+	skb_set_mac_header(skb, -skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 
-	ip6h = skb->nh.ipv6h;
+	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(skb->len - size);
 	ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
 	ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 6031c16d46ca..6ad6d7ac6bd7 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -50,11 +50,12 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdr_len;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
-	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_network_header(skb,
+			       (prevhdr - x->props.header_len) - skb->data);
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
 }
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 3a4b39b12bad..c026bfea820a 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -32,11 +32,12 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdr_len;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
-	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_network_header(skb,
+			       (prevhdr - x->props.header_len) - skb->data);
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
 }
@@ -51,13 +52,16 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int ihl = skb->data - skb->h.raw;
+	int ihl = skb->data - skb_transport_header(skb);
 
-	if (skb->h.raw != skb->nh.raw)
-		skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
-	skb->nh.ipv6h->payload_len = htons(skb->len + ihl -
+	if (skb->transport_header != skb->network_header) {
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
+		skb->network_header = skb->transport_header;
+	}
+	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
 					   sizeof(struct ipv6hdr));
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	return 0;
 }
 
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 0bc866c0d83c..a6c0cdf46ad6 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,8 +18,8 @@
 
 static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 {
-	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
-	struct ipv6hdr *inner_iph = skb->h.ipv6h;
+	struct ipv6hdr *outer_iph = ipv6_hdr(skb);
+	struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
 
 	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
 		IP6_ECN_set_ce(inner_iph);
@@ -27,8 +27,8 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 
 static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
 {
-	if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h)))
-			IP_ECN_set_ce(skb->h.ipiph);
+	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
+			IP_ECN_set_ce(ipip_hdr(skb));
 }
 
 /* Add encapsulation header.
@@ -51,12 +51,12 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	int dsfield;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
-	skb->nh.raw = skb->data;
-	top_iph = skb->nh.ipv6h;
-	skb->nh.raw = &top_iph->nexthdr;
-	skb->h.ipv6h = top_iph + 1;
+	skb_reset_network_header(skb);
+	top_iph = ipv6_hdr(skb);
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+	skb->network_header   += offsetof(struct ipv6hdr, nexthdr);
 
 	top_iph->version = 6;
 	if (xdst->route->ops->family == AF_INET6) {
@@ -86,9 +86,11 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
+	const unsigned char *old_mac;
+	const unsigned char *nh = skb_network_header(skb);
 
-	if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6
-	    && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
+	if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 &&
+	    nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
 		goto out;
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
@@ -97,9 +99,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
+	nh = skb_network_header(skb);
+	if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
+			ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb));
 		if (!(x->props.flags & XFRM_STATE_NOECN))
 			ipip6_ecn_decapsulate(skb);
 	} else {
@@ -107,9 +110,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 			ip6ip_ecn_decapsulate(skb);
 		skb->protocol = htons(ETH_P_IP);
 	}
-	skb->mac.raw = memmove(skb->data - skb->mac_len,
-			       skb->mac.raw, skb->mac_len);
-	skb->nh.raw = skb->data;
+	old_mac = skb_mac_header(skb);
+	skb_set_mac_header(skb, -skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+	skb_reset_network_header(skb);
 	err = 0;
 
 out:
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d6d786b89d2b..56364a5f676a 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -23,6 +23,8 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
 	return ip6_find_1stfragopt(skb, prevhdr);
 }
 
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
+
 static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
@@ -76,11 +78,11 @@ static int xfrm6_output_one(struct sk_buff *skb)
 		x->curlft.bytes += skb->len;
 		x->curlft.packets++;
 		if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
-			x->lastused = (u64)xtime.tv_sec;
+			x->lastused = get_seconds();
 
 		spin_unlock_bh(&x->lock);
 
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		if (!(skb->dst = dst_pop(dst))) {
 			err = -EHOSTUNREACH;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d8a585bd2cb4..1faa2ea80afc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -240,7 +240,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		if (!afinfo) {
 			dst = *dst_p;
 			goto error;
-		};
+		}
+
 		dst_prev->output = afinfo->output;
 		xfrm_state_put_afinfo(afinfo);
 		/* Sheit... I remember I did this right. Apparently,
@@ -270,17 +271,19 @@ error:
 static inline void
 _decode_session6(struct sk_buff *skb, struct flowi *fl)
 {
-	u16 offset = skb->h.raw - skb->nh.raw;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	u16 offset = skb_network_header_len(skb);
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct ipv6_opt_hdr *exthdr;
-	u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff];
+	const unsigned char *nh = skb_network_header(skb);
+	u8 nexthdr = nh[IP6CB(skb)->nhoff];
 
 	memset(fl, 0, sizeof(struct flowi));
 	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
 	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
 
-	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+		nh = skb_network_header(skb);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 
 		switch (nexthdr) {
 		case NEXTHDR_ROUTING:
@@ -288,7 +291,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case NEXTHDR_DEST:
 			offset += ipv6_optlen(exthdr);
 			nexthdr = exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 			break;
 
 		case IPPROTO_UDP:
@@ -296,7 +299,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
 		case IPPROTO_DCCP:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
 				__be16 *ports = (__be16 *)exthdr;
 
 				fl->fl_ip_sport = ports[0];
@@ -306,7 +309,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			return;
 
 		case IPPROTO_ICMPV6:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
 				u8 *icmp = (u8 *)exthdr;
 
 				fl->fl_icmp_type = icmp[0];
@@ -317,7 +320,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 
 #ifdef CONFIG_IPV6_MIP6
 		case IPPROTO_MH:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
 				struct ip6_mh *mh;
 				mh = (struct ip6_mh *)exthdr;
 
@@ -335,7 +338,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			fl->fl_ipsec_spi = 0;
 			fl->proto = nexthdr;
 			return;
-		};
+		}
 	}
 }
 
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 93c42232aa39..5502cc948dfb 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -257,11 +257,11 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	__be32 spi;
 
 	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(skb, spi);
+	return xfrm6_rcv_spi(skb, spi) > 0 ? : 0;
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index cac35a77f069..392f8bc92691 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -576,7 +576,9 @@ static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
 	skb2 = alloc_skb(len, GFP_ATOMIC);
 	if (skb2) {
 		skb_reserve(skb2, out_offset);
-		skb2->nh.raw = skb2->h.raw = skb_put(skb2, skb->len);
+		skb_reset_network_header(skb2);
+		skb_reset_transport_header(skb2);
+		skb_put(skb2, skb->len);
 		memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
 		memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
 	}
@@ -1807,8 +1809,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
 				     copied);
 	if (rc)
 		goto out_free;
-	if (skb->tstamp.off_sec)
-		skb_get_timestamp(skb, &sk->sk_stamp);
+	if (skb->tstamp.tv64)
+		sk->sk_stamp = skb->tstamp;
 
 	msg->msg_namelen = sizeof(*sipx);
 
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 8e1cad971f11..e16c11423527 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -203,7 +203,9 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
 	skb->sk = sk;
 
 	/* Fill in IPX header */
-	skb->h.raw = skb->nh.raw = skb_put(skb, sizeof(struct ipxhdr));
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+	skb_put(skb, sizeof(struct ipxhdr));
 	ipx = ipx_hdr(skb);
 	ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
 	IPX_SKB_CB(skb)->ipx_tctrl = 0;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index eabd6838f50a..06c97c60d542 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -89,7 +89,6 @@ static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
 
 	self = instance;
 	sk = instance;
-	IRDA_ASSERT(sk != NULL, return -1;);
 
 	err = sock_queue_rcv_skb(sk, skb);
 	if (err) {
@@ -131,15 +130,12 @@ static void irda_disconnect_indication(void *instance, void *sap,
 	}
 
 	/* Prevent race conditions with irda_release() and irda_shutdown() */
+	bh_lock_sock(sk);
 	if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
-		lock_sock(sk);
 		sk->sk_state     = TCP_CLOSE;
-		sk->sk_err       = ECONNRESET;
 		sk->sk_shutdown |= SEND_SHUTDOWN;
 
 		sk->sk_state_change(sk);
-		sock_orphan(sk);
-		release_sock(sk);
 
 		/* Close our TSAP.
 		 * If we leave it open, IrLMP put it back into the list of
@@ -159,6 +155,7 @@ static void irda_disconnect_indication(void *instance, void *sap,
 			self->tsap = NULL;
 		}
 	}
+	bh_unlock_sock(sk);
 
 	/* Note : once we are there, there is not much you want to do
 	 * with the socket anymore, apart from closing it.
@@ -221,7 +218,7 @@ static void irda_connect_confirm(void *instance, void *sap,
 		break;
 	default:
 		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	};
+	}
 
 	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
 		   self->max_data_size);
@@ -284,7 +281,7 @@ static void irda_connect_indication(void *instance, void *sap,
 		break;
 	default:
 		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	};
+	}
 
 	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
 		   self->max_data_size);
@@ -307,8 +304,6 @@ static void irda_connect_response(struct irda_sock *self)
 
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return;);
-
 	skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
 			GFP_ATOMIC);
 	if (skb == NULL) {
@@ -338,7 +333,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
 
 	self = instance;
 	sk = instance;
-	IRDA_ASSERT(sk != NULL, return;);
+	BUG_ON(sk == NULL);
 
 	switch (flow) {
 	case FLOW_STOP:
@@ -450,7 +445,7 @@ static void irda_discovery_timeout(u_long priv)
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
 	self = (struct irda_sock *) priv;
-	IRDA_ASSERT(self != NULL, return;);
+	BUG_ON(self == NULL);
 
 	/* Nothing for the caller */
 	self->cachelog = NULL;
@@ -547,8 +542,6 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
 {
 	IRDA_DEBUG(2, "%s(%p, %s)\n", __FUNCTION__, self, name);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	if (self->iriap) {
 		IRDA_WARNING("%s(): busy with a previous query\n",
 			     __FUNCTION__);
@@ -636,8 +629,6 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
 
 	IRDA_DEBUG(2, "%s(), name=%s\n", __FUNCTION__, name);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	/* Ask lmp for the current discovery log
 	 * Note : we have to use irlmp_get_discoveries(), as opposed
 	 * to play with the cachelog directly, because while we are
@@ -785,8 +776,6 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct irda_sock *self = irda_sk(sk);
 	int err;
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
 
 	if (addr_len != sizeof(struct sockaddr_irda))
@@ -842,8 +831,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	err = irda_create(newsock, sk->sk_protocol);
 	if (err)
 		return err;
@@ -874,44 +861,28 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 	 * calling us, the data is waiting for us ;-)
 	 * Jean II
 	 */
-	skb = skb_dequeue(&sk->sk_receive_queue);
-	if (skb == NULL) {
-		int ret = 0;
-		DECLARE_WAITQUEUE(waitq, current);
+	while (1) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb)
+			break;
 
 		/* Non blocking operation */
 		if (flags & O_NONBLOCK)
 			return -EWOULDBLOCK;
 
-		/* The following code is a cut'n'paste of the
-		 * wait_event_interruptible() macro.
-		 * We don't us the macro because the condition has
-		 * side effects : we want to make sure that only one
-		 * skb get dequeued - Jean II */
-		add_wait_queue(sk->sk_sleep, &waitq);
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			skb = skb_dequeue(&sk->sk_receive_queue);
-			if (skb != NULL)
-				break;
-			if (!signal_pending(current)) {
-				schedule();
-				continue;
-			}
-			ret = -ERESTARTSYS;
-			break;
-		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &waitq);
-		if(ret)
-			return -ERESTARTSYS;
+		err = wait_event_interruptible(*(sk->sk_sleep),
+					skb_peek(&sk->sk_receive_queue));
+		if (err)
+			return err;
 	}
 
 	newsk = newsock->sk;
+	if (newsk == NULL)
+		return -EIO;
+
 	newsk->sk_state = TCP_ESTABLISHED;
 
 	new = irda_sk(newsk);
-	IRDA_ASSERT(new != NULL, return -1;);
 
 	/* Now attach up the new socket */
 	new->tsap = irttp_dup(self->tsap, new);
@@ -1062,7 +1033,8 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
 		sock->state = SS_UNCONNECTED;
-		return sock_error(sk);	/* Always set at this point */
+		err = sock_error(sk);
+		return err? err : -ECONNRESET;
 	}
 
 	sock->state = SS_CONNECTED;
@@ -1172,8 +1144,6 @@ static void irda_destroy_socket(struct irda_sock *self)
 {
 	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
 
-	IRDA_ASSERT(self != NULL, return;);
-
 	/* Unregister with IrLMP */
 	irlmp_unregister_client(self->ckey);
 	irlmp_unregister_service(self->skey);
@@ -1275,7 +1245,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct irda_sock *self;
 	struct sk_buff *skb;
-	unsigned char *asmptr;
 	int err;
 
 	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1293,7 +1262,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENOTCONN;
 
 	self = irda_sk(sk);
-	IRDA_ASSERT(self != NULL, return -1;);
 
 	/* Check if IrTTP is wants us to slow down */
 
@@ -1318,9 +1286,9 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENOBUFS;
 
 	skb_reserve(skb, self->max_header_size + 16);
-
-	asmptr = skb->h.raw = skb_put(skb, len);
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	skb_reset_transport_header(skb);
+	skb_put(skb, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -1356,16 +1324,16 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 
 	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(!sock_error(sk), return -1;);
+	if ((err = sock_error(sk)) < 0)
+		return err;
 
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &err);
 	if (!skb)
 		return err;
 
-	skb->h.raw = skb->data;
-	copied     = skb->len;
+	skb_reset_transport_header(skb);
+	copied = skb->len;
 
 	if (copied > size) {
 		IRDA_DEBUG(2, "%s(), Received truncated frame (%zd < %zd)!\n",
@@ -1404,13 +1372,13 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 	struct irda_sock *self = irda_sk(sk);
 	int noblock = flags & MSG_DONTWAIT;
 	size_t copied = 0;
-	int target = 1;
-	DECLARE_WAITQUEUE(waitq, current);
+	int target, err;
+	long timeo;
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(!sock_error(sk), return -1;);
+	if ((err = sock_error(sk)) < 0)
+		return err;
 
 	if (sock->flags & __SO_ACCEPTCON)
 		return(-EINVAL);
@@ -1418,8 +1386,8 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
-	if (flags & MSG_WAITALL)
-		target = size;
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+	timeo = sock_rcvtimeo(sk, noblock);
 
 	msg->msg_namelen = 0;
 
@@ -1427,42 +1395,37 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 		int chunk;
 		struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
 
-		if (skb==NULL) {
+		if (skb == NULL) {
+			DEFINE_WAIT(wait);
 			int ret = 0;
 
 			if (copied >= target)
 				break;
 
-			/* The following code is a cut'n'paste of the
-			 * wait_event_interruptible() macro.
-			 * We don't us the macro because the test condition
-			 * is messy. - Jean II */
-			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-			add_wait_queue(sk->sk_sleep, &waitq);
-			set_current_state(TASK_INTERRUPTIBLE);
+			prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
 			/*
 			 *	POSIX 1003.1g mandates this order.
 			 */
 			ret = sock_error(sk);
 			if (ret)
-				break;
+				;
 			else if (sk->sk_shutdown & RCV_SHUTDOWN)
 				;
 			else if (noblock)
 				ret = -EAGAIN;
 			else if (signal_pending(current))
-				ret = -ERESTARTSYS;
+				ret = sock_intr_errno(timeo);
+			else if (sk->sk_state != TCP_ESTABLISHED)
+				ret = -ENOTCONN;
 			else if (skb_peek(&sk->sk_receive_queue) == NULL)
 				/* Wait process until data arrives */
 				schedule();
 
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &waitq);
-			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			finish_wait(sk->sk_sleep, &wait);
 
-			if(ret)
-				return(ret);
+			if (ret)
+				return ret;
 			if (sk->sk_shutdown & RCV_SHUTDOWN)
 				break;
 
@@ -1531,7 +1494,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct irda_sock *self;
 	struct sk_buff *skb;
-	unsigned char *asmptr;
 	int err;
 
 	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1548,7 +1510,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 		return -ENOTCONN;
 
 	self = irda_sk(sk);
-	IRDA_ASSERT(self != NULL, return -1;);
 
 	/*
 	 * Check that we don't send out too big frames. This is an unreliable
@@ -1567,10 +1528,11 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 		return -ENOBUFS;
 
 	skb_reserve(skb, self->max_header_size);
+	skb_reset_transport_header(skb);
 
 	IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
-	asmptr = skb->h.raw = skb_put(skb, len);
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	skb_put(skb, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -1603,7 +1565,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 	__u8 pid = 0;
 	int bound = 0;
 	struct sk_buff *skb;
-	unsigned char *asmptr;
 	int err;
 
 	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1617,7 +1578,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 	}
 
 	self = irda_sk(sk);
-	IRDA_ASSERT(self != NULL, return -1;);
 
 	/* Check if an address was specified with sendto. Jean II */
 	if (msg->msg_name) {
@@ -1663,10 +1623,11 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 		return -ENOBUFS;
 
 	skb_reserve(skb, self->max_header_size);
+	skb_reset_transport_header(skb);
 
 	IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
-	asmptr = skb->h.raw = skb_put(skb, len);
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	skb_put(skb, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -1690,8 +1651,6 @@ static int irda_shutdown(struct socket *sock, int how)
 	struct sock *sk = sock->sk;
 	struct irda_sock *self = irda_sk(sk);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	IRDA_DEBUG(1, "%s(%p)\n", __FUNCTION__, self);
 
 	sk->sk_state       = TCP_CLOSE;
@@ -1864,8 +1823,6 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
 	struct ias_attrib *	ias_attr;	/* Attribute in IAS object */
 	int opt;
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
 
 	if (level != SOL_IRLMP)
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 01d7c9c7b3b4..e5e4792a0314 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -133,8 +133,8 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
 	 * Inserting is a little bit tricky since we don't know how much
 	 * room we will need. But this should hopefully work OK
 	 */
-	count = irda_param_insert(self, pi, skb->tail, skb_tailroom(skb),
-				  &ircomm_param_info);
+	count = irda_param_insert(self, pi, skb_tail_pointer(skb),
+				  skb_tailroom(skb), &ircomm_param_info);
 	if (count < 0) {
 		IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__);
 		spin_unlock_irqrestore(&self->spinlock, flags);
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index e717801b38f9..7b5def1ea633 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(alloc_irdadev);
 dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
 {
 	struct dongle_reg *reg;
-	dongle_t *dongle = NULL;
+	dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
 
 	might_sleep();
 
@@ -397,19 +397,14 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
 	if (!reg || !try_module_get(reg->owner) ) {
 		IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
 			   type);
-		goto out;
+		kfree(dongle);
+		dongle = NULL;
+	}
+	if (dongle) {
+		/* Bind the registration info to this particular instance */
+		dongle->issue = reg;
+		dongle->dev = dev;
 	}
-
-	/* Allocate dongle info for this instance */
-	dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
-	if (!dongle)
-		goto out;
-
-	/* Bind the registration info to this particular instance */
-	dongle->issue = reg;
-	dongle->dev = dev;
-
- out:
 	spin_unlock(&dongles->hb_spinlock);
 	return dongle;
 }
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index fcf9d6599628..ed69773b0f8e 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1039,7 +1039,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
 	}
 
 	/* Insert at end of sk-buffer */
-	frame = skb->tail;
+	frame = skb_tail_pointer(skb);
 
 	/* Make space for data */
 	if (skb_tailroom(skb) < (param_len+value_len+3)) {
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 672ab3f69033..c421521c0a99 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -234,8 +234,7 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
 	 * might have been previously set by the low level IrDA network
 	 * device driver
 	 */
-	skb->dev = self->dev;
-	skb->protocol=eth_type_trans(skb, skb->dev); /* Remove eth header */
+	skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */
 
 	self->stats.rx_packets++;
 	self->stats.rx_bytes += skb->len;
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 7b6433fe1dc2..0b02073ffdf3 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -590,7 +590,7 @@ static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
 		if (!self->discovery_log) {
 			IRDA_WARNING("%s: discovery log is gone! "
 				     "maybe the discovery timeout has been set"
-				     " to short?\n", __FUNCTION__);
+				     " too short?\n", __FUNCTION__);
 			break;
 		}
 		hashbin_insert(self->discovery_log,
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 0b04603e9c47..3c5a68e36414 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -93,7 +93,9 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
 {
 	/* Some common init stuff */
 	skb->dev = self->netdev;
-	skb->h.raw = skb->nh.raw = skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->priority = TC_PRIO_BESTEFFORT;
 
@@ -411,7 +413,7 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
 	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
 
 	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 
@@ -482,7 +484,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
 	char *text;
 
 	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 
@@ -526,7 +528,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
 		/* Check if things are sane at this point... */
 		if((discovery_info == NULL) ||
 		   !pskb_may_pull(skb, 3)) {
-			IRDA_ERROR("%s: discovery frame to short!\n",
+			IRDA_ERROR("%s: discovery frame too short!\n",
 				   __FUNCTION__);
 			return;
 		}
@@ -1171,7 +1173,7 @@ static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
 	IRDA_ASSERT(info != NULL, return;);
 
 	if (!pskb_may_pull(skb, 4)) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 
@@ -1260,7 +1262,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
 	if (!pskb_may_pull(skb, sizeof(*frame))) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 	frame = (struct test_frame *) skb->data;
@@ -1268,7 +1270,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
 	/* Broadcast frames must carry saddr and daddr fields */
 	if (info->caddr == CBROADCAST) {
 		if (skb->len < sizeof(struct test_frame)) {
-			IRDA_DEBUG(0, "%s() test frame to short!\n",
+			IRDA_DEBUG(0, "%s() test frame too short!\n",
 				   __FUNCTION__);
 			return;
 		}
@@ -1334,7 +1336,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	/* Check if frame is large enough for parsing */
 	if (!pskb_may_pull(skb, 2)) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		dev_kfree_skb(skb);
 		return -1;
 	}
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 92662330dbcf..d058b467f9e4 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -384,6 +384,9 @@ EXPORT_SYMBOL(hashbin_new);
  *    for deallocating this structure if it's complex. If not the user can
  *    just supply kfree, which should take care of the job.
  */
+#ifdef CONFIG_LOCKDEP
+static int hashbin_lock_depth = 0;
+#endif
 int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
 {
 	irda_queue_t* queue;
@@ -395,7 +398,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
 
 	/* Synchronize */
 	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+		spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags,
+					 hashbin_lock_depth++);
 	}
 
 	/*
@@ -419,6 +423,9 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
 	/* Release lock */
 	if ( hashbin->hb_type & HB_LOCK) {
 		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+#ifdef CONFIG_LOCKDEP
+		hashbin_lock_depth--;
+#endif
 	}
 
 	/*
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index da3f2bc1b6f6..7069e4a58257 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -256,7 +256,7 @@ static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
 	 *  Copy all fragments to a new buffer
 	 */
 	while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
-		memcpy(skb->data+n, frag->data, frag->len);
+		skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len);
 		n += frag->len;
 
 		dev_kfree_skb(frag);
@@ -314,8 +314,8 @@ static inline void irttp_fragment_skb(struct tsap_cb *self,
 		skb_reserve(frag, self->max_header_size);
 
 		/* Copy data from the original skb into this fragment. */
-		memcpy(skb_put(frag, self->max_seg_size), skb->data,
-		       self->max_seg_size);
+		skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size),
+			      self->max_seg_size);
 
 		/* Insert TTP header, with the more bit set */
 		frame = skb_push(frag, TTP_HEADER);
@@ -551,7 +551,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
 	}
 
 	if (skb->len > self->max_seg_size) {
-		IRDA_DEBUG(1, "%s(), UData is to large for IrLAP!\n",
+		IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
 			   __FUNCTION__);
 		goto err;
 	}
@@ -598,7 +598,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
 	 *  inside an IrLAP frame
 	 */
 	if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
-		IRDA_ERROR("%s: SAR disabled, and data is to large for IrLAP!\n",
+		IRDA_ERROR("%s: SAR disabled, and data is too large for IrLAP!\n",
 			   __FUNCTION__);
 		ret = -EMSGSIZE;
 		goto err;
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index 75a72d203b01..2627dad7cd87 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -160,7 +160,7 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
 	}
 	/* Check if buffer is long enough for insertion */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for insertion!\n",
+		IRDA_WARNING("%s: buffer too short for insertion!\n",
 			     __FUNCTION__);
 		return -1;
 	}
@@ -216,7 +216,7 @@ static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
 
 	/* Check if buffer is long enough for parsing */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for parsing! "
+		IRDA_WARNING("%s: buffer too short for parsing! "
 			     "Need %d bytes, but len is only %d\n",
 			     __FUNCTION__, p.pl, len);
 		return -1;
@@ -304,7 +304,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
 
 	/* Check if buffer is long enough for parsing */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for parsing! "
+		IRDA_WARNING("%s: buffer too short for parsing! "
 			     "Need %d bytes, but len is only %d\n",
 			     __FUNCTION__, p.pl, len);
 		return -1;
@@ -343,7 +343,7 @@ static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
 
 	/* Check if buffer is long enough for parsing */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for parsing! "
+		IRDA_WARNING("%s: buffer too short for parsing! "
 			     "Need %d bytes, but len is only %d\n",
 			     __FUNCTION__, p.pl, len);
 		return -1;
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 349012c926b7..aeb18cf1dcae 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -469,49 +469,49 @@ int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
 	int ret;
 
 	/* Insert data rate */
-	ret = irda_param_insert(self, PI_BAUD_RATE, skb->tail,
+	ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert max turnaround time */
-	ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb->tail,
+	ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert data size */
-	ret = irda_param_insert(self, PI_DATA_SIZE, skb->tail,
+	ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert window size */
-	ret = irda_param_insert(self, PI_WINDOW_SIZE, skb->tail,
+	ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert additional BOFs */
-	ret = irda_param_insert(self, PI_ADD_BOFS, skb->tail,
+	ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert minimum turnaround time */
-	ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb->tail,
+	ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert link disconnect/threshold time */
-	ret = irda_param_insert(self, PI_LINK_DISC, skb->tail,
+	ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
index 5abfb71aae8d..a7a7f191f1a8 100644
--- a/net/irda/wrapper.c
+++ b/net/irda/wrapper.c
@@ -239,7 +239,8 @@ async_bump(struct net_device *dev,
 
 	if(docopy) {
 		/* Copy data without CRC (lenght already checked) */
-		memcpy(newskb->data, rx_buff->data, rx_buff->len - 2);
+		skb_copy_to_linear_data(newskb, rx_buff->data,
+					rx_buff->len - 2);
 		/* Deliver this skb */
 		dataskb = newskb;
 	} else {
@@ -256,7 +257,7 @@ async_bump(struct net_device *dev,
 
 	/* Feed it to IrLAP layer */
 	dataskb->dev = dev;
-	dataskb->mac.raw  = dataskb->data;
+	skb_reset_mac_header(dataskb);
 	dataskb->protocol = htons(ETH_P_IRDA);
 
 	netif_rx(dataskb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index acc94214bde6..e84c924a81ee 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -181,7 +181,7 @@ static void iucv_sock_close(struct sock *sk)
 	default:
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
-	};
+	}
 
 	release_sock(sk);
 	iucv_sock_kill(sk);
@@ -953,8 +953,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 			return;
 		}
 
-		skb->h.raw = skb->data;
-		skb->nh.raw = skb->data;
+		skb_reset_transport_header(skb);
+		skb_reset_network_header(skb);
 		skb->len = msg->length;
 	}
 
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1b10d576f222..903bdb6eaaa1 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -90,20 +90,43 @@ struct iucv_irq_data {
 	u32 res2[8];
 };
 
-struct iucv_work {
+struct iucv_irq_list {
 	struct list_head list;
 	struct iucv_irq_data data;
 };
 
-static LIST_HEAD(iucv_work_queue);
-static DEFINE_SPINLOCK(iucv_work_lock);
-
 static struct iucv_irq_data *iucv_irq_data;
 static cpumask_t iucv_buffer_cpumask = CPU_MASK_NONE;
 static cpumask_t iucv_irq_cpumask = CPU_MASK_NONE;
 
-static void iucv_tasklet_handler(unsigned long);
-static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_handler,0);
+/*
+ * Queue of interrupt buffers lock for delivery via the tasklet
+ * (fast but can't call smp_call_function).
+ */
+static LIST_HEAD(iucv_task_queue);
+
+/*
+ * The tasklet for fast delivery of iucv interrupts.
+ */
+static void iucv_tasklet_fn(unsigned long);
+static DECLARE_TASKLET(iucv_tasklet, iucv_tasklet_fn,0);
+
+/*
+ * Queue of interrupt buffers for delivery via a work queue
+ * (slower but can call smp_call_function).
+ */
+static LIST_HEAD(iucv_work_queue);
+
+/*
+ * The work element to deliver path pending interrupts.
+ */
+static void iucv_work_fn(struct work_struct *work);
+static DECLARE_WORK(iucv_work, iucv_work_fn);
+
+/*
+ * Spinlock protecting task and work queue.
+ */
+static DEFINE_SPINLOCK(iucv_queue_lock);
 
 enum iucv_command_codes {
 	IUCV_QUERY = 0,
@@ -147,10 +170,10 @@ static unsigned long iucv_max_pathid;
 static DEFINE_SPINLOCK(iucv_table_lock);
 
 /*
- * iucv_tasklet_cpu: contains the number of the cpu executing the tasklet.
- * Needed for iucv_path_sever called from tasklet.
+ * iucv_active_cpu: contains the number of the cpu executing the tasklet
+ * or the work handler. Needed for iucv_path_sever called from tasklet.
  */
-static int iucv_tasklet_cpu = -1;
+static int iucv_active_cpu = -1;
 
 /*
  * Mutex and wait queue for iucv_register/iucv_unregister.
@@ -449,17 +472,19 @@ static void iucv_setmask_mp(void)
 {
 	int cpu;
 
+	preempt_disable();
 	for_each_online_cpu(cpu)
 		/* Enable all cpus with a declared buffer. */
 		if (cpu_isset(cpu, iucv_buffer_cpumask) &&
 		    !cpu_isset(cpu, iucv_irq_cpumask))
 			smp_call_function_on(iucv_allow_cpu, NULL, 0, 1, cpu);
+	preempt_enable();
 }
 
 /**
  * iucv_setmask_up
  *
- * Allow iucv interrupts on a single cpus.
+ * Allow iucv interrupts on a single cpu.
  */
 static void iucv_setmask_up(void)
 {
@@ -493,8 +518,10 @@ static int iucv_enable(void)
 		goto out;
 	/* Declare per cpu buffers. */
 	rc = -EIO;
+	preempt_disable();
 	for_each_online_cpu(cpu)
 		smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu);
+	preempt_enable();
 	if (cpus_empty(iucv_buffer_cpumask))
 		/* No cpu could declare an iucv buffer. */
 		goto out_path;
@@ -519,7 +546,6 @@ static void iucv_disable(void)
 	kfree(iucv_path_table);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 				     unsigned long action, void *hcpu)
 {
@@ -565,7 +591,6 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 static struct notifier_block iucv_cpu_notifier = {
 	.notifier_call = iucv_cpu_notify,
 };
-#endif
 
 /**
  * iucv_sever_pathid
@@ -586,48 +611,49 @@ static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
 	return iucv_call_b2f0(IUCV_SEVER, parm);
 }
 
+#ifdef CONFIG_SMP
 /**
- * __iucv_cleanup_pathid
+ * __iucv_cleanup_queue
  * @dummy: unused dummy argument
  *
  * Nop function called via smp_call_function to force work items from
  * pending external iucv interrupts to the work queue.
  */
-static void __iucv_cleanup_pathid(void *dummy)
+static void __iucv_cleanup_queue(void *dummy)
 {
 }
+#endif
 
 /**
- * iucv_cleanup_pathid
- * @pathid: 16 bit pathid
+ * iucv_cleanup_queue
  *
  * Function called after a path has been severed to find all remaining
  * work items for the now stale pathid. The caller needs to hold the
  * iucv_table_lock.
  */
-static void iucv_cleanup_pathid(u16 pathid)
+static void iucv_cleanup_queue(void)
 {
-	struct iucv_work *p, *n;
+	struct iucv_irq_list *p, *n;
 
 	/*
-	 * Path is severed, the pathid can be reused immediatly on
-	 * a iucv connect or a connection pending interrupt.
-	 * iucv_path_connect and connection pending interrupt will
-	 * wait until the iucv_table_lock is released before the
-	 * recycled pathid enters the system.
-	 * Force remaining interrupts to the work queue, then
-	 * scan the work queue for items of this path.
+	 * When a path is severed, the pathid can be reused immediatly
+	 * on a iucv connect or a connection pending interrupt. Remove
+	 * all entries from the task queue that refer to a stale pathid
+	 * (iucv_path_table[ix] == NULL). Only then do the iucv connect
+	 * or deliver the connection pending interrupt. To get all the
+	 * pending interrupts force them to the work queue by calling
+	 * an empty function on all cpus.
 	 */
-	smp_call_function(__iucv_cleanup_pathid, NULL, 0, 1);
-	spin_lock_irq(&iucv_work_lock);
-	list_for_each_entry_safe(p, n, &iucv_work_queue, list) {
-		/* Remove work items for pathid except connection pending */
-		if (p->data.ippathid == pathid && p->data.iptype != 0x01) {
+	smp_call_function(__iucv_cleanup_queue, NULL, 0, 1);
+	spin_lock_irq(&iucv_queue_lock);
+	list_for_each_entry_safe(p, n, &iucv_task_queue, list) {
+		/* Remove stale work items from the task queue. */
+		if (iucv_path_table[p->data.ippathid] == NULL) {
 			list_del(&p->list);
 			kfree(p);
 		}
 	}
-	spin_unlock_irq(&iucv_work_lock);
+	spin_unlock_irq(&iucv_queue_lock);
 }
 
 /**
@@ -686,7 +712,6 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
 		iucv_sever_pathid(p->pathid, NULL);
 		iucv_path_table[p->pathid] = NULL;
 		list_del(&p->list);
-		iucv_cleanup_pathid(p->pathid);
 		iucv_path_free(p);
 	}
 	spin_unlock_bh(&iucv_table_lock);
@@ -759,9 +784,9 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
 	union iucv_param *parm;
 	int rc;
 
-	preempt_disable();
-	if (iucv_tasklet_cpu != smp_processor_id())
-		spin_lock_bh(&iucv_table_lock);
+	BUG_ON(in_atomic());
+	spin_lock_bh(&iucv_table_lock);
+	iucv_cleanup_queue();
 	parm = percpu_ptr(iucv_param, smp_processor_id());
 	memset(parm, 0, sizeof(union iucv_param));
 	parm->ctrl.ipmsglim = path->msglim;
@@ -796,9 +821,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
 			rc = -EIO;
 		}
 	}
-	if (iucv_tasklet_cpu != smp_processor_id())
-		spin_unlock_bh(&iucv_table_lock);
-	preempt_enable();
+	spin_unlock_bh(&iucv_table_lock);
 	return rc;
 }
 
@@ -869,15 +892,14 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
 
 
 	preempt_disable();
-	if (iucv_tasklet_cpu != smp_processor_id())
+	if (iucv_active_cpu != smp_processor_id())
 		spin_lock_bh(&iucv_table_lock);
 	rc = iucv_sever_pathid(path->pathid, userdata);
 	if (!rc) {
 		iucv_path_table[path->pathid] = NULL;
 		list_del_init(&path->list);
-		iucv_cleanup_pathid(path->pathid);
 	}
-	if (iucv_tasklet_cpu != smp_processor_id())
+	if (iucv_active_cpu != smp_processor_id())
 		spin_unlock_bh(&iucv_table_lock);
 	preempt_enable();
 	return rc;
@@ -1246,8 +1268,7 @@ static void iucv_path_complete(struct iucv_irq_data *data)
 	struct iucv_path_complete *ipc = (void *) data;
 	struct iucv_path *path = iucv_path_table[ipc->ippathid];
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->path_complete)
+	if (path && path->handler && path->handler->path_complete)
 		path->handler->path_complete(path, ipc->ipuser);
 }
 
@@ -1275,14 +1296,14 @@ static void iucv_path_severed(struct iucv_irq_data *data)
 	struct iucv_path_severed *ips = (void *) data;
 	struct iucv_path *path = iucv_path_table[ips->ippathid];
 
-	BUG_ON(!path || !path->handler);
+	if (!path || !path->handler)	/* Already severed */
+		return;
 	if (path->handler->path_severed)
 		path->handler->path_severed(path, ips->ipuser);
 	else {
 		iucv_sever_pathid(path->pathid, NULL);
 		iucv_path_table[path->pathid] = NULL;
 		list_del_init(&path->list);
-		iucv_cleanup_pathid(path->pathid);
 		iucv_path_free(path);
 	}
 }
@@ -1311,8 +1332,7 @@ static void iucv_path_quiesced(struct iucv_irq_data *data)
 	struct iucv_path_quiesced *ipq = (void *) data;
 	struct iucv_path *path = iucv_path_table[ipq->ippathid];
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->path_quiesced)
+	if (path && path->handler && path->handler->path_quiesced)
 		path->handler->path_quiesced(path, ipq->ipuser);
 }
 
@@ -1340,8 +1360,7 @@ static void iucv_path_resumed(struct iucv_irq_data *data)
 	struct iucv_path_resumed *ipr = (void *) data;
 	struct iucv_path *path = iucv_path_table[ipr->ippathid];
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->path_resumed)
+	if (path && path->handler && path->handler->path_resumed)
 		path->handler->path_resumed(path, ipr->ipuser);
 }
 
@@ -1373,8 +1392,7 @@ static void iucv_message_complete(struct iucv_irq_data *data)
 	struct iucv_path *path = iucv_path_table[imc->ippathid];
 	struct iucv_message msg;
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->message_complete) {
+	if (path && path->handler && path->handler->message_complete) {
 		msg.flags = imc->ipflags1;
 		msg.id = imc->ipmsgid;
 		msg.audit = imc->ipaudit;
@@ -1419,8 +1437,7 @@ static void iucv_message_pending(struct iucv_irq_data *data)
 	struct iucv_path *path = iucv_path_table[imp->ippathid];
 	struct iucv_message msg;
 
-	BUG_ON(!path || !path->handler);
-	if (path->handler->message_pending) {
+	if (path && path->handler && path->handler->message_pending) {
 		msg.flags = imp->ipflags1;
 		msg.id = imp->ipmsgid;
 		msg.class = imp->iptrgcls;
@@ -1435,17 +1452,16 @@ static void iucv_message_pending(struct iucv_irq_data *data)
 }
 
 /**
- * iucv_tasklet_handler:
+ * iucv_tasklet_fn:
  *
  * This tasklet loops over the queue of irq buffers created by
  * iucv_external_interrupt, calls the appropriate action handler
  * and then frees the buffer.
  */
-static void iucv_tasklet_handler(unsigned long ignored)
+static void iucv_tasklet_fn(unsigned long ignored)
 {
 	typedef void iucv_irq_fn(struct iucv_irq_data *);
 	static iucv_irq_fn *irq_fn[] = {
-		[0x01] = iucv_path_pending,
 		[0x02] = iucv_path_complete,
 		[0x03] = iucv_path_severed,
 		[0x04] = iucv_path_quiesced,
@@ -1455,38 +1471,70 @@ static void iucv_tasklet_handler(unsigned long ignored)
 		[0x08] = iucv_message_pending,
 		[0x09] = iucv_message_pending,
 	};
-	struct iucv_work *p;
+	struct list_head task_queue = LIST_HEAD_INIT(task_queue);
+	struct iucv_irq_list *p, *n;
 
 	/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
 	spin_lock(&iucv_table_lock);
-	iucv_tasklet_cpu = smp_processor_id();
+	iucv_active_cpu = smp_processor_id();
 
-	spin_lock_irq(&iucv_work_lock);
-	while (!list_empty(&iucv_work_queue)) {
-		p = list_entry(iucv_work_queue.next, struct iucv_work, list);
+	spin_lock_irq(&iucv_queue_lock);
+	list_splice_init(&iucv_task_queue, &task_queue);
+	spin_unlock_irq(&iucv_queue_lock);
+
+	list_for_each_entry_safe(p, n, &task_queue, list) {
 		list_del_init(&p->list);
-		spin_unlock_irq(&iucv_work_lock);
 		irq_fn[p->data.iptype](&p->data);
 		kfree(p);
-		spin_lock_irq(&iucv_work_lock);
 	}
-	spin_unlock_irq(&iucv_work_lock);
 
-	iucv_tasklet_cpu = -1;
+	iucv_active_cpu = -1;
 	spin_unlock(&iucv_table_lock);
 }
 
 /**
+ * iucv_work_fn:
+ *
+ * This work function loops over the queue of path pending irq blocks
+ * created by iucv_external_interrupt, calls the appropriate action
+ * handler and then frees the buffer.
+ */
+static void iucv_work_fn(struct work_struct *work)
+{
+	typedef void iucv_irq_fn(struct iucv_irq_data *);
+	struct list_head work_queue = LIST_HEAD_INIT(work_queue);
+	struct iucv_irq_list *p, *n;
+
+	/* Serialize tasklet, iucv_path_sever and iucv_path_connect. */
+	spin_lock_bh(&iucv_table_lock);
+	iucv_active_cpu = smp_processor_id();
+
+	spin_lock_irq(&iucv_queue_lock);
+	list_splice_init(&iucv_work_queue, &work_queue);
+	spin_unlock_irq(&iucv_queue_lock);
+
+	iucv_cleanup_queue();
+	list_for_each_entry_safe(p, n, &work_queue, list) {
+		list_del_init(&p->list);
+		iucv_path_pending(&p->data);
+		kfree(p);
+	}
+
+	iucv_active_cpu = -1;
+	spin_unlock_bh(&iucv_table_lock);
+}
+
+/**
  * iucv_external_interrupt
  * @code: irq code
  *
  * Handles external interrupts coming in from CP.
- * Places the interrupt buffer on a queue and schedules iucv_tasklet_handler().
+ * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn().
  */
 static void iucv_external_interrupt(u16 code)
 {
 	struct iucv_irq_data *p;
-	struct iucv_work *work;
+	struct iucv_irq_list *work;
 
 	p = percpu_ptr(iucv_irq_data, smp_processor_id());
 	if (p->ippathid >= iucv_max_pathid) {
@@ -1500,16 +1548,23 @@ static void iucv_external_interrupt(u16 code)
 		printk(KERN_ERR "iucv_do_int: unknown iucv interrupt\n");
 		return;
 	}
-	work = kmalloc(sizeof(struct iucv_work), GFP_ATOMIC);
+	work = kmalloc(sizeof(struct iucv_irq_list), GFP_ATOMIC);
 	if (!work) {
 		printk(KERN_WARNING "iucv_external_interrupt: out of memory\n");
 		return;
 	}
 	memcpy(&work->data, p, sizeof(work->data));
-	spin_lock(&iucv_work_lock);
-	list_add_tail(&work->list, &iucv_work_queue);
-	spin_unlock(&iucv_work_lock);
-	tasklet_schedule(&iucv_tasklet);
+	spin_lock(&iucv_queue_lock);
+	if (p->iptype == 0x01) {
+		/* Path pending interrupt. */
+		list_add_tail(&work->list, &iucv_work_queue);
+		schedule_work(&iucv_work);
+	} else {
+		/* The other interrupts. */
+		list_add_tail(&work->list, &iucv_task_queue);
+		tasklet_schedule(&iucv_tasklet);
+	}
+	spin_unlock(&iucv_queue_lock);
 }
 
 /**
@@ -1579,12 +1634,14 @@ out:
  */
 static void iucv_exit(void)
 {
-	struct iucv_work *p, *n;
+	struct iucv_irq_list *p, *n;
 
-	spin_lock_irq(&iucv_work_lock);
+	spin_lock_irq(&iucv_queue_lock);
+	list_for_each_entry_safe(p, n, &iucv_task_queue, list)
+		kfree(p);
 	list_for_each_entry_safe(p, n, &iucv_work_queue, list)
 		kfree(p);
-	spin_unlock_irq(&iucv_work_lock);
+	spin_unlock_irq(&iucv_queue_lock);
 	unregister_hotcpu_notifier(&iucv_cpu_notifier);
 	percpu_free(iucv_param);
 	percpu_free(iucv_irq_data);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index a4e7e2db0ff3..a99444142dc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -379,7 +379,7 @@ static int verify_address_len(void *p)
 		 */
 		return -EINVAL;
 		break;
-	};
+	}
 
 	return 0;
 }
@@ -630,6 +630,35 @@ pfkey_sockaddr_size(sa_family_t family)
 	/* NOTREACHED */
 }
 
+static inline int pfkey_mode_from_xfrm(int mode)
+{
+	switch(mode) {
+	case XFRM_MODE_TRANSPORT:
+		return IPSEC_MODE_TRANSPORT;
+	case XFRM_MODE_TUNNEL:
+		return IPSEC_MODE_TUNNEL;
+	case XFRM_MODE_BEET:
+		return IPSEC_MODE_BEET;
+	default:
+		return -1;
+	}
+}
+
+static inline int pfkey_mode_to_xfrm(int mode)
+{
+	switch(mode) {
+	case IPSEC_MODE_ANY:	/*XXX*/
+	case IPSEC_MODE_TRANSPORT:
+		return XFRM_MODE_TRANSPORT;
+	case IPSEC_MODE_TUNNEL:
+		return XFRM_MODE_TUNNEL;
+	case IPSEC_MODE_BEET:
+		return XFRM_MODE_BEET;
+	default:
+		return -1;
+	}
+}
+
 static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc)
 {
 	struct sk_buff *skb;
@@ -651,6 +680,7 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 	int encrypt_key_size = 0;
 	int sockaddr_size;
 	struct xfrm_encap_tmpl *natt = NULL;
+	int mode;
 
 	/* address family check */
 	sockaddr_size = pfkey_sockaddr_size(x->props.family);
@@ -928,7 +958,11 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 	sa2 = (struct sadb_x_sa2 *)  skb_put(skb, sizeof(struct sadb_x_sa2));
 	sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t);
 	sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
-	sa2->sadb_x_sa2_mode = x->props.mode + 1;
+	if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) {
+		kfree_skb(skb);
+		return ERR_PTR(-EINVAL);
+	}
+	sa2->sadb_x_sa2_mode = mode;
 	sa2->sadb_x_sa2_reserved1 = 0;
 	sa2->sadb_x_sa2_reserved2 = 0;
 	sa2->sadb_x_sa2_sequence = 0;
@@ -1155,9 +1189,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 
 	if (ext_hdrs[SADB_X_EXT_SA2-1]) {
 		struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1];
-		x->props.mode = sa2->sadb_x_sa2_mode;
-		if (x->props.mode)
-			x->props.mode--;
+		int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode);
+		if (mode < 0) {
+			err = -EINVAL;
+			goto out;
+		}
+		x->props.mode = mode;
 		x->props.reqid = sa2->sadb_x_sa2_reqid;
 	}
 
@@ -1218,7 +1255,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	struct sadb_address *saddr, *daddr;
 	struct sadb_msg *out_hdr;
 	struct xfrm_state *x = NULL;
-	u8 mode;
+	int mode;
 	u32 reqid;
 	u8 proto;
 	unsigned short family;
@@ -1233,7 +1270,9 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 		return -EINVAL;
 
 	if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) {
-		mode = sa2->sadb_x_sa2_mode - 1;
+		mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode);
+		if (mode < 0)
+			return -EINVAL;
 		reqid = sa2->sadb_x_sa2_reqid;
 	} else {
 		mode = 0;
@@ -1756,6 +1795,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6 *sin6;
 #endif
+	int mode;
 
 	if (xp->xfrm_nr >= XFRM_MAX_DEPTH)
 		return -ELOOP;
@@ -1764,7 +1804,9 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
 		return -EINVAL;
 
 	t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
-	t->mode = rq->sadb_x_ipsecrequest_mode-1;
+	if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
+		return -EINVAL;
+	t->mode = mode;
 	if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
 		t->optional = 1;
 	else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
@@ -1877,7 +1919,7 @@ static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
 	return skb;
 }
 
-static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
+static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
 {
 	struct sadb_msg *hdr;
 	struct sadb_address *addr;
@@ -2014,6 +2056,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 		struct sadb_x_ipsecrequest *rq;
 		struct xfrm_tmpl *t = xp->xfrm_vec + i;
 		int req_size;
+		int mode;
 
 		req_size = sizeof(struct sadb_x_ipsecrequest);
 		if (t->mode == XFRM_MODE_TUNNEL)
@@ -2027,7 +2070,9 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 		memset(rq, 0, sizeof(*rq));
 		rq->sadb_x_ipsecrequest_len = req_size;
 		rq->sadb_x_ipsecrequest_proto = t->id.proto;
-		rq->sadb_x_ipsecrequest_mode = t->mode+1;
+		if ((mode = pfkey_mode_from_xfrm(t->mode)) < 0)
+			return -EINVAL;
+		rq->sadb_x_ipsecrequest_mode = mode;
 		rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE;
 		if (t->reqid)
 			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE;
@@ -2089,6 +2134,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 
 	hdr->sadb_msg_len = size / sizeof(uint64_t);
 	hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
+
+	return 0;
 }
 
 static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c)
@@ -2102,7 +2149,9 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c
 		err = PTR_ERR(out_skb);
 		goto out;
 	}
-	pfkey_xfrm_policy2msg(out_skb, xp, dir);
+	err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
+	if (err < 0)
+		return err;
 
 	out_hdr = (struct sadb_msg *) out_skb->data;
 	out_hdr->sadb_msg_version = PF_KEY_V2;
@@ -2327,7 +2376,9 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb
 		err =  PTR_ERR(out_skb);
 		goto out;
 	}
-	pfkey_xfrm_policy2msg(out_skb, xp, dir);
+	err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
+	if (err < 0)
+		goto out;
 
 	out_hdr = (struct sadb_msg *) out_skb->data;
 	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
@@ -2409,6 +2460,7 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 {
 	int err;
 	struct sadb_x_ipsecrequest *rq2;
+	int mode;
 
 	if (len <= sizeof(struct sadb_x_ipsecrequest) ||
 	    len < rq1->sadb_x_ipsecrequest_len)
@@ -2439,7 +2491,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
 		return -EINVAL;
 
 	m->proto = rq1->sadb_x_ipsecrequest_proto;
-	m->mode = rq1->sadb_x_ipsecrequest_mode - 1;
+	if ((mode = pfkey_mode_to_xfrm(rq1->sadb_x_ipsecrequest_mode)) < 0)
+		return -EINVAL;
+	m->mode = mode;
 	m->reqid = rq1->sadb_x_ipsecrequest_reqid;
 
 	return ((int)(rq1->sadb_x_ipsecrequest_len +
@@ -2579,12 +2633,15 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
 	struct pfkey_dump_data *data = ptr;
 	struct sk_buff *out_skb;
 	struct sadb_msg *out_hdr;
+	int err;
 
 	out_skb = pfkey_xfrm_policy2msg_prep(xp);
 	if (IS_ERR(out_skb))
 		return PTR_ERR(out_skb);
 
-	pfkey_xfrm_policy2msg(out_skb, xp, dir);
+	err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
+	if (err < 0)
+		return err;
 
 	out_hdr = (struct sadb_msg *) out_skb->data;
 	out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
@@ -3513,7 +3570,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 
 	for (i = 0, mp = m; i < num_bundles; i++, mp++) {
 		/* old ipsecrequest */
-		if (set_ipsecrequest(skb, mp->proto, mp->mode + 1,
+		int mode = pfkey_mode_from_xfrm(mp->mode);
+		if (mode < 0)
+			return -EINVAL;
+		if (set_ipsecrequest(skb, mp->proto, mode,
 				     (mp->reqid ?  IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
 				     mp->reqid, mp->old_family,
 				     &mp->old_saddr, &mp->old_daddr) < 0) {
@@ -3521,7 +3581,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 		}
 
 		/* new ipsecrequest */
-		if (set_ipsecrequest(skb, mp->proto, mp->mode + 1,
+		if (set_ipsecrequest(skb, mp->proto, mode,
 				     (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE),
 				     mp->reqid, mp->new_family,
 				     &mp->new_saddr, &mp->new_daddr) < 0) {
@@ -3607,7 +3667,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (err)
 		goto out_free;
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index b3f65d1e80b1..099ed8fec145 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -112,7 +112,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, llc_len)))
 		return 0;
 
-	skb->h.raw += llc_len;
+	skb->transport_header += llc_len;
 	skb_pull(skb, llc_len);
 	if (skb->protocol == htons(ETH_P_802_2)) {
 		__be16 pdulen = eth_hdr(skb)->h_proto;
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index f4291f349e92..754f4fedc852 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -41,7 +41,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
 		struct net_device *dev = skb->dev;
 		struct trh_hdr *trh;
 
-		skb->mac.raw = skb_push(skb, sizeof(*trh));
+		skb_push(skb, sizeof(*trh));
+		skb_reset_mac_header(skb);
 		trh = tr_hdr(skb);
 		trh->ac = AC;
 		trh->fc = LLC_FRAME;
@@ -52,7 +53,7 @@ int llc_mac_hdr_init(struct sk_buff *skb,
 		if (da) {
 			memcpy(trh->daddr, da, dev->addr_len);
 			tr_source_route(skb, trh, dev);
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 		}
 		break;
 	}
@@ -62,7 +63,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
 		unsigned short len = skb->len;
 		struct ethhdr *eth;
 
-		skb->mac.raw = skb_push(skb, sizeof(*eth));
+		skb_push(skb, sizeof(*eth));
+		skb_reset_mac_header(skb);
 		eth = eth_hdr(skb);
 		eth->h_proto = htons(len);
 		memcpy(eth->h_dest, da, ETH_ALEN);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 2615dc81aa36..2525165e2e8f 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -36,11 +36,12 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
 	struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
 
 	if (skb) {
+		skb_reset_mac_header(skb);
 		skb_reserve(skb, 50);
-		skb->nh.raw   = skb->h.raw = skb->data;
+		skb_reset_network_header(skb);
+		skb_reset_transport_header(skb);
 		skb->protocol = htons(ETH_P_802_2);
 		skb->dev      = dev;
-		skb->mac.raw  = skb->head;
 		if (sk != NULL)
 			skb_set_owner_w(skb, sk);
 	}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54698af6d0af..c558f3214255 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -25,6 +25,7 @@ config NETFILTER_NETLINK_LOG
 	  and is also scheduled to replace the old syslog-based ipt_LOG
 	  and ip6t_LOG modules.
 
+# Rename this to NF_CONNTRACK in a 2.6.25
 config NF_CONNTRACK_ENABLED
 	tristate "Netfilter connection tracking support"
 	help
@@ -39,42 +40,9 @@ config NF_CONNTRACK_ENABLED
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-choice
-	prompt "Netfilter connection tracking support"
-	depends on NF_CONNTRACK_ENABLED
-
-config NF_CONNTRACK_SUPPORT
-	bool "Layer 3 Independent Connection tracking"
-	help
-	  Layer 3 independent connection tracking is experimental scheme
-	  which generalize ip_conntrack to support other layer 3 protocols.
-
-	  This is required to do Masquerading or other kinds of Network
-	  Address Translation (except for Fast NAT).  It can also be used to
-	  enhance packet filtering (see `Connection state match support'
-	  below).
-
-config IP_NF_CONNTRACK_SUPPORT
-	bool "Layer 3 Dependent Connection tracking (OBSOLETE)"
-	help
-	  The old, Layer 3 dependent ip_conntrack subsystem of netfilter.
-
-	  This is required to do Masquerading or other kinds of Network
-	  Address Translation (except for Fast NAT).  It can also be used to
-	  enhance packet filtering (see `Connection state match support'
-	  below).
-
-endchoice
-
 config NF_CONNTRACK
 	tristate
-	default m if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
-	default y if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
-
-config IP_NF_CONNTRACK
-	tristate
-	default m if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
-	default y if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
+	default NF_CONNTRACK_ENABLED
 
 config NF_CT_ACCT
 	bool "Connection tracking flow accounting"
@@ -303,9 +271,8 @@ config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
-	select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
-	select NF_CONNTRACK_MARK if NF_CONNTRACK
+	depends on NF_CONNTRACK
+	select NF_CONNTRACK_MARK
 	help
 	  This option adds a `CONNMARK' target, which allows one to manipulate
 	  the connection mark value.  Similar to the MARK target, but
@@ -366,7 +333,7 @@ config NETFILTER_XT_TARGET_NOTRACK
 	tristate  '"NOTRACK" target support'
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  The NOTRACK target allows a select rule to specify
 	  which packets *not* to enter the conntrack/NAT
@@ -387,9 +354,7 @@ config NETFILTER_XT_TARGET_SECMARK
 
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
-	depends on NETFILTER_XTABLES && \
-		   ((NF_CONNTRACK && NF_CONNTRACK_SECMARK) || \
-		    (IP_NF_CONNTRACK && IP_NF_CONNTRACK_SECMARK))
+	depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
 	help
 	  The CONNSECMARK target copies security markings from packets
 	  to connections, and restores security markings from connections
@@ -437,9 +402,8 @@ config NETFILTER_XT_MATCH_COMMENT
 config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
-	select IP_NF_CT_ACCT if IP_NF_CONNTRACK
-	select NF_CT_ACCT if NF_CONNTRACK
+	depends on NF_CONNTRACK
+	select NF_CT_ACCT
 	help
 	  This option adds a `connbytes' match, which allows you to match the
 	  number of bytes and/or packets for each direction within a connection.
@@ -450,9 +414,8 @@ config NETFILTER_XT_MATCH_CONNBYTES
 config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
-	select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
-	select NF_CONNTRACK_MARK if NF_CONNTRACK
+	depends on NF_CONNTRACK
+	select NF_CONNTRACK_MARK
 	help
 	  This option adds a `connmark' match, which allows you to match the
 	  connection mark value previously set for the session by `CONNMARK'. 
@@ -464,7 +427,7 @@ config NETFILTER_XT_MATCH_CONNMARK
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  This is a general conntrack match module, a superset of the state match.
 
@@ -508,7 +471,7 @@ config NETFILTER_XT_MATCH_ESP
 config NETFILTER_XT_MATCH_HELPER
 	tristate '"helper" match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  Helper matching allows you to match packets in dynamic connections
 	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -632,7 +595,7 @@ config NETFILTER_XT_MATCH_SCTP
 config NETFILTER_XT_MATCH_STATE
 	tristate '"state" match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  Connection state matching allows you to match packets based on their
 	  relationship to a tracked connection (ie. previous packets).  This
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index c3ebdbd917e9..a84478ee2ded 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,10 +5,6 @@
  * way.
  *
  * Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000:   Added NF_REPEAT --RR.
- * 08-May-2003:	  Internal logging interface added by Jozsef Kadlecsik.
  */
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
@@ -244,6 +240,7 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(nf_proto_csum_replace4);
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
@@ -264,6 +261,22 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nf_ct_attach);
 
+void (*nf_ct_destroy)(struct nf_conntrack *);
+EXPORT_SYMBOL(nf_ct_destroy);
+
+void nf_conntrack_destroy(struct nf_conntrack *nfct)
+{
+	void (*destroy)(struct nf_conntrack *);
+
+	rcu_read_lock();
+	destroy = rcu_dereference(nf_ct_destroy);
+	BUG_ON(destroy == NULL);
+	destroy(nfct);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_conntrack_destroy);
+#endif /* CONFIG_NF_CONNTRACK */
+
 #ifdef CONFIG_PROC_FS
 struct proc_dir_entry *proc_net_netfilter;
 EXPORT_SYMBOL(proc_net_netfilter);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b3a70eb6d42a..e132c8ae8784 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -9,24 +9,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- *	- new API and handling of conntrack/nat helpers
- *	- now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- *	- add usage/reference counts to ip_conntrack_expect
- *	- export ip_conntrack[_expect]_{find_get,put} functions
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- generalize L3 protocol denendent part.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- add support various size of conntrack structures.
- * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
- * 	- restructure nf_conn (introduce nf_conn_help)
- * 	- redesign 'features' how they were originally intended
- * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
- * 	- add support for L3 protocol module load on demand.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_core.c
  */
 
 #include <linux/types.h>
@@ -128,10 +110,11 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 				  unsigned int size, unsigned int rnd)
 {
 	unsigned int a, b;
-	a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
-		  ((tuple->src.l3num) << 16) | tuple->dst.protonum);
-	b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
-			(tuple->src.u.all << 16) | tuple->dst.u.all);
+
+	a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all),
+		   (tuple->src.l3num << 16) | tuple->dst.protonum);
+	b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+		   (tuple->src.u.all << 16) | tuple->dst.u.all);
 
 	return jhash_2words(a, b, rnd) % size;
 }
@@ -633,13 +616,11 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	memset(conntrack, 0, nf_ct_cache[features].size);
 	conntrack->features = features;
 	atomic_set(&conntrack->ct_general.use, 1);
-	conntrack->ct_general.destroy = destroy_conntrack;
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
 	/* Don't set timer yet: wait for confirmation */
-	init_timer(&conntrack->timeout);
-	conntrack->timeout.data = (unsigned long)conntrack;
-	conntrack->timeout.function = death_by_timeout;
+	setup_timer(&conntrack->timeout, death_by_timeout,
+		    (unsigned long)conntrack);
 	read_unlock_bh(&nf_ct_cache_lock);
 
 	return conntrack;
@@ -768,7 +749,7 @@ resolve_normal_ct(struct sk_buff *skb,
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
-	if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
+	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, l3num, protonum, &tuple, l3proto,
 			     l4proto)) {
 		DEBUGP("resolve_normal_ct: Can't get tuple\n");
@@ -960,7 +941,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 	if (do_acct) {
 		ct->counters[CTINFO2DIR(ctinfo)].packets++;
 		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-			skb->len - (unsigned int)(skb->nh.raw - skb->data);
+			skb->len - skb_network_offset(skb);
 
 		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
 		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
@@ -1140,6 +1121,8 @@ void nf_conntrack_cleanup(void)
 	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
 		schedule();
 
+	rcu_assign_pointer(nf_ct_destroy, NULL);
+
 	for (i = 0; i < NF_CT_F_NUM; i++) {
 		if (nf_ct_cache[i].use == 0)
 			continue;
@@ -1152,14 +1135,7 @@ void nf_conntrack_cleanup(void)
 	free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
 			    nf_conntrack_htable_size);
 
-	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic);
-
-	/* free l3proto protocol tables */
-	for (i = 0; i < PF_MAX; i++)
-		if (nf_ct_protos[i]) {
-			kfree(nf_ct_protos[i]);
-			nf_ct_protos[i] = NULL;
-		}
+	nf_conntrack_proto_fini();
 }
 
 static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1237,7 +1213,6 @@ module_param_call(hashsize, set_hashsize, param_get_uint,
 
 int __init nf_conntrack_init(void)
 {
-	unsigned int i;
 	int ret;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
@@ -1279,18 +1254,13 @@ int __init nf_conntrack_init(void)
 		goto err_free_conntrack_slab;
 	}
 
-	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic);
+	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto out_free_expect_slab;
 
-	/* Don't NEED lock here, but good form anyway. */
-	write_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < AF_MAX; i++)
-		nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic;
-	write_unlock_bh(&nf_conntrack_lock);
-
 	/* For use by REJECT target */
 	rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
+	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
 
 	/* Set up fake conntrack:
 	    - to never be deleted, not in any hashes */
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 1a223e0c0856..6bd421df2dbc 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -91,3 +91,26 @@ void nf_ct_event_cache_flush(void)
 	}
 }
 
+int nf_conntrack_register_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+
+int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+
+int nf_conntrack_expect_register_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier);
+
+int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ce70a6fc6bda..c31af29a4439 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -290,9 +290,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
 	master_help->expecting++;
 	list_add(&exp->list, &nf_conntrack_expect_list);
 
-	init_timer(&exp->timeout);
-	exp->timeout.data = (unsigned long)exp;
-	exp->timeout.function = expectation_timed_out;
+	setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
 	exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
 	add_timer(&exp->timeout);
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 3089dfc40c88..a186799f6542 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -7,12 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with Layer 3 protocol independent connection tracking.
- *	- track EPRT and EPSV commands with IPv6 address.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
  */
 
 #include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index bb26a658cc1c..1093478cc007 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -46,7 +46,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	struct nf_conntrack_expect *exp;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct in_device *in_dev;
 	__be32 mask = 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 48f05314ebf7..aa1a97ee514b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -6,9 +6,6 @@
  * (C) 2003 by Patrick Mchardy <kaber@trash.net>
  * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
  *
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
  * Initial connection tracking via netlink development funded and
  * generally made possible by Network Robots, Inc. (www.networkrobots.com)
  *
@@ -16,8 +13,6 @@
  *
  * This software may be used and distributed according to the terms
  * of the GNU General Public License, incorporated herein by reference.
- *
- * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
  */
 
 #include <linux/init.h>
@@ -33,6 +28,7 @@
 #include <linux/notifier.h>
 
 #include <linux/netfilter.h>
+#include <net/netlink.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -268,9 +264,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nfattr *nest_parms;
-	unsigned char *b;
-
-	b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	event |= NFNL_SUBSYS_CTNETLINK << 8;
 	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -303,12 +297,12 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_use(skb, ct) < 0)
 		goto nfattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 nfattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -322,7 +316,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	struct nf_conn *ct = (struct nf_conn *)ptr;
 	struct sk_buff *skb;
 	unsigned int type;
-	unsigned char *b;
+	sk_buff_data_t b;
 	unsigned int flags = 0, group;
 
 	/* ignore our fake conntrack entry */
@@ -662,7 +656,7 @@ static const size_t cta_min[CTA_MAX] = {
 
 static int
 ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+			struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -710,7 +704,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 static int
 ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+			struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -721,22 +715,12 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	int err = 0;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		u32 rlen;
-
 #ifndef CONFIG_NF_CT_ACCT
 		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
 			return -ENOTSUPP;
 #endif
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
+		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
+					  ctnetlink_done);
 	}
 
 	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
@@ -1010,7 +994,7 @@ err:
 
 static int
 ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+			struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -1152,9 +1136,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned char *b;
-
-	b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
 	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -1168,12 +1150,12 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
 		goto nfattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 nfattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1186,7 +1168,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
 	struct sk_buff *skb;
 	unsigned int type;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int flags = 0;
 
 	if (events & IPEXP_NEW) {
@@ -1263,7 +1245,7 @@ static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
 
 static int
 ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+		     struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
@@ -1276,17 +1258,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		return -EINVAL;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		u32 rlen;
-
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_exp_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
+		return netlink_dump_start(ctnl, skb, nlh,
+					  ctnetlink_exp_dump_table,
+					  ctnetlink_done);
 	}
 
 	if (cda[CTA_EXPECT_MASTER-1])
@@ -1333,7 +1307,7 @@ out:
 
 static int
 ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+		     struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_expect *exp, *tmp;
 	struct nf_conntrack_tuple tuple;
@@ -1467,7 +1441,7 @@ out:
 
 static int
 ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+		     struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 456155f05c75..6d947068c58f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,13 +28,13 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
 struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
-#ifdef CONFIG_SYSCTL
-static DEFINE_MUTEX(nf_ct_proto_sysctl_mutex);
+static DEFINE_MUTEX(nf_ct_proto_mutex);
 
+#ifdef CONFIG_SYSCTL
 static int
 nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path,
 		      struct ctl_table *table, unsigned int *users)
@@ -164,13 +164,11 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
 	int err = 0;
 
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
 	if (l3proto->ctl_table != NULL) {
 		err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
 					    l3proto->ctl_table_path,
 					    l3proto->ctl_table, NULL);
 	}
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
 #endif
 	return err;
 }
@@ -178,11 +176,9 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
 static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
 {
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
 	if (l3proto->ctl_table_header != NULL)
 		nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
 					l3proto->ctl_table, NULL);
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
 #endif
 }
 
@@ -190,27 +186,23 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 
-	if (proto->l3proto >= AF_MAX) {
-		ret = -EBUSY;
-		goto out;
-	}
+	if (proto->l3proto >= AF_MAX)
+		return -EBUSY;
 
-	write_lock_bh(&nf_conntrack_lock);
+	mutex_lock(&nf_ct_proto_mutex);
 	if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
-	write_unlock_bh(&nf_conntrack_lock);
 
 	ret = nf_ct_l3proto_register_sysctl(proto);
 	if (ret < 0)
-		nf_conntrack_l3proto_unregister(proto);
-	return ret;
+		goto out_unlock;
+
+	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
 
 out_unlock:
-	write_unlock_bh(&nf_conntrack_lock);
-out:
+	mutex_unlock(&nf_ct_proto_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
@@ -219,14 +211,14 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= AF_MAX);
 
-	write_lock_bh(&nf_conntrack_lock);
+	mutex_lock(&nf_ct_proto_mutex);
 	BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
 			   &nf_conntrack_l3proto_generic);
-	write_unlock_bh(&nf_conntrack_lock);
-	synchronize_rcu();
-
 	nf_ct_l3proto_unregister_sysctl(proto);
+	mutex_unlock(&nf_ct_proto_mutex);
+
+	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(kill_l3proto, proto);
@@ -238,7 +230,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
 	int err = 0;
 
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
 	if (l4proto->ctl_table != NULL) {
 		err = nf_ct_register_sysctl(l4proto->ctl_table_header,
 					    nf_net_netfilter_sysctl_path,
@@ -260,7 +251,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
 	}
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 out:
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
 #endif /* CONFIG_SYSCTL */
 	return err;
 }
@@ -268,7 +258,6 @@ out:
 static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
 	if (l4proto->ctl_table_header != NULL &&
 	    *l4proto->ctl_table_header != NULL)
 		nf_ct_unregister_sysctl(l4proto->ctl_table_header,
@@ -279,7 +268,6 @@ static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto
 		nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
 					l4proto->ctl_compat_table, NULL);
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
 #endif /* CONFIG_SYSCTL */
 }
 
@@ -289,68 +277,41 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 
-	if (l4proto->l3proto >= PF_MAX) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	if (l4proto == &nf_conntrack_l4proto_generic)
-		return nf_ct_l4proto_register_sysctl(l4proto);
+	if (l4proto->l3proto >= PF_MAX)
+		return -EBUSY;
 
-retry:
-	write_lock_bh(&nf_conntrack_lock);
-	if (nf_ct_protos[l4proto->l3proto]) {
-		if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
-				!= &nf_conntrack_l4proto_generic) {
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-	} else {
+	mutex_lock(&nf_ct_proto_mutex);
+	if (!nf_ct_protos[l4proto->l3proto]) {
 		/* l3proto may be loaded latter. */
 		struct nf_conntrack_l4proto **proto_array;
 		int i;
 
-		write_unlock_bh(&nf_conntrack_lock);
-
-		proto_array = (struct nf_conntrack_l4proto **)
-				kmalloc(MAX_NF_CT_PROTO *
-					 sizeof(struct nf_conntrack_l4proto *),
-					GFP_KERNEL);
+		proto_array = kmalloc(MAX_NF_CT_PROTO *
+				      sizeof(struct nf_conntrack_l4proto *),
+				      GFP_KERNEL);
 		if (proto_array == NULL) {
 			ret = -ENOMEM;
-			goto out;
+			goto out_unlock;
 		}
+
 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
 			proto_array[i] = &nf_conntrack_l4proto_generic;
-
-		write_lock_bh(&nf_conntrack_lock);
-		if (nf_ct_protos[l4proto->l3proto]) {
-			/* bad timing, but no problem */
-			write_unlock_bh(&nf_conntrack_lock);
-			kfree(proto_array);
-		} else {
-			nf_ct_protos[l4proto->l3proto] = proto_array;
-			write_unlock_bh(&nf_conntrack_lock);
-		}
-
-		/*
-		 * Just once because array is never freed until unloading
-		 * nf_conntrack.ko
-		 */
-		goto retry;
+		nf_ct_protos[l4proto->l3proto] = proto_array;
+	} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
+					&nf_conntrack_l4proto_generic) {
+		ret = -EBUSY;
+		goto out_unlock;
 	}
 
-	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
-	write_unlock_bh(&nf_conntrack_lock);
-
 	ret = nf_ct_l4proto_register_sysctl(l4proto);
 	if (ret < 0)
-		nf_conntrack_l4proto_unregister(l4proto);
-	return ret;
+		goto out_unlock;
+
+	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+			   l4proto);
 
 out_unlock:
-	write_unlock_bh(&nf_conntrack_lock);
-out:
+	mutex_unlock(&nf_ct_proto_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
@@ -359,21 +320,42 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 {
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
-	if (l4proto == &nf_conntrack_l4proto_generic) {
-		nf_ct_l4proto_unregister_sysctl(l4proto);
-		return;
-	}
-
-	write_lock_bh(&nf_conntrack_lock);
+	mutex_lock(&nf_ct_proto_mutex);
 	BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			   &nf_conntrack_l4proto_generic);
-	write_unlock_bh(&nf_conntrack_lock);
-	synchronize_rcu();
-
 	nf_ct_l4proto_unregister_sysctl(l4proto);
+	mutex_unlock(&nf_ct_proto_mutex);
+
+	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(kill_l4proto, l4proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
+
+int nf_conntrack_proto_init(void)
+{
+	unsigned int i;
+	int err;
+
+	err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < AF_MAX; i++)
+		rcu_assign_pointer(nf_ct_l3protos[i],
+				   &nf_conntrack_l3proto_generic);
+	return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	unsigned int i;
+
+	nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic);
+
+	/* free l3proto protocol tables */
+	for (i = 0; i < PF_MAX; i++)
+		kfree(nf_ct_protos[i]);
+}
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 7c069939695a..6faf1bed7224 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -4,11 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3c80558716a0..0d3254b974c5 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -7,15 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/ip_conntrack_sctp.c
- */
-
-/*
- * Added support for proc manipulation of timeouts.
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 153d6619993a..ccdd5d231e0d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -4,24 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- *	- Real stateful connection tracking
- *	- Modified state transitions table
- *	- Window scaling support added
- *	- SACK support added
- *
- * Willy Tarreau:
- *	- State table bugfixes
- *	- More robust state changes
- *	- Tuning timer parameters
- *
- * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- genelized Layer 3 protocol part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
- *
- * version 2.2
  */
 
 #include <linux/types.h>
@@ -470,11 +452,10 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 
 	/* Fast path for timestamp-only option */
 	if (length == TCPOLEN_TSTAMP_ALIGNED*4
-	    && *(__be32 *)ptr ==
-		__constant_htonl((TCPOPT_NOP << 24)
-				 | (TCPOPT_NOP << 16)
-				 | (TCPOPT_TIMESTAMP << 8)
-				 | TCPOLEN_TIMESTAMP))
+	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+				       | (TCPOPT_NOP << 16)
+				       | (TCPOPT_TIMESTAMP << 8)
+				       | TCPOLEN_TIMESTAMP))
 		return;
 
 	while (length > 0) {
@@ -765,26 +746,18 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
 #define	TH_ECE	0x40
 #define	TH_CWR	0x80
 
-/* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 {
 	[TH_SYN]			= 1,
-	[TH_SYN|TH_PUSH]		= 1,
 	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_PUSH|TH_URG]		= 1,
 	[TH_SYN|TH_ACK]			= 1,
-	[TH_SYN|TH_ACK|TH_PUSH]		= 1,
 	[TH_RST]			= 1,
 	[TH_RST|TH_ACK]			= 1,
-	[TH_RST|TH_ACK|TH_PUSH]		= 1,
 	[TH_FIN|TH_ACK]			= 1,
+	[TH_FIN|TH_ACK|TH_URG]		= 1,
 	[TH_ACK]			= 1,
-	[TH_ACK|TH_PUSH]		= 1,
 	[TH_ACK|TH_URG]			= 1,
-	[TH_ACK|TH_URG|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_FIN|TH_ACK|TH_URG|TH_PUSH]	= 1,
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
@@ -831,7 +804,7 @@ static int tcp_error(struct sk_buff *skb,
 	}
 
 	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
 	if (!tcp_valid_flags[tcpflags]) {
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
@@ -1110,11 +1083,26 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
 			 const struct nf_conn *ct)
 {
 	struct nfattr *nest_parms;
+	struct nf_ct_tcp_flags tmp = {};
 
 	read_lock_bh(&tcp_lock);
 	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
+
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[0].td_scale);
+
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[1].td_scale);
+
+	tmp.flags = ct->proto.tcp.seen[0].flags;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+		sizeof(struct nf_ct_tcp_flags), &tmp);
+
+	tmp.flags = ct->proto.tcp.seen[1].flags;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+		sizeof(struct nf_ct_tcp_flags), &tmp);
 	read_unlock_bh(&tcp_lock);
 
 	NFA_NEST_END(skb, nest_parms);
@@ -1127,7 +1115,11 @@ nfattr_failure:
 }
 
 static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
-	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_STATE-1]	      = sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]    = sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]  = sizeof(struct nf_ct_tcp_flags),
+	[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]     = sizeof(struct nf_ct_tcp_flags)
 };
 
 static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
@@ -1151,6 +1143,30 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
 	write_lock_bh(&tcp_lock);
 	ct->proto.tcp.state =
 		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+
+	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) {
+		struct nf_ct_tcp_flags *attr =
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]);
+		ct->proto.tcp.seen[0].flags &= ~attr->mask;
+		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
+	}
+
+	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) {
+		struct nf_ct_tcp_flags *attr =
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]);
+		ct->proto.tcp.seen[1].flags &= ~attr->mask;
+		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
+	}
+
+	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] &&
+	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] &&
+	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+		ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+		ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+	}
 	write_unlock_bh(&tcp_lock);
 
 	return 0;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a5e5726ec0c7..3620ecc095fd 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -4,11 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b8586360e519..45baeb0e30f9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,20 +1,9 @@
-/* This file contains all the functions required for the standalone
-   nf_conntrack module.
-
-   These are not required by the compatibility layer.
-*/
-
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- generalize L3 protocol dependent part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index bf23e489e4cd..8797e6953ef2 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -3,7 +3,7 @@
  *
  * (C) 2001 by Jay Schulist <jschlst@samba.org>,
  * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * Initial netfilter messages via netlink development funded and
  * generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -28,10 +28,9 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <linux/init.h>
-#include <linux/spinlock.h>
 
-#include <linux/netfilter.h>
 #include <linux/netlink.h>
 #include <linux/netfilter/nfnetlink.h>
 
@@ -41,32 +40,34 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-#if 0
-#define DEBUGP(format, args...)	\
-		printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \
-			__LINE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static struct sock *nfnl = NULL;
 static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
-DECLARE_MUTEX(nfnl_sem);
+static DEFINE_MUTEX(nfnl_mutex);
 
-void nfnl_lock(void)
+static void nfnl_lock(void)
 {
-	nfnl_shlock();
+	mutex_lock(&nfnl_mutex);
 }
 
-void nfnl_unlock(void)
+static int nfnl_trylock(void)
 {
-	nfnl_shunlock();
+	return !mutex_trylock(&nfnl_mutex);
 }
 
-int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+static void __nfnl_unlock(void)
 {
-	DEBUGP("registering subsystem ID %u\n", n->subsys_id);
+	mutex_unlock(&nfnl_mutex);
+}
+
+static void nfnl_unlock(void)
+{
+	mutex_unlock(&nfnl_mutex);
+	if (nfnl->sk_receive_queue.qlen)
+		nfnl->sk_data_ready(nfnl, 0);
+}
 
+int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
+{
 	nfnl_lock();
 	if (subsys_table[n->subsys_id]) {
 		nfnl_unlock();
@@ -77,24 +78,23 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
 
 int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
 {
-	DEBUGP("unregistering subsystem ID %u\n", n->subsys_id);
-
 	nfnl_lock();
 	subsys_table[n->subsys_id] = NULL;
 	nfnl_unlock();
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
 
 static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
 {
 	u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
 
-	if (subsys_id >= NFNL_SUBSYS_COUNT
-	    || subsys_table[subsys_id] == NULL)
+	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return NULL;
 
 	return subsys_table[subsys_id];
@@ -105,10 +105,8 @@ nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss)
 {
 	u_int8_t cb_id = NFNL_MSG_TYPE(type);
 
-	if (cb_id >= ss->cb_count) {
-		DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count);
+	if (cb_id >= ss->cb_count)
 		return NULL;
-	}
 
 	return &ss->cb[cb_id];
 }
@@ -125,6 +123,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen,
 	memcpy(NFA_DATA(nfa), data, attrlen);
 	memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size);
 }
+EXPORT_SYMBOL_GPL(__nfa_fill);
 
 void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
 {
@@ -137,6 +136,7 @@ void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
 		nfa = NFA_NEXT(nfa, len);
 	}
 }
+EXPORT_SYMBOL_GPL(nfattr_parse);
 
 /**
  * nfnetlink_check_attributes - check and parse nfnetlink attributes
@@ -150,37 +150,15 @@ static int
 nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 			   struct nlmsghdr *nlh, struct nfattr *cda[])
 {
-	int min_len;
-	u_int16_t attr_count;
+	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
 	u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
-
-	if (unlikely(cb_id >= subsys->cb_count)) {
-		DEBUGP("msgtype %u >= %u, returning\n",
-			cb_id, subsys->cb_count);
-		return -EINVAL;
-	}
-
-	min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
-	if (unlikely(nlh->nlmsg_len < min_len))
-		return -EINVAL;
-
-	attr_count = subsys->cb[cb_id].attr_count;
-	memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+	u_int16_t attr_count = subsys->cb[cb_id].attr_count;
 
 	/* check attribute lengths. */
 	if (likely(nlh->nlmsg_len > min_len)) {
 		struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh));
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
-		while (NFA_OK(attr, attrlen)) {
-			unsigned flavor = NFA_TYPE(attr);
-			if (flavor) {
-				if (flavor > attr_count)
-					return -EINVAL;
-				cda[flavor - 1] = attr;
-			}
-			attr = NFA_NEXT(attr, attrlen);
-		}
+		nfattr_parse(cda, attr_count, attr, attrlen);
 	}
 
 	/* implicit: if nlmsg_len == min_len, we return 0, and an empty
@@ -208,62 +186,46 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(nfnetlink_send);
 
 int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
 {
 	return netlink_unicast(nfnl, skb, pid, flags);
 }
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
 /* Process one complete nfnetlink message. */
-static int nfnetlink_rcv_msg(struct sk_buff *skb,
-				    struct nlmsghdr *nlh, int *errp)
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct nfnl_callback *nc;
 	struct nfnetlink_subsystem *ss;
-	int type, err = 0;
-
-	DEBUGP("entered; subsys=%u, msgtype=%u\n",
-		 NFNL_SUBSYS_ID(nlh->nlmsg_type),
-		 NFNL_MSG_TYPE(nlh->nlmsg_type));
-
-	if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		DEBUGP("missing CAP_NET_ADMIN\n");
-		*errp = -EPERM;
-		return -1;
-	}
+	int type, err;
 
-	/* Only requests are handled by kernel now. */
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
-		DEBUGP("received non-request message\n");
-		return 0;
-	}
+	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	/* All the messages must at least contain nfgenmsg */
-	if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
-		DEBUGP("received message was too short\n");
+	if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
 		return 0;
-	}
 
 	type = nlh->nlmsg_type;
 	ss = nfnetlink_get_subsys(type);
 	if (!ss) {
 #ifdef CONFIG_KMOD
-		/* don't call nfnl_shunlock, since it would reenter
+		/* don't call nfnl_unlock, since it would reenter
 		 * with further packet processing */
-		up(&nfnl_sem);
+		__nfnl_unlock();
 		request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
-		nfnl_shlock();
+		nfnl_lock();
 		ss = nfnetlink_get_subsys(type);
 		if (!ss)
 #endif
-			goto err_inval;
+			return -EINVAL;
 	}
 
 	nc = nfnetlink_find_client(type, ss);
-	if (!nc) {
-		DEBUGP("unable to find client for type %d\n", type);
-		goto err_inval;
-	}
+	if (!nc)
+		return -EINVAL;
 
 	{
 		u_int16_t attr_count =
@@ -274,73 +236,21 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
 
 		err = nfnetlink_check_attributes(ss, nlh, cda);
 		if (err < 0)
-			goto err_inval;
-
-		DEBUGP("calling handler\n");
-		err = nc->call(nfnl, skb, nlh, cda, errp);
-		*errp = err;
-		return err;
-	}
-
-err_inval:
-	DEBUGP("returning -EINVAL\n");
-	*errp = -EINVAL;
-	return -1;
-}
-
-/* Process one packet of messages. */
-static inline int nfnetlink_rcv_skb(struct sk_buff *skb)
-{
-	int err;
-	struct nlmsghdr *nlh;
-
-	while (skb->len >= NLMSG_SPACE(0)) {
-		u32 rlen;
-
-		nlh = (struct nlmsghdr *)skb->data;
-		if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
-		    || skb->len < nlh->nlmsg_len)
-			return 0;
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		if (nfnetlink_rcv_msg(skb, nlh, &err)) {
-			if (!err)
-				return -1;
-			netlink_ack(skb, nlh, err);
-		} else
-			if (nlh->nlmsg_flags & NLM_F_ACK)
-				netlink_ack(skb, nlh, 0);
-		skb_pull(skb, rlen);
+			return err;
+		return nc->call(nfnl, skb, nlh, cda);
 	}
-
-	return 0;
 }
 
 static void nfnetlink_rcv(struct sock *sk, int len)
 {
-	do {
-		struct sk_buff *skb;
+	unsigned int qlen = 0;
 
-		if (nfnl_shlock_nowait())
+	do {
+		if (nfnl_trylock())
 			return;
-
-		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-			if (nfnetlink_rcv_skb(skb)) {
-				if (skb->len)
-					skb_queue_head(&sk->sk_receive_queue,
-						       skb);
-				else
-					kfree_skb(skb);
-				break;
-			}
-			kfree_skb(skb);
-		}
-
-		/* don't call nfnl_shunlock, since it would reenter
-		 * with further packet processing */
-		up(&nfnl_sem);
-	} while(nfnl && nfnl->sk_receive_queue.qlen);
+		netlink_run_queue(sk, &qlen, nfnetlink_rcv_msg);
+		__nfnl_unlock();
+	} while (qlen);
 }
 
 static void __exit nfnetlink_exit(void)
@@ -355,7 +265,7 @@ static int __init nfnetlink_init(void)
 	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
 
 	nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
-				     nfnetlink_rcv, THIS_MODULE);
+				     nfnetlink_rcv, NULL, THIS_MODULE);
 	if (!nfnl) {
 		printk(KERN_ERR "cannot initialize nfnetlink!\n");
 		return -1;
@@ -366,10 +276,3 @@ static int __init nfnetlink_init(void)
 
 module_init(nfnetlink_init);
 module_exit(nfnetlink_exit);
-
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
-EXPORT_SYMBOL_GPL(nfnetlink_send);
-EXPORT_SYMBOL_GPL(nfnetlink_unicast);
-EXPORT_SYMBOL_GPL(nfattr_parse);
-EXPORT_SYMBOL_GPL(__nfa_fill);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 5cb30ebba0f4..e32e30e7a17c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -10,11 +10,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 2006-01-26 Harald Welte <laforge@netfilter.org>
- * 	- Add optional local and global sequence number to detect lost
- * 	  events from userspace
- *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -163,10 +158,7 @@ instance_create(u_int16_t group_num, int pid)
 	/* needs to be two, since we _put() after creation */
 	atomic_set(&inst->use, 2);
 
-	init_timer(&inst->timer);
-	inst->timer.function = nfulnl_timer;
-	inst->timer.data = (unsigned long)inst;
-	/* don't start timer yet. (re)start it  with every packet */
+	setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
 
 	inst->peer_pid = pid;
 	inst->group_num = group_num;
@@ -200,20 +192,14 @@ out_unlock:
 static int __nfulnl_send(struct nfulnl_instance *inst);
 
 static void
-_instance_destroy2(struct nfulnl_instance *inst, int lock)
+__instance_destroy(struct nfulnl_instance *inst)
 {
 	/* first pull it out of the global list */
-	if (lock)
-		write_lock_bh(&instances_lock);
-
 	UDEBUG("removing instance %p (queuenum=%u) from hash\n",
 		inst, inst->group_num);
 
 	hlist_del(&inst->hlist);
 
-	if (lock)
-		write_unlock_bh(&instances_lock);
-
 	/* then flush all pending packets from skb */
 
 	spin_lock_bh(&inst->lock);
@@ -235,15 +221,11 @@ _instance_destroy2(struct nfulnl_instance *inst, int lock)
 }
 
 static inline void
-__instance_destroy(struct nfulnl_instance *inst)
-{
-	_instance_destroy2(inst, 0);
-}
-
-static inline void
 instance_destroy(struct nfulnl_instance *inst)
 {
-	_instance_destroy2(inst, 1);
+	write_lock_bh(&instances_lock);
+	__instance_destroy(inst);
+	write_unlock_bh(&instances_lock);
 }
 
 static int
@@ -365,9 +347,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
 {
 	int status;
 
-	if (!inst->skb)
-		return 0;
-
 	if (inst->qlen > 1)
 		inst->lastnlh->nlmsg_type = NLMSG_DONE;
 
@@ -391,7 +370,8 @@ static void nfulnl_timer(unsigned long data)
 	UDEBUG("timer function called, flushing buffer\n");
 
 	spin_lock_bh(&inst->lock);
-	__nfulnl_send(inst);
+	if (inst->skb)
+		__nfulnl_send(inst);
 	spin_unlock_bh(&inst->lock);
 	instance_put(inst);
 }
@@ -409,15 +389,14 @@ __build_packet_message(struct nfulnl_instance *inst,
 			const struct nf_loginfo *li,
 			const char *prefix, unsigned int plen)
 {
-	unsigned char *old_tail;
 	struct nfulnl_msg_packet_hdr pmsg;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	__be32 tmp_uint;
+	sk_buff_data_t old_tail = inst->skb->tail;
 
 	UDEBUG("entered\n");
 
-	old_tail = inst->skb->tail;
 	nlh = NLMSG_PUT(inst->skb, 0, 0,
 			NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
@@ -509,11 +488,11 @@ __build_packet_message(struct nfulnl_instance *inst,
 		NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
 	}
 
-	if (skb->tstamp.off_sec) {
+	if (skb->tstamp.tv64) {
 		struct nfulnl_msg_packet_timestamp ts;
-
-		ts.sec = cpu_to_be64(skb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(skb->tstamp.off_usec);
+		struct timeval tv = ktime_to_timeval(skb->tstamp);
+		ts.sec = cpu_to_be64(tv.tv_sec);
+		ts.usec = cpu_to_be64(tv.tv_usec);
 
 		NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
 	}
@@ -596,7 +575,6 @@ nfulnl_log_packet(unsigned int pf,
 	struct nfulnl_instance *inst;
 	const struct nf_loginfo *li;
 	unsigned int qthreshold;
-	unsigned int nlbufsiz;
 	unsigned int plen;
 
 	if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
@@ -606,12 +584,7 @@ nfulnl_log_packet(unsigned int pf,
 
 	inst = instance_lookup_get(li->u.ulog.group);
 	if (!inst)
-		inst = instance_lookup_get(0);
-	if (!inst) {
-		PRINTR("nfnetlink_log: trying to log packet, "
-			"but no instance for group %u\n", li->u.ulog.group);
 		return;
-	}
 
 	plen = 0;
 	if (prefix)
@@ -667,24 +640,11 @@ nfulnl_log_packet(unsigned int pf,
 		break;
 
 	default:
-		spin_unlock_bh(&inst->lock);
-		instance_put(inst);
-		return;
+		goto unlock_and_release;
 	}
 
-	if (size > inst->nlbufsiz)
-		nlbufsiz = size;
-	else
-		nlbufsiz = inst->nlbufsiz;
-
-	if (!inst->skb) {
-		if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
-			UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
-				inst->nlbufsiz, size);
-			goto alloc_failure;
-		}
-	} else if (inst->qlen >= qthreshold ||
-		   size > skb_tailroom(inst->skb)) {
+	if (inst->qlen >= qthreshold ||
+	    (inst->skb && size > skb_tailroom(inst->skb))) {
 		/* either the queue len is too high or we don't have
 		 * enough room in the skb left. flush to userspace. */
 		UDEBUG("flushing old skb\n");
@@ -693,12 +653,12 @@ nfulnl_log_packet(unsigned int pf,
 		if (del_timer(&inst->timer))
 			instance_put(inst);
 		__nfulnl_send(inst);
+	}
 
-		if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
-			UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
-				inst->nlbufsiz, size);
+	if (!inst->skb) {
+		inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
+		if (!inst->skb)
 			goto alloc_failure;
-		}
 	}
 
 	UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
@@ -760,7 +720,7 @@ static struct notifier_block nfulnl_rtnl_notifier = {
 
 static int
 nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		  struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	return -ENOTSUPP;
 }
@@ -798,7 +758,7 @@ static const int nfula_cfg_min[NFULA_CFG_MAX] = {
 
 static int
 nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
-		   struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp)
+		   struct nlmsghdr *nlh, struct nfattr *nfula[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
@@ -830,13 +790,13 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 					       NETLINK_CB(skb).pid);
 			if (!inst) {
 				ret = -EINVAL;
-				goto out_put;
+				goto out;
 			}
 			break;
 		case NFULNL_CFG_CMD_UNBIND:
 			if (!inst) {
 				ret = -ENODEV;
-				goto out_put;
+				goto out;
 			}
 
 			if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -845,7 +805,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			}
 
 			instance_destroy(inst);
-			break;
+			goto out;
 		case NFULNL_CFG_CMD_PF_BIND:
 			UDEBUG("registering log handler for pf=%u\n", pf);
 			ret = nf_log_register(pf, &nfulnl_logger);
@@ -869,7 +829,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				"group=%u pid=%u =>ENOENT\n",
 				group_num, NETLINK_CB(skb).pid);
 			ret = -ENOENT;
-			goto out_put;
+			goto out;
 		}
 
 		if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -939,10 +899,8 @@ struct iter_state {
 	unsigned int bucket;
 };
 
-static struct hlist_node *get_first(struct seq_file *seq)
+static struct hlist_node *get_first(struct iter_state *st)
 {
-	struct iter_state *st = seq->private;
-
 	if (!st)
 		return NULL;
 
@@ -953,10 +911,8 @@ static struct hlist_node *get_first(struct seq_file *seq)
 	return NULL;
 }
 
-static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-	struct iter_state *st = seq->private;
-
 	h = h->next;
 	while (!h) {
 		if (++st->bucket >= INSTANCE_BUCKETS)
@@ -967,13 +923,13 @@ static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
 	return h;
 }
 
-static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
 {
 	struct hlist_node *head;
-	head = get_first(seq);
+	head = get_first(st);
 
 	if (head)
-		while (pos && (head = get_next(seq, head)))
+		while (pos && (head = get_next(st, head)))
 			pos--;
 	return pos ? NULL : head;
 }
@@ -981,13 +937,13 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
 	read_lock_bh(&instances_lock);
-	return get_idx(seq, *pos);
+	return get_idx(seq->private, *pos);
 }
 
 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
-	return get_next(s, v);
+	return get_next(s->private, v);
 }
 
 static void seq_stop(struct seq_file *s, void *v)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d9ce4a71d0f3..7a97bec67729 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -338,7 +338,7 @@ static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			   struct nfqnl_queue_entry *entry, int *errp)
 {
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	size_t size;
 	size_t data_len = 0;
 	struct sk_buff *skb;
@@ -404,7 +404,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (!skb)
 		goto nlmsg_failure;
 
-	old_tail= skb->tail;
+	old_tail = skb->tail;
 	nlh = NLMSG_PUT(skb, 0, 0,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
@@ -495,11 +495,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
 	}
 
-	if (entskb->tstamp.off_sec) {
+	if (entskb->tstamp.tv64) {
 		struct nfqnl_msg_packet_timestamp ts;
-
-		ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
+		struct timeval tv = ktime_to_timeval(entskb->tstamp);
+		ts.sec = cpu_to_be64(tv.tv_sec);
+		ts.usec = cpu_to_be64(tv.tv_usec);
 
 		NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
 	}
@@ -648,7 +648,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
 	}
 	if (!skb_make_writable(&e->skb, data_len))
 		return -ENOMEM;
-	memcpy(e->skb->data, data, data_len);
+	skb_copy_to_linear_data(e->skb, data, data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
 	return 0;
 }
@@ -783,7 +783,7 @@ static const int nfqa_verdict_min[NFQA_MAX] = {
 
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
-		   struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		   struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -848,7 +848,7 @@ err_out_put:
 
 static int
 nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		  struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	return -ENOTSUPP;
 }
@@ -865,7 +865,7 @@ static struct nf_queue_handler nfqh = {
 
 static int
 nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		  struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ec607a421a5a..0eb2504b89b5 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -56,8 +56,8 @@ enum {
 };
 
 static const char *xt_prefix[NPROTO] = {
-	[AF_INET] 	= "ip",
-	[AF_INET6] 	= "ip6",
+	[AF_INET]	= "ip",
+	[AF_INET6]	= "ip6",
 	[NF_ARP]	= "arp",
 };
 
@@ -651,12 +651,6 @@ void *xt_unregister_table(struct xt_table *table)
 EXPORT_SYMBOL_GPL(xt_unregister_table);
 
 #ifdef CONFIG_PROC_FS
-static char *xt_proto_prefix[NPROTO] = {
-	[AF_INET]	= "ip",
-	[AF_INET6]	= "ip6",
-	[NF_ARP]	= "arp",
-};
-
 static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos)
 {
 	struct list_head *head = list->next;
@@ -798,7 +792,7 @@ int xt_proto_init(int af)
 
 
 #ifdef CONFIG_PROC_FS
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
 	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
 	if (!proc)
@@ -806,14 +800,14 @@ int xt_proto_init(int af)
 	proc->data = (void *) ((unsigned long) af | (TABLE << 16));
 
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
 	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
 	if (!proc)
 		goto out_remove_tables;
 	proc->data = (void *) ((unsigned long) af | (MATCH << 16));
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
 	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
 	if (!proc)
@@ -825,12 +819,12 @@ int xt_proto_init(int af)
 
 #ifdef CONFIG_PROC_FS
 out_remove_matches:
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
 	proc_net_remove(buf);
 
 out_remove_tables:
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
 	proc_net_remove(buf);
 out:
@@ -844,15 +838,15 @@ void xt_proto_fini(int af)
 #ifdef CONFIG_PROC_FS
 	char buf[XT_FUNCTION_MAXNAMELEN];
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
 	proc_net_remove(buf);
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
 	proc_net_remove(buf);
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
 	proc_net_remove(buf);
 #endif /*CONFIG_PROC_FS*/
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 795c058b16a5..b03ce009d0bf 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -30,10 +30,7 @@ MODULE_ALIAS("ipt_CONNMARK");
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack_ecache.h>
-#endif
 
 static unsigned int
 target(struct sk_buff **pskb,
@@ -44,40 +41,33 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	const struct xt_connmark_target_info *markinfo = targinfo;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
 	u_int32_t diff;
 	u_int32_t mark;
 	u_int32_t newmark;
-	u_int32_t ctinfo;
-	u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
 
-	if (ctmark) {
+	ct = nf_ct_get(*pskb, &ctinfo);
+	if (ct) {
 		switch(markinfo->mode) {
 		case XT_CONNMARK_SET:
-			newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
-			if (newmark != *ctmark) {
-				*ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-				ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+			if (newmark != ct->mark) {
+				ct->mark = newmark;
 				nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
 			}
 			break;
 		case XT_CONNMARK_SAVE:
-			newmark = (*ctmark & ~markinfo->mask) |
+			newmark = (ct->mark & ~markinfo->mask) |
 				  ((*pskb)->mark & markinfo->mask);
-			if (*ctmark != newmark) {
-				*ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-				ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+			if (ct->mark != newmark) {
+				ct->mark = newmark;
 				nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
 			}
 			break;
 		case XT_CONNMARK_RESTORE:
 			mark = (*pskb)->mark;
-			diff = (*ctmark ^ mark) & markinfo->mask;
+			diff = (ct->mark ^ mark) & markinfo->mask;
 			(*pskb)->mark = mark ^ diff;
 			break;
 		}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 1ab0db641f96..81c0c58bab47 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -19,7 +19,7 @@
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CONNSECMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 
 #define PFX "CONNSECMARK: "
 
@@ -36,12 +36,12 @@ MODULE_ALIAS("ip6t_CONNSECMARK");
 static void secmark_save(struct sk_buff *skb)
 {
 	if (skb->secmark) {
-		u32 *connsecmark;
+		struct nf_conn *ct;
 		enum ip_conntrack_info ctinfo;
 
-		connsecmark = nf_ct_get_secmark(skb, &ctinfo);
-		if (connsecmark && !*connsecmark)
-			*connsecmark = skb->secmark;
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && !ct->secmark)
+			ct->secmark = skb->secmark;
 	}
 }
 
@@ -52,12 +52,12 @@ static void secmark_save(struct sk_buff *skb)
 static void secmark_restore(struct sk_buff *skb)
 {
 	if (!skb->secmark) {
-		u32 *connsecmark;
+		struct nf_conn *ct;
 		enum ip_conntrack_info ctinfo;
 
-		connsecmark = nf_ct_get_secmark(skb, &ctinfo);
-		if (connsecmark && *connsecmark)
-			skb->secmark = *connsecmark;
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && ct->secmark)
+			skb->secmark = ct->secmark;
 	}
 }
 
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index a7cc75aeb38d..9f2f2201f6ae 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -8,8 +8,6 @@
  * published by the Free Software Foundation.
  *
  * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
 */
 
 #include <linux/module.h>
@@ -35,13 +33,13 @@ static unsigned int target(struct sk_buff **pskb,
 			   const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return NF_DROP;
 
-		ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
+		ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 
 	}
@@ -56,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb,
 			    const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
 		if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
 			return NF_DROP;
 
-		ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
+		ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index b874a2008b2b..5085fb3d1e2d 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -5,7 +5,7 @@
 #include <linux/skbuff.h>
 
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_NOTRACK");
@@ -26,7 +26,7 @@ target(struct sk_buff **pskb,
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	nf_ct_untrack(*pskb);
+	(*pskb)->nfct = &nf_conntrack_untracked.ct_general;
 	(*pskb)->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get((*pskb)->nfct);
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index db7e38c08de2..15fe8f649510 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -54,7 +54,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 		return -1;
 
 	tcplen = (*pskb)->len - tcphoff;
-	tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+	tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
 
 	/* Since it passed flags test in tcp match, we know it is is
 	   not a fragment, and has data >= tcp header length.  SYN
@@ -113,7 +113,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 			return -1;
 		kfree_skb(*pskb);
 		*pskb = newskb;
-		tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+		tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
 	}
 
 	skb_put((*pskb), TCPOLEN_MSS);
@@ -145,7 +145,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	__be16 newlen;
 	int ret;
 
@@ -154,7 +154,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		newlen = htons(ntohs(iph->tot_len) + ret);
 		nf_csum_replace2(&iph->check, iph->tot_len, newlen);
 		iph->tot_len = newlen;
@@ -171,7 +171,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h;
+	struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
 	u8 nexthdr;
 	int tcphoff;
 	int ret;
@@ -187,7 +187,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		ipv6h = (*pskb)->nh.ipv6h;
+		ipv6h = ipv6_hdr(*pskb);
 		ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e32dfa2668b..804afe55e141 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,20 +1,11 @@
 /* Kernel module to match connection tracking byte counter.
  * GPL (C) 2002 Martin Devera (devik@cdi.cz).
- *
- * 2004-07-20 Harald Welte <laforge@netfilter.org>
- * 	- reimplemented to use per-connection accounting counters
- * 	- add functionality to match number of packets
- * 	- add functionality to match average packet size
- * 	- add support to match directions seperately
- * 2005-10-16 Harald Welte <laforge@netfilter.org>
- * 	- Port to x_tables
- *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connbytes.h>
+#include <net/netfilter/nf_conntrack.h>
 
 #include <asm/div64.h>
 #include <asm/bitops.h>
@@ -24,22 +15,6 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
 MODULE_ALIAS("ipt_connbytes");
 
-/* 64bit divisor, dividend and result. dynamic precision */
-static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
-	u_int32_t d = divisor;
-
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
-
-		d = divisor >> shift;
-		dividend >>= shift;
-	}
-
-	do_div(dividend, d);
-	return dividend;
-}
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -51,13 +26,17 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
 	u_int64_t what = 0;	/* initialize to make gcc happy */
 	u_int64_t bytes = 0;
 	u_int64_t pkts = 0;
 	const struct ip_conntrack_counter *counters;
 
-	if (!(counters = nf_ct_get_counters(skb)))
-		return 0; /* no match */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return 0;
+	counters = ct->counters;
 
 	switch (sinfo->what) {
 	case XT_CONNBYTES_PKTS:
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 36c2defff238..e1803256c792 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -21,16 +21,15 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connmark.h>
 
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
 MODULE_DESCRIPTION("IP tables connmark match module");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_connmark");
 
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_connmark.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -42,12 +41,14 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct xt_connmark_info *info = matchinfo;
-	u_int32_t ctinfo;
-	const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
-	if (!ctmark)
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
 		return 0;
 
-	return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
+	return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
 }
 
 static int
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 2885c378288e..f4ea8fe07a53 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -10,121 +10,15 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_conntrack.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables connection tracking match module");
 MODULE_ALIAS("ipt_conntrack");
 
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct xt_conntrack_info *sinfo = matchinfo;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int statebit;
-
-	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
-	if (ct == &ip_conntrack_untracked)
-		statebit = XT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct)
-		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-	else
-		statebit = XT_CONNTRACK_STATE_INVALID;
-
-	if (sinfo->flags & XT_CONNTRACK_STATE) {
-		if (ct) {
-			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
-				statebit |= XT_CONNTRACK_STATE_SNAT;
-			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
-				statebit |= XT_CONNTRACK_STATE_DNAT;
-		}
-		if (FWINV((statebit & sinfo->statemask) == 0,
-			  XT_CONNTRACK_STATE))
-			return 0;
-	}
-
-	if (ct == NULL) {
-		if (sinfo->flags & ~XT_CONNTRACK_STATE)
-			return 0;
-		return 1;
-	}
-
-	if (sinfo->flags & XT_CONNTRACK_PROTO &&
-	    FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
-		  XT_CONNTRACK_PROTO))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
-		   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
-		  XT_CONNTRACK_ORIGSRC))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
-		   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
-		  XT_CONNTRACK_ORIGDST))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
-		   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
-		  XT_CONNTRACK_REPLSRC))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
-		   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
-		  XT_CONNTRACK_REPLDST))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_STATUS &&
-	    FWINV((ct->status & sinfo->statusmask) == 0,
-		  XT_CONNTRACK_STATUS))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires = timer_pending(&ct->timeout) ?
-					(ct->timeout.expires - jiffies)/HZ : 0;
-
-		if (FWINV(!(expires >= sinfo->expires_min &&
-			    expires <= sinfo->expires_max),
-			  XT_CONNTRACK_EXPIRES))
-			return 0;
-	}
-	return 1;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -220,8 +114,6 @@ match(const struct sk_buff *skb,
 	return 1;
 }
 
-#endif /* CONFIG_NF_IP_CONNTRACK */
-
 static int
 checkentry(const char *tablename,
 	   const void *ip,
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 26c7f4ad102a..56b247ecc283 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -1,7 +1,5 @@
 /* IP tables module for matching the value of the IPv4/IPv6 DSCP field
  *
- * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
  * (C) 2002 by Harald Welte <laforge@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -34,7 +32,7 @@ static int match(const struct sk_buff *skb,
 		 int *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
-	u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
 }
@@ -49,7 +47,7 @@ static int match6(const struct sk_buff *skb,
 		  int *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
-	u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
 }
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9f37d593ca38..d3043fa32ebc 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -216,10 +216,8 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
 	hinfo->pde->proc_fops = &dl_file_ops;
 	hinfo->pde->data = hinfo;
 
-	init_timer(&hinfo->timer);
+	setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
-	hinfo->timer.data = (unsigned long )hinfo;
-	hinfo->timer.function = htable_gc;
 	add_timer(&hinfo->timer);
 
 	spin_lock_bh(&hashlimit_lock);
@@ -380,22 +378,22 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
 	switch (hinfo->family) {
 	case AF_INET:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
-			dst->addr.ip.dst = skb->nh.iph->daddr;
+			dst->addr.ip.dst = ip_hdr(skb)->daddr;
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
-			dst->addr.ip.src = skb->nh.iph->saddr;
+			dst->addr.ip.src = ip_hdr(skb)->saddr;
 
 		if (!(hinfo->cfg.mode &
 		      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
 			return 0;
-		nexthdr = skb->nh.iph->protocol;
+		nexthdr = ip_hdr(skb)->protocol;
 		break;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	case AF_INET6:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
-			memcpy(&dst->addr.ip6.dst, &skb->nh.ipv6h->daddr,
+			memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr,
 			       sizeof(dst->addr.ip6.dst));
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
-			memcpy(&dst->addr.ip6.src, &skb->nh.ipv6h->saddr,
+			memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr,
 			       sizeof(dst->addr.ip6.src));
 
 		if (!(hinfo->cfg.mode &
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 407d1d5da8a1..c139b2f43a10 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -5,26 +5,16 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- *   19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- *   		 - Port to newnat infrastructure
  */
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#else
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
-#endif
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_helper.h>
-#include <net/netfilter/nf_conntrack_compat.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -38,55 +28,6 @@ MODULE_ALIAS("ip6t_helper");
 #define DEBUGP(format, args...)
 #endif
 
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct xt_helper_info *info = matchinfo;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	int ret = info->invert;
-
-	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-	if (!ct) {
-		DEBUGP("xt_helper: Eek! invalid conntrack?\n");
-		return ret;
-	}
-
-	if (!ct->master) {
-		DEBUGP("xt_helper: conntrack %p has no master\n", ct);
-		return ret;
-	}
-
-	read_lock_bh(&ip_conntrack_lock);
-	if (!ct->master->helper) {
-		DEBUGP("xt_helper: master ct %p has no helper\n",
-			exp->expectant);
-		goto out_unlock;
-	}
-
-	DEBUGP("master's name = %s , info->name = %s\n",
-		ct->master->helper->name, info->name);
-
-	if (info->name[0] == '\0')
-		ret ^= 1;
-	else
-		ret ^= !strncmp(ct->master->helper->name, info->name,
-				strlen(ct->master->helper->name));
-out_unlock:
-	read_unlock_bh(&ip_conntrack_lock);
-	return ret;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -134,7 +75,6 @@ out_unlock:
 	read_unlock_bh(&nf_conntrack_lock);
 	return ret;
 }
-#endif
 
 static int check(const char *tablename,
 		 const void *inf,
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 32fb998d9bac..77288c5ada78 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
+	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
 
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
@@ -47,7 +47,8 @@ match6(const struct sk_buff *skb,
        int *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+	const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) +
+				  sizeof(struct ipv6hdr));
 
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 6fd8347c0058..571a72ab89ad 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,10 +1,3 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- *                   `limit', removed logging.  Did I mention that
- *                   Alexey is a fucking genius?
- *                   Rusty Russell (rusty@rustcorp.com.au).  */
-
 /* (C) 1999 J�r�me de Vivie <devivie@info.enserb.u-bordeaux.fr>
  * (C) 1999 Herv� Eychenne <eychenne@info.enserb.u-bordeaux.fr>
  *
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index d430d90d7b26..1d3a1d98b885 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -37,8 +37,8 @@ match(const struct sk_buff *skb,
     const struct xt_mac_info *info = matchinfo;
 
     /* Is mac pointer valid? */
-    return (skb->mac.raw >= skb->head
-	    && (skb->mac.raw + ETH_HLEN) <= skb->data
+    return (skb_mac_header(skb) >= skb->head &&
+	    (skb_mac_header(skb) + ETH_HLEN) <= skb->data
 	    /* If so, compare... */
 	    && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
 		^ info->invert));
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 16e7b0804287..e1409fc5c288 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -34,7 +34,7 @@ static int match(const struct sk_buff *skb,
 	const struct xt_pkttype_info *info = matchinfo;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
-		type = (MULTICAST(skb->nh.iph->daddr)
+		type = (MULTICAST(ip_hdr(skb)->daddr)
 			? PACKET_MULTICAST
 			: PACKET_BROADCAST);
 	else
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 97ffc2fbc19d..c2017f8af9c4 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -1,7 +1,5 @@
 /* IP tables module for matching the routing realm
  *
- * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
- *
  * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index df37b912163a..149294f7df71 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -10,7 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_state.h>
 
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
 
 	if (nf_ct_is_untracked(skb))
 		statebit = XT_STATE_UNTRACKED;
-	else if (!nf_ct_get_ctinfo(skb, &ctinfo))
+	else if (!nf_ct_get(skb, &ctinfo))
 		statebit = XT_STATE_INVALID;
 	else
 		statebit = XT_STATE_BIT(ctinfo);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index e03a3282c551..f2535e7f2869 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -263,9 +263,6 @@ int netlbl_socket_setattr(const struct socket *sock,
 	int ret_val = -ENOENT;
 	struct netlbl_dom_map *dom_entry;
 
-	if ((secattr->flags & NETLBL_SECATTR_DOMAIN) == 0)
-		return -ENOENT;
-
 	rcu_read_lock();
 	dom_entry = netlbl_domhsh_getentry(secattr->domain);
 	if (dom_entry == NULL)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e73d8f546c6b..42d2fb94eff1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -56,6 +56,7 @@
 #include <linux/types.h>
 #include <linux/audit.h>
 #include <linux/selinux.h>
+#include <linux/mutex.h>
 
 #include <net/sock.h>
 #include <net/scm.h>
@@ -76,7 +77,8 @@ struct netlink_sock {
 	unsigned long		state;
 	wait_queue_head_t	wait;
 	struct netlink_callback	*cb;
-	spinlock_t		cb_lock;
+	struct mutex		*cb_mutex;
+	struct mutex		cb_def_mutex;
 	void			(*data_ready)(struct sock *sk, int bytes);
 	struct module		*module;
 };
@@ -108,6 +110,7 @@ struct netlink_table {
 	unsigned long *listeners;
 	unsigned int nl_nonroot;
 	unsigned int groups;
+	struct mutex *cb_mutex;
 	struct module *module;
 	int registered;
 };
@@ -118,6 +121,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
 static int netlink_dump(struct sock *sk);
 static void netlink_destroy_callback(struct netlink_callback *cb);
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
 
 static DEFINE_RWLOCK(nl_table_lock);
 static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -370,7 +374,8 @@ static struct proto netlink_proto = {
 	.obj_size = sizeof(struct netlink_sock),
 };
 
-static int __netlink_create(struct socket *sock, int protocol)
+static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
+			    int protocol)
 {
 	struct sock *sk;
 	struct netlink_sock *nlk;
@@ -384,7 +389,12 @@ static int __netlink_create(struct socket *sock, int protocol)
 	sock_init_data(sock, sk);
 
 	nlk = nlk_sk(sk);
-	spin_lock_init(&nlk->cb_lock);
+	if (cb_mutex)
+		nlk->cb_mutex = cb_mutex;
+	else {
+		nlk->cb_mutex = &nlk->cb_def_mutex;
+		mutex_init(nlk->cb_mutex);
+	}
 	init_waitqueue_head(&nlk->wait);
 
 	sk->sk_destruct = netlink_sock_destruct;
@@ -395,8 +405,8 @@ static int __netlink_create(struct socket *sock, int protocol)
 static int netlink_create(struct socket *sock, int protocol)
 {
 	struct module *module = NULL;
+	struct mutex *cb_mutex;
 	struct netlink_sock *nlk;
-	unsigned int groups;
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -418,10 +428,10 @@ static int netlink_create(struct socket *sock, int protocol)
 	if (nl_table[protocol].registered &&
 	    try_module_get(nl_table[protocol].module))
 		module = nl_table[protocol].module;
-	groups = nl_table[protocol].groups;
+	cb_mutex = nl_table[protocol].cb_mutex;
 	netlink_unlock_table();
 
-	if ((err = __netlink_create(sock, protocol)) < 0)
+	if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
 		goto out_module;
 
 	nlk = nlk_sk(sock->sk);
@@ -443,21 +453,21 @@ static int netlink_release(struct socket *sock)
 		return 0;
 
 	netlink_remove(sk);
+	sock_orphan(sk);
 	nlk = nlk_sk(sk);
 
-	spin_lock(&nlk->cb_lock);
+	mutex_lock(nlk->cb_mutex);
 	if (nlk->cb) {
 		if (nlk->cb->done)
 			nlk->cb->done(nlk->cb);
 		netlink_destroy_callback(nlk->cb);
 		nlk->cb = NULL;
 	}
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 
 	/* OK. Socket is unlinked, and, therefore,
 	   no new packets will arrive */
 
-	sock_orphan(sk);
 	sock->sk = NULL;
 	wake_up_interruptible_all(&nlk->wait);
 
@@ -1215,7 +1225,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	if (msg->msg_name) {
@@ -1242,6 +1252,9 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 
 	scm_recv(sock, msg, siocb->scm, flags);
 
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+
 out:
 	netlink_rcv_wake(sk);
 	return err ? : copied;
@@ -1265,7 +1278,7 @@ static void netlink_data_ready(struct sock *sk, int len)
 struct sock *
 netlink_kernel_create(int unit, unsigned int groups,
 		      void (*input)(struct sock *sk, int len),
-		      struct module *module)
+		      struct mutex *cb_mutex, struct module *module)
 {
 	struct socket *sock;
 	struct sock *sk;
@@ -1280,7 +1293,7 @@ netlink_kernel_create(int unit, unsigned int groups,
 	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
 		return NULL;
 
-	if (__netlink_create(sock, unit) < 0)
+	if (__netlink_create(sock, cb_mutex, unit) < 0)
 		goto out_sock_release;
 
 	if (groups < 32)
@@ -1304,6 +1317,7 @@ netlink_kernel_create(int unit, unsigned int groups,
 	netlink_table_grab();
 	nl_table[unit].groups = groups;
 	nl_table[unit].listeners = listeners;
+	nl_table[unit].cb_mutex = cb_mutex;
 	nl_table[unit].module = module;
 	nl_table[unit].registered = 1;
 	netlink_table_ungrab();
@@ -1346,7 +1360,7 @@ static int netlink_dump(struct sock *sk)
 	if (!skb)
 		goto errout;
 
-	spin_lock(&nlk->cb_lock);
+	mutex_lock(nlk->cb_mutex);
 
 	cb = nlk->cb;
 	if (cb == NULL) {
@@ -1357,7 +1371,7 @@ static int netlink_dump(struct sock *sk)
 	len = cb->dump(skb, cb);
 
 	if (len > 0) {
-		spin_unlock(&nlk->cb_lock);
+		mutex_unlock(nlk->cb_mutex);
 		skb_queue_tail(&sk->sk_receive_queue, skb);
 		sk->sk_data_ready(sk, len);
 		return 0;
@@ -1375,13 +1389,13 @@ static int netlink_dump(struct sock *sk)
 	if (cb->done)
 		cb->done(cb);
 	nlk->cb = NULL;
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 
 	netlink_destroy_callback(cb);
 	return 0;
 
 errout_skb:
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 	kfree_skb(skb);
 errout:
 	return err;
@@ -1412,20 +1426,25 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 		return -ECONNREFUSED;
 	}
 	nlk = nlk_sk(sk);
-	/* A dump is in progress... */
-	spin_lock(&nlk->cb_lock);
-	if (nlk->cb) {
-		spin_unlock(&nlk->cb_lock);
+	/* A dump or destruction is in progress... */
+	mutex_lock(nlk->cb_mutex);
+	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
+		mutex_unlock(nlk->cb_mutex);
 		netlink_destroy_callback(cb);
 		sock_put(sk);
 		return -EBUSY;
 	}
 	nlk->cb = cb;
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 
 	netlink_dump(sk);
 	sock_put(sk);
-	return 0;
+
+	/* We successfully started a dump, by returning -EINTR we
+	 * signal the queue mangement to interrupt processing of
+	 * any netlink messages so userspace gets a chance to read
+	 * the results. */
+	return -EINTR;
 }
 
 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
@@ -1462,27 +1481,35 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 }
 
 static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
-						     struct nlmsghdr *, int *))
+						     struct nlmsghdr *))
 {
 	struct nlmsghdr *nlh;
 	int err;
 
 	while (skb->len >= nlmsg_total_size(0)) {
-		nlh = (struct nlmsghdr *) skb->data;
+		nlh = nlmsg_hdr(skb);
+		err = 0;
 
 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
 			return 0;
 
-		if (cb(skb, nlh, &err) < 0) {
-			/* Not an error, but we have to interrupt processing
-			 * here. Note: that in this case we do not pull
-			 * message from skb, it will be processed later.
-			 */
-			if (err == 0)
-				return -1;
+		/* Only requests are handled by the kernel */
+		if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+			goto skip;
+
+		/* Skip control messages */
+		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
+			goto skip;
+
+		err = cb(skb, nlh);
+		if (err == -EINTR) {
+			/* Not an error, but we interrupt processing */
+			netlink_queue_skip(nlh, skb);
+			return err;
+		}
+skip:
+		if (nlh->nlmsg_flags & NLM_F_ACK || err)
 			netlink_ack(skb, nlh, err);
-		} else if (nlh->nlmsg_flags & NLM_F_ACK)
-			netlink_ack(skb, nlh, 0);
 
 		netlink_queue_skip(nlh, skb);
 	}
@@ -1504,9 +1531,14 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
  *
  * qlen must be initialized to 0 before the initial entry, afterwards
  * the function may be called repeatedly until qlen reaches 0.
+ *
+ * The callback function may return -EINTR to signal that processing
+ * of netlink messages shall be interrupted. In this case the message
+ * currently being processed will NOT be requeued onto the receive
+ * queue.
  */
 void netlink_run_queue(struct sock *sk, unsigned int *qlen,
-		       int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
+		       int (*cb)(struct sk_buff *, struct nlmsghdr *))
 {
 	struct sk_buff *skb;
 
@@ -1537,7 +1569,7 @@ void netlink_run_queue(struct sock *sk, unsigned int *qlen,
  * Pulls the given netlink message off the socket buffer so the next
  * call to netlink_queue_run() will not reconsider the message.
  */
-void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
 {
 	int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 
@@ -1820,12 +1852,10 @@ core_initcall(netlink_proto_init);
 
 EXPORT_SYMBOL(netlink_ack);
 EXPORT_SYMBOL(netlink_run_queue);
-EXPORT_SYMBOL(netlink_queue_skip);
 EXPORT_SYMBOL(netlink_broadcast);
 EXPORT_SYMBOL(netlink_dump_start);
 EXPORT_SYMBOL(netlink_kernel_create);
 EXPORT_SYMBOL(netlink_register_notifier);
-EXPORT_SYMBOL(netlink_set_err);
 EXPORT_SYMBOL(netlink_set_nonroot);
 EXPORT_SYMBOL(netlink_unicast);
 EXPORT_SYMBOL(netlink_unregister_notifier);
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 004139557e09..df5f820a4c32 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -67,6 +67,11 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 		}
 		break;
 
+	case NLA_BINARY:
+		if (pt->len && attrlen > pt->len)
+			return -ERANGE;
+		break;
+
 	default:
 		if (pt->len)
 			minlen = pt->len;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c2996794eb25..6e31234a4196 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -295,66 +295,46 @@ int genl_unregister_family(struct genl_family *family)
 	return -ENOENT;
 }
 
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
-			       int *errp)
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct genl_ops *ops;
 	struct genl_family *family;
 	struct genl_info info;
 	struct genlmsghdr *hdr = nlmsg_data(nlh);
-	int hdrlen, err = -EINVAL;
-
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
-		goto ignore;
-
-	if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
-		goto ignore;
+	int hdrlen, err;
 
 	family = genl_family_find_byid(nlh->nlmsg_type);
-	if (family == NULL) {
-		err = -ENOENT;
-		goto errout;
-	}
+	if (family == NULL)
+		return -ENOENT;
 
 	hdrlen = GENL_HDRLEN + family->hdrsize;
 	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
-		goto errout;
+		return -EINVAL;
 
 	ops = genl_get_cmd(hdr->cmd, family);
-	if (ops == NULL) {
-		err = -EOPNOTSUPP;
-		goto errout;
-	}
+	if (ops == NULL)
+		return -EOPNOTSUPP;
 
-	if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		err = -EPERM;
-		goto errout;
-	}
+	if ((ops->flags & GENL_ADMIN_PERM) &&
+	    security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		if (ops->dumpit == NULL) {
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
+		if (ops->dumpit == NULL)
+			return -EOPNOTSUPP;
 
-		*errp = err = netlink_dump_start(genl_sock, skb, nlh,
-						 ops->dumpit, ops->done);
-		if (err == 0)
-			skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
-					  skb->len));
-		return -1;
+		return netlink_dump_start(genl_sock, skb, nlh,
+					  ops->dumpit, ops->done);
 	}
 
-	if (ops->doit == NULL) {
-		err = -EOPNOTSUPP;
-		goto errout;
-	}
+	if (ops->doit == NULL)
+		return -EOPNOTSUPP;
 
 	if (family->attrbuf) {
 		err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
 				  ops->policy);
 		if (err < 0)
-			goto errout;
+			return err;
 	}
 
 	info.snd_seq = nlh->nlmsg_seq;
@@ -364,15 +344,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
 	info.attrs = family->attrbuf;
 
-	*errp = err = ops->doit(skb, &info);
-	return err;
-
-ignore:
-	return 0;
-
-errout:
-	*errp = err;
-	return -1;
+	return ops->doit(skb, &info);
 }
 
 static void genl_rcv(struct sock *sk, int len)
@@ -586,7 +558,7 @@ static int __init genl_init(void)
 
 	netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
 	genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
-					  genl_rcv, THIS_MODULE);
+					  genl_rcv, NULL, THIS_MODULE);
 	if (genl_sock == NULL)
 		panic("GENL: Cannot initialize generic netlink\n");
 
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index bf9837dd95c4..5d4a26c2aa0c 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -625,42 +625,42 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 	ax25_address *source = NULL;
 	ax25_uid_assoc *user;
 	struct net_device *dev;
+	int err = 0;
 
 	lock_sock(sk);
 	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
 		sock->state = SS_CONNECTED;
-		release_sock(sk);
-		return 0;	/* Connect completed during a ERESTARTSYS event */
+		goto out_release;	/* Connect completed during a ERESTARTSYS event */
 	}
 
 	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
 		sock->state = SS_UNCONNECTED;
-		release_sock(sk);
-		return -ECONNREFUSED;
+		err = -ECONNREFUSED;
+		goto out_release;
 	}
 
 	if (sk->sk_state == TCP_ESTABLISHED) {
-		release_sock(sk);
-		return -EISCONN;	/* No reconnect on a seqpacket socket */
+		err = -EISCONN;	/* No reconnect on a seqpacket socket */
+		goto out_release;
 	}
 
 	sk->sk_state   = TCP_CLOSE;
 	sock->state = SS_UNCONNECTED;
 
 	if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) {
-		release_sock(sk);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out_release;
 	}
 	if (addr->sax25_family != AF_NETROM) {
-		release_sock(sk);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out_release;
 	}
 	if (sock_flag(sk, SOCK_ZAPPED)) {	/* Must bind first - autobinding in this may or may not work */
 		sock_reset_flag(sk, SOCK_ZAPPED);
 
 		if ((dev = nr_dev_first()) == NULL) {
-			release_sock(sk);
-			return -ENETUNREACH;
+			err = -ENETUNREACH;
+			goto out_release;
 		}
 		source = (ax25_address *)dev->dev_addr;
 
@@ -671,8 +671,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 		} else {
 			if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
 				dev_put(dev);
-				release_sock(sk);
-				return -EPERM;
+				err = -EPERM;
+				goto out_release;
 			}
 			nr->user_addr   = *source;
 		}
@@ -707,8 +707,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	/* Now the loop */
 	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
-		release_sock(sk);
-		return -EINPROGRESS;
+		err = -EINPROGRESS;
+		goto out_release;
 	}
 
 	/*
@@ -716,46 +716,46 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 	 * closed.
 	 */
 	if (sk->sk_state == TCP_SYN_SENT) {
-		struct task_struct *tsk = current;
-		DECLARE_WAITQUEUE(wait, tsk);
+		DEFINE_WAIT(wait);
 
-		add_wait_queue(sk->sk_sleep, &wait);
 		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
+			prepare_to_wait(sk->sk_sleep, &wait,
+			                TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
-			release_sock(sk);
-			if (!signal_pending(tsk)) {
+			if (!signal_pending(current)) {
+				release_sock(sk);
 				schedule();
 				lock_sock(sk);
 				continue;
 			}
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -ERESTARTSYS;
+			err = -ERESTARTSYS;
+			break;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
+		finish_wait(sk->sk_sleep, &wait);
+		if (err)
+			goto out_release;
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
 		sock->state = SS_UNCONNECTED;
-		release_sock(sk);
-		return sock_error(sk);	/* Always set at this point */
+		err = sock_error(sk);	/* Always set at this point */
+		goto out_release;
 	}
 
 	sock->state = SS_CONNECTED;
+
+out_release:
 	release_sock(sk);
 
-	return 0;
+	return err;
 }
 
 static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 	struct sk_buff *skb;
 	struct sock *newsk;
+	DEFINE_WAIT(wait);
 	struct sock *sk;
 	int err = 0;
 
@@ -765,42 +765,40 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 	lock_sock(sk);
 	if (sk->sk_type != SOCK_SEQPACKET) {
 		err = -EOPNOTSUPP;
-		goto out;
+		goto out_release;
 	}
 
 	if (sk->sk_state != TCP_LISTEN) {
 		err = -EINVAL;
-		goto out;
+		goto out_release;
 	}
 
 	/*
 	 *	The write queue this time is holding sockets ready to use
 	 *	hooked into the SABM we saved
 	 */
-	add_wait_queue(sk->sk_sleep, &wait);
 	for (;;) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
 
-		current->state = TASK_INTERRUPTIBLE;
-		release_sock(sk);
 		if (flags & O_NONBLOCK) {
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -EWOULDBLOCK;
+			err = -EWOULDBLOCK;
+			break;
 		}
-		if (!signal_pending(tsk)) {
+		if (!signal_pending(current)) {
+			release_sock(sk);
 			schedule();
 			lock_sock(sk);
 			continue;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
-		return -ERESTARTSYS;
+		err = -ERESTARTSYS;
+		break;
 	}
-	current->state = TASK_RUNNING;
-	remove_wait_queue(sk->sk_sleep, &wait);
+	finish_wait(sk->sk_sleep, &wait);
+	if (err)
+		goto out_release;
 
 	newsk = skb->sk;
 	newsk->sk_socket = newsock;
@@ -811,8 +809,9 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 	sk_acceptq_removed(sk);
 	newsock->sk = newsk;
 
-out:
+out_release:
 	release_sock(sk);
+
 	return err;
 }
 
@@ -878,7 +877,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
 	if (frametype == NR_PROTOEXT &&
 	    circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
 		skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 
 		return nr_rx_ip(skb, dev);
 	}
@@ -904,7 +903,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	if (sk != NULL) {
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 
 		if (frametype == NR_CONNACK && skb->len == 22)
 			nr_sk(sk)->bpqext = 1;
@@ -1074,6 +1073,7 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 
 	skb_reserve(skb, size - len);
+	skb_reset_transport_header(skb);
 
 	/*
 	 *	Push down the NET/ROM header
@@ -1094,14 +1094,12 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
 	/*
 	 *	Put the data on the end
 	 */
+	skb_put(skb, len);
 
-	skb->h.raw = skb_put(skb, len);
-
-	asmptr = skb->h.raw;
 	SOCK_DEBUG(sk, "NET/ROM: Appending user data\n");
 
 	/* User data follows immediately after the NET/ROM transport header */
-	if (memcpy_fromiovec(asmptr, msg->msg_iov, len)) {
+	if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) {
 		kfree_skb(skb);
 		err = -EFAULT;
 		goto out;
@@ -1149,7 +1147,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return er;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	copied     = skb->len;
 
 	if (copied > size) {
@@ -1161,7 +1159,8 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (sax != NULL) {
 		sax->sax25_family = AF_NETROM;
-		memcpy(sax->sax25_call.ax25_call, skb->data + 7, AX25_ADDR_LEN);
+		skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
+			      AX25_ADDR_LEN);
 	}
 
 	msg->msg_namelen = sizeof(*sax);
@@ -1209,6 +1208,12 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		release_sock(sk);
 		return ret;
 
+	case SIOCGSTAMPNS:
+		lock_sock(sk);
+		ret = sock_get_timestampns(sk, argp);
+		release_sock(sk);
+		return ret;
+
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 9a97ed6e6910..c7b5d930e732 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -56,8 +56,8 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
 
 	/* Spoof incoming device */
 	skb->dev      = dev;
-	skb->mac.raw  = skb->nh.raw;
-	skb->nh.raw   = skb->data;
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
 	skb->pkt_type = PACKET_HOST;
 
 	netif_rx(skb);
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 5560acbaaa95..68176483617f 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -51,10 +51,12 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
 		if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
 			return 1;
 
-		skbn->h.raw = skbn->data;
+		skb_reset_transport_header(skbn);
 
 		while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
-			memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+			skb_copy_from_linear_data(skbo,
+						  skb_put(skbn, skbo->len),
+						  skbo->len);
 			kfree_skb(skbo);
 		}
 
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index e856ae1b360a..f324d5df4186 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -34,8 +34,8 @@ int nr_loopback_queue(struct sk_buff *skb)
 	struct sk_buff *skbn;
 
 	if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
-		memcpy(skb_put(skbn, skb->len), skb->data, skb->len);
-		skbn->h.raw = skbn->data;
+		skb_copy_from_linear_data(skb, skb_put(skbn, skb->len), skb->len);
+		skb_reset_transport_header(skbn);
 
 		skb_queue_tail(&loopback_queue, skbn);
 
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 0cbfb611465b..e3e6c44e1890 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -40,7 +40,7 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
 
 	if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) {
 		/* Save a copy of the Transport Header */
-		memcpy(transport, skb->data, NR_TRANSPORT_LEN);
+		skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN);
 		skb_pull(skb, NR_TRANSPORT_LEN);
 
 		frontlen = skb_headroom(skb);
@@ -54,13 +54,13 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
 			len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE;
 
 			/* Copy the user data */
-			memcpy(skb_put(skbn, len), skb->data, len);
+			skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 			skb_pull(skb, len);
 
 			/* Duplicate the Transport Header */
 			skb_push(skbn, NR_TRANSPORT_LEN);
-			memcpy(skbn->data, transport, NR_TRANSPORT_LEN);
-
+			skb_copy_to_linear_data(skbn, transport,
+						NR_TRANSPORT_LEN);
 			if (skb->len > 0)
 				skbn->data[4] |= NR_MORE_FLAG;
 
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 07b694d18870..04e7d0d2fd8f 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -226,13 +226,13 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
 
 	dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
 
-	memcpy(dptr, skb->data + 7, AX25_ADDR_LEN);
+	skb_copy_from_linear_data_offset(skb, 7, dptr, AX25_ADDR_LEN);
 	dptr[6] &= ~AX25_CBIT;
 	dptr[6] &= ~AX25_EBIT;
 	dptr[6] |= AX25_SSSID_SPARE;
 	dptr += AX25_ADDR_LEN;
 
-	memcpy(dptr, skb->data + 0, AX25_ADDR_LEN);
+	skb_copy_from_linear_data(skb, dptr, AX25_ADDR_LEN);
 	dptr[6] &= ~AX25_CBIT;
 	dptr[6] |= AX25_EBIT;
 	dptr[6] |= AX25_SSSID_SPARE;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 28d47e8f2873..02e401cd683f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -114,22 +114,22 @@ On receive:
 -----------
 
 Incoming, dev->hard_header!=NULL
-   mac.raw -> ll header
-   data    -> data
+   mac_header -> ll header
+   data       -> data
 
 Outgoing, dev->hard_header!=NULL
-   mac.raw -> ll header
-   data    -> ll header
+   mac_header -> ll header
+   data       -> ll header
 
 Incoming, dev->hard_header==NULL
-   mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
-	      PPP makes it, that is wrong, because introduce assymetry
-	      between rx and tx paths.
-   data    -> data
+   mac_header -> UNKNOWN position. It is very likely, that it points to ll
+		 header.  PPP makes it, that is wrong, because introduce
+                 assymetry between rx and tx paths.
+   data       -> data
 
 Outgoing, dev->hard_header==NULL
-   mac.raw -> data. ll header is still not built!
-   data    -> data
+   mac_header -> data. ll header is still not built!
+   data       -> data
 
 Resume
   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
@@ -139,12 +139,12 @@ On transmit:
 ------------
 
 dev->hard_header != NULL
-   mac.raw -> ll header
-   data    -> ll header
+   mac_header -> ll header
+   data       -> ll header
 
 dev->hard_header == NULL (ll header is added by device, we cannot control it)
-   mac.raw -> data
-   data -> data
+   mac_header -> data
+   data       -> data
 
    We should set nh.raw on output to correct posistion,
    packet classifier depends on it.
@@ -201,7 +201,8 @@ struct packet_sock {
 	struct packet_type	prot_hook;
 	spinlock_t		bind_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
-				auxdata:1;
+				auxdata:1,
+				origdev:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 #ifdef CONFIG_PACKET_MULTICAST
@@ -284,7 +285,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 	 *	Incoming packets have ll header pulled,
 	 *	push it back.
 	 *
-	 *	For outgoing ones skb->data == skb->mac.raw
+	 *	For outgoing ones skb->data == skb_mac_header(skb)
 	 *	so that this procedure is noop.
 	 */
 
@@ -303,7 +304,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 
 	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 
-	skb_push(skb, skb->data-skb->mac.raw);
+	skb_push(skb, skb->data - skb_mac_header(skb));
 
 	/*
 	 *	The SOCK_PACKET socket receives _all_ frames.
@@ -401,14 +402,14 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 	 * notable one here. This should really be fixed at the driver level.
 	 */
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Try to align data part correctly */
 	if (dev->hard_header) {
 		skb->data -= dev->hard_header_len;
 		skb->tail -= dev->hard_header_len;
 		if (len < dev->hard_header_len)
-			skb->nh.raw = skb->data;
+			skb_reset_network_header(skb);
 	}
 
 	/* Returns -EFAULT on error */
@@ -488,10 +489,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 		   never delivered to user.
 		 */
 		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb->mac.raw);
+			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
-			skb_pull(skb, skb->nh.raw - skb->data);
+			skb_pull(skb, skb_network_offset(skb));
 		}
 	}
 
@@ -528,7 +529,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	sll->sll_hatype = dev->type;
 	sll->sll_protocol = skb->protocol;
 	sll->sll_pkttype = skb->pkt_type;
-	sll->sll_ifindex = dev->ifindex;
+	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+		sll->sll_ifindex = orig_dev->ifindex;
+	else
+		sll->sll_ifindex = dev->ifindex;
 	sll->sll_halen = 0;
 
 	if (dev->hard_header_parse)
@@ -582,6 +586,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
 	unsigned short macoff, netoff;
 	struct sk_buff *copy_skb = NULL;
+	struct timeval tv;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -591,10 +596,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 
 	if (dev->hard_header) {
 		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb->mac.raw);
+			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
-			skb_pull(skb, skb->nh.raw - skb->data);
+			skb_pull(skb, skb_network_offset(skb));
 		}
 	}
 
@@ -612,7 +617,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	if (sk->sk_type == SOCK_DGRAM) {
 		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
 	} else {
-		unsigned maclen = skb->nh.raw - skb->data;
+		unsigned maclen = skb_network_offset(skb);
 		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
 		macoff = netoff - maclen;
 	}
@@ -656,12 +661,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	h->tp_snaplen = snaplen;
 	h->tp_mac = macoff;
 	h->tp_net = netoff;
-	if (skb->tstamp.off_sec == 0) {
+	if (skb->tstamp.tv64 == 0) {
 		__net_timestamp(skb);
 		sock_enable_timestamp(sk);
 	}
-	h->tp_sec = skb->tstamp.off_sec;
-	h->tp_usec = skb->tstamp.off_usec;
+	tv = ktime_to_timeval(skb->tstamp);
+	h->tp_sec = tv.tv_sec;
+	h->tp_usec = tv.tv_usec;
 
 	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
 	sll->sll_halen = 0;
@@ -671,7 +677,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	sll->sll_hatype = dev->type;
 	sll->sll_protocol = skb->protocol;
 	sll->sll_pkttype = skb->pkt_type;
-	sll->sll_ifindex = dev->ifindex;
+	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+		sll->sll_ifindex = orig_dev->ifindex;
+	else
+		sll->sll_ifindex = dev->ifindex;
 
 	h->tp_status = status;
 	smp_mb();
@@ -766,14 +775,14 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out_unlock;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	if (dev->hard_header) {
 		int res;
 		err = -EINVAL;
 		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
 		if (sock->type != SOCK_DGRAM) {
-			skb->tail = skb->data;
+			skb_reset_tail_pointer(skb);
 			skb->len = 0;
 		} else if (res < 0)
 			goto out_free;
@@ -1143,7 +1152,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_len = PACKET_SKB_CB(skb)->origlen;
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
-		aux.tp_net = skb->nh.raw - skb->data;
+		aux.tp_net = skb_network_offset(skb);
 
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
@@ -1411,6 +1420,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->auxdata = !!val;
 		return 0;
 	}
+	case PACKET_ORIGDEV:
+	{
+		int val;
+
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->origdev = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1454,6 +1475,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 
 		data = &val;
 		break;
+	case PACKET_ORIGDEV:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->origdev;
+
+		data = &val;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1543,6 +1571,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 		}
 		case SIOCGSTAMP:
 			return sock_get_timestamp(sk, (struct timeval __user *)arg);
+		case SIOCGSTAMPNS:
+			return sock_get_timestampns(sk, (struct timespec __user *)arg);
 
 #ifdef CONFIG_INET
 		case SIOCADDRT:
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f92d5310847b..d476c43d5216 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -812,26 +812,26 @@ rose_try_next_neigh:
 	 * closed.
 	 */
 	if (sk->sk_state == TCP_SYN_SENT) {
-		struct task_struct *tsk = current;
-		DECLARE_WAITQUEUE(wait, tsk);
+		DEFINE_WAIT(wait);
 
-		add_wait_queue(sk->sk_sleep, &wait);
 		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
+			prepare_to_wait(sk->sk_sleep, &wait,
+			                TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
-			release_sock(sk);
-			if (!signal_pending(tsk)) {
+			if (!signal_pending(current)) {
+				release_sock(sk);
 				schedule();
 				lock_sock(sk);
 				continue;
 			}
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -ERESTARTSYS;
+			err = -ERESTARTSYS;
+			break;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
+		finish_wait(sk->sk_sleep, &wait);
+
+		if (err)
+			goto out_release;
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
@@ -856,10 +856,9 @@ out_release:
 
 static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 	struct sk_buff *skb;
 	struct sock *newsk;
+	DEFINE_WAIT(wait);
 	struct sock *sk;
 	int err = 0;
 
@@ -869,42 +868,41 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 	lock_sock(sk);
 	if (sk->sk_type != SOCK_SEQPACKET) {
 		err = -EOPNOTSUPP;
-		goto out;
+		goto out_release;
 	}
 
 	if (sk->sk_state != TCP_LISTEN) {
 		err = -EINVAL;
-		goto out;
+		goto out_release;
 	}
 
 	/*
 	 *	The write queue this time is holding sockets ready to use
 	 *	hooked into the SABM we saved
 	 */
-	add_wait_queue(sk->sk_sleep, &wait);
 	for (;;) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
 
-		current->state = TASK_INTERRUPTIBLE;
-		release_sock(sk);
 		if (flags & O_NONBLOCK) {
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -EWOULDBLOCK;
+			err = -EWOULDBLOCK;
+			break;
 		}
-		if (!signal_pending(tsk)) {
+		if (!signal_pending(current)) {
+			release_sock(sk);
 			schedule();
 			lock_sock(sk);
 			continue;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
-		return -ERESTARTSYS;
+		err = -ERESTARTSYS;
+		break;
 	}
-	current->state = TASK_RUNNING;
-	remove_wait_queue(sk->sk_sleep, &wait);
+	finish_wait(sk->sk_sleep, &wait);
+	if (err)
+		goto out_release;
 
 	newsk = skb->sk;
 	newsk->sk_socket = newsock;
@@ -916,7 +914,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 	sk->sk_ack_backlog--;
 	newsock->sk = newsk;
 
-out:
+out_release:
 	release_sock(sk);
 
 	return err;
@@ -1105,9 +1103,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 	 */
 	SOCK_DEBUG(sk, "ROSE: Appending user data\n");
 
-	asmptr = skb->h.raw = skb_put(skb, len);
+	skb_reset_transport_header(skb);
+	skb_put(skb, len);
 
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -1155,7 +1154,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 		int lg;
 
 		/* Save a copy of the Header */
-		memcpy(header, skb->data, ROSE_MIN_LEN);
+		skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN);
 		skb_pull(skb, ROSE_MIN_LEN);
 
 		frontlen = skb_headroom(skb);
@@ -1175,12 +1174,12 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 			lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
 
 			/* Copy the user data */
-			memcpy(skb_put(skbn, lg), skb->data, lg);
+			skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg);
 			skb_pull(skb, lg);
 
 			/* Duplicate the Header */
 			skb_push(skbn, ROSE_MIN_LEN);
-			memcpy(skbn->data, header, ROSE_MIN_LEN);
+			skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN);
 
 			if (skb->len > 0)
 				skbn->data[2] |= M_BIT;
@@ -1234,7 +1233,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
 		*asmptr = qbit;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	copied     = skb->len;
 
 	if (copied > size) {
@@ -1296,6 +1295,9 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCGSTAMP:
 		return sock_get_timestamp(sk, (struct timeval __user *) argp);
 
+	case SIOCGSTAMPNS:
+		return sock_get_timestampns(sk, (struct timespec __user *) argp);
+
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 3e41bd93ab9f..cd01642f0491 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -77,7 +77,7 @@ static void rose_loopback_timer(unsigned long param)
 		dest      = (rose_address *)(skb->data + 4);
 		lci_o     = 0xFFF - lci_i;
 
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 
 		sk = rose_find_socket(lci_o, &rose_loopback_neigh);
 		if (sk) {
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index a1233e1b1ab6..1f9aefd95a99 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -906,7 +906,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 			}
 		}
 		else {
-			skb->h.raw = skb->data;
+			skb_reset_transport_header(skb);
 			res = rose_process_rx_frame(sk, skb);
 			goto out;
 		}
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
new file mode 100644
index 000000000000..8750f6da6bc7
--- /dev/null
+++ b/net/rxrpc/Kconfig
@@ -0,0 +1,42 @@
+#
+# RxRPC session sockets
+#
+
+config AF_RXRPC
+	tristate "RxRPC session sockets"
+	depends on EXPERIMENTAL
+	help
+	  Say Y or M here to include support for RxRPC session sockets (just
+	  the transport part, not the presentation part: (un)marshalling is
+	  left to the application).
+
+	  These are used for AFS kernel filesystem and userspace utilities.
+
+	  This module at the moment only supports client operations and is
+	  currently incomplete.
+
+	  See Documentation/networking/rxrpc.txt.
+
+
+config AF_RXRPC_DEBUG
+	bool "RxRPC dynamic debugging"
+	depends on AF_RXRPC
+	help
+	  Say Y here to make runtime controllable debugging messages appear.
+
+	  See Documentation/networking/rxrpc.txt.
+
+
+config RXKAD
+	tristate "RxRPC Kerberos security"
+	depends on AF_RXRPC && KEYS
+	select CRYPTO
+	select CRYPTO_MANAGER
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_PCBC
+	select CRYPTO_FCRYPT
+	help
+	  Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
+	  through the use of the key retention service.
+
+	  See Documentation/networking/rxrpc.txt.
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6efcb6f162a0..c46867c61c98 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,25 +1,29 @@
 #
-# Makefile for Linux kernel Rx RPC
+# Makefile for Linux kernel RxRPC
 #
 
-#CFLAGS += -finstrument-functions
-
-rxrpc-objs := \
-	call.o \
-	connection.o \
-	krxiod.o \
-	krxsecd.o \
-	krxtimod.o \
-	main.o \
-	peer.o \
-	rxrpc_syms.o \
-	transport.o
+af-rxrpc-objs := \
+	af_rxrpc.o \
+	ar-accept.o \
+	ar-ack.o \
+	ar-call.o \
+	ar-connection.o \
+	ar-connevent.o \
+	ar-error.o \
+	ar-input.o \
+	ar-key.o \
+	ar-local.o \
+	ar-output.o \
+	ar-peer.o \
+	ar-recvmsg.o \
+	ar-security.o \
+	ar-skbuff.o \
+	ar-transport.o
 
 ifeq ($(CONFIG_PROC_FS),y)
-rxrpc-objs += proc.o
-endif
-ifeq ($(CONFIG_SYSCTL),y)
-rxrpc-objs += sysctl.o
+af-rxrpc-objs += ar-proc.o
 endif
 
-obj-$(CONFIG_RXRPC) := rxrpc.o
+obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
+
+obj-$(CONFIG_RXKAD) += rxkad.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
new file mode 100644
index 000000000000..2c57df9c131b
--- /dev/null
+++ b/net/rxrpc/af_rxrpc.c
@@ -0,0 +1,879 @@
+/* AF_RXRPC implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+MODULE_DESCRIPTION("RxRPC network protocol");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_RXRPC);
+
+unsigned rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "RxRPC debugging mask");
+
+static int sysctl_rxrpc_max_qlen __read_mostly = 10;
+
+static struct proto rxrpc_proto;
+static const struct proto_ops rxrpc_rpc_ops;
+
+/* local epoch for detecting local-end reset */
+__be32 rxrpc_epoch;
+
+/* current debugging ID */
+atomic_t rxrpc_debug_id;
+
+/* count of skbs currently in use */
+atomic_t rxrpc_n_skbs;
+
+struct workqueue_struct *rxrpc_workqueue;
+
+static void rxrpc_sock_destructor(struct sock *);
+
+/*
+ * see if an RxRPC socket is currently writable
+ */
+static inline int rxrpc_writable(struct sock *sk)
+{
+	return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+}
+
+/*
+ * wait for write bufferage to become available
+ */
+static void rxrpc_write_space(struct sock *sk)
+{
+	_enter("%p", sk);
+	read_lock(&sk->sk_callback_lock);
+	if (rxrpc_writable(sk)) {
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+		sk_wake_async(sk, 2, POLL_OUT);
+	}
+	read_unlock(&sk->sk_callback_lock);
+}
+
+/*
+ * validate an RxRPC address
+ */
+static int rxrpc_validate_address(struct rxrpc_sock *rx,
+				  struct sockaddr_rxrpc *srx,
+				  int len)
+{
+	if (len < sizeof(struct sockaddr_rxrpc))
+		return -EINVAL;
+
+	if (srx->srx_family != AF_RXRPC)
+		return -EAFNOSUPPORT;
+
+	if (srx->transport_type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	len -= offsetof(struct sockaddr_rxrpc, transport);
+	if (srx->transport_len < sizeof(sa_family_t) ||
+	    srx->transport_len > len)
+		return -EINVAL;
+
+	if (srx->transport.family != rx->proto)
+		return -EAFNOSUPPORT;
+
+	switch (srx->transport.family) {
+	case AF_INET:
+		_debug("INET: %x @ %u.%u.%u.%u",
+		       ntohs(srx->transport.sin.sin_port),
+		       NIPQUAD(srx->transport.sin.sin_addr));
+		if (srx->transport_len > 8)
+			memset((void *)&srx->transport + 8, 0,
+			       srx->transport_len - 8);
+		break;
+
+	case AF_INET6:
+	default:
+		return -EAFNOSUPPORT;
+	}
+
+	return 0;
+}
+
+/*
+ * bind a local address to an RxRPC socket
+ */
+static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) saddr;
+	struct sock *sk = sock->sk;
+	struct rxrpc_local *local;
+	struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
+	__be16 service_id;
+	int ret;
+
+	_enter("%p,%p,%d", rx, saddr, len);
+
+	ret = rxrpc_validate_address(rx, srx, len);
+	if (ret < 0)
+		goto error;
+
+	lock_sock(&rx->sk);
+
+	if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
+		ret = -EINVAL;
+		goto error_unlock;
+	}
+
+	memcpy(&rx->srx, srx, sizeof(rx->srx));
+
+	/* find a local transport endpoint if we don't have one already */
+	local = rxrpc_lookup_local(&rx->srx);
+	if (IS_ERR(local)) {
+		ret = PTR_ERR(local);
+		goto error_unlock;
+	}
+
+	rx->local = local;
+	if (srx->srx_service) {
+		service_id = htons(srx->srx_service);
+		write_lock_bh(&local->services_lock);
+		list_for_each_entry(prx, &local->services, listen_link) {
+			if (prx->service_id == service_id)
+				goto service_in_use;
+		}
+
+		rx->service_id = service_id;
+		list_add_tail(&rx->listen_link, &local->services);
+		write_unlock_bh(&local->services_lock);
+
+		rx->sk.sk_state = RXRPC_SERVER_BOUND;
+	} else {
+		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+	}
+
+	release_sock(&rx->sk);
+	_leave(" = 0");
+	return 0;
+
+service_in_use:
+	ret = -EADDRINUSE;
+	write_unlock_bh(&local->services_lock);
+error_unlock:
+	release_sock(&rx->sk);
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * set the number of pending calls permitted on a listening socket
+ */
+static int rxrpc_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	int ret;
+
+	_enter("%p,%d", rx, backlog);
+
+	lock_sock(&rx->sk);
+
+	switch (rx->sk.sk_state) {
+	case RXRPC_UNCONNECTED:
+		ret = -EADDRNOTAVAIL;
+		break;
+	case RXRPC_CLIENT_BOUND:
+	case RXRPC_CLIENT_CONNECTED:
+	default:
+		ret = -EBUSY;
+		break;
+	case RXRPC_SERVER_BOUND:
+		ASSERT(rx->local != NULL);
+		sk->sk_max_ack_backlog = backlog;
+		rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+		ret = 0;
+		break;
+	}
+
+	release_sock(&rx->sk);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * find a transport by address
+ */
+static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
+						       struct sockaddr *addr,
+						       int addr_len, int flags,
+						       gfp_t gfp)
+{
+	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+	struct rxrpc_transport *trans;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	struct rxrpc_peer *peer;
+
+	_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+	ASSERT(rx->local != NULL);
+	ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
+
+	if (rx->srx.transport_type != srx->transport_type)
+		return ERR_PTR(-ESOCKTNOSUPPORT);
+	if (rx->srx.transport.family != srx->transport.family)
+		return ERR_PTR(-EAFNOSUPPORT);
+
+	/* find a remote transport endpoint from the local one */
+	peer = rxrpc_get_peer(srx, gfp);
+	if (IS_ERR(peer))
+		return ERR_PTR(PTR_ERR(peer));
+
+	/* find a transport */
+	trans = rxrpc_get_transport(rx->local, peer, gfp);
+	rxrpc_put_peer(peer);
+	_leave(" = %p", trans);
+	return trans;
+}
+
+/**
+ * rxrpc_kernel_begin_call - Allow a kernel service to begin a call
+ * @sock: The socket on which to make the call
+ * @srx: The address of the peer to contact (defaults to socket setting)
+ * @key: The security context to use (defaults to socket setting)
+ * @user_call_ID: The ID to use
+ *
+ * Allow a kernel service to begin a call on the nominated socket.  This just
+ * sets up all the internal tracking structures and allocates connection and
+ * call IDs as appropriate.  The call to be used is returned.
+ *
+ * The default socket destination address and security may be overridden by
+ * supplying @srx and @key.
+ */
+struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
+					   struct sockaddr_rxrpc *srx,
+					   struct key *key,
+					   unsigned long user_call_ID,
+					   gfp_t gfp)
+{
+	struct rxrpc_conn_bundle *bundle;
+	struct rxrpc_transport *trans;
+	struct rxrpc_call *call;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	__be16 service_id;
+
+	_enter(",,%x,%lx", key_serial(key), user_call_ID);
+
+	lock_sock(&rx->sk);
+
+	if (srx) {
+		trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
+						sizeof(*srx), 0, gfp);
+		if (IS_ERR(trans)) {
+			call = ERR_PTR(PTR_ERR(trans));
+			trans = NULL;
+			goto out;
+		}
+	} else {
+		trans = rx->trans;
+		if (!trans) {
+			call = ERR_PTR(-ENOTCONN);
+			goto out;
+		}
+		atomic_inc(&trans->usage);
+	}
+
+	service_id = rx->service_id;
+	if (srx)
+		service_id = htons(srx->srx_service);
+
+	if (!key)
+		key = rx->key;
+	if (key && !key->payload.data)
+		key = NULL; /* a no-security key */
+
+	bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
+	if (IS_ERR(bundle)) {
+		call = ERR_PTR(PTR_ERR(bundle));
+		goto out;
+	}
+
+	call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
+				     gfp);
+	rxrpc_put_bundle(trans, bundle);
+out:
+	rxrpc_put_transport(trans);
+	release_sock(&rx->sk);
+	_leave(" = %p", call);
+	return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_begin_call);
+
+/**
+ * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * @call: The call to end
+ *
+ * Allow a kernel service to end a call it was using.  The call must be
+ * complete before this is called (the call should be aborted if necessary).
+ */
+void rxrpc_kernel_end_call(struct rxrpc_call *call)
+{
+	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+	rxrpc_remove_user_ID(call->socket, call);
+	rxrpc_put_call(call);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_end_call);
+
+/**
+ * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
+ * @sock: The socket to intercept received messages on
+ * @interceptor: The function to pass the messages to
+ *
+ * Allow a kernel service to intercept messages heading for the Rx queue on an
+ * RxRPC socket.  They get passed to the specified function instead.
+ * @interceptor should free the socket buffers it is given.  @interceptor is
+ * called with the socket receive queue spinlock held and softirqs disabled -
+ * this ensures that the messages will be delivered in the right order.
+ */
+void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
+					rxrpc_interceptor_t interceptor)
+{
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+
+	_enter("");
+	rx->interceptor = interceptor;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
+
+/*
+ * connect an RxRPC socket
+ * - this just targets it at a specific destination; no actual connection
+ *   negotiation takes place
+ */
+static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
+			 int addr_len, int flags)
+{
+	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+	struct sock *sk = sock->sk;
+	struct rxrpc_transport *trans;
+	struct rxrpc_local *local;
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	int ret;
+
+	_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+	ret = rxrpc_validate_address(rx, srx, addr_len);
+	if (ret < 0) {
+		_leave(" = %d [bad addr]", ret);
+		return ret;
+	}
+
+	lock_sock(&rx->sk);
+
+	switch (rx->sk.sk_state) {
+	case RXRPC_UNCONNECTED:
+		/* find a local transport endpoint if we don't have one already */
+		ASSERTCMP(rx->local, ==, NULL);
+		rx->srx.srx_family = AF_RXRPC;
+		rx->srx.srx_service = 0;
+		rx->srx.transport_type = srx->transport_type;
+		rx->srx.transport_len = sizeof(sa_family_t);
+		rx->srx.transport.family = srx->transport.family;
+		local = rxrpc_lookup_local(&rx->srx);
+		if (IS_ERR(local)) {
+			release_sock(&rx->sk);
+			return PTR_ERR(local);
+		}
+		rx->local = local;
+		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+	case RXRPC_CLIENT_BOUND:
+		break;
+	case RXRPC_CLIENT_CONNECTED:
+		release_sock(&rx->sk);
+		return -EISCONN;
+	default:
+		release_sock(&rx->sk);
+		return -EBUSY; /* server sockets can't connect as well */
+	}
+
+	trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
+					GFP_KERNEL);
+	if (IS_ERR(trans)) {
+		release_sock(&rx->sk);
+		_leave(" = %ld", PTR_ERR(trans));
+		return PTR_ERR(trans);
+	}
+
+	rx->trans = trans;
+	rx->service_id = htons(srx->srx_service);
+	rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
+
+	release_sock(&rx->sk);
+	return 0;
+}
+
+/*
+ * send a message through an RxRPC socket
+ * - in a client this does a number of things:
+ *   - finds/sets up a connection for the security specified (if any)
+ *   - initiates a call (ID in control data)
+ *   - ends the request phase of a call (if MSG_MORE is not set)
+ *   - sends a call data packet
+ *   - may send an abort (abort code in control data)
+ */
+static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
+			 struct msghdr *m, size_t len)
+{
+	struct rxrpc_transport *trans;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	int ret;
+
+	_enter(",{%d},,%zu", rx->sk.sk_state, len);
+
+	if (m->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (m->msg_name) {
+		ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen);
+		if (ret < 0) {
+			_leave(" = %d [bad addr]", ret);
+			return ret;
+		}
+	}
+
+	trans = NULL;
+	lock_sock(&rx->sk);
+
+	if (m->msg_name) {
+		ret = -EISCONN;
+		trans = rxrpc_name_to_transport(sock, m->msg_name,
+						m->msg_namelen, 0, GFP_KERNEL);
+		if (IS_ERR(trans)) {
+			ret = PTR_ERR(trans);
+			trans = NULL;
+			goto out;
+		}
+	} else {
+		trans = rx->trans;
+		if (trans)
+			atomic_inc(&trans->usage);
+	}
+
+	switch (rx->sk.sk_state) {
+	case RXRPC_SERVER_LISTENING:
+		if (!m->msg_name) {
+			ret = rxrpc_server_sendmsg(iocb, rx, m, len);
+			break;
+		}
+	case RXRPC_SERVER_BOUND:
+	case RXRPC_CLIENT_BOUND:
+		if (!m->msg_name) {
+			ret = -ENOTCONN;
+			break;
+		}
+	case RXRPC_CLIENT_CONNECTED:
+		ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len);
+		break;
+	default:
+		ret = -ENOTCONN;
+		break;
+	}
+
+out:
+	release_sock(&rx->sk);
+	if (trans)
+		rxrpc_put_transport(trans);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * set RxRPC socket options
+ */
+static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
+			    char __user *optval, int optlen)
+{
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	unsigned min_sec_level;
+	int ret;
+
+	_enter(",%d,%d,,%d", level, optname, optlen);
+
+	lock_sock(&rx->sk);
+	ret = -EOPNOTSUPP;
+
+	if (level == SOL_RXRPC) {
+		switch (optname) {
+		case RXRPC_EXCLUSIVE_CONNECTION:
+			ret = -EINVAL;
+			if (optlen != 0)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
+			goto success;
+
+		case RXRPC_SECURITY_KEY:
+			ret = -EINVAL;
+			if (rx->key)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			ret = rxrpc_request_key(rx, optval, optlen);
+			goto error;
+
+		case RXRPC_SECURITY_KEYRING:
+			ret = -EINVAL;
+			if (rx->key)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			ret = rxrpc_server_keyring(rx, optval, optlen);
+			goto error;
+
+		case RXRPC_MIN_SECURITY_LEVEL:
+			ret = -EINVAL;
+			if (optlen != sizeof(unsigned))
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			ret = get_user(min_sec_level,
+				       (unsigned __user *) optval);
+			if (ret < 0)
+				goto error;
+			ret = -EINVAL;
+			if (min_sec_level > RXRPC_SECURITY_MAX)
+				goto error;
+			rx->min_sec_level = min_sec_level;
+			goto success;
+
+		default:
+			break;
+		}
+	}
+
+success:
+	ret = 0;
+error:
+	release_sock(&rx->sk);
+	return ret;
+}
+
+/*
+ * permit an RxRPC socket to be polled
+ */
+static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	unsigned int mask;
+	struct sock *sk = sock->sk;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* the socket is readable if there are any messages waiting on the Rx
+	 * queue */
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* the socket is writable if there is space to add new data to the
+	 * socket; there is no guarantee that any particular call in progress
+	 * on the socket may have space in the Tx ACK window */
+	if (rxrpc_writable(sk))
+		mask |= POLLOUT | POLLWRNORM;
+
+	return mask;
+}
+
+/*
+ * create an RxRPC socket
+ */
+static int rxrpc_create(struct socket *sock, int protocol)
+{
+	struct rxrpc_sock *rx;
+	struct sock *sk;
+
+	_enter("%p,%d", sock, protocol);
+
+	/* we support transport protocol UDP only */
+	if (protocol != PF_INET)
+		return -EPROTONOSUPPORT;
+
+	if (sock->type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	sock->ops = &rxrpc_rpc_ops;
+	sock->state = SS_UNCONNECTED;
+
+	sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+	sk->sk_state		= RXRPC_UNCONNECTED;
+	sk->sk_write_space	= rxrpc_write_space;
+	sk->sk_max_ack_backlog	= sysctl_rxrpc_max_qlen;
+	sk->sk_destruct		= rxrpc_sock_destructor;
+
+	rx = rxrpc_sk(sk);
+	rx->proto = protocol;
+	rx->calls = RB_ROOT;
+
+	INIT_LIST_HEAD(&rx->listen_link);
+	INIT_LIST_HEAD(&rx->secureq);
+	INIT_LIST_HEAD(&rx->acceptq);
+	rwlock_init(&rx->call_lock);
+	memset(&rx->srx, 0, sizeof(rx->srx));
+
+	_leave(" = 0 [%p]", rx);
+	return 0;
+}
+
+/*
+ * RxRPC socket destructor
+ */
+static void rxrpc_sock_destructor(struct sock *sk)
+{
+	_enter("%p", sk);
+
+	rxrpc_purge_queue(&sk->sk_receive_queue);
+
+	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+	BUG_TRAP(sk_unhashed(sk));
+	BUG_TRAP(!sk->sk_socket);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk("Attempt to release alive rxrpc socket: %p\n", sk);
+		return;
+	}
+}
+
+/*
+ * release an RxRPC socket
+ */
+static int rxrpc_release_sock(struct sock *sk)
+{
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+
+	_enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+
+	/* declare the socket closed for business */
+	sock_orphan(sk);
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	sk->sk_state = RXRPC_CLOSE;
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+	ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
+
+	if (!list_empty(&rx->listen_link)) {
+		write_lock_bh(&rx->local->services_lock);
+		list_del(&rx->listen_link);
+		write_unlock_bh(&rx->local->services_lock);
+	}
+
+	/* try to flush out this socket */
+	rxrpc_release_calls_on_socket(rx);
+	flush_workqueue(rxrpc_workqueue);
+	rxrpc_purge_queue(&sk->sk_receive_queue);
+
+	if (rx->conn) {
+		rxrpc_put_connection(rx->conn);
+		rx->conn = NULL;
+	}
+
+	if (rx->bundle) {
+		rxrpc_put_bundle(rx->trans, rx->bundle);
+		rx->bundle = NULL;
+	}
+	if (rx->trans) {
+		rxrpc_put_transport(rx->trans);
+		rx->trans = NULL;
+	}
+	if (rx->local) {
+		rxrpc_put_local(rx->local);
+		rx->local = NULL;
+	}
+
+	key_put(rx->key);
+	rx->key = NULL;
+	key_put(rx->securities);
+	rx->securities = NULL;
+	sock_put(sk);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * release an RxRPC BSD socket on close() or equivalent
+ */
+static int rxrpc_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	_enter("%p{%p}", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	sock->sk = NULL;
+
+	return rxrpc_release_sock(sk);
+}
+
+/*
+ * RxRPC network protocol
+ */
+static const struct proto_ops rxrpc_rpc_ops = {
+	.family		= PF_UNIX,
+	.owner		= THIS_MODULE,
+	.release	= rxrpc_release,
+	.bind		= rxrpc_bind,
+	.connect	= rxrpc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= sock_no_getname,
+	.poll		= rxrpc_poll,
+	.ioctl		= sock_no_ioctl,
+	.listen		= rxrpc_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= rxrpc_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+	.sendmsg	= rxrpc_sendmsg,
+	.recvmsg	= rxrpc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+
+static struct proto rxrpc_proto = {
+	.name		= "RXRPC",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct rxrpc_sock),
+	.max_header	= sizeof(struct rxrpc_header),
+};
+
+static struct net_proto_family rxrpc_family_ops = {
+	.family	= PF_RXRPC,
+	.create = rxrpc_create,
+	.owner	= THIS_MODULE,
+};
+
+/*
+ * initialise and register the RxRPC protocol
+ */
+static int __init af_rxrpc_init(void)
+{
+	struct sk_buff *dummy_skb;
+	int ret = -1;
+
+	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb));
+
+	rxrpc_epoch = htonl(xtime.tv_sec);
+
+	ret = -ENOMEM;
+	rxrpc_call_jar = kmem_cache_create(
+		"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
+		SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!rxrpc_call_jar) {
+		printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
+		goto error_call_jar;
+	}
+
+	rxrpc_workqueue = create_workqueue("krxrpcd");
+	if (!rxrpc_workqueue) {
+		printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
+		goto error_work_queue;
+	}
+
+	ret = proto_register(&rxrpc_proto, 1);
+        if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
+		goto error_proto;
+	}
+
+	ret = sock_register(&rxrpc_family_ops);
+	if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
+		goto error_sock;
+	}
+
+	ret = register_key_type(&key_type_rxrpc);
+	if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
+		goto error_key_type;
+	}
+
+	ret = register_key_type(&key_type_rxrpc_s);
+	if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
+		goto error_key_type_s;
+	}
+
+#ifdef CONFIG_PROC_FS
+	proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops);
+	proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops);
+#endif
+	return 0;
+
+error_key_type_s:
+	unregister_key_type(&key_type_rxrpc);
+error_key_type:
+	sock_unregister(PF_RXRPC);
+error_sock:
+	proto_unregister(&rxrpc_proto);
+error_proto:
+	destroy_workqueue(rxrpc_workqueue);
+error_work_queue:
+	kmem_cache_destroy(rxrpc_call_jar);
+error_call_jar:
+	return ret;
+}
+
+/*
+ * unregister the RxRPC protocol
+ */
+static void __exit af_rxrpc_exit(void)
+{
+	_enter("");
+	unregister_key_type(&key_type_rxrpc_s);
+	unregister_key_type(&key_type_rxrpc);
+	sock_unregister(PF_RXRPC);
+	proto_unregister(&rxrpc_proto);
+	rxrpc_destroy_all_calls();
+	rxrpc_destroy_all_connections();
+	rxrpc_destroy_all_transports();
+	rxrpc_destroy_all_peers();
+	rxrpc_destroy_all_locals();
+
+	ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+
+	_debug("flush scheduled work");
+	flush_workqueue(rxrpc_workqueue);
+	proc_net_remove("rxrpc_conns");
+	proc_net_remove("rxrpc_calls");
+	destroy_workqueue(rxrpc_workqueue);
+	kmem_cache_destroy(rxrpc_call_jar);
+	_leave("");
+}
+
+module_init(af_rxrpc_init);
+module_exit(af_rxrpc_exit);
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
new file mode 100644
index 000000000000..92a87fde8bfe
--- /dev/null
+++ b/net/rxrpc/ar-accept.c
@@ -0,0 +1,504 @@
+/* incoming call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
+		      struct rxrpc_header *hdr)
+{
+	struct msghdr msg;
+	struct kvec iov[1];
+	size_t len;
+	int ret;
+
+	_enter("%d,,", local->debug_id);
+
+	msg.msg_name	= &srx->transport.sin;
+	msg.msg_namelen	= sizeof(srx->transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr->seq	= 0;
+	hdr->type	= RXRPC_PACKET_TYPE_BUSY;
+	hdr->flags	= 0;
+	hdr->userStatus	= 0;
+	hdr->_rsvd	= 0;
+
+	iov[0].iov_base	= hdr;
+	iov[0].iov_len	= sizeof(*hdr);
+
+	len = iov[0].iov_len;
+
+	hdr->serial = htonl(1);
+	_proto("Tx BUSY %%%u", ntohl(hdr->serial));
+
+	ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
+	if (ret < 0) {
+		_leave(" = -EAGAIN [sendmsg failed: %d]", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * accept an incoming call that needs peer, transport and/or connection setting
+ * up
+ */
+static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
+				      struct rxrpc_sock *rx,
+				      struct sk_buff *skb,
+				      struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_transport *trans;
+	struct rxrpc_skb_priv *sp, *nsp;
+	struct rxrpc_peer *peer;
+	struct rxrpc_call *call;
+	struct sk_buff *notification;
+	int ret;
+
+	_enter("");
+
+	sp = rxrpc_skb(skb);
+
+	/* get a notification message to send to the server app */
+	notification = alloc_skb(0, GFP_NOFS);
+	rxrpc_new_skb(notification);
+	notification->mark = RXRPC_SKB_MARK_NEW_CALL;
+
+	peer = rxrpc_get_peer(srx, GFP_NOIO);
+	if (IS_ERR(peer)) {
+		_debug("no peer");
+		ret = -EBUSY;
+		goto error;
+	}
+
+	trans = rxrpc_get_transport(local, peer, GFP_NOIO);
+	rxrpc_put_peer(peer);
+	if (!trans) {
+		_debug("no trans");
+		ret = -EBUSY;
+		goto error;
+	}
+
+	conn = rxrpc_incoming_connection(trans, &sp->hdr, GFP_NOIO);
+	rxrpc_put_transport(trans);
+	if (IS_ERR(conn)) {
+		_debug("no conn");
+		ret = PTR_ERR(conn);
+		goto error;
+	}
+
+	call = rxrpc_incoming_call(rx, conn, &sp->hdr, GFP_NOIO);
+	rxrpc_put_connection(conn);
+	if (IS_ERR(call)) {
+		_debug("no call");
+		ret = PTR_ERR(call);
+		goto error;
+	}
+
+	/* attach the call to the socket */
+	read_lock_bh(&local->services_lock);
+	if (rx->sk.sk_state == RXRPC_CLOSE)
+		goto invalid_service;
+
+	write_lock(&rx->call_lock);
+	if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
+		rxrpc_get_call(call);
+
+		spin_lock(&call->conn->state_lock);
+		if (sp->hdr.securityIndex > 0 &&
+		    call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+			_debug("await conn sec");
+			list_add_tail(&call->accept_link, &rx->secureq);
+			call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
+			atomic_inc(&call->conn->usage);
+			set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+			rxrpc_queue_conn(call->conn);
+		} else {
+			_debug("conn ready");
+			call->state = RXRPC_CALL_SERVER_ACCEPTING;
+			list_add_tail(&call->accept_link, &rx->acceptq);
+			rxrpc_get_call(call);
+			nsp = rxrpc_skb(notification);
+			nsp->call = call;
+
+			ASSERTCMP(atomic_read(&call->usage), >=, 3);
+
+			_debug("notify");
+			spin_lock(&call->lock);
+			ret = rxrpc_queue_rcv_skb(call, notification, true,
+						  false);
+			spin_unlock(&call->lock);
+			notification = NULL;
+			if (ret < 0)
+				BUG();
+		}
+		spin_unlock(&call->conn->state_lock);
+
+		_debug("queued");
+	}
+	write_unlock(&rx->call_lock);
+
+	_debug("process");
+	rxrpc_fast_process_packet(call, skb);
+
+	_debug("done");
+	read_unlock_bh(&local->services_lock);
+	rxrpc_free_skb(notification);
+	rxrpc_put_call(call);
+	_leave(" = 0");
+	return 0;
+
+invalid_service:
+	_debug("invalid");
+	read_unlock_bh(&local->services_lock);
+
+	read_lock_bh(&call->state_lock);
+	if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) {
+		rxrpc_get_call(call);
+		rxrpc_queue_call(call);
+	}
+	read_unlock_bh(&call->state_lock);
+	rxrpc_put_call(call);
+	ret = -ECONNREFUSED;
+error:
+	rxrpc_free_skb(notification);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * accept incoming calls that need peer, transport and/or connection setting up
+ * - the packets we get are all incoming client DATA packets that have seq == 1
+ */
+void rxrpc_accept_incoming_calls(struct work_struct *work)
+{
+	struct rxrpc_local *local =
+		container_of(work, struct rxrpc_local, acceptor);
+	struct rxrpc_skb_priv *sp;
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_sock *rx;
+	struct sk_buff *skb;
+	__be16 service_id;
+	int ret;
+
+	_enter("%d", local->debug_id);
+
+	read_lock_bh(&rxrpc_local_lock);
+	if (atomic_read(&local->usage) > 0)
+		rxrpc_get_local(local);
+	else
+		local = NULL;
+	read_unlock_bh(&rxrpc_local_lock);
+	if (!local) {
+		_leave(" [local dead]");
+		return;
+	}
+
+process_next_packet:
+	skb = skb_dequeue(&local->accept_queue);
+	if (!skb) {
+		rxrpc_put_local(local);
+		_leave("\n");
+		return;
+	}
+
+	_net("incoming call skb %p", skb);
+
+	sp = rxrpc_skb(skb);
+
+	/* determine the remote address */
+	memset(&srx, 0, sizeof(srx));
+	srx.srx_family = AF_RXRPC;
+	srx.transport.family = local->srx.transport.family;
+	srx.transport_type = local->srx.transport_type;
+	switch (srx.transport.family) {
+	case AF_INET:
+		srx.transport_len = sizeof(struct sockaddr_in);
+		srx.transport.sin.sin_port = udp_hdr(skb)->source;
+		srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		break;
+	default:
+		goto busy;
+	}
+
+	/* get the socket providing the service */
+	service_id = sp->hdr.serviceId;
+	read_lock_bh(&local->services_lock);
+	list_for_each_entry(rx, &local->services, listen_link) {
+		if (rx->service_id == service_id &&
+		    rx->sk.sk_state != RXRPC_CLOSE)
+			goto found_service;
+	}
+	read_unlock_bh(&local->services_lock);
+	goto invalid_service;
+
+found_service:
+	_debug("found service %hd", ntohs(rx->service_id));
+	if (sk_acceptq_is_full(&rx->sk))
+		goto backlog_full;
+	sk_acceptq_added(&rx->sk);
+	sock_hold(&rx->sk);
+	read_unlock_bh(&local->services_lock);
+
+	ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
+	if (ret < 0)
+		sk_acceptq_removed(&rx->sk);
+	sock_put(&rx->sk);
+	switch (ret) {
+	case -ECONNRESET: /* old calls are ignored */
+	case -ECONNABORTED: /* aborted calls are reaborted or ignored */
+	case 0:
+		goto process_next_packet;
+	case -ECONNREFUSED:
+		goto invalid_service;
+	case -EBUSY:
+		goto busy;
+	case -EKEYREJECTED:
+		goto security_mismatch;
+	default:
+		BUG();
+	}
+
+backlog_full:
+	read_unlock_bh(&local->services_lock);
+busy:
+	rxrpc_busy(local, &srx, &sp->hdr);
+	rxrpc_free_skb(skb);
+	goto process_next_packet;
+
+invalid_service:
+	skb->priority = RX_INVALID_OPERATION;
+	rxrpc_reject_packet(local, skb);
+	goto process_next_packet;
+
+	/* can't change connection security type mid-flow */
+security_mismatch:
+	skb->priority = RX_PROTOCOL_ERROR;
+	rxrpc_reject_packet(local, skb);
+	goto process_next_packet;
+}
+
+/*
+ * handle acceptance of a call by userspace
+ * - assign the user call ID to the call at the front of the queue
+ */
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
+				     unsigned long user_call_ID)
+{
+	struct rxrpc_call *call;
+	struct rb_node *parent, **pp;
+	int ret;
+
+	_enter(",%lx", user_call_ID);
+
+	ASSERT(!irqs_disabled());
+
+	write_lock(&rx->call_lock);
+
+	ret = -ENODATA;
+	if (list_empty(&rx->acceptq))
+		goto out;
+
+	/* check the user ID isn't already in use */
+	ret = -EBADSLT;
+	pp = &rx->calls.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			pp = &(*pp)->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			pp = &(*pp)->rb_right;
+		else
+			goto out;
+	}
+
+	/* dequeue the first call and check it's still valid */
+	call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+	list_del_init(&call->accept_link);
+	sk_acceptq_removed(&rx->sk);
+
+	write_lock_bh(&call->state_lock);
+	switch (call->state) {
+	case RXRPC_CALL_SERVER_ACCEPTING:
+		call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+		break;
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		ret = -ECONNABORTED;
+		goto out_release;
+	case RXRPC_CALL_NETWORK_ERROR:
+		ret = call->conn->error;
+		goto out_release;
+	case RXRPC_CALL_DEAD:
+		ret = -ETIME;
+		goto out_discard;
+	default:
+		BUG();
+	}
+
+	/* formalise the acceptance */
+	call->user_call_ID = user_call_ID;
+	rb_link_node(&call->sock_node, parent, pp);
+	rb_insert_color(&call->sock_node, &rx->calls);
+	if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
+		BUG();
+	if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events))
+		BUG();
+	rxrpc_queue_call(call);
+
+	rxrpc_get_call(call);
+	write_unlock_bh(&call->state_lock);
+	write_unlock(&rx->call_lock);
+	_leave(" = %p{%d}", call, call->debug_id);
+	return call;
+
+	/* if the call is already dying or dead, then we leave the socket's ref
+	 * on it to be released by rxrpc_dead_call_expired() as induced by
+	 * rxrpc_release_call() */
+out_release:
+	_debug("release %p", call);
+	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+		rxrpc_queue_call(call);
+out_discard:
+	write_unlock_bh(&call->state_lock);
+	_debug("discard %p", call);
+out:
+	write_unlock(&rx->call_lock);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+}
+
+/*
+ * handle rejectance of a call by userspace
+ * - reject the call at the front of the queue
+ */
+int rxrpc_reject_call(struct rxrpc_sock *rx)
+{
+	struct rxrpc_call *call;
+	int ret;
+
+	_enter("");
+
+	ASSERT(!irqs_disabled());
+
+	write_lock(&rx->call_lock);
+
+	ret = -ENODATA;
+	if (list_empty(&rx->acceptq))
+		goto out;
+
+	/* dequeue the first call and check it's still valid */
+	call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+	list_del_init(&call->accept_link);
+	sk_acceptq_removed(&rx->sk);
+
+	write_lock_bh(&call->state_lock);
+	switch (call->state) {
+	case RXRPC_CALL_SERVER_ACCEPTING:
+		call->state = RXRPC_CALL_SERVER_BUSY;
+		if (test_and_set_bit(RXRPC_CALL_REJECT_BUSY, &call->events))
+			rxrpc_queue_call(call);
+		ret = 0;
+		goto out_release;
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		ret = -ECONNABORTED;
+		goto out_release;
+	case RXRPC_CALL_NETWORK_ERROR:
+		ret = call->conn->error;
+		goto out_release;
+	case RXRPC_CALL_DEAD:
+		ret = -ETIME;
+		goto out_discard;
+	default:
+		BUG();
+	}
+
+	/* if the call is already dying or dead, then we leave the socket's ref
+	 * on it to be released by rxrpc_dead_call_expired() as induced by
+	 * rxrpc_release_call() */
+out_release:
+	_debug("release %p", call);
+	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+		rxrpc_queue_call(call);
+out_discard:
+	write_unlock_bh(&call->state_lock);
+	_debug("discard %p", call);
+out:
+	write_unlock(&rx->call_lock);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/**
+ * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
+ * @sock: The socket on which the impending call is waiting
+ * @user_call_ID: The tag to attach to the call
+ *
+ * Allow a kernel service to accept an incoming call, assuming the incoming
+ * call is still valid.
+ */
+struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
+					    unsigned long user_call_ID)
+{
+	struct rxrpc_call *call;
+
+	_enter(",%lx", user_call_ID);
+	call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
+	_leave(" = %p", call);
+	return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_accept_call);
+
+/**
+ * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
+ * @sock: The socket on which the impending call is waiting
+ *
+ * Allow a kernel service to reject an incoming call with a BUSY message,
+ * assuming the incoming call is still valid.
+ */
+int rxrpc_kernel_reject_call(struct socket *sock)
+{
+	int ret;
+
+	_enter("");
+	ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
+	_leave(" = %d", ret);
+	return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_reject_call);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
new file mode 100644
index 000000000000..fc07a926df56
--- /dev/null
+++ b/net/rxrpc/ar-ack.c
@@ -0,0 +1,1250 @@
+/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static unsigned rxrpc_ack_defer = 1;
+
+static const char *rxrpc_acks[] = {
+	"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
+	"-?-"
+};
+
+static const s8 rxrpc_ack_priority[] = {
+	[0]				= 0,
+	[RXRPC_ACK_DELAY]		= 1,
+	[RXRPC_ACK_REQUESTED]		= 2,
+	[RXRPC_ACK_IDLE]		= 3,
+	[RXRPC_ACK_PING_RESPONSE]	= 4,
+	[RXRPC_ACK_DUPLICATE]		= 5,
+	[RXRPC_ACK_OUT_OF_SEQUENCE]	= 6,
+	[RXRPC_ACK_EXCEEDS_WINDOW]	= 7,
+	[RXRPC_ACK_NOSPACE]		= 8,
+};
+
+/*
+ * propose an ACK be sent
+ */
+void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+			 __be32 serial, bool immediate)
+{
+	unsigned long expiry;
+	s8 prior = rxrpc_ack_priority[ack_reason];
+
+	ASSERTCMP(prior, >, 0);
+
+	_enter("{%d},%s,%%%x,%u",
+	       call->debug_id, rxrpc_acks[ack_reason], ntohl(serial),
+	       immediate);
+
+	if (prior < rxrpc_ack_priority[call->ackr_reason]) {
+		if (immediate)
+			goto cancel_timer;
+		return;
+	}
+
+	/* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
+	 * numbers */
+	if (prior == rxrpc_ack_priority[call->ackr_reason]) {
+		if (prior <= 4)
+			call->ackr_serial = serial;
+		if (immediate)
+			goto cancel_timer;
+		return;
+	}
+
+	call->ackr_reason = ack_reason;
+	call->ackr_serial = serial;
+
+	switch (ack_reason) {
+	case RXRPC_ACK_DELAY:
+		_debug("run delay timer");
+		call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
+		add_timer(&call->ack_timer);
+		return;
+
+	case RXRPC_ACK_IDLE:
+		if (!immediate) {
+			_debug("run defer timer");
+			expiry = 1;
+			goto run_timer;
+		}
+		goto cancel_timer;
+
+	case RXRPC_ACK_REQUESTED:
+		if (!rxrpc_ack_defer)
+			goto cancel_timer;
+		if (!immediate || serial == cpu_to_be32(1)) {
+			_debug("run defer timer");
+			expiry = rxrpc_ack_defer;
+			goto run_timer;
+		}
+
+	default:
+		_debug("immediate ACK");
+		goto cancel_timer;
+	}
+
+run_timer:
+	expiry += jiffies;
+	if (!timer_pending(&call->ack_timer) ||
+	    time_after(call->ack_timer.expires, expiry))
+		mod_timer(&call->ack_timer, expiry);
+	return;
+
+cancel_timer:
+	_debug("cancel timer %%%u", ntohl(serial));
+	try_to_del_timer_sync(&call->ack_timer);
+	read_lock_bh(&call->state_lock);
+	if (call->state <= RXRPC_CALL_COMPLETE &&
+	    !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+		rxrpc_queue_call(call);
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * propose an ACK be sent, locking the call structure
+ */
+void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+		       __be32 serial, bool immediate)
+{
+	s8 prior = rxrpc_ack_priority[ack_reason];
+
+	if (prior > rxrpc_ack_priority[call->ackr_reason]) {
+		spin_lock_bh(&call->lock);
+		__rxrpc_propose_ACK(call, ack_reason, serial, immediate);
+		spin_unlock_bh(&call->lock);
+	}
+}
+
+/*
+ * set the resend timer
+ */
+static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
+			     unsigned long resend_at)
+{
+	read_lock_bh(&call->state_lock);
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		resend = 0;
+
+	if (resend & 1) {
+		_debug("SET RESEND");
+		set_bit(RXRPC_CALL_RESEND, &call->events);
+	}
+
+	if (resend & 2) {
+		_debug("MODIFY RESEND TIMER");
+		set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		mod_timer(&call->resend_timer, resend_at);
+	} else {
+		_debug("KILL RESEND TIMER");
+		del_timer_sync(&call->resend_timer);
+		clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	}
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * resend packets
+ */
+static void rxrpc_resend(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_header *hdr;
+	struct sk_buff *txb;
+	unsigned long *p_txb, resend_at;
+	int loop, stop;
+	u8 resend;
+
+	_enter("{%d,%d,%d,%d},",
+	       call->acks_hard, call->acks_unacked,
+	       atomic_read(&call->sequence),
+	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+	stop = 0;
+	resend = 0;
+	resend_at = 0;
+
+	for (loop = call->acks_tail;
+	     loop != call->acks_head || stop;
+	     loop = (loop + 1) &  (call->acks_winsz - 1)
+	     ) {
+		p_txb = call->acks_window + loop;
+		smp_read_barrier_depends();
+		if (*p_txb & 1)
+			continue;
+
+		txb = (struct sk_buff *) *p_txb;
+		sp = rxrpc_skb(txb);
+
+		if (sp->need_resend) {
+			sp->need_resend = 0;
+
+			/* each Tx packet has a new serial number */
+			sp->hdr.serial =
+				htonl(atomic_inc_return(&call->conn->serial));
+
+			hdr = (struct rxrpc_header *) txb->head;
+			hdr->serial = sp->hdr.serial;
+
+			_proto("Tx DATA %%%u { #%d }",
+			       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+			if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
+				stop = 0;
+				sp->resend_at = jiffies + 3;
+			} else {
+				sp->resend_at =
+					jiffies + rxrpc_resend_timeout * HZ;
+			}
+		}
+
+		if (time_after_eq(jiffies + 1, sp->resend_at)) {
+			sp->need_resend = 1;
+			resend |= 1;
+		} else if (resend & 2) {
+			if (time_before(sp->resend_at, resend_at))
+				resend_at = sp->resend_at;
+		} else {
+			resend_at = sp->resend_at;
+			resend |= 2;
+		}
+	}
+
+	rxrpc_set_resend(call, resend, resend_at);
+	_leave("");
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_timer(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *txb;
+	unsigned long *p_txb, resend_at;
+	int loop;
+	u8 resend;
+
+	_enter("%d,%d,%d",
+	       call->acks_tail, call->acks_unacked, call->acks_head);
+
+	resend = 0;
+	resend_at = 0;
+
+	for (loop = call->acks_unacked;
+	     loop != call->acks_head;
+	     loop = (loop + 1) &  (call->acks_winsz - 1)
+	     ) {
+		p_txb = call->acks_window + loop;
+		smp_read_barrier_depends();
+		txb = (struct sk_buff *) (*p_txb & ~1);
+		sp = rxrpc_skb(txb);
+
+		ASSERT(!(*p_txb & 1));
+
+		if (sp->need_resend) {
+			;
+		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+			sp->need_resend = 1;
+			resend |= 1;
+		} else if (resend & 2) {
+			if (time_before(sp->resend_at, resend_at))
+				resend_at = sp->resend_at;
+		} else {
+			resend_at = sp->resend_at;
+			resend |= 2;
+		}
+	}
+
+	rxrpc_set_resend(call, resend, resend_at);
+	_leave("");
+}
+
+/*
+ * process soft ACKs of our transmitted packets
+ * - these indicate packets the peer has or has not received, but hasn't yet
+ *   given to the consumer, and so can still be discarded and re-requested
+ */
+static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
+				   struct rxrpc_ackpacket *ack,
+				   struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *txb;
+	unsigned long *p_txb, resend_at;
+	int loop;
+	u8 sacks[RXRPC_MAXACKS], resend;
+
+	_enter("{%d,%d},{%d},",
+	       call->acks_hard,
+	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
+	       ack->nAcks);
+
+	if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
+		goto protocol_error;
+
+	resend = 0;
+	resend_at = 0;
+	for (loop = 0; loop < ack->nAcks; loop++) {
+		p_txb = call->acks_window;
+		p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
+		smp_read_barrier_depends();
+		txb = (struct sk_buff *) (*p_txb & ~1);
+		sp = rxrpc_skb(txb);
+
+		switch (sacks[loop]) {
+		case RXRPC_ACK_TYPE_ACK:
+			sp->need_resend = 0;
+			*p_txb |= 1;
+			break;
+		case RXRPC_ACK_TYPE_NACK:
+			sp->need_resend = 1;
+			*p_txb &= ~1;
+			resend = 1;
+			break;
+		default:
+			_debug("Unsupported ACK type %d", sacks[loop]);
+			goto protocol_error;
+		}
+	}
+
+	smp_mb();
+	call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
+
+	/* anything not explicitly ACK'd is implicitly NACK'd, but may just not
+	 * have been received or processed yet by the far end */
+	for (loop = call->acks_unacked;
+	     loop != call->acks_head;
+	     loop = (loop + 1) &  (call->acks_winsz - 1)
+	     ) {
+		p_txb = call->acks_window + loop;
+		smp_read_barrier_depends();
+		txb = (struct sk_buff *) (*p_txb & ~1);
+		sp = rxrpc_skb(txb);
+
+		if (*p_txb & 1) {
+			/* packet must have been discarded */
+			sp->need_resend = 1;
+			*p_txb &= ~1;
+			resend |= 1;
+		} else if (sp->need_resend) {
+			;
+		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+			sp->need_resend = 1;
+			resend |= 1;
+		} else if (resend & 2) {
+			if (time_before(sp->resend_at, resend_at))
+				resend_at = sp->resend_at;
+		} else {
+			resend_at = sp->resend_at;
+			resend |= 2;
+		}
+	}
+
+	rxrpc_set_resend(call, resend, resend_at);
+	_leave(" = 0");
+	return 0;
+
+protocol_error:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+}
+
+/*
+ * discard hard-ACK'd packets from the Tx window
+ */
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
+{
+	struct rxrpc_skb_priv *sp;
+	unsigned long _skb;
+	int tail = call->acks_tail, old_tail;
+	int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
+
+	_enter("{%u,%u},%u", call->acks_hard, win, hard);
+
+	ASSERTCMP(hard - call->acks_hard, <=, win);
+
+	while (call->acks_hard < hard) {
+		smp_read_barrier_depends();
+		_skb = call->acks_window[tail] & ~1;
+		sp = rxrpc_skb((struct sk_buff *) _skb);
+		rxrpc_free_skb((struct sk_buff *) _skb);
+		old_tail = tail;
+		tail = (tail + 1) & (call->acks_winsz - 1);
+		call->acks_tail = tail;
+		if (call->acks_unacked == old_tail)
+			call->acks_unacked = tail;
+		call->acks_hard++;
+	}
+
+	wake_up(&call->tx_waitq);
+}
+
+/*
+ * clear the Tx window in the event of a failure
+ */
+static void rxrpc_clear_tx_window(struct rxrpc_call *call)
+{
+	rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
+}
+
+/*
+ * drain the out of sequence received packet queue into the packet Rx queue
+ */
+static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	bool terminal;
+	int ret;
+
+	_enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
+
+	spin_lock_bh(&call->lock);
+
+	ret = -ECONNRESET;
+	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
+		goto socket_unavailable;
+
+	skb = skb_dequeue(&call->rx_oos_queue);
+	if (skb) {
+		sp = rxrpc_skb(skb);
+
+		_debug("drain OOS packet %d [%d]",
+		       ntohl(sp->hdr.seq), call->rx_first_oos);
+
+		if (ntohl(sp->hdr.seq) != call->rx_first_oos) {
+			skb_queue_head(&call->rx_oos_queue, skb);
+			call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq);
+			_debug("requeue %p {%u}", skb, call->rx_first_oos);
+		} else {
+			skb->mark = RXRPC_SKB_MARK_DATA;
+			terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+				!(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+			ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
+			BUG_ON(ret < 0);
+			_debug("drain #%u", call->rx_data_post);
+			call->rx_data_post++;
+
+			/* find out what the next packet is */
+			skb = skb_peek(&call->rx_oos_queue);
+			if (skb)
+				call->rx_first_oos =
+					ntohl(rxrpc_skb(skb)->hdr.seq);
+			else
+				call->rx_first_oos = 0;
+			_debug("peek %p {%u}", skb, call->rx_first_oos);
+		}
+	}
+
+	ret = 0;
+socket_unavailable:
+	spin_unlock_bh(&call->lock);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * insert an out of sequence packet into the buffer
+ */
+static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
+				    struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp, *psp;
+	struct sk_buff *p;
+	u32 seq;
+
+	sp = rxrpc_skb(skb);
+	seq = ntohl(sp->hdr.seq);
+	_enter(",,{%u}", seq);
+
+	skb->destructor = rxrpc_packet_destructor;
+	ASSERTCMP(sp->call, ==, NULL);
+	sp->call = call;
+	rxrpc_get_call(call);
+
+	/* insert into the buffer in sequence order */
+	spin_lock_bh(&call->lock);
+
+	skb_queue_walk(&call->rx_oos_queue, p) {
+		psp = rxrpc_skb(p);
+		if (ntohl(psp->hdr.seq) > seq) {
+			_debug("insert oos #%u before #%u",
+			       seq, ntohl(psp->hdr.seq));
+			skb_insert(p, skb, &call->rx_oos_queue);
+			goto inserted;
+		}
+	}
+
+	_debug("append oos #%u", seq);
+	skb_queue_tail(&call->rx_oos_queue, skb);
+inserted:
+
+	/* we might now have a new front to the queue */
+	if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
+		call->rx_first_oos = seq;
+
+	read_lock(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    call->rx_data_post == call->rx_first_oos) {
+		_debug("drain rx oos now");
+		set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+	}
+	read_unlock(&call->state_lock);
+
+	spin_unlock_bh(&call->lock);
+	_leave(" [stored #%u]", call->rx_first_oos);
+}
+
+/*
+ * clear the Tx window on final ACK reception
+ */
+static void rxrpc_zap_tx_window(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	unsigned long _skb, *acks_window;
+	uint8_t winsz = call->acks_winsz;
+	int tail;
+
+	acks_window = call->acks_window;
+	call->acks_window = NULL;
+
+	while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
+		tail = call->acks_tail;
+		smp_read_barrier_depends();
+		_skb = acks_window[tail] & ~1;
+		smp_mb();
+		call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
+
+		skb = (struct sk_buff *) _skb;
+		sp = rxrpc_skb(skb);
+		_debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+		rxrpc_free_skb(skb);
+	}
+
+	kfree(acks_window);
+}
+
+/*
+ * process packets in the reception queue
+ */
+static int rxrpc_process_rx_queue(struct rxrpc_call *call,
+				  u32 *_abort_code)
+{
+	struct rxrpc_ackpacket ack;
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	bool post_ACK;
+	int latest;
+	u32 hard, tx;
+
+	_enter("");
+
+process_further:
+	skb = skb_dequeue(&call->rx_queue);
+	if (!skb)
+		return -EAGAIN;
+
+	_net("deferred skb %p", skb);
+
+	sp = rxrpc_skb(skb);
+
+	_debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
+
+	post_ACK = false;
+
+	switch (sp->hdr.type) {
+		/* data packets that wind up here have been received out of
+		 * order, need security processing or are jumbo packets */
+	case RXRPC_PACKET_TYPE_DATA:
+		_proto("OOSQ DATA %%%u { #%u }",
+		       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+		/* secured packets must be verified and possibly decrypted */
+		if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
+			goto protocol_error;
+
+		rxrpc_insert_oos_packet(call, skb);
+		goto process_further;
+
+		/* partial ACK to process */
+	case RXRPC_PACKET_TYPE_ACK:
+		if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
+			_debug("extraction failure");
+			goto protocol_error;
+		}
+		if (!skb_pull(skb, sizeof(ack)))
+			BUG();
+
+		latest = ntohl(sp->hdr.serial);
+		hard = ntohl(ack.firstPacket);
+		tx = atomic_read(&call->sequence);
+
+		_proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+		       latest,
+		       ntohs(ack.maxSkew),
+		       hard,
+		       ntohl(ack.previousPacket),
+		       ntohl(ack.serial),
+		       rxrpc_acks[ack.reason],
+		       ack.nAcks);
+
+		if (ack.reason == RXRPC_ACK_PING) {
+			_proto("Rx ACK %%%u PING Request", latest);
+			rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
+					  sp->hdr.serial, true);
+		}
+
+		/* discard any out-of-order or duplicate ACKs */
+		if (latest - call->acks_latest <= 0) {
+			_debug("discard ACK %d <= %d",
+			       latest, call->acks_latest);
+			goto discard;
+		}
+		call->acks_latest = latest;
+
+		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+		    call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
+		    call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
+		    call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
+			goto discard;
+
+		_debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
+
+		if (hard > 0) {
+			if (hard - 1 > tx) {
+				_debug("hard-ACK'd packet %d not transmitted"
+				       " (%d top)",
+				       hard - 1, tx);
+				goto protocol_error;
+			}
+
+			if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
+			     call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
+			    hard > tx)
+				goto all_acked;
+
+			smp_rmb();
+			rxrpc_rotate_tx_window(call, hard - 1);
+		}
+
+		if (ack.nAcks > 0) {
+			if (hard - 1 + ack.nAcks > tx) {
+				_debug("soft-ACK'd packet %d+%d not"
+				       " transmitted (%d top)",
+				       hard - 1, ack.nAcks, tx);
+				goto protocol_error;
+			}
+
+			if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
+				goto protocol_error;
+		}
+		goto discard;
+
+		/* complete ACK to process */
+	case RXRPC_PACKET_TYPE_ACKALL:
+		goto all_acked;
+
+		/* abort and busy are handled elsewhere */
+	case RXRPC_PACKET_TYPE_BUSY:
+	case RXRPC_PACKET_TYPE_ABORT:
+		BUG();
+
+		/* connection level events - also handled elsewhere */
+	case RXRPC_PACKET_TYPE_CHALLENGE:
+	case RXRPC_PACKET_TYPE_RESPONSE:
+	case RXRPC_PACKET_TYPE_DEBUG:
+		BUG();
+	}
+
+	/* if we've had a hard ACK that covers all the packets we've sent, then
+	 * that ends that phase of the operation */
+all_acked:
+	write_lock_bh(&call->state_lock);
+	_debug("ack all %d", call->state);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+		call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+		break;
+	case RXRPC_CALL_SERVER_AWAIT_ACK:
+		_debug("srv complete");
+		call->state = RXRPC_CALL_COMPLETE;
+		post_ACK = true;
+		break;
+	case RXRPC_CALL_CLIENT_SEND_REQUEST:
+	case RXRPC_CALL_SERVER_RECV_REQUEST:
+		goto protocol_error_unlock; /* can't occur yet */
+	default:
+		write_unlock_bh(&call->state_lock);
+		goto discard; /* assume packet left over from earlier phase */
+	}
+
+	write_unlock_bh(&call->state_lock);
+
+	/* if all the packets we sent are hard-ACK'd, then we can discard
+	 * whatever we've got left */
+	_debug("clear Tx %d",
+	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+	del_timer_sync(&call->resend_timer);
+	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+
+	if (call->acks_window)
+		rxrpc_zap_tx_window(call);
+
+	if (post_ACK) {
+		/* post the final ACK message for userspace to pick up */
+		_debug("post ACK");
+		skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
+		sp->call = call;
+		rxrpc_get_call(call);
+		spin_lock_bh(&call->lock);
+		if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
+			BUG();
+		spin_unlock_bh(&call->lock);
+		goto process_further;
+	}
+
+discard:
+	rxrpc_free_skb(skb);
+	goto process_further;
+
+protocol_error_unlock:
+	write_unlock_bh(&call->state_lock);
+protocol_error:
+	rxrpc_free_skb(skb);
+	_leave(" = -EPROTO");
+	return -EPROTO;
+}
+
+/*
+ * post a message to the socket Rx queue for recvmsg() to pick up
+ */
+static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
+			      bool fatal)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	int ret;
+
+	_enter("{%d,%lx},%u,%u,%d",
+	       call->debug_id, call->flags, mark, error, fatal);
+
+	/* remove timers and things for fatal messages */
+	if (fatal) {
+		del_timer_sync(&call->resend_timer);
+		del_timer_sync(&call->ack_timer);
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	}
+
+	if (mark != RXRPC_SKB_MARK_NEW_CALL &&
+	    !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		_leave("[no userid]");
+		return 0;
+	}
+
+	if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+		skb = alloc_skb(0, GFP_NOFS);
+		if (!skb)
+			return -ENOMEM;
+
+		rxrpc_new_skb(skb);
+
+		skb->mark = mark;
+
+		sp = rxrpc_skb(skb);
+		memset(sp, 0, sizeof(*sp));
+		sp->error = error;
+		sp->call = call;
+		rxrpc_get_call(call);
+
+		spin_lock_bh(&call->lock);
+		ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
+		spin_unlock_bh(&call->lock);
+		if (ret < 0)
+			BUG();
+	}
+
+	return 0;
+}
+
+/*
+ * handle background processing of incoming call packets and ACK / abort
+ * generation
+ */
+void rxrpc_process_call(struct work_struct *work)
+{
+	struct rxrpc_call *call =
+		container_of(work, struct rxrpc_call, processor);
+	struct rxrpc_ackpacket ack;
+	struct rxrpc_ackinfo ackinfo;
+	struct rxrpc_header hdr;
+	struct msghdr msg;
+	struct kvec iov[5];
+	unsigned long bits;
+	__be32 data;
+	size_t len;
+	int genbit, loop, nbit, ioc, ret;
+	u32 abort_code = RX_PROTOCOL_ERROR;
+	u8 *acks = NULL;
+
+	//printk("\n--------------------\n");
+	_enter("{%d,%s,%lx} [%lu]",
+	       call->debug_id, rxrpc_call_states[call->state], call->events,
+	       (jiffies - call->creation_jif) / (HZ / 10));
+
+	if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
+		_debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
+		return;
+	}
+
+	/* there's a good chance we're going to have to send a message, so set
+	 * one up in advance */
+	msg.msg_name	= &call->conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(call->conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr.epoch	= call->conn->epoch;
+	hdr.cid		= call->cid;
+	hdr.callNumber	= call->call_id;
+	hdr.seq		= 0;
+	hdr.type	= RXRPC_PACKET_TYPE_ACK;
+	hdr.flags	= call->conn->out_clientflag;
+	hdr.userStatus	= 0;
+	hdr.securityIndex = call->conn->security_ix;
+	hdr._rsvd	= 0;
+	hdr.serviceId	= call->conn->service_id;
+
+	memset(iov, 0, sizeof(iov));
+	iov[0].iov_base	= &hdr;
+	iov[0].iov_len	= sizeof(hdr);
+
+	/* deal with events of a final nature */
+	if (test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+		rxrpc_release_call(call);
+		clear_bit(RXRPC_CALL_RELEASE, &call->events);
+	}
+
+	if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) {
+		int error;
+
+		clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+		clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+		clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+		error = call->conn->trans->peer->net_error;
+		_debug("post net error %d", error);
+
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
+				       error, true) < 0)
+			goto no_mem;
+		clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+		goto kill_ACKs;
+	}
+
+	if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) {
+		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+		clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+		clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+		_debug("post conn abort");
+
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+				       call->conn->error, true) < 0)
+			goto no_mem;
+		clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+		goto kill_ACKs;
+	}
+
+	if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) {
+		hdr.type = RXRPC_PACKET_TYPE_BUSY;
+		genbit = RXRPC_CALL_REJECT_BUSY;
+		goto send_message;
+	}
+
+	if (test_bit(RXRPC_CALL_ABORT, &call->events)) {
+		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+				       ECONNABORTED, true) < 0)
+			goto no_mem;
+		hdr.type = RXRPC_PACKET_TYPE_ABORT;
+		data = htonl(call->abort_code);
+		iov[1].iov_base = &data;
+		iov[1].iov_len = sizeof(data);
+		genbit = RXRPC_CALL_ABORT;
+		goto send_message;
+	}
+
+	if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
+		hdr.type = RXRPC_PACKET_TYPE_ACKALL;
+		genbit = RXRPC_CALL_ACK_FINAL;
+		goto send_message;
+	}
+
+	if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
+			    (1 << RXRPC_CALL_RCVD_ABORT))
+	    ) {
+		u32 mark;
+
+		if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events))
+			mark = RXRPC_SKB_MARK_REMOTE_ABORT;
+		else
+			mark = RXRPC_SKB_MARK_BUSY;
+
+		_debug("post abort/busy");
+		rxrpc_clear_tx_window(call);
+		if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
+			goto no_mem;
+
+		clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+		clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+		goto kill_ACKs;
+	}
+
+	if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) {
+		_debug("do implicit ackall");
+		rxrpc_clear_tx_window(call);
+	}
+
+	if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) {
+		write_lock_bh(&call->state_lock);
+		if (call->state <= RXRPC_CALL_COMPLETE) {
+			call->state = RXRPC_CALL_LOCALLY_ABORTED;
+			call->abort_code = RX_CALL_TIMEOUT;
+			set_bit(RXRPC_CALL_ABORT, &call->events);
+		}
+		write_unlock_bh(&call->state_lock);
+
+		_debug("post timeout");
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+				       ETIME, true) < 0)
+			goto no_mem;
+
+		clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+		goto kill_ACKs;
+	}
+
+	/* deal with assorted inbound messages */
+	if (!skb_queue_empty(&call->rx_queue)) {
+		switch (rxrpc_process_rx_queue(call, &abort_code)) {
+		case 0:
+		case -EAGAIN:
+			break;
+		case -ENOMEM:
+			goto no_mem;
+		case -EKEYEXPIRED:
+		case -EKEYREJECTED:
+		case -EPROTO:
+			rxrpc_abort_call(call, abort_code);
+			goto kill_ACKs;
+		}
+	}
+
+	/* handle resending */
+	if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+		rxrpc_resend_timer(call);
+	if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events))
+		rxrpc_resend(call);
+
+	/* consider sending an ordinary ACK */
+	if (test_bit(RXRPC_CALL_ACK, &call->events)) {
+		__be32 pad;
+
+		_debug("send ACK: window: %d - %d { %lx }",
+		       call->rx_data_eaten, call->ackr_win_top,
+		       call->ackr_window[0]);
+
+		if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
+		    call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
+			/* ACK by sending reply DATA packet in this state */
+			clear_bit(RXRPC_CALL_ACK, &call->events);
+			goto maybe_reschedule;
+		}
+
+		genbit = RXRPC_CALL_ACK;
+
+		acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
+			       GFP_NOFS);
+		if (!acks)
+			goto no_mem;
+
+		//hdr.flags	= RXRPC_SLOW_START_OK;
+		ack.bufferSpace	= htons(8);
+		ack.maxSkew	= 0;
+		ack.serial	= 0;
+		ack.reason	= 0;
+
+		ackinfo.rxMTU	= htonl(5692);
+//		ackinfo.rxMTU	= htonl(call->conn->trans->peer->maxdata);
+		ackinfo.maxMTU	= htonl(call->conn->trans->peer->maxdata);
+		ackinfo.rwind	= htonl(32);
+		ackinfo.jumbo_max = htonl(4);
+
+		spin_lock_bh(&call->lock);
+		ack.reason = call->ackr_reason;
+		ack.serial = call->ackr_serial;
+		ack.previousPacket = call->ackr_prev_seq;
+		ack.firstPacket = htonl(call->rx_data_eaten + 1);
+
+		ack.nAcks = 0;
+		for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+			nbit = loop * BITS_PER_LONG;
+			for (bits = call->ackr_window[loop]; bits; bits >>= 1
+			     ) {
+				_debug("- l=%d n=%d b=%lx", loop, nbit, bits);
+				if (bits & 1) {
+					acks[nbit] = RXRPC_ACK_TYPE_ACK;
+					ack.nAcks = nbit + 1;
+				}
+				nbit++;
+			}
+		}
+		call->ackr_reason = 0;
+		spin_unlock_bh(&call->lock);
+
+		pad = 0;
+
+		iov[1].iov_base = &ack;
+		iov[1].iov_len	= sizeof(ack);
+		iov[2].iov_base = acks;
+		iov[2].iov_len	= ack.nAcks;
+		iov[3].iov_base = &pad;
+		iov[3].iov_len	= 3;
+		iov[4].iov_base = &ackinfo;
+		iov[4].iov_len	= sizeof(ackinfo);
+
+		switch (ack.reason) {
+		case RXRPC_ACK_REQUESTED:
+		case RXRPC_ACK_DUPLICATE:
+		case RXRPC_ACK_OUT_OF_SEQUENCE:
+		case RXRPC_ACK_EXCEEDS_WINDOW:
+		case RXRPC_ACK_NOSPACE:
+		case RXRPC_ACK_PING:
+		case RXRPC_ACK_PING_RESPONSE:
+			goto send_ACK_with_skew;
+		case RXRPC_ACK_DELAY:
+		case RXRPC_ACK_IDLE:
+			goto send_ACK;
+		}
+	}
+
+	/* handle completion of security negotiations on an incoming
+	 * connection */
+	if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) {
+		_debug("secured");
+		spin_lock_bh(&call->lock);
+
+		if (call->state == RXRPC_CALL_SERVER_SECURING) {
+			_debug("securing");
+			write_lock(&call->conn->lock);
+			if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+			    !test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+				_debug("not released");
+				call->state = RXRPC_CALL_SERVER_ACCEPTING;
+				list_move_tail(&call->accept_link,
+					       &call->socket->acceptq);
+			}
+			write_unlock(&call->conn->lock);
+			read_lock(&call->state_lock);
+			if (call->state < RXRPC_CALL_COMPLETE)
+				set_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+			read_unlock(&call->state_lock);
+		}
+
+		spin_unlock_bh(&call->lock);
+		if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events))
+			goto maybe_reschedule;
+	}
+
+	/* post a notification of an acceptable connection to the app */
+	if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) {
+		_debug("post accept");
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
+				       0, false) < 0)
+			goto no_mem;
+		clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+		goto maybe_reschedule;
+	}
+
+	/* handle incoming call acceptance */
+	if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) {
+		_debug("accepted");
+		ASSERTCMP(call->rx_data_post, ==, 0);
+		call->rx_data_post = 1;
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE)
+			set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+		read_unlock_bh(&call->state_lock);
+	}
+
+	/* drain the out of sequence received packet queue into the packet Rx
+	 * queue */
+	if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) {
+		while (call->rx_data_post == call->rx_first_oos)
+			if (rxrpc_drain_rx_oos_queue(call) < 0)
+				break;
+		goto maybe_reschedule;
+	}
+
+	/* other events may have been raised since we started checking */
+	goto maybe_reschedule;
+
+send_ACK_with_skew:
+	ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
+			    ntohl(ack.serial));
+send_ACK:
+	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+	_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+	       ntohl(hdr.serial),
+	       ntohs(ack.maxSkew),
+	       ntohl(ack.firstPacket),
+	       ntohl(ack.previousPacket),
+	       ntohl(ack.serial),
+	       rxrpc_acks[ack.reason],
+	       ack.nAcks);
+
+	del_timer_sync(&call->ack_timer);
+	if (ack.nAcks > 0)
+		set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
+	goto send_message_2;
+
+send_message:
+	_debug("send message");
+
+	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+	_proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial));
+send_message_2:
+
+	len = iov[0].iov_len;
+	ioc = 1;
+	if (iov[4].iov_len) {
+		ioc = 5;
+		len += iov[4].iov_len;
+		len += iov[3].iov_len;
+		len += iov[2].iov_len;
+		len += iov[1].iov_len;
+	} else if (iov[3].iov_len) {
+		ioc = 4;
+		len += iov[3].iov_len;
+		len += iov[2].iov_len;
+		len += iov[1].iov_len;
+	} else if (iov[2].iov_len) {
+		ioc = 3;
+		len += iov[2].iov_len;
+		len += iov[1].iov_len;
+	} else if (iov[1].iov_len) {
+		ioc = 2;
+		len += iov[1].iov_len;
+	}
+
+	ret = kernel_sendmsg(call->conn->trans->local->socket,
+			     &msg, iov, ioc, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_DEAD)
+			rxrpc_queue_call(call);
+		read_unlock_bh(&call->state_lock);
+		goto error;
+	}
+
+	switch (genbit) {
+	case RXRPC_CALL_ABORT:
+		clear_bit(genbit, &call->events);
+		clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+		goto kill_ACKs;
+
+	case RXRPC_CALL_ACK_FINAL:
+		write_lock_bh(&call->state_lock);
+		if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
+			call->state = RXRPC_CALL_COMPLETE;
+		write_unlock_bh(&call->state_lock);
+		goto kill_ACKs;
+
+	default:
+		clear_bit(genbit, &call->events);
+		switch (call->state) {
+		case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+		case RXRPC_CALL_CLIENT_RECV_REPLY:
+		case RXRPC_CALL_SERVER_RECV_REQUEST:
+		case RXRPC_CALL_SERVER_ACK_REQUEST:
+			_debug("start ACK timer");
+			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
+					  call->ackr_serial, false);
+		default:
+			break;
+		}
+		goto maybe_reschedule;
+	}
+
+kill_ACKs:
+	del_timer_sync(&call->ack_timer);
+	if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events))
+		rxrpc_put_call(call);
+	clear_bit(RXRPC_CALL_ACK, &call->events);
+
+maybe_reschedule:
+	if (call->events || !skb_queue_empty(&call->rx_queue)) {
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_DEAD)
+			rxrpc_queue_call(call);
+		read_unlock_bh(&call->state_lock);
+	}
+
+	/* don't leave aborted connections on the accept queue */
+	if (call->state >= RXRPC_CALL_COMPLETE &&
+	    !list_empty(&call->accept_link)) {
+		_debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
+		       call, call->events, call->flags,
+		       ntohl(call->conn->cid));
+
+		read_lock_bh(&call->state_lock);
+		if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+		    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+			rxrpc_queue_call(call);
+		read_unlock_bh(&call->state_lock);
+	}
+
+error:
+	clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
+	kfree(acks);
+
+	/* because we don't want two CPUs both processing the work item for one
+	 * call at the same time, we use a flag to note when it's busy; however
+	 * this means there's a race between clearing the flag and setting the
+	 * work pending bit and the work item being processed again */
+	if (call->events && !work_pending(&call->processor)) {
+		_debug("jumpstart %x", ntohl(call->conn->cid));
+		rxrpc_queue_call(call);
+	}
+
+	_leave("");
+	return;
+
+no_mem:
+	_debug("out of memory");
+	goto maybe_reschedule;
+}
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
new file mode 100644
index 000000000000..4d92d88ff1fc
--- /dev/null
+++ b/net/rxrpc/ar-call.c
@@ -0,0 +1,804 @@
+/* RxRPC individual remote procedure call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+struct kmem_cache *rxrpc_call_jar;
+LIST_HEAD(rxrpc_calls);
+DEFINE_RWLOCK(rxrpc_call_lock);
+static unsigned rxrpc_call_max_lifetime = 60;
+static unsigned rxrpc_dead_call_timeout = 2;
+
+static void rxrpc_destroy_call(struct work_struct *work);
+static void rxrpc_call_life_expired(unsigned long _call);
+static void rxrpc_dead_call_expired(unsigned long _call);
+static void rxrpc_ack_time_expired(unsigned long _call);
+static void rxrpc_resend_time_expired(unsigned long _call);
+
+/*
+ * allocate a new call
+ */
+static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+{
+	struct rxrpc_call *call;
+
+	call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
+	if (!call)
+		return NULL;
+
+	call->acks_winsz = 16;
+	call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
+				    gfp);
+	if (!call->acks_window) {
+		kmem_cache_free(rxrpc_call_jar, call);
+		return NULL;
+	}
+
+	setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
+		    (unsigned long) call);
+	setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
+		    (unsigned long) call);
+	setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
+		    (unsigned long) call);
+	setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
+		    (unsigned long) call);
+	INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
+	INIT_WORK(&call->processor, &rxrpc_process_call);
+	INIT_LIST_HEAD(&call->accept_link);
+	skb_queue_head_init(&call->rx_queue);
+	skb_queue_head_init(&call->rx_oos_queue);
+	init_waitqueue_head(&call->tx_waitq);
+	spin_lock_init(&call->lock);
+	rwlock_init(&call->state_lock);
+	atomic_set(&call->usage, 1);
+	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+
+	memset(&call->sock_node, 0xed, sizeof(call->sock_node));
+
+	call->rx_data_expect = 1;
+	call->rx_data_eaten = 0;
+	call->rx_first_oos = 0;
+	call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+	call->creation_jif = jiffies;
+	return call;
+}
+
+/*
+ * allocate a new client call and attempt to to get a connection slot for it
+ */
+static struct rxrpc_call *rxrpc_alloc_client_call(
+	struct rxrpc_sock *rx,
+	struct rxrpc_transport *trans,
+	struct rxrpc_conn_bundle *bundle,
+	gfp_t gfp)
+{
+	struct rxrpc_call *call;
+	int ret;
+
+	_enter("");
+
+	ASSERT(rx != NULL);
+	ASSERT(trans != NULL);
+	ASSERT(bundle != NULL);
+
+	call = rxrpc_alloc_call(gfp);
+	if (!call)
+		return ERR_PTR(-ENOMEM);
+
+	sock_hold(&rx->sk);
+	call->socket = rx;
+	call->rx_data_post = 1;
+
+	ret = rxrpc_connect_call(rx, trans, bundle, call, gfp);
+	if (ret < 0) {
+		kmem_cache_free(rxrpc_call_jar, call);
+		return ERR_PTR(ret);
+	}
+
+	spin_lock(&call->conn->trans->peer->lock);
+	list_add(&call->error_link, &call->conn->trans->peer->error_targets);
+	spin_unlock(&call->conn->trans->peer->lock);
+
+	call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+	add_timer(&call->lifetimer);
+
+	_leave(" = %p", call);
+	return call;
+}
+
+/*
+ * set up a call for the given data
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
+					 struct rxrpc_transport *trans,
+					 struct rxrpc_conn_bundle *bundle,
+					 unsigned long user_call_ID,
+					 int create,
+					 gfp_t gfp)
+{
+	struct rxrpc_call *call, *candidate;
+	struct rb_node *p, *parent, **pp;
+
+	_enter("%p,%d,%d,%lx,%d",
+	       rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
+	       user_call_ID, create);
+
+	/* search the extant calls first for one that matches the specified
+	 * user ID */
+	read_lock(&rx->call_lock);
+
+	p = rx->calls.rb_node;
+	while (p) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			p = p->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			p = p->rb_right;
+		else
+			goto found_extant_call;
+	}
+
+	read_unlock(&rx->call_lock);
+
+	if (!create || !trans)
+		return ERR_PTR(-EBADSLT);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
+	if (IS_ERR(candidate)) {
+		_leave(" = %ld", PTR_ERR(candidate));
+		return candidate;
+	}
+
+	candidate->user_call_ID = user_call_ID;
+	__set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
+
+	write_lock(&rx->call_lock);
+
+	pp = &rx->calls.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			pp = &(*pp)->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_second;
+	}
+
+	/* second search also failed; add the new call */
+	call = candidate;
+	candidate = NULL;
+	rxrpc_get_call(call);
+
+	rb_link_node(&call->sock_node, parent, pp);
+	rb_insert_color(&call->sock_node, &rx->calls);
+	write_unlock(&rx->call_lock);
+
+	write_lock_bh(&rxrpc_call_lock);
+	list_add_tail(&call->link, &rxrpc_calls);
+	write_unlock_bh(&rxrpc_call_lock);
+
+	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+	_leave(" = %p [new]", call);
+	return call;
+
+	/* we found the call in the list immediately */
+found_extant_call:
+	rxrpc_get_call(call);
+	read_unlock(&rx->call_lock);
+	_leave(" = %p [extant %d]", call, atomic_read(&call->usage));
+	return call;
+
+	/* we found the call on the second time through the list */
+found_extant_second:
+	rxrpc_get_call(call);
+	write_unlock(&rx->call_lock);
+	rxrpc_put_call(candidate);
+	_leave(" = %p [second %d]", call, atomic_read(&call->usage));
+	return call;
+}
+
+/*
+ * set up an incoming call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
+				       struct rxrpc_connection *conn,
+				       struct rxrpc_header *hdr,
+				       gfp_t gfp)
+{
+	struct rxrpc_call *call, *candidate;
+	struct rb_node **p, *parent;
+	__be32 call_id;
+
+	_enter(",%d,,%x", conn->debug_id, gfp);
+
+	ASSERT(rx != NULL);
+
+	candidate = rxrpc_alloc_call(gfp);
+	if (!candidate)
+		return ERR_PTR(-EBUSY);
+
+	candidate->socket = rx;
+	candidate->conn = conn;
+	candidate->cid = hdr->cid;
+	candidate->call_id = hdr->callNumber;
+	candidate->channel = ntohl(hdr->cid) & RXRPC_CHANNELMASK;
+	candidate->rx_data_post = 0;
+	candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
+	if (conn->security_ix > 0)
+		candidate->state = RXRPC_CALL_SERVER_SECURING;
+
+	write_lock_bh(&conn->lock);
+
+	/* set the channel for this call */
+	call = conn->channels[candidate->channel];
+	_debug("channel[%u] is %p", candidate->channel, call);
+	if (call && call->call_id == hdr->callNumber) {
+		/* already set; must've been a duplicate packet */
+		_debug("extant call [%d]", call->state);
+		ASSERTCMP(call->conn, ==, conn);
+
+		read_lock(&call->state_lock);
+		switch (call->state) {
+		case RXRPC_CALL_LOCALLY_ABORTED:
+			if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+				rxrpc_queue_call(call);
+		case RXRPC_CALL_REMOTELY_ABORTED:
+			read_unlock(&call->state_lock);
+			goto aborted_call;
+		default:
+			rxrpc_get_call(call);
+			read_unlock(&call->state_lock);
+			goto extant_call;
+		}
+	}
+
+	if (call) {
+		/* it seems the channel is still in use from the previous call
+		 * - ditch the old binding if its call is now complete */
+		_debug("CALL: %u { %s }",
+		       call->debug_id, rxrpc_call_states[call->state]);
+
+		if (call->state >= RXRPC_CALL_COMPLETE) {
+			conn->channels[call->channel] = NULL;
+		} else {
+			write_unlock_bh(&conn->lock);
+			kmem_cache_free(rxrpc_call_jar, candidate);
+			_leave(" = -EBUSY");
+			return ERR_PTR(-EBUSY);
+		}
+	}
+
+	/* check the call number isn't duplicate */
+	_debug("check dup");
+	call_id = hdr->callNumber;
+	p = &conn->calls.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		call = rb_entry(parent, struct rxrpc_call, conn_node);
+
+		if (call_id < call->call_id)
+			p = &(*p)->rb_left;
+		else if (call_id > call->call_id)
+			p = &(*p)->rb_right;
+		else
+			goto old_call;
+	}
+
+	/* make the call available */
+	_debug("new call");
+	call = candidate;
+	candidate = NULL;
+	rb_link_node(&call->conn_node, parent, p);
+	rb_insert_color(&call->conn_node, &conn->calls);
+	conn->channels[call->channel] = call;
+	sock_hold(&rx->sk);
+	atomic_inc(&conn->usage);
+	write_unlock_bh(&conn->lock);
+
+	spin_lock(&conn->trans->peer->lock);
+	list_add(&call->error_link, &conn->trans->peer->error_targets);
+	spin_unlock(&conn->trans->peer->lock);
+
+	write_lock_bh(&rxrpc_call_lock);
+	list_add_tail(&call->link, &rxrpc_calls);
+	write_unlock_bh(&rxrpc_call_lock);
+
+	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+	call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+	add_timer(&call->lifetimer);
+	_leave(" = %p {%d} [new]", call, call->debug_id);
+	return call;
+
+extant_call:
+	write_unlock_bh(&conn->lock);
+	kmem_cache_free(rxrpc_call_jar, candidate);
+	_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
+	return call;
+
+aborted_call:
+	write_unlock_bh(&conn->lock);
+	kmem_cache_free(rxrpc_call_jar, candidate);
+	_leave(" = -ECONNABORTED");
+	return ERR_PTR(-ECONNABORTED);
+
+old_call:
+	write_unlock_bh(&conn->lock);
+	kmem_cache_free(rxrpc_call_jar, candidate);
+	_leave(" = -ECONNRESET [old]");
+	return ERR_PTR(-ECONNRESET);
+}
+
+/*
+ * find an extant server call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
+					  unsigned long user_call_ID)
+{
+	struct rxrpc_call *call;
+	struct rb_node *p;
+
+	_enter("%p,%lx", rx, user_call_ID);
+
+	/* search the extant calls for one that matches the specified user
+	 * ID */
+	read_lock(&rx->call_lock);
+
+	p = rx->calls.rb_node;
+	while (p) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			p = p->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			p = p->rb_right;
+		else
+			goto found_extant_call;
+	}
+
+	read_unlock(&rx->call_lock);
+	_leave(" = NULL");
+	return NULL;
+
+	/* we found the call in the list immediately */
+found_extant_call:
+	rxrpc_get_call(call);
+	read_unlock(&rx->call_lock);
+	_leave(" = %p [%d]", call, atomic_read(&call->usage));
+	return call;
+}
+
+/*
+ * detach a call from a socket and set up for release
+ */
+void rxrpc_release_call(struct rxrpc_call *call)
+{
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_sock *rx = call->socket;
+
+	_enter("{%d,%d,%d,%d}",
+	       call->debug_id, atomic_read(&call->usage),
+	       atomic_read(&call->ackr_not_idle),
+	       call->rx_first_oos);
+
+	spin_lock_bh(&call->lock);
+	if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
+		BUG();
+	spin_unlock_bh(&call->lock);
+
+	/* dissociate from the socket
+	 * - the socket's ref on the call is passed to the death timer
+	 */
+	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+
+	write_lock_bh(&rx->call_lock);
+	if (!list_empty(&call->accept_link)) {
+		_debug("unlinking once-pending call %p { e=%lx f=%lx }",
+		       call, call->events, call->flags);
+		ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+		list_del_init(&call->accept_link);
+		sk_acceptq_removed(&rx->sk);
+	} else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		rb_erase(&call->sock_node, &rx->calls);
+		memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
+		clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+	}
+	write_unlock_bh(&rx->call_lock);
+
+	/* free up the channel for reuse */
+	spin_lock(&conn->trans->client_lock);
+	write_lock_bh(&conn->lock);
+	write_lock(&call->state_lock);
+
+	if (conn->channels[call->channel] == call)
+		conn->channels[call->channel] = NULL;
+
+	if (conn->out_clientflag && conn->bundle) {
+		conn->avail_calls++;
+		switch (conn->avail_calls) {
+		case 1:
+			list_move_tail(&conn->bundle_link,
+				       &conn->bundle->avail_conns);
+		case 2 ... RXRPC_MAXCALLS - 1:
+			ASSERT(conn->channels[0] == NULL ||
+			       conn->channels[1] == NULL ||
+			       conn->channels[2] == NULL ||
+			       conn->channels[3] == NULL);
+			break;
+		case RXRPC_MAXCALLS:
+			list_move_tail(&conn->bundle_link,
+				       &conn->bundle->unused_conns);
+			ASSERT(conn->channels[0] == NULL &&
+			       conn->channels[1] == NULL &&
+			       conn->channels[2] == NULL &&
+			       conn->channels[3] == NULL);
+			break;
+		default:
+			printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
+			       conn->avail_calls);
+			BUG();
+		}
+	}
+
+	spin_unlock(&conn->trans->client_lock);
+
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
+		_debug("+++ ABORTING STATE %d +++\n", call->state);
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = RX_CALL_DEAD;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		rxrpc_queue_call(call);
+	}
+	write_unlock(&call->state_lock);
+	write_unlock_bh(&conn->lock);
+
+	/* clean up the Rx queue */
+	if (!skb_queue_empty(&call->rx_queue) ||
+	    !skb_queue_empty(&call->rx_oos_queue)) {
+		struct rxrpc_skb_priv *sp;
+		struct sk_buff *skb;
+
+		_debug("purge Rx queues");
+
+		spin_lock_bh(&call->lock);
+		while ((skb = skb_dequeue(&call->rx_queue)) ||
+		       (skb = skb_dequeue(&call->rx_oos_queue))) {
+			sp = rxrpc_skb(skb);
+			if (sp->call) {
+				ASSERTCMP(sp->call, ==, call);
+				rxrpc_put_call(call);
+				sp->call = NULL;
+			}
+			skb->destructor = NULL;
+			spin_unlock_bh(&call->lock);
+
+			_debug("- zap %s %%%u #%u",
+			       rxrpc_pkts[sp->hdr.type],
+			       ntohl(sp->hdr.serial),
+			       ntohl(sp->hdr.seq));
+			rxrpc_free_skb(skb);
+			spin_lock_bh(&call->lock);
+		}
+		spin_unlock_bh(&call->lock);
+
+		ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
+	}
+
+	del_timer_sync(&call->resend_timer);
+	del_timer_sync(&call->ack_timer);
+	del_timer_sync(&call->lifetimer);
+	call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+	add_timer(&call->deadspan);
+
+	_leave("");
+}
+
+/*
+ * handle a dead call being ready for reaping
+ */
+static void rxrpc_dead_call_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_enter("{%d}", call->debug_id);
+
+	write_lock_bh(&call->state_lock);
+	call->state = RXRPC_CALL_DEAD;
+	write_unlock_bh(&call->state_lock);
+	rxrpc_put_call(call);
+}
+
+/*
+ * mark a call as to be released, aborting it if it's still in progress
+ * - called with softirqs disabled
+ */
+static void rxrpc_mark_call_released(struct rxrpc_call *call)
+{
+	bool sched;
+
+	write_lock(&call->state_lock);
+	if (call->state < RXRPC_CALL_DEAD) {
+		sched = false;
+		if (call->state < RXRPC_CALL_COMPLETE) {
+			_debug("abort call %p", call);
+			call->state = RXRPC_CALL_LOCALLY_ABORTED;
+			call->abort_code = RX_CALL_DEAD;
+			if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+				sched = true;
+		}
+		if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+			sched = true;
+		if (sched)
+			rxrpc_queue_call(call);
+	}
+	write_unlock(&call->state_lock);
+}
+
+/*
+ * release all the calls associated with a socket
+ */
+void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
+{
+	struct rxrpc_call *call;
+	struct rb_node *p;
+
+	_enter("%p", rx);
+
+	read_lock_bh(&rx->call_lock);
+
+	/* mark all the calls as no longer wanting incoming packets */
+	for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
+		rxrpc_mark_call_released(call);
+	}
+
+	/* kill the not-yet-accepted incoming calls */
+	list_for_each_entry(call, &rx->secureq, accept_link) {
+		rxrpc_mark_call_released(call);
+	}
+
+	list_for_each_entry(call, &rx->acceptq, accept_link) {
+		rxrpc_mark_call_released(call);
+	}
+
+	read_unlock_bh(&rx->call_lock);
+	_leave("");
+}
+
+/*
+ * release a call
+ */
+void __rxrpc_put_call(struct rxrpc_call *call)
+{
+	ASSERT(call != NULL);
+
+	_enter("%p{u=%d}", call, atomic_read(&call->usage));
+
+	ASSERTCMP(atomic_read(&call->usage), >, 0);
+
+	if (atomic_dec_and_test(&call->usage)) {
+		_debug("call %d dead", call->debug_id);
+		ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+		rxrpc_queue_work(&call->destroyer);
+	}
+	_leave("");
+}
+
+/*
+ * clean up a call
+ */
+static void rxrpc_cleanup_call(struct rxrpc_call *call)
+{
+	_net("DESTROY CALL %d", call->debug_id);
+
+	ASSERT(call->socket);
+
+	memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
+
+	del_timer_sync(&call->lifetimer);
+	del_timer_sync(&call->deadspan);
+	del_timer_sync(&call->ack_timer);
+	del_timer_sync(&call->resend_timer);
+
+	ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
+	ASSERTCMP(call->events, ==, 0);
+	if (work_pending(&call->processor)) {
+		_debug("defer destroy");
+		rxrpc_queue_work(&call->destroyer);
+		return;
+	}
+
+	if (call->conn) {
+		spin_lock(&call->conn->trans->peer->lock);
+		list_del(&call->error_link);
+		spin_unlock(&call->conn->trans->peer->lock);
+
+		write_lock_bh(&call->conn->lock);
+		rb_erase(&call->conn_node, &call->conn->calls);
+		write_unlock_bh(&call->conn->lock);
+		rxrpc_put_connection(call->conn);
+	}
+
+	if (call->acks_window) {
+		_debug("kill Tx window %d",
+		       CIRC_CNT(call->acks_head, call->acks_tail,
+				call->acks_winsz));
+		smp_mb();
+		while (CIRC_CNT(call->acks_head, call->acks_tail,
+				call->acks_winsz) > 0) {
+			struct rxrpc_skb_priv *sp;
+			unsigned long _skb;
+
+			_skb = call->acks_window[call->acks_tail] & ~1;
+			sp = rxrpc_skb((struct sk_buff *) _skb);
+			_debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+			rxrpc_free_skb((struct sk_buff *) _skb);
+			call->acks_tail =
+				(call->acks_tail + 1) & (call->acks_winsz - 1);
+		}
+
+		kfree(call->acks_window);
+	}
+
+	rxrpc_free_skb(call->tx_pending);
+
+	rxrpc_purge_queue(&call->rx_queue);
+	ASSERT(skb_queue_empty(&call->rx_oos_queue));
+	sock_put(&call->socket->sk);
+	kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
+ * destroy a call
+ */
+static void rxrpc_destroy_call(struct work_struct *work)
+{
+	struct rxrpc_call *call =
+		container_of(work, struct rxrpc_call, destroyer);
+
+	_enter("%p{%d,%d,%p}",
+	       call, atomic_read(&call->usage), call->channel, call->conn);
+
+	ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+
+	write_lock_bh(&rxrpc_call_lock);
+	list_del_init(&call->link);
+	write_unlock_bh(&rxrpc_call_lock);
+
+	rxrpc_cleanup_call(call);
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the call records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_calls(void)
+{
+	struct rxrpc_call *call;
+
+	_enter("");
+	write_lock_bh(&rxrpc_call_lock);
+
+	while (!list_empty(&rxrpc_calls)) {
+		call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
+		_debug("Zapping call %p", call);
+
+		list_del_init(&call->link);
+
+		switch (atomic_read(&call->usage)) {
+		case 0:
+			ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+			break;
+		case 1:
+			if (del_timer_sync(&call->deadspan) != 0 &&
+			    call->state != RXRPC_CALL_DEAD)
+				rxrpc_dead_call_expired((unsigned long) call);
+			if (call->state != RXRPC_CALL_DEAD)
+				break;
+		default:
+			printk(KERN_ERR "RXRPC:"
+			       " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
+			       call, atomic_read(&call->usage),
+			       atomic_read(&call->ackr_not_idle),
+			       rxrpc_call_states[call->state],
+			       call->flags, call->events);
+			if (!skb_queue_empty(&call->rx_queue))
+				printk(KERN_ERR"RXRPC: Rx queue occupied\n");
+			if (!skb_queue_empty(&call->rx_oos_queue))
+				printk(KERN_ERR"RXRPC: OOS queue occupied\n");
+			break;
+		}
+
+		write_unlock_bh(&rxrpc_call_lock);
+		cond_resched();
+		write_lock_bh(&rxrpc_call_lock);
+	}
+
+	write_unlock_bh(&rxrpc_call_lock);
+	_leave("");
+}
+
+/*
+ * handle call lifetime being exceeded
+ */
+static void rxrpc_call_life_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
+	_enter("{%d}", call->debug_id);
+	read_lock_bh(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE) {
+		set_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+		rxrpc_queue_call(call);
+	}
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_time_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_enter("{%d}", call->debug_id);
+
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
+	read_lock_bh(&call->state_lock);
+	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+		rxrpc_queue_call(call);
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle ACK timer expiry
+ */
+static void rxrpc_ack_time_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_enter("{%d}", call->debug_id);
+
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
+	read_lock_bh(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+		rxrpc_queue_call(call);
+	read_unlock_bh(&call->state_lock);
+}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
new file mode 100644
index 000000000000..43cb3e051ece
--- /dev/null
+++ b/net/rxrpc/ar-connection.c
@@ -0,0 +1,911 @@
+/* RxRPC virtual connection handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_connection_reaper(struct work_struct *work);
+
+LIST_HEAD(rxrpc_connections);
+DEFINE_RWLOCK(rxrpc_connection_lock);
+static unsigned long rxrpc_connection_timeout = 10 * 60;
+static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+
+/*
+ * allocate a new client connection bundle
+ */
+static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
+{
+	struct rxrpc_conn_bundle *bundle;
+
+	_enter("");
+
+	bundle = kzalloc(sizeof(struct rxrpc_conn_bundle), gfp);
+	if (bundle) {
+		INIT_LIST_HEAD(&bundle->unused_conns);
+		INIT_LIST_HEAD(&bundle->avail_conns);
+		INIT_LIST_HEAD(&bundle->busy_conns);
+		init_waitqueue_head(&bundle->chanwait);
+		atomic_set(&bundle->usage, 1);
+	}
+
+	_leave(" = %p", bundle);
+	return bundle;
+}
+
+/*
+ * compare bundle parameters with what we're looking for
+ * - return -ve, 0 or +ve
+ */
+static inline
+int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
+		     struct key *key, __be16 service_id)
+{
+	return (bundle->service_id - service_id) ?:
+		((unsigned long) bundle->key - (unsigned long) key);
+}
+
+/*
+ * get bundle of client connections that a client socket can make use of
+ */
+struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
+					   struct rxrpc_transport *trans,
+					   struct key *key,
+					   __be16 service_id,
+					   gfp_t gfp)
+{
+	struct rxrpc_conn_bundle *bundle, *candidate;
+	struct rb_node *p, *parent, **pp;
+
+	_enter("%p{%x},%x,%hx,",
+	       rx, key_serial(key), trans->debug_id, ntohl(service_id));
+
+	if (rx->trans == trans && rx->bundle) {
+		atomic_inc(&rx->bundle->usage);
+		return rx->bundle;
+	}
+
+	/* search the extant bundles first for one that matches the specified
+	 * user ID */
+	spin_lock(&trans->client_lock);
+
+	p = trans->bundles.rb_node;
+	while (p) {
+		bundle = rb_entry(p, struct rxrpc_conn_bundle, node);
+
+		if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+			p = p->rb_left;
+		else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+			p = p->rb_right;
+		else
+			goto found_extant_bundle;
+	}
+
+	spin_unlock(&trans->client_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_bundle(gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	candidate->key = key_get(key);
+	candidate->service_id = service_id;
+
+	spin_lock(&trans->client_lock);
+
+	pp = &trans->bundles.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		bundle = rb_entry(parent, struct rxrpc_conn_bundle, node);
+
+		if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+			pp = &(*pp)->rb_left;
+		else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_second;
+	}
+
+	/* second search also failed; add the new bundle */
+	bundle = candidate;
+	candidate = NULL;
+
+	rb_link_node(&bundle->node, parent, pp);
+	rb_insert_color(&bundle->node, &trans->bundles);
+	spin_unlock(&trans->client_lock);
+	_net("BUNDLE new on trans %d", trans->debug_id);
+	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+		atomic_inc(&bundle->usage);
+		rx->bundle = bundle;
+	}
+	_leave(" = %p [new]", bundle);
+	return bundle;
+
+	/* we found the bundle in the list immediately */
+found_extant_bundle:
+	atomic_inc(&bundle->usage);
+	spin_unlock(&trans->client_lock);
+	_net("BUNDLE old on trans %d", trans->debug_id);
+	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+		atomic_inc(&bundle->usage);
+		rx->bundle = bundle;
+	}
+	_leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
+	return bundle;
+
+	/* we found the bundle on the second time through the list */
+found_extant_second:
+	atomic_inc(&bundle->usage);
+	spin_unlock(&trans->client_lock);
+	kfree(candidate);
+	_net("BUNDLE old2 on trans %d", trans->debug_id);
+	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+		atomic_inc(&bundle->usage);
+		rx->bundle = bundle;
+	}
+	_leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
+	return bundle;
+}
+
+/*
+ * release a bundle
+ */
+void rxrpc_put_bundle(struct rxrpc_transport *trans,
+		      struct rxrpc_conn_bundle *bundle)
+{
+	_enter("%p,%p{%d}",trans, bundle, atomic_read(&bundle->usage));
+
+	if (atomic_dec_and_lock(&bundle->usage, &trans->client_lock)) {
+		_debug("Destroy bundle");
+		rb_erase(&bundle->node, &trans->bundles);
+		spin_unlock(&trans->client_lock);
+		ASSERT(list_empty(&bundle->unused_conns));
+		ASSERT(list_empty(&bundle->avail_conns));
+		ASSERT(list_empty(&bundle->busy_conns));
+		ASSERTCMP(bundle->num_conns, ==, 0);
+		key_put(bundle->key);
+		kfree(bundle);
+	}
+
+	_leave("");
+}
+
+/*
+ * allocate a new connection
+ */
+static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+{
+	struct rxrpc_connection *conn;
+
+	_enter("");
+
+	conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
+	if (conn) {
+		INIT_WORK(&conn->processor, &rxrpc_process_connection);
+		INIT_LIST_HEAD(&conn->bundle_link);
+		conn->calls = RB_ROOT;
+		skb_queue_head_init(&conn->rx_queue);
+		rwlock_init(&conn->lock);
+		spin_lock_init(&conn->state_lock);
+		atomic_set(&conn->usage, 1);
+		conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		conn->avail_calls = RXRPC_MAXCALLS;
+		conn->size_align = 4;
+		conn->header_size = sizeof(struct rxrpc_header);
+	}
+
+	_leave(" = %p{%d}", conn, conn->debug_id);
+	return conn;
+}
+
+/*
+ * assign a connection ID to a connection and add it to the transport's
+ * connection lookup tree
+ * - called with transport client lock held
+ */
+static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
+{
+	struct rxrpc_connection *xconn;
+	struct rb_node *parent, **p;
+	__be32 epoch;
+	u32 real_conn_id;
+
+	_enter("");
+
+	epoch = conn->epoch;
+
+	write_lock_bh(&conn->trans->conn_lock);
+
+	conn->trans->conn_idcounter += RXRPC_CID_INC;
+	if (conn->trans->conn_idcounter < RXRPC_CID_INC)
+		conn->trans->conn_idcounter = RXRPC_CID_INC;
+	real_conn_id = conn->trans->conn_idcounter;
+
+attempt_insertion:
+	parent = NULL;
+	p = &conn->trans->client_conns.rb_node;
+
+	while (*p) {
+		parent = *p;
+		xconn = rb_entry(parent, struct rxrpc_connection, node);
+
+		if (epoch < xconn->epoch)
+			p = &(*p)->rb_left;
+		else if (epoch > xconn->epoch)
+			p = &(*p)->rb_right;
+		else if (real_conn_id < xconn->real_conn_id)
+			p = &(*p)->rb_left;
+		else if (real_conn_id > xconn->real_conn_id)
+			p = &(*p)->rb_right;
+		else
+			goto id_exists;
+	}
+
+	/* we've found a suitable hole - arrange for this connection to occupy
+	 * it */
+	rb_link_node(&conn->node, parent, p);
+	rb_insert_color(&conn->node, &conn->trans->client_conns);
+
+	conn->real_conn_id = real_conn_id;
+	conn->cid = htonl(real_conn_id);
+	write_unlock_bh(&conn->trans->conn_lock);
+	_leave(" [CONNID %x CID %x]", real_conn_id, ntohl(conn->cid));
+	return;
+
+	/* we found a connection with the proposed ID - walk the tree from that
+	 * point looking for the next unused ID */
+id_exists:
+	for (;;) {
+		real_conn_id += RXRPC_CID_INC;
+		if (real_conn_id < RXRPC_CID_INC) {
+			real_conn_id = RXRPC_CID_INC;
+			conn->trans->conn_idcounter = real_conn_id;
+			goto attempt_insertion;
+		}
+
+		parent = rb_next(parent);
+		if (!parent)
+			goto attempt_insertion;
+
+		xconn = rb_entry(parent, struct rxrpc_connection, node);
+		if (epoch < xconn->epoch ||
+		    real_conn_id < xconn->real_conn_id)
+			goto attempt_insertion;
+	}
+}
+
+/*
+ * add a call to a connection's call-by-ID tree
+ */
+static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
+				      struct rxrpc_call *call)
+{
+	struct rxrpc_call *xcall;
+	struct rb_node *parent, **p;
+	__be32 call_id;
+
+	write_lock_bh(&conn->lock);
+
+	call_id = call->call_id;
+	p = &conn->calls.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		xcall = rb_entry(parent, struct rxrpc_call, conn_node);
+
+		if (call_id < xcall->call_id)
+			p = &(*p)->rb_left;
+		else if (call_id > xcall->call_id)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&call->conn_node, parent, p);
+	rb_insert_color(&call->conn_node, &conn->calls);
+
+	write_unlock_bh(&conn->lock);
+}
+
+/*
+ * connect a call on an exclusive connection
+ */
+static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
+				   struct rxrpc_transport *trans,
+				   __be16 service_id,
+				   struct rxrpc_call *call,
+				   gfp_t gfp)
+{
+	struct rxrpc_connection *conn;
+	int chan, ret;
+
+	_enter("");
+
+	conn = rx->conn;
+	if (!conn) {
+		/* not yet present - create a candidate for a new connection
+		 * and then redo the check */
+		conn = rxrpc_alloc_connection(gfp);
+		if (IS_ERR(conn)) {
+			_leave(" = %ld", PTR_ERR(conn));
+			return PTR_ERR(conn);
+		}
+
+		conn->trans = trans;
+		conn->bundle = NULL;
+		conn->service_id = service_id;
+		conn->epoch = rxrpc_epoch;
+		conn->in_clientflag = 0;
+		conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+		conn->cid = 0;
+		conn->state = RXRPC_CONN_CLIENT;
+		conn->avail_calls = RXRPC_MAXCALLS - 1;
+		conn->security_level = rx->min_sec_level;
+		conn->key = key_get(rx->key);
+
+		ret = rxrpc_init_client_conn_security(conn);
+		if (ret < 0) {
+			key_put(conn->key);
+			kfree(conn);
+			_leave(" = %d [key]", ret);
+			return ret;
+		}
+
+		write_lock_bh(&rxrpc_connection_lock);
+		list_add_tail(&conn->link, &rxrpc_connections);
+		write_unlock_bh(&rxrpc_connection_lock);
+
+		spin_lock(&trans->client_lock);
+		atomic_inc(&trans->usage);
+
+		_net("CONNECT EXCL new %d on TRANS %d",
+		     conn->debug_id, conn->trans->debug_id);
+
+		rxrpc_assign_connection_id(conn);
+		rx->conn = conn;
+	}
+
+	/* we've got a connection with a free channel and we can now attach the
+	 * call to it
+	 * - we're holding the transport's client lock
+	 * - we're holding a reference on the connection
+	 */
+	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+		if (!conn->channels[chan])
+			goto found_channel;
+	goto no_free_channels;
+
+found_channel:
+	atomic_inc(&conn->usage);
+	conn->channels[chan] = call;
+	call->conn = conn;
+	call->channel = chan;
+	call->cid = conn->cid | htonl(chan);
+	call->call_id = htonl(++conn->call_counter);
+
+	_net("CONNECT client on conn %d chan %d as call %x",
+	     conn->debug_id, chan, ntohl(call->call_id));
+
+	spin_unlock(&trans->client_lock);
+
+	rxrpc_add_call_ID_to_conn(conn, call);
+	_leave(" = 0");
+	return 0;
+
+no_free_channels:
+	spin_unlock(&trans->client_lock);
+	_leave(" = -ENOSR");
+	return -ENOSR;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_sock *rx,
+		       struct rxrpc_transport *trans,
+		       struct rxrpc_conn_bundle *bundle,
+		       struct rxrpc_call *call,
+		       gfp_t gfp)
+{
+	struct rxrpc_connection *conn, *candidate;
+	int chan, ret;
+
+	DECLARE_WAITQUEUE(myself, current);
+
+	_enter("%p,%lx,", rx, call->user_call_ID);
+
+	if (test_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags))
+		return rxrpc_connect_exclusive(rx, trans, bundle->service_id,
+					       call, gfp);
+
+	spin_lock(&trans->client_lock);
+	for (;;) {
+		/* see if the bundle has a call slot available */
+		if (!list_empty(&bundle->avail_conns)) {
+			_debug("avail");
+			conn = list_entry(bundle->avail_conns.next,
+					  struct rxrpc_connection,
+					  bundle_link);
+			if (--conn->avail_calls == 0)
+				list_move(&conn->bundle_link,
+					  &bundle->busy_conns);
+			ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
+			ASSERT(conn->channels[0] == NULL ||
+			       conn->channels[1] == NULL ||
+			       conn->channels[2] == NULL ||
+			       conn->channels[3] == NULL);
+			atomic_inc(&conn->usage);
+			break;
+		}
+
+		if (!list_empty(&bundle->unused_conns)) {
+			_debug("unused");
+			conn = list_entry(bundle->unused_conns.next,
+					  struct rxrpc_connection,
+					  bundle_link);
+			ASSERTCMP(conn->avail_calls, ==, RXRPC_MAXCALLS);
+			conn->avail_calls = RXRPC_MAXCALLS - 1;
+			ASSERT(conn->channels[0] == NULL &&
+			       conn->channels[1] == NULL &&
+			       conn->channels[2] == NULL &&
+			       conn->channels[3] == NULL);
+			atomic_inc(&conn->usage);
+			list_move(&conn->bundle_link, &bundle->avail_conns);
+			break;
+		}
+
+		/* need to allocate a new connection */
+		_debug("get new conn [%d]", bundle->num_conns);
+
+		spin_unlock(&trans->client_lock);
+
+		if (signal_pending(current))
+			goto interrupted;
+
+		if (bundle->num_conns >= 20) {
+			_debug("too many conns");
+
+			if (!(gfp & __GFP_WAIT)) {
+				_leave(" = -EAGAIN");
+				return -EAGAIN;
+			}
+
+			add_wait_queue(&bundle->chanwait, &myself);
+			for (;;) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				if (bundle->num_conns < 20 ||
+				    !list_empty(&bundle->unused_conns) ||
+				    !list_empty(&bundle->avail_conns))
+					break;
+				if (signal_pending(current))
+					goto interrupted_dequeue;
+				schedule();
+			}
+			remove_wait_queue(&bundle->chanwait, &myself);
+			__set_current_state(TASK_RUNNING);
+			spin_lock(&trans->client_lock);
+			continue;
+		}
+
+		/* not yet present - create a candidate for a new connection and then
+		 * redo the check */
+		candidate = rxrpc_alloc_connection(gfp);
+		if (IS_ERR(candidate)) {
+			_leave(" = %ld", PTR_ERR(candidate));
+			return PTR_ERR(candidate);
+		}
+
+		candidate->trans = trans;
+		candidate->bundle = bundle;
+		candidate->service_id = bundle->service_id;
+		candidate->epoch = rxrpc_epoch;
+		candidate->in_clientflag = 0;
+		candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
+		candidate->cid = 0;
+		candidate->state = RXRPC_CONN_CLIENT;
+		candidate->avail_calls = RXRPC_MAXCALLS;
+		candidate->security_level = rx->min_sec_level;
+		candidate->key = key_get(bundle->key);
+
+		ret = rxrpc_init_client_conn_security(candidate);
+		if (ret < 0) {
+			key_put(candidate->key);
+			kfree(candidate);
+			_leave(" = %d [key]", ret);
+			return ret;
+		}
+
+		write_lock_bh(&rxrpc_connection_lock);
+		list_add_tail(&candidate->link, &rxrpc_connections);
+		write_unlock_bh(&rxrpc_connection_lock);
+
+		spin_lock(&trans->client_lock);
+
+		list_add(&candidate->bundle_link, &bundle->unused_conns);
+		bundle->num_conns++;
+		atomic_inc(&bundle->usage);
+		atomic_inc(&trans->usage);
+
+		_net("CONNECT new %d on TRANS %d",
+		     candidate->debug_id, candidate->trans->debug_id);
+
+		rxrpc_assign_connection_id(candidate);
+		if (candidate->security)
+			candidate->security->prime_packet_security(candidate);
+
+		/* leave the candidate lurking in zombie mode attached to the
+		 * bundle until we're ready for it */
+		rxrpc_put_connection(candidate);
+		candidate = NULL;
+	}
+
+	/* we've got a connection with a free channel and we can now attach the
+	 * call to it
+	 * - we're holding the transport's client lock
+	 * - we're holding a reference on the connection
+	 * - we're holding a reference on the bundle
+	 */
+	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+		if (!conn->channels[chan])
+			goto found_channel;
+	ASSERT(conn->channels[0] == NULL ||
+	       conn->channels[1] == NULL ||
+	       conn->channels[2] == NULL ||
+	       conn->channels[3] == NULL);
+	BUG();
+
+found_channel:
+	conn->channels[chan] = call;
+	call->conn = conn;
+	call->channel = chan;
+	call->cid = conn->cid | htonl(chan);
+	call->call_id = htonl(++conn->call_counter);
+
+	_net("CONNECT client on conn %d chan %d as call %x",
+	     conn->debug_id, chan, ntohl(call->call_id));
+
+	ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
+	spin_unlock(&trans->client_lock);
+
+	rxrpc_add_call_ID_to_conn(conn, call);
+
+	_leave(" = 0");
+	return 0;
+
+interrupted_dequeue:
+	remove_wait_queue(&bundle->chanwait, &myself);
+	__set_current_state(TASK_RUNNING);
+interrupted:
+	_leave(" = -ERESTARTSYS");
+	return -ERESTARTSYS;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *trans,
+			  struct rxrpc_header *hdr,
+			  gfp_t gfp)
+{
+	struct rxrpc_connection *conn, *candidate = NULL;
+	struct rb_node *p, **pp;
+	const char *new = "old";
+	__be32 epoch;
+	u32 conn_id;
+
+	_enter("");
+
+	ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
+
+	epoch = hdr->epoch;
+	conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+
+	/* search the connection list first */
+	read_lock_bh(&trans->conn_lock);
+
+	p = trans->server_conns.rb_node;
+	while (p) {
+		conn = rb_entry(p, struct rxrpc_connection, node);
+
+		_debug("maybe %x", conn->real_conn_id);
+
+		if (epoch < conn->epoch)
+			p = p->rb_left;
+		else if (epoch > conn->epoch)
+			p = p->rb_right;
+		else if (conn_id < conn->real_conn_id)
+			p = p->rb_left;
+		else if (conn_id > conn->real_conn_id)
+			p = p->rb_right;
+		else
+			goto found_extant_connection;
+	}
+	read_unlock_bh(&trans->conn_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_connection(gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	candidate->trans = trans;
+	candidate->epoch = hdr->epoch;
+	candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK);
+	candidate->service_id = hdr->serviceId;
+	candidate->security_ix = hdr->securityIndex;
+	candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
+	candidate->out_clientflag = 0;
+	candidate->real_conn_id = conn_id;
+	candidate->state = RXRPC_CONN_SERVER;
+	if (candidate->service_id)
+		candidate->state = RXRPC_CONN_SERVER_UNSECURED;
+
+	write_lock_bh(&trans->conn_lock);
+
+	pp = &trans->server_conns.rb_node;
+	p = NULL;
+	while (*pp) {
+		p = *pp;
+		conn = rb_entry(p, struct rxrpc_connection, node);
+
+		if (epoch < conn->epoch)
+			pp = &(*pp)->rb_left;
+		else if (epoch > conn->epoch)
+			pp = &(*pp)->rb_right;
+		else if (conn_id < conn->real_conn_id)
+			pp = &(*pp)->rb_left;
+		else if (conn_id > conn->real_conn_id)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_second;
+	}
+
+	/* we can now add the new candidate to the list */
+	conn = candidate;
+	candidate = NULL;
+	rb_link_node(&conn->node, p, pp);
+	rb_insert_color(&conn->node, &trans->server_conns);
+	atomic_inc(&conn->trans->usage);
+
+	write_unlock_bh(&trans->conn_lock);
+
+	write_lock_bh(&rxrpc_connection_lock);
+	list_add_tail(&conn->link, &rxrpc_connections);
+	write_unlock_bh(&rxrpc_connection_lock);
+
+	new = "new";
+
+success:
+	_net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->real_conn_id);
+
+	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+	return conn;
+
+	/* we found the connection in the list immediately */
+found_extant_connection:
+	if (hdr->securityIndex != conn->security_ix) {
+		read_unlock_bh(&trans->conn_lock);
+		goto security_mismatch;
+	}
+	atomic_inc(&conn->usage);
+	read_unlock_bh(&trans->conn_lock);
+	goto success;
+
+	/* we found the connection on the second time through the list */
+found_extant_second:
+	if (hdr->securityIndex != conn->security_ix) {
+		write_unlock_bh(&trans->conn_lock);
+		goto security_mismatch;
+	}
+	atomic_inc(&conn->usage);
+	write_unlock_bh(&trans->conn_lock);
+	kfree(candidate);
+	goto success;
+
+security_mismatch:
+	kfree(candidate);
+	_leave(" = -EKEYREJECTED");
+	return ERR_PTR(-EKEYREJECTED);
+}
+
+/*
+ * find a connection based on transport and RxRPC connection ID for an incoming
+ * packet
+ */
+struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
+					       struct rxrpc_header *hdr)
+{
+	struct rxrpc_connection *conn;
+	struct rb_node *p;
+	__be32 epoch;
+	u32 conn_id;
+
+	_enter(",{%x,%x}", ntohl(hdr->cid), hdr->flags);
+
+	read_lock_bh(&trans->conn_lock);
+
+	conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+	epoch = hdr->epoch;
+
+	if (hdr->flags & RXRPC_CLIENT_INITIATED)
+		p = trans->server_conns.rb_node;
+	else
+		p = trans->client_conns.rb_node;
+
+	while (p) {
+		conn = rb_entry(p, struct rxrpc_connection, node);
+
+		_debug("maybe %x", conn->real_conn_id);
+
+		if (epoch < conn->epoch)
+			p = p->rb_left;
+		else if (epoch > conn->epoch)
+			p = p->rb_right;
+		else if (conn_id < conn->real_conn_id)
+			p = p->rb_left;
+		else if (conn_id > conn->real_conn_id)
+			p = p->rb_right;
+		else
+			goto found;
+	}
+
+	read_unlock_bh(&trans->conn_lock);
+	_leave(" = NULL");
+	return NULL;
+
+found:
+	atomic_inc(&conn->usage);
+	read_unlock_bh(&trans->conn_lock);
+	_leave(" = %p", conn);
+	return conn;
+}
+
+/*
+ * release a virtual connection
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+	_enter("%p{u=%d,d=%d}",
+	       conn, atomic_read(&conn->usage), conn->debug_id);
+
+	ASSERTCMP(atomic_read(&conn->usage), >, 0);
+
+	conn->put_time = xtime.tv_sec;
+	if (atomic_dec_and_test(&conn->usage)) {
+		_debug("zombie");
+		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+	}
+
+	_leave("");
+}
+
+/*
+ * destroy a virtual connection
+ */
+static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+{
+	_enter("%p{%d}", conn, atomic_read(&conn->usage));
+
+	ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+
+	_net("DESTROY CONN %d", conn->debug_id);
+
+	if (conn->bundle)
+		rxrpc_put_bundle(conn->trans, conn->bundle);
+
+	ASSERT(RB_EMPTY_ROOT(&conn->calls));
+	rxrpc_purge_queue(&conn->rx_queue);
+
+	rxrpc_clear_conn_security(conn);
+	rxrpc_put_transport(conn->trans);
+	kfree(conn);
+	_leave("");
+}
+
+/*
+ * reap dead connections
+ */
+void rxrpc_connection_reaper(struct work_struct *work)
+{
+	struct rxrpc_connection *conn, *_p;
+	unsigned long now, earliest, reap_time;
+
+	LIST_HEAD(graveyard);
+
+	_enter("");
+
+	now = xtime.tv_sec;
+	earliest = ULONG_MAX;
+
+	write_lock_bh(&rxrpc_connection_lock);
+	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+		_debug("reap CONN %d { u=%d,t=%ld }",
+		       conn->debug_id, atomic_read(&conn->usage),
+		       (long) now - (long) conn->put_time);
+
+		if (likely(atomic_read(&conn->usage) > 0))
+			continue;
+
+		spin_lock(&conn->trans->client_lock);
+		write_lock(&conn->trans->conn_lock);
+		reap_time = conn->put_time + rxrpc_connection_timeout;
+
+		if (atomic_read(&conn->usage) > 0) {
+			;
+		} else if (reap_time <= now) {
+			list_move_tail(&conn->link, &graveyard);
+			if (conn->out_clientflag)
+				rb_erase(&conn->node,
+					 &conn->trans->client_conns);
+			else
+				rb_erase(&conn->node,
+					 &conn->trans->server_conns);
+			if (conn->bundle) {
+				list_del_init(&conn->bundle_link);
+				conn->bundle->num_conns--;
+			}
+
+		} else if (reap_time < earliest) {
+			earliest = reap_time;
+		}
+
+		write_unlock(&conn->trans->conn_lock);
+		spin_unlock(&conn->trans->client_lock);
+	}
+	write_unlock_bh(&rxrpc_connection_lock);
+
+	if (earliest != ULONG_MAX) {
+		_debug("reschedule reaper %ld", (long) earliest - now);
+		ASSERTCMP(earliest, >, now);
+		rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+					 (earliest - now) * HZ);
+	}
+
+	/* then destroy all those pulled out */
+	while (!list_empty(&graveyard)) {
+		conn = list_entry(graveyard.next, struct rxrpc_connection,
+				  link);
+		list_del_init(&conn->link);
+
+		ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+		rxrpc_destroy_connection(conn);
+	}
+
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the connection records rather than waiting for them
+ * to time out
+ */
+void __exit rxrpc_destroy_all_connections(void)
+{
+	_enter("");
+
+	rxrpc_connection_timeout = 0;
+	cancel_delayed_work(&rxrpc_connection_reap);
+	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+
+	_leave("");
+}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
new file mode 100644
index 000000000000..1ada43d51165
--- /dev/null
+++ b/net/rxrpc/ar-connevent.c
@@ -0,0 +1,403 @@
+/* connection-level event handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * pass a connection-level abort onto all calls on that connection
+ */
+static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
+			      u32 abort_code)
+{
+	struct rxrpc_call *call;
+	struct rb_node *p;
+
+	_enter("{%d},%x", conn->debug_id, abort_code);
+
+	read_lock_bh(&conn->lock);
+
+	for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
+		call = rb_entry(p, struct rxrpc_call, conn_node);
+		write_lock(&call->state_lock);
+		if (call->state <= RXRPC_CALL_COMPLETE) {
+			call->state = state;
+			call->abort_code = abort_code;
+			if (state == RXRPC_CALL_LOCALLY_ABORTED)
+				set_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+			else
+				set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+			rxrpc_queue_call(call);
+		}
+		write_unlock(&call->state_lock);
+	}
+
+	read_unlock_bh(&conn->lock);
+	_leave("");
+}
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_abort_connection(struct rxrpc_connection *conn,
+				  u32 error, u32 abort_code)
+{
+	struct rxrpc_header hdr;
+	struct msghdr msg;
+	struct kvec iov[2];
+	__be32 word;
+	size_t len;
+	int ret;
+
+	_enter("%d,,%u,%u", conn->debug_id, error, abort_code);
+
+	/* generate a connection-level abort */
+	spin_lock_bh(&conn->state_lock);
+	if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
+		conn->state = RXRPC_CONN_LOCALLY_ABORTED;
+		conn->error = error;
+		spin_unlock_bh(&conn->state_lock);
+	} else {
+		spin_unlock_bh(&conn->state_lock);
+		_leave(" = 0 [already dead]");
+		return 0;
+	}
+
+	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
+
+	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr.epoch	= conn->epoch;
+	hdr.cid		= conn->cid;
+	hdr.callNumber	= 0;
+	hdr.seq		= 0;
+	hdr.type	= RXRPC_PACKET_TYPE_ABORT;
+	hdr.flags	= conn->out_clientflag;
+	hdr.userStatus	= 0;
+	hdr.securityIndex = conn->security_ix;
+	hdr._rsvd	= 0;
+	hdr.serviceId	= conn->service_id;
+
+	word = htonl(abort_code);
+
+	iov[0].iov_base	= &hdr;
+	iov[0].iov_len	= sizeof(hdr);
+	iov[1].iov_base	= &word;
+	iov[1].iov_len	= sizeof(word);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	hdr.serial = htonl(atomic_inc_return(&conn->serial));
+	_proto("Tx CONN ABORT %%%u { %d }", ntohl(hdr.serial), abort_code);
+
+	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * mark a call as being on a now-secured channel
+ * - must be called with softirqs disabled
+ */
+void rxrpc_call_is_secure(struct rxrpc_call *call)
+{
+	_enter("%p", call);
+	if (call) {
+		read_lock(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE &&
+		    !test_and_set_bit(RXRPC_CALL_SECURED, &call->events))
+			rxrpc_queue_call(call);
+		read_unlock(&call->state_lock);
+	}
+}
+
+/*
+ * connection-level Rx packet processor
+ */
+static int rxrpc_process_event(struct rxrpc_connection *conn,
+			       struct sk_buff *skb,
+			       u32 *_abort_code)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	__be32 tmp;
+	u32 serial;
+	int loop, ret;
+
+	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED)
+		return -ECONNABORTED;
+
+	serial = ntohl(sp->hdr.serial);
+
+	switch (sp->hdr.type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		if (skb_copy_bits(skb, 0, &tmp, sizeof(tmp)) < 0)
+			return -EPROTO;
+		_proto("Rx ABORT %%%u { ac=%d }", serial, ntohl(tmp));
+
+		conn->state = RXRPC_CONN_REMOTELY_ABORTED;
+		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
+				  ntohl(tmp));
+		return -ECONNABORTED;
+
+	case RXRPC_PACKET_TYPE_CHALLENGE:
+		if (conn->security)
+			return conn->security->respond_to_challenge(
+				conn, skb, _abort_code);
+		return -EPROTO;
+
+	case RXRPC_PACKET_TYPE_RESPONSE:
+		if (!conn->security)
+			return -EPROTO;
+
+		ret = conn->security->verify_response(conn, skb, _abort_code);
+		if (ret < 0)
+			return ret;
+
+		ret = conn->security->init_connection_security(conn);
+		if (ret < 0)
+			return ret;
+
+		conn->security->prime_packet_security(conn);
+		read_lock_bh(&conn->lock);
+		spin_lock(&conn->state_lock);
+
+		if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
+			conn->state = RXRPC_CONN_SERVER;
+			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+				rxrpc_call_is_secure(conn->channels[loop]);
+		}
+
+		spin_unlock(&conn->state_lock);
+		read_unlock_bh(&conn->lock);
+		return 0;
+
+	default:
+		return -EPROTO;
+	}
+}
+
+/*
+ * set up security and issue a challenge
+ */
+static void rxrpc_secure_connection(struct rxrpc_connection *conn)
+{
+	u32 abort_code;
+	int ret;
+
+	_enter("{%d}", conn->debug_id);
+
+	ASSERT(conn->security_ix != 0);
+
+	if (!conn->key) {
+		_debug("set up security");
+		ret = rxrpc_init_server_conn_security(conn);
+		switch (ret) {
+		case 0:
+			break;
+		case -ENOENT:
+			abort_code = RX_CALL_DEAD;
+			goto abort;
+		default:
+			abort_code = RXKADNOAUTH;
+			goto abort;
+		}
+	}
+
+	ASSERT(conn->security != NULL);
+
+	if (conn->security->issue_challenge(conn) < 0) {
+		abort_code = RX_CALL_DEAD;
+		ret = -ENOMEM;
+		goto abort;
+	}
+
+	_leave("");
+	return;
+
+abort:
+	_debug("abort %d, %d", ret, abort_code);
+	rxrpc_abort_connection(conn, -ret, abort_code);
+	_leave(" [aborted]");
+}
+
+/*
+ * connection-level event processor
+ */
+void rxrpc_process_connection(struct work_struct *work)
+{
+	struct rxrpc_connection *conn =
+		container_of(work, struct rxrpc_connection, processor);
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	u32 abort_code = RX_PROTOCOL_ERROR;
+	int ret;
+
+	_enter("{%d}", conn->debug_id);
+
+	atomic_inc(&conn->usage);
+
+	if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+		rxrpc_secure_connection(conn);
+		rxrpc_put_connection(conn);
+	}
+
+	/* go through the conn-level event packets, releasing the ref on this
+	 * connection that each one has when we've finished with it */
+	while ((skb = skb_dequeue(&conn->rx_queue))) {
+		sp = rxrpc_skb(skb);
+
+		ret = rxrpc_process_event(conn, skb, &abort_code);
+		switch (ret) {
+		case -EPROTO:
+		case -EKEYEXPIRED:
+		case -EKEYREJECTED:
+			goto protocol_error;
+		case -EAGAIN:
+			goto requeue_and_leave;
+		case -ECONNABORTED:
+		default:
+			rxrpc_put_connection(conn);
+			rxrpc_free_skb(skb);
+			break;
+		}
+	}
+
+out:
+	rxrpc_put_connection(conn);
+	_leave("");
+	return;
+
+requeue_and_leave:
+	skb_queue_head(&conn->rx_queue, skb);
+	goto out;
+
+protocol_error:
+	if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
+		goto requeue_and_leave;
+	rxrpc_put_connection(conn);
+	rxrpc_free_skb(skb);
+	_leave(" [EPROTO]");
+	goto out;
+}
+
+/*
+ * put a packet up for transport-level abort
+ */
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+	CHECK_SLAB_OKAY(&local->usage);
+
+	if (!atomic_inc_not_zero(&local->usage)) {
+		printk("resurrected on reject\n");
+		BUG();
+	}
+
+	skb_queue_tail(&local->reject_queue, skb);
+	rxrpc_queue_work(&local->rejecter);
+}
+
+/*
+ * reject packets through the local endpoint
+ */
+void rxrpc_reject_packets(struct work_struct *work)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in sin;
+	} sa;
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_header hdr;
+	struct rxrpc_local *local;
+	struct sk_buff *skb;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t size;
+	__be32 code;
+
+	local = container_of(work, struct rxrpc_local, rejecter);
+	rxrpc_get_local(local);
+
+	_enter("%d", local->debug_id);
+
+	iov[0].iov_base = &hdr;
+	iov[0].iov_len = sizeof(hdr);
+	iov[1].iov_base = &code;
+	iov[1].iov_len = sizeof(code);
+	size = sizeof(hdr) + sizeof(code);
+
+	msg.msg_name = &sa;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags = 0;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa.sa_family = local->srx.transport.family;
+	switch (sa.sa.sa_family) {
+	case AF_INET:
+		msg.msg_namelen = sizeof(sa.sin);
+		break;
+	default:
+		msg.msg_namelen = 0;
+		break;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.type = RXRPC_PACKET_TYPE_ABORT;
+
+	while ((skb = skb_dequeue(&local->reject_queue))) {
+		sp = rxrpc_skb(skb);
+		switch (sa.sa.sa_family) {
+		case AF_INET:
+			sa.sin.sin_port = udp_hdr(skb)->source;
+			sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+			code = htonl(skb->priority);
+
+			hdr.epoch = sp->hdr.epoch;
+			hdr.cid = sp->hdr.cid;
+			hdr.callNumber = sp->hdr.callNumber;
+			hdr.serviceId = sp->hdr.serviceId;
+			hdr.flags = sp->hdr.flags;
+			hdr.flags ^= RXRPC_CLIENT_INITIATED;
+			hdr.flags &= RXRPC_CLIENT_INITIATED;
+
+			kernel_sendmsg(local->socket, &msg, iov, 2, size);
+			break;
+
+		default:
+			break;
+		}
+
+		rxrpc_free_skb(skb);
+		rxrpc_put_local(local);
+	}
+
+	rxrpc_put_local(local);
+	_leave("");
+}
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
new file mode 100644
index 000000000000..2c27df1ffa17
--- /dev/null
+++ b/net/rxrpc/ar-error.c
@@ -0,0 +1,253 @@
+/* Error message handling (ICMP)
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * handle an error received on the local endpoint
+ */
+void rxrpc_UDP_error_report(struct sock *sk)
+{
+	struct sock_exterr_skb *serr;
+	struct rxrpc_transport *trans;
+	struct rxrpc_local *local = sk->sk_user_data;
+	struct rxrpc_peer *peer;
+	struct sk_buff *skb;
+	__be32 addr;
+	__be16 port;
+
+	_enter("%p{%d}", sk, local->debug_id);
+
+	skb = skb_dequeue(&sk->sk_error_queue);
+	if (!skb) {
+		_leave("UDP socket errqueue empty");
+		return;
+	}
+
+	rxrpc_new_skb(skb);
+
+	serr = SKB_EXT_ERR(skb);
+	addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
+	port = serr->port;
+
+	_net("Rx UDP Error from "NIPQUAD_FMT":%hu",
+	     NIPQUAD(addr), ntohs(port));
+	_debug("Msg l:%d d:%d", skb->len, skb->data_len);
+
+	peer = rxrpc_find_peer(local, addr, port);
+	if (IS_ERR(peer)) {
+		rxrpc_free_skb(skb);
+		_leave(" [no peer]");
+		return;
+	}
+
+	trans = rxrpc_find_transport(local, peer);
+	if (!trans) {
+		rxrpc_put_peer(peer);
+		rxrpc_free_skb(skb);
+		_leave(" [no trans]");
+		return;
+	}
+
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+	    serr->ee.ee_type == ICMP_DEST_UNREACH &&
+	    serr->ee.ee_code == ICMP_FRAG_NEEDED
+	    ) {
+		u32 mtu = serr->ee.ee_info;
+
+		_net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
+
+		/* wind down the local interface MTU */
+		if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+			peer->if_mtu = mtu;
+			_net("I/F MTU %u", mtu);
+		}
+
+		/* ip_rt_frag_needed() may have eaten the info */
+		if (mtu == 0)
+			mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
+
+		if (mtu == 0) {
+			/* they didn't give us a size, estimate one */
+			if (mtu > 1500) {
+				mtu >>= 1;
+				if (mtu < 1500)
+					mtu = 1500;
+			} else {
+				mtu -= 100;
+				if (mtu < peer->hdrsize)
+					mtu = peer->hdrsize + 4;
+			}
+		}
+
+		if (mtu < peer->mtu) {
+			peer->mtu = mtu;
+			peer->maxdata = peer->mtu - peer->hdrsize;
+			_net("Net MTU %u (maxdata %u)",
+			     peer->mtu, peer->maxdata);
+		}
+	}
+
+	rxrpc_put_peer(peer);
+
+	/* pass the transport ref to error_handler to release */
+	skb_queue_tail(&trans->error_queue, skb);
+	rxrpc_queue_work(&trans->error_handler);
+
+	/* reset and regenerate socket error */
+	spin_lock_bh(&sk->sk_error_queue.lock);
+	sk->sk_err = 0;
+	skb = skb_peek(&sk->sk_error_queue);
+	if (skb) {
+		sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno;
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+		sk->sk_error_report(sk);
+	} else {
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+	}
+
+	_leave("");
+}
+
+/*
+ * deal with UDP error messages
+ */
+void rxrpc_UDP_error_handler(struct work_struct *work)
+{
+	struct sock_extended_err *ee;
+	struct sock_exterr_skb *serr;
+	struct rxrpc_transport *trans =
+		container_of(work, struct rxrpc_transport, error_handler);
+	struct sk_buff *skb;
+	int local, err;
+
+	_enter("");
+
+	skb = skb_dequeue(&trans->error_queue);
+	if (!skb)
+		return;
+
+	serr = SKB_EXT_ERR(skb);
+	ee = &serr->ee;
+
+	_net("Rx Error o=%d t=%d c=%d e=%d",
+	     ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
+
+	err = ee->ee_errno;
+
+	switch (ee->ee_origin) {
+	case SO_EE_ORIGIN_ICMP:
+		local = 0;
+		switch (ee->ee_type) {
+		case ICMP_DEST_UNREACH:
+			switch (ee->ee_code) {
+			case ICMP_NET_UNREACH:
+				_net("Rx Received ICMP Network Unreachable");
+				err = ENETUNREACH;
+				break;
+			case ICMP_HOST_UNREACH:
+				_net("Rx Received ICMP Host Unreachable");
+				err = EHOSTUNREACH;
+				break;
+			case ICMP_PORT_UNREACH:
+				_net("Rx Received ICMP Port Unreachable");
+				err = ECONNREFUSED;
+				break;
+			case ICMP_FRAG_NEEDED:
+				_net("Rx Received ICMP Fragmentation Needed (%d)",
+				     ee->ee_info);
+				err = 0; /* dealt with elsewhere */
+				break;
+			case ICMP_NET_UNKNOWN:
+				_net("Rx Received ICMP Unknown Network");
+				err = ENETUNREACH;
+				break;
+			case ICMP_HOST_UNKNOWN:
+				_net("Rx Received ICMP Unknown Host");
+				err = EHOSTUNREACH;
+				break;
+			default:
+				_net("Rx Received ICMP DestUnreach code=%u",
+				     ee->ee_code);
+				break;
+			}
+			break;
+
+		case ICMP_TIME_EXCEEDED:
+			_net("Rx Received ICMP TTL Exceeded");
+			break;
+
+		default:
+			_proto("Rx Received ICMP error { type=%u code=%u }",
+			       ee->ee_type, ee->ee_code);
+			break;
+		}
+		break;
+
+	case SO_EE_ORIGIN_LOCAL:
+		_proto("Rx Received local error { error=%d }",
+		       ee->ee_errno);
+		local = 1;
+		break;
+
+	case SO_EE_ORIGIN_NONE:
+	case SO_EE_ORIGIN_ICMP6:
+	default:
+		_proto("Rx Received error report { orig=%u }",
+		       ee->ee_origin);
+		local = 0;
+		break;
+	}
+
+	/* terminate all the affected calls if there's an unrecoverable
+	 * error */
+	if (err) {
+		struct rxrpc_call *call, *_n;
+
+		_debug("ISSUE ERROR %d", err);
+
+		spin_lock_bh(&trans->peer->lock);
+		trans->peer->net_error = err;
+
+		list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
+					 error_link) {
+			write_lock(&call->state_lock);
+			if (call->state != RXRPC_CALL_COMPLETE &&
+			    call->state < RXRPC_CALL_NETWORK_ERROR) {
+				call->state = RXRPC_CALL_NETWORK_ERROR;
+				set_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+				rxrpc_queue_call(call);
+			}
+			write_unlock(&call->state_lock);
+			list_del_init(&call->error_link);
+		}
+
+		spin_unlock_bh(&trans->peer->lock);
+	}
+
+	if (!skb_queue_empty(&trans->error_queue))
+		rxrpc_queue_work(&trans->error_handler);
+
+	rxrpc_free_skb(skb);
+	rxrpc_put_transport(trans);
+	_leave("");
+}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
new file mode 100644
index 000000000000..91b5bbb003e2
--- /dev/null
+++ b/net/rxrpc/ar-input.c
@@ -0,0 +1,797 @@
+/* RxRPC packet reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+unsigned long rxrpc_ack_timeout = 1;
+
+const char *rxrpc_pkts[] = {
+	"?00",
+	"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
+	"?09", "?10", "?11", "?12", "?13", "?14", "?15"
+};
+
+/*
+ * queue a packet for recvmsg to pass to userspace
+ * - the caller must hold a lock on call->lock
+ * - must not be called with interrupts disabled (sk_filter() disables BH's)
+ * - eats the packet whether successful or not
+ * - there must be just one reference to the packet, which the caller passes to
+ *   this function
+ */
+int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
+			bool force, bool terminal)
+{
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_sock *rx = call->socket;
+	struct sock *sk;
+	int skb_len, ret;
+
+	_enter(",,%d,%d", force, terminal);
+
+	ASSERT(!irqs_disabled());
+
+	sp = rxrpc_skb(skb);
+	ASSERTCMP(sp->call, ==, call);
+
+	/* if we've already posted the terminal message for a call, then we
+	 * don't post any more */
+	if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+		_debug("already terminated");
+		ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
+		skb->destructor = NULL;
+		sp->call = NULL;
+		rxrpc_put_call(call);
+		rxrpc_free_skb(skb);
+		return 0;
+	}
+
+	sk = &rx->sk;
+
+	if (!force) {
+		/* cast skb->rcvbuf to unsigned...  It's pointless, but
+		 * reduces number of warnings when compiling with -W
+		 * --ANK */
+//		ret = -ENOBUFS;
+//		if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+//		    (unsigned) sk->sk_rcvbuf)
+//			goto out;
+
+		ret = sk_filter(sk, skb);
+		if (ret < 0)
+			goto out;
+	}
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
+	    !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    call->socket->sk.sk_state != RXRPC_CLOSE) {
+		skb->destructor = rxrpc_packet_destructor;
+		skb->dev = NULL;
+		skb->sk = sk;
+		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+		if (terminal) {
+			_debug("<<<< TERMINAL MESSAGE >>>>");
+			set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
+		}
+
+		/* allow interception by a kernel service */
+		if (rx->interceptor) {
+			rx->interceptor(sk, call->user_call_ID, skb);
+			spin_unlock_bh(&sk->sk_receive_queue.lock);
+		} else {
+
+			/* Cache the SKB length before we tack it onto the
+			 * receive queue.  Once it is added it no longer
+			 * belongs to us and may be freed by other threads of
+			 * control pulling packets from the queue */
+			skb_len = skb->len;
+
+			_net("post skb %p", skb);
+			__skb_queue_tail(&sk->sk_receive_queue, skb);
+			spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_data_ready(sk, skb_len);
+		}
+		skb = NULL;
+	} else {
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+	}
+	ret = 0;
+
+out:
+	/* release the socket buffer */
+	if (skb) {
+		skb->destructor = NULL;
+		sp->call = NULL;
+		rxrpc_put_call(call);
+		rxrpc_free_skb(skb);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * process a DATA packet, posting the packet to the appropriate queue
+ * - eats the packet if successful
+ */
+static int rxrpc_fast_process_data(struct rxrpc_call *call,
+				   struct sk_buff *skb, u32 seq)
+{
+	struct rxrpc_skb_priv *sp;
+	bool terminal;
+	int ret, ackbit, ack;
+
+	_enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
+
+	sp = rxrpc_skb(skb);
+	ASSERTCMP(sp->call, ==, NULL);
+
+	spin_lock(&call->lock);
+
+	if (call->state > RXRPC_CALL_COMPLETE)
+		goto discard;
+
+	ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
+	ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
+	ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
+
+	if (seq < call->rx_data_post) {
+		_debug("dup #%u [-%u]", seq, call->rx_data_post);
+		ack = RXRPC_ACK_DUPLICATE;
+		ret = -ENOBUFS;
+		goto discard_and_ack;
+	}
+
+	/* we may already have the packet in the out of sequence queue */
+	ackbit = seq - (call->rx_data_eaten + 1);
+	ASSERTCMP(ackbit, >=, 0);
+	if (__test_and_set_bit(ackbit, call->ackr_window)) {
+		_debug("dup oos #%u [%u,%u]",
+		       seq, call->rx_data_eaten, call->rx_data_post);
+		ack = RXRPC_ACK_DUPLICATE;
+		goto discard_and_ack;
+	}
+
+	if (seq >= call->ackr_win_top) {
+		_debug("exceed #%u [%u]", seq, call->ackr_win_top);
+		__clear_bit(ackbit, call->ackr_window);
+		ack = RXRPC_ACK_EXCEEDS_WINDOW;
+		goto discard_and_ack;
+	}
+
+	if (seq == call->rx_data_expect) {
+		clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
+		call->rx_data_expect++;
+	} else if (seq > call->rx_data_expect) {
+		_debug("oos #%u [%u]", seq, call->rx_data_expect);
+		call->rx_data_expect = seq + 1;
+		if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
+			ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+			goto enqueue_and_ack;
+		}
+		goto enqueue_packet;
+	}
+
+	if (seq != call->rx_data_post) {
+		_debug("ahead #%u [%u]", seq, call->rx_data_post);
+		goto enqueue_packet;
+	}
+
+	if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
+		goto protocol_error;
+
+	/* if the packet need security things doing to it, then it goes down
+	 * the slow path */
+	if (call->conn->security)
+		goto enqueue_packet;
+
+	sp->call = call;
+	rxrpc_get_call(call);
+	terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+		    !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+	ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
+	if (ret < 0) {
+		if (ret == -ENOMEM || ret == -ENOBUFS) {
+			__clear_bit(ackbit, call->ackr_window);
+			ack = RXRPC_ACK_NOSPACE;
+			goto discard_and_ack;
+		}
+		goto out;
+	}
+
+	skb = NULL;
+
+	_debug("post #%u", seq);
+	ASSERTCMP(call->rx_data_post, ==, seq);
+	call->rx_data_post++;
+
+	if (sp->hdr.flags & RXRPC_LAST_PACKET)
+		set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
+
+	/* if we've reached an out of sequence packet then we need to drain
+	 * that queue into the socket Rx queue now */
+	if (call->rx_data_post == call->rx_first_oos) {
+		_debug("drain rx oos now");
+		read_lock(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE &&
+		    !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events))
+			rxrpc_queue_call(call);
+		read_unlock(&call->state_lock);
+	}
+
+	spin_unlock(&call->lock);
+	atomic_inc(&call->ackr_not_idle);
+	rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+	_leave(" = 0 [posted]");
+	return 0;
+
+protocol_error:
+	ret = -EBADMSG;
+out:
+	spin_unlock(&call->lock);
+	_leave(" = %d", ret);
+	return ret;
+
+discard_and_ack:
+	_debug("discard and ACK packet %p", skb);
+	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+discard:
+	spin_unlock(&call->lock);
+	rxrpc_free_skb(skb);
+	_leave(" = 0 [discarded]");
+	return 0;
+
+enqueue_and_ack:
+	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+enqueue_packet:
+	_net("defer skb %p", skb);
+	spin_unlock(&call->lock);
+	skb_queue_tail(&call->rx_queue, skb);
+	atomic_inc(&call->ackr_not_idle);
+	read_lock(&call->state_lock);
+	if (call->state < RXRPC_CALL_DEAD)
+		rxrpc_queue_call(call);
+	read_unlock(&call->state_lock);
+	_leave(" = 0 [queued]");
+	return 0;
+}
+
+/*
+ * assume an implicit ACKALL of the transmission phase of a client socket upon
+ * reception of the first reply packet
+ */
+static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
+{
+	write_lock_bh(&call->state_lock);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+		call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+		call->acks_latest = serial;
+
+		_debug("implicit ACKALL %%%u", call->acks_latest);
+		set_bit(RXRPC_CALL_RCVD_ACKALL, &call->events);
+		write_unlock_bh(&call->state_lock);
+
+		if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+			clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+			clear_bit(RXRPC_CALL_RESEND, &call->events);
+			clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		}
+		break;
+
+	default:
+		write_unlock_bh(&call->state_lock);
+		break;
+	}
+}
+
+/*
+ * post an incoming packet to the nominated call to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	__be32 _abort_code;
+	u32 serial, hi_serial, seq, abort_code;
+
+	_enter("%p,%p", call, skb);
+
+	ASSERT(!irqs_disabled());
+
+#if 0 // INJECT RX ERROR
+	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+		static int skip = 0;
+		if (++skip == 3) {
+			printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
+			skip = 0;
+			goto free_packet;
+		}
+	}
+#endif
+
+	/* track the latest serial number on this connection for ACK packet
+	 * information */
+	serial = ntohl(sp->hdr.serial);
+	hi_serial = atomic_read(&call->conn->hi_serial);
+	while (serial > hi_serial)
+		hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
+					   serial);
+
+	/* request ACK generation for any ACK or DATA packet that requests
+	 * it */
+	if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
+		_proto("ACK Requested on %%%u", serial);
+		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
+				  !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+	}
+
+	switch (sp->hdr.type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		_debug("abort");
+
+		if (skb_copy_bits(skb, 0, &_abort_code,
+				  sizeof(_abort_code)) < 0)
+			goto protocol_error;
+
+		abort_code = ntohl(_abort_code);
+		_proto("Rx ABORT %%%u { %x }", serial, abort_code);
+
+		write_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE) {
+			call->state = RXRPC_CALL_REMOTELY_ABORTED;
+			call->abort_code = abort_code;
+			set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+			rxrpc_queue_call(call);
+		}
+		goto free_packet_unlock;
+
+	case RXRPC_PACKET_TYPE_BUSY:
+		_proto("Rx BUSY %%%u", serial);
+
+		if (call->conn->out_clientflag)
+			goto protocol_error;
+
+		write_lock_bh(&call->state_lock);
+		switch (call->state) {
+		case RXRPC_CALL_CLIENT_SEND_REQUEST:
+			call->state = RXRPC_CALL_SERVER_BUSY;
+			set_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+			rxrpc_queue_call(call);
+		case RXRPC_CALL_SERVER_BUSY:
+			goto free_packet_unlock;
+		default:
+			goto protocol_error_locked;
+		}
+
+	default:
+		_proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], serial);
+		goto protocol_error;
+
+	case RXRPC_PACKET_TYPE_DATA:
+		seq = ntohl(sp->hdr.seq);
+
+		_proto("Rx DATA %%%u { #%u }", serial, seq);
+
+		if (seq == 0)
+			goto protocol_error;
+
+		call->ackr_prev_seq = sp->hdr.seq;
+
+		/* received data implicitly ACKs all of the request packets we
+		 * sent when we're acting as a client */
+		if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+			rxrpc_assume_implicit_ackall(call, serial);
+
+		switch (rxrpc_fast_process_data(call, skb, seq)) {
+		case 0:
+			skb = NULL;
+			goto done;
+
+		default:
+			BUG();
+
+			/* data packet received beyond the last packet */
+		case -EBADMSG:
+			goto protocol_error;
+		}
+
+	case RXRPC_PACKET_TYPE_ACK:
+		/* ACK processing is done in process context */
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_DEAD) {
+			skb_queue_tail(&call->rx_queue, skb);
+			rxrpc_queue_call(call);
+			skb = NULL;
+		}
+		read_unlock_bh(&call->state_lock);
+		goto free_packet;
+	}
+
+protocol_error:
+	_debug("protocol error");
+	write_lock_bh(&call->state_lock);
+protocol_error_locked:
+	if (call->state <= RXRPC_CALL_COMPLETE) {
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = RX_PROTOCOL_ERROR;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		rxrpc_queue_call(call);
+	}
+free_packet_unlock:
+	write_unlock_bh(&call->state_lock);
+free_packet:
+	rxrpc_free_skb(skb);
+done:
+	_leave("");
+}
+
+/*
+ * split up a jumbo data packet
+ */
+static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
+				       struct sk_buff *jumbo)
+{
+	struct rxrpc_jumbo_header jhdr;
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *part;
+
+	_enter(",{%u,%u}", jumbo->data_len, jumbo->len);
+
+	sp = rxrpc_skb(jumbo);
+
+	do {
+		sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
+
+		/* make a clone to represent the first subpacket in what's left
+		 * of the jumbo packet */
+		part = skb_clone(jumbo, GFP_ATOMIC);
+		if (!part) {
+			/* simply ditch the tail in the event of ENOMEM */
+			pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
+			break;
+		}
+		rxrpc_new_skb(part);
+
+		pskb_trim(part, RXRPC_JUMBO_DATALEN);
+
+		if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
+			goto protocol_error;
+
+		if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
+			goto protocol_error;
+		if (!pskb_pull(jumbo, sizeof(jhdr)))
+			BUG();
+
+		sp->hdr.seq	= htonl(ntohl(sp->hdr.seq) + 1);
+		sp->hdr.serial	= htonl(ntohl(sp->hdr.serial) + 1);
+		sp->hdr.flags	= jhdr.flags;
+		sp->hdr._rsvd	= jhdr._rsvd;
+
+		_proto("Rx DATA Jumbo %%%u", ntohl(sp->hdr.serial) - 1);
+
+		rxrpc_fast_process_packet(call, part);
+		part = NULL;
+
+	} while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
+
+	rxrpc_fast_process_packet(call, jumbo);
+	_leave("");
+	return;
+
+protocol_error:
+	_debug("protocol error");
+	rxrpc_free_skb(part);
+	rxrpc_free_skb(jumbo);
+	write_lock_bh(&call->state_lock);
+	if (call->state <= RXRPC_CALL_COMPLETE) {
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = RX_PROTOCOL_ERROR;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		rxrpc_queue_call(call);
+	}
+	write_unlock_bh(&call->state_lock);
+	_leave("");
+}
+
+/*
+ * post an incoming packet to the appropriate call/socket to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+				      struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_call *call;
+	struct rb_node *p;
+	__be32 call_id;
+
+	_enter("%p,%p", conn, skb);
+
+	read_lock_bh(&conn->lock);
+
+	sp = rxrpc_skb(skb);
+
+	/* look at extant calls by channel number first */
+	call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
+	if (!call || call->call_id != sp->hdr.callNumber)
+		goto call_not_extant;
+
+	_debug("extant call [%d]", call->state);
+	ASSERTCMP(call->conn, ==, conn);
+
+	read_lock(&call->state_lock);
+	switch (call->state) {
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+			rxrpc_queue_call(call);
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_NETWORK_ERROR:
+	case RXRPC_CALL_DEAD:
+		goto free_unlock;
+	default:
+		break;
+	}
+
+	read_unlock(&call->state_lock);
+	rxrpc_get_call(call);
+	read_unlock_bh(&conn->lock);
+
+	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+	    sp->hdr.flags & RXRPC_JUMBO_PACKET)
+		rxrpc_process_jumbo_packet(call, skb);
+	else
+		rxrpc_fast_process_packet(call, skb);
+
+	rxrpc_put_call(call);
+	goto done;
+
+call_not_extant:
+	/* search the completed calls in case what we're dealing with is
+	 * there */
+	_debug("call not extant");
+
+	call_id = sp->hdr.callNumber;
+	p = conn->calls.rb_node;
+	while (p) {
+		call = rb_entry(p, struct rxrpc_call, conn_node);
+
+		if (call_id < call->call_id)
+			p = p->rb_left;
+		else if (call_id > call->call_id)
+			p = p->rb_right;
+		else
+			goto found_completed_call;
+	}
+
+dead_call:
+	/* it's a either a really old call that we no longer remember or its a
+	 * new incoming call */
+	read_unlock_bh(&conn->lock);
+
+	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+	    sp->hdr.seq == __constant_cpu_to_be32(1)) {
+		_debug("incoming call");
+		skb_queue_tail(&conn->trans->local->accept_queue, skb);
+		rxrpc_queue_work(&conn->trans->local->acceptor);
+		goto done;
+	}
+
+	_debug("dead call");
+	skb->priority = RX_CALL_DEAD;
+	rxrpc_reject_packet(conn->trans->local, skb);
+	goto done;
+
+	/* resend last packet of a completed call
+	 * - client calls may have been aborted or ACK'd
+	 * - server calls may have been aborted
+	 */
+found_completed_call:
+	_debug("completed call");
+
+	if (atomic_read(&call->usage) == 0)
+		goto dead_call;
+
+	/* synchronise any state changes */
+	read_lock(&call->state_lock);
+	ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
+		    call->state, >=, RXRPC_CALL_COMPLETE);
+
+	if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
+	    call->state == RXRPC_CALL_REMOTELY_ABORTED ||
+	    call->state == RXRPC_CALL_DEAD) {
+		read_unlock(&call->state_lock);
+		goto dead_call;
+	}
+
+	if (call->conn->in_clientflag) {
+		read_unlock(&call->state_lock);
+		goto dead_call; /* complete server call */
+	}
+
+	_debug("final ack again");
+	rxrpc_get_call(call);
+	set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+	rxrpc_queue_call(call);
+
+free_unlock:
+	read_unlock(&call->state_lock);
+	read_unlock_bh(&conn->lock);
+	rxrpc_free_skb(skb);
+done:
+	_leave("");
+}
+
+/*
+ * post connection-level events to the connection
+ * - this includes challenges, responses and some aborts
+ */
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+				      struct sk_buff *skb)
+{
+	_enter("%p,%p", conn, skb);
+
+	atomic_inc(&conn->usage);
+	skb_queue_tail(&conn->rx_queue, skb);
+	rxrpc_queue_conn(conn);
+}
+
+/*
+ * handle data received on the local endpoint
+ * - may be called in interrupt context
+ */
+void rxrpc_data_ready(struct sock *sk, int count)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_transport *trans;
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_local *local;
+	struct rxrpc_peer *peer;
+	struct sk_buff *skb;
+	int ret;
+
+	_enter("%p, %d", sk, count);
+
+	ASSERT(!irqs_disabled());
+
+	read_lock_bh(&rxrpc_local_lock);
+	local = sk->sk_user_data;
+	if (local && atomic_read(&local->usage) > 0)
+		rxrpc_get_local(local);
+	else
+		local = NULL;
+	read_unlock_bh(&rxrpc_local_lock);
+	if (!local) {
+		_leave(" [local dead]");
+		return;
+	}
+
+	skb = skb_recv_datagram(sk, 0, 1, &ret);
+	if (!skb) {
+		rxrpc_put_local(local);
+		if (ret == -EAGAIN)
+			return;
+		_debug("UDP socket error %d", ret);
+		return;
+	}
+
+	rxrpc_new_skb(skb);
+
+	_net("recv skb %p", skb);
+
+	/* we'll probably need to checksum it (didn't call sock_recvmsg) */
+	if (skb_checksum_complete(skb)) {
+		rxrpc_free_skb(skb);
+		rxrpc_put_local(local);
+		_leave(" [CSUM failed]");
+		return;
+	}
+
+	/* the socket buffer we have is owned by UDP, with UDP's data all over
+	 * it, but we really want our own */
+	skb_orphan(skb);
+	sp = rxrpc_skb(skb);
+	memset(sp, 0, sizeof(*sp));
+
+	_net("Rx UDP packet from %08x:%04hu",
+	     ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
+
+	/* dig out the RxRPC connection details */
+	if (skb_copy_bits(skb, sizeof(struct udphdr), &sp->hdr,
+			  sizeof(sp->hdr)) < 0)
+		goto bad_message;
+	if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(sp->hdr)))
+		BUG();
+
+	_net("Rx RxRPC %s ep=%x call=%x:%x",
+	     sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
+	     ntohl(sp->hdr.epoch),
+	     ntohl(sp->hdr.cid),
+	     ntohl(sp->hdr.callNumber));
+
+	if (sp->hdr.type == 0 || sp->hdr.type >= RXRPC_N_PACKET_TYPES) {
+		_proto("Rx Bad Packet Type %u", sp->hdr.type);
+		goto bad_message;
+	}
+
+	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+	    (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
+		goto bad_message;
+
+	peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
+	if (IS_ERR(peer))
+		goto cant_route_call;
+
+	trans = rxrpc_find_transport(local, peer);
+	rxrpc_put_peer(peer);
+	if (!trans)
+		goto cant_route_call;
+
+	conn = rxrpc_find_connection(trans, &sp->hdr);
+	rxrpc_put_transport(trans);
+	if (!conn)
+		goto cant_route_call;
+
+	_debug("CONN %p {%d}", conn, conn->debug_id);
+
+	if (sp->hdr.callNumber == 0)
+		rxrpc_post_packet_to_conn(conn, skb);
+	else
+		rxrpc_post_packet_to_call(conn, skb);
+	rxrpc_put_connection(conn);
+	rxrpc_put_local(local);
+	return;
+
+cant_route_call:
+	_debug("can't route call");
+	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+	    sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+		if (sp->hdr.seq == __constant_cpu_to_be32(1)) {
+			_debug("first packet");
+			skb_queue_tail(&local->accept_queue, skb);
+			rxrpc_queue_work(&local->acceptor);
+			rxrpc_put_local(local);
+			_leave(" [incoming]");
+			return;
+		}
+		skb->priority = RX_INVALID_OPERATION;
+	} else {
+		skb->priority = RX_CALL_DEAD;
+	}
+
+	_debug("reject");
+	rxrpc_reject_packet(local, skb);
+	rxrpc_put_local(local);
+	_leave(" [no call]");
+	return;
+
+bad_message:
+	skb->priority = RX_PROTOCOL_ERROR;
+	rxrpc_reject_packet(local, skb);
+	rxrpc_put_local(local);
+	_leave(" [badmsg]");
+}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
new file mode 100644
index 000000000000..58aaf892238e
--- /dev/null
+++ b/net/rxrpc/ar-internal.h
@@ -0,0 +1,808 @@
+/* AF_RXRPC internal definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <rxrpc/packet.h>
+
+#if 0
+#define CHECK_SLAB_OKAY(X)				     \
+	BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
+	       (POISON_FREE << 8 | POISON_FREE))
+#else
+#define CHECK_SLAB_OKAY(X) do {} while(0)
+#endif
+
+#define FCRYPT_BSIZE 8
+struct rxrpc_crypt {
+	union {
+		u8	x[FCRYPT_BSIZE];
+		u32	n[2];
+	};
+} __attribute__((aligned(8)));
+
+#define rxrpc_queue_work(WS)	queue_work(rxrpc_workqueue, (WS))
+#define rxrpc_queue_delayed_work(WS,D)	\
+	queue_delayed_work(rxrpc_workqueue, (WS), (D))
+
+#define rxrpc_queue_call(CALL)	rxrpc_queue_work(&(CALL)->processor)
+#define rxrpc_queue_conn(CONN)	rxrpc_queue_work(&(CONN)->processor)
+
+/*
+ * sk_state for RxRPC sockets
+ */
+enum {
+	RXRPC_UNCONNECTED = 0,
+	RXRPC_CLIENT_BOUND,		/* client local address bound */
+	RXRPC_CLIENT_CONNECTED,		/* client is connected */
+	RXRPC_SERVER_BOUND,		/* server local address bound */
+	RXRPC_SERVER_LISTENING,		/* server listening for connections */
+	RXRPC_CLOSE,			/* socket is being closed */
+};
+
+/*
+ * RxRPC socket definition
+ */
+struct rxrpc_sock {
+	/* WARNING: sk has to be the first member */
+	struct sock		sk;
+	rxrpc_interceptor_t	interceptor;	/* kernel service Rx interceptor function */
+	struct rxrpc_local	*local;		/* local endpoint */
+	struct rxrpc_transport	*trans;		/* transport handler */
+	struct rxrpc_conn_bundle *bundle;	/* virtual connection bundle */
+	struct rxrpc_connection	*conn;		/* exclusive virtual connection */
+	struct list_head	listen_link;	/* link in the local endpoint's listen list */
+	struct list_head	secureq;	/* calls awaiting connection security clearance */
+	struct list_head	acceptq;	/* calls awaiting acceptance */
+	struct key		*key;		/* security for this socket */
+	struct key		*securities;	/* list of server security descriptors */
+	struct rb_root		calls;		/* outstanding calls on this socket */
+	unsigned long		flags;
+#define RXRPC_SOCK_EXCLUSIVE_CONN	1	/* exclusive connection for a client socket */
+	rwlock_t		call_lock;	/* lock for calls */
+	u32			min_sec_level;	/* minimum security level */
+#define RXRPC_SECURITY_MAX	RXRPC_SECURITY_ENCRYPT
+	struct sockaddr_rxrpc	srx;		/* local address */
+	sa_family_t		proto;		/* protocol created with */
+	__be16			service_id;	/* service ID of local/remote service */
+};
+
+#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
+
+/*
+ * RxRPC socket buffer private variables
+ * - max 48 bytes (struct sk_buff::cb)
+ */
+struct rxrpc_skb_priv {
+	struct rxrpc_call	*call;		/* call with which associated */
+	unsigned long		resend_at;	/* time in jiffies at which to resend */
+	union {
+		unsigned	offset;		/* offset into buffer of next read */
+		int		remain;		/* amount of space remaining for next write */
+		u32		error;		/* network error code */
+		bool		need_resend;	/* T if needs resending */
+	};
+
+	struct rxrpc_header	hdr;		/* RxRPC packet header from this packet */
+};
+
+#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
+
+enum rxrpc_command {
+	RXRPC_CMD_SEND_DATA,		/* send data message */
+	RXRPC_CMD_SEND_ABORT,		/* request abort generation */
+	RXRPC_CMD_ACCEPT,		/* [server] accept incoming call */
+	RXRPC_CMD_REJECT_BUSY,		/* [server] reject a call as busy */
+};
+
+/*
+ * RxRPC security module interface
+ */
+struct rxrpc_security {
+	struct module		*owner;		/* providing module */
+	struct list_head	link;		/* link in master list */
+	const char		*name;		/* name of this service */
+	u8			security_index;	/* security type provided */
+
+	/* initialise a connection's security */
+	int (*init_connection_security)(struct rxrpc_connection *);
+
+	/* prime a connection's packet security */
+	void (*prime_packet_security)(struct rxrpc_connection *);
+
+	/* impose security on a packet */
+	int (*secure_packet)(const struct rxrpc_call *,
+			     struct sk_buff *,
+			     size_t,
+			     void *);
+
+	/* verify the security on a received packet */
+	int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
+			     u32 *);
+
+	/* issue a challenge */
+	int (*issue_challenge)(struct rxrpc_connection *);
+
+	/* respond to a challenge */
+	int (*respond_to_challenge)(struct rxrpc_connection *,
+				    struct sk_buff *,
+				    u32 *);
+
+	/* verify a response */
+	int (*verify_response)(struct rxrpc_connection *,
+			       struct sk_buff *,
+			       u32 *);
+
+	/* clear connection security */
+	void (*clear)(struct rxrpc_connection *);
+};
+
+/*
+ * RxRPC local transport endpoint definition
+ * - matched by local port, address and protocol type
+ */
+struct rxrpc_local {
+	struct socket		*socket;	/* my UDP socket */
+	struct work_struct	destroyer;	/* endpoint destroyer */
+	struct work_struct	acceptor;	/* incoming call processor */
+	struct work_struct	rejecter;	/* packet reject writer */
+	struct list_head	services;	/* services listening on this endpoint */
+	struct list_head	link;		/* link in endpoint list */
+	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
+	struct sk_buff_head	accept_queue;	/* incoming calls awaiting acceptance */
+	struct sk_buff_head	reject_queue;	/* packets awaiting rejection */
+	spinlock_t		lock;		/* access lock */
+	rwlock_t		services_lock;	/* lock for services list */
+	atomic_t		usage;
+	int			debug_id;	/* debug ID for printks */
+	volatile char		error_rcvd;	/* T if received ICMP error outstanding */
+	struct sockaddr_rxrpc	srx;		/* local address */
+};
+
+/*
+ * RxRPC remote transport endpoint definition
+ * - matched by remote port, address and protocol type
+ * - holds the connection ID counter for connections between the two endpoints
+ */
+struct rxrpc_peer {
+	struct work_struct	destroyer;	/* peer destroyer */
+	struct list_head	link;		/* link in master peer list */
+	struct list_head	error_targets;	/* targets for net error distribution */
+	spinlock_t		lock;		/* access lock */
+	atomic_t		usage;
+	unsigned		if_mtu;		/* interface MTU for this peer */
+	unsigned		mtu;		/* network MTU for this peer */
+	unsigned		maxdata;	/* data size (MTU - hdrsize) */
+	unsigned short		hdrsize;	/* header size (IP + UDP + RxRPC) */
+	int			debug_id;	/* debug ID for printks */
+	int			net_error;	/* network error distributed */
+	struct sockaddr_rxrpc	srx;		/* remote address */
+
+	/* calculated RTT cache */
+#define RXRPC_RTT_CACHE_SIZE 32
+	suseconds_t		rtt;		/* current RTT estimate (in uS) */
+	unsigned		rtt_point;	/* next entry at which to insert */
+	unsigned		rtt_usage;	/* amount of cache actually used */
+	suseconds_t		rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
+};
+
+/*
+ * RxRPC point-to-point transport / connection manager definition
+ * - handles a bundle of connections between two endpoints
+ * - matched by { local, peer }
+ */
+struct rxrpc_transport {
+	struct rxrpc_local	*local;		/* local transport endpoint */
+	struct rxrpc_peer	*peer;		/* remote transport endpoint */
+	struct work_struct	error_handler;	/* network error distributor */
+	struct rb_root		bundles;	/* client connection bundles on this transport */
+	struct rb_root		client_conns;	/* client connections on this transport */
+	struct rb_root		server_conns;	/* server connections on this transport */
+	struct list_head	link;		/* link in master session list */
+	struct sk_buff_head	error_queue;	/* error packets awaiting processing */
+	time_t			put_time;	/* time at which to reap */
+	spinlock_t		client_lock;	/* client connection allocation lock */
+	rwlock_t		conn_lock;	/* lock for active/dead connections */
+	atomic_t		usage;
+	int			debug_id;	/* debug ID for printks */
+	unsigned int		conn_idcounter;	/* connection ID counter (client) */
+};
+
+/*
+ * RxRPC client connection bundle
+ * - matched by { transport, service_id, key }
+ */
+struct rxrpc_conn_bundle {
+	struct rb_node		node;		/* node in transport's lookup tree */
+	struct list_head	unused_conns;	/* unused connections in this bundle */
+	struct list_head	avail_conns;	/* available connections in this bundle */
+	struct list_head	busy_conns;	/* busy connections in this bundle */
+	struct key		*key;		/* security for this bundle */
+	wait_queue_head_t	chanwait;	/* wait for channel to become available */
+	atomic_t		usage;
+	int			debug_id;	/* debug ID for printks */
+	unsigned short		num_conns;	/* number of connections in this bundle */
+	__be16			service_id;	/* service ID */
+	uint8_t			security_ix;	/* security type */
+};
+
+/*
+ * RxRPC connection definition
+ * - matched by { transport, service_id, conn_id, direction, key }
+ * - each connection can only handle four simultaneous calls
+ */
+struct rxrpc_connection {
+	struct rxrpc_transport	*trans;		/* transport session */
+	struct rxrpc_conn_bundle *bundle;	/* connection bundle (client) */
+	struct work_struct	processor;	/* connection event processor */
+	struct rb_node		node;		/* node in transport's lookup tree */
+	struct list_head	link;		/* link in master connection list */
+	struct list_head	bundle_link;	/* link in bundle */
+	struct rb_root		calls;		/* calls on this connection */
+	struct sk_buff_head	rx_queue;	/* received conn-level packets */
+	struct rxrpc_call	*channels[RXRPC_MAXCALLS]; /* channels (active calls) */
+	struct rxrpc_security	*security;	/* applied security module */
+	struct key		*key;		/* security for this connection (client) */
+	struct key		*server_key;	/* security for this service */
+	struct crypto_blkcipher	*cipher;	/* encryption handle */
+	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
+	unsigned long		events;
+#define RXRPC_CONN_CHALLENGE	0		/* send challenge packet */
+	time_t			put_time;	/* time at which to reap */
+	rwlock_t		lock;		/* access lock */
+	spinlock_t		state_lock;	/* state-change lock */
+	atomic_t		usage;
+	u32			real_conn_id;	/* connection ID (host-endian) */
+	enum {					/* current state of connection */
+		RXRPC_CONN_UNUSED,		/* - connection not yet attempted */
+		RXRPC_CONN_CLIENT,		/* - client connection */
+		RXRPC_CONN_SERVER_UNSECURED,	/* - server unsecured connection */
+		RXRPC_CONN_SERVER_CHALLENGING,	/* - server challenging for security */
+		RXRPC_CONN_SERVER,		/* - server secured connection */
+		RXRPC_CONN_REMOTELY_ABORTED,	/* - conn aborted by peer */
+		RXRPC_CONN_LOCALLY_ABORTED,	/* - conn aborted locally */
+		RXRPC_CONN_NETWORK_ERROR,	/* - conn terminated by network error */
+	} state;
+	int			error;		/* error code for local abort */
+	int			debug_id;	/* debug ID for printks */
+	unsigned		call_counter;	/* call ID counter */
+	atomic_t		serial;		/* packet serial number counter */
+	atomic_t		hi_serial;	/* highest serial number received */
+	u8			avail_calls;	/* number of calls available */
+	u8			size_align;	/* data size alignment (for security) */
+	u8			header_size;	/* rxrpc + security header size */
+	u8			security_size;	/* security header size */
+	u32			security_level;	/* security level negotiated */
+	u32			security_nonce;	/* response re-use preventer */
+
+	/* the following are all in net order */
+	__be32			epoch;		/* epoch of this connection */
+	__be32			cid;		/* connection ID */
+	__be16			service_id;	/* service ID */
+	u8			security_ix;	/* security type */
+	u8			in_clientflag;	/* RXRPC_CLIENT_INITIATED if we are server */
+	u8			out_clientflag;	/* RXRPC_CLIENT_INITIATED if we are client */
+};
+
+/*
+ * RxRPC call definition
+ * - matched by { connection, call_id }
+ */
+struct rxrpc_call {
+	struct rxrpc_connection	*conn;		/* connection carrying call */
+	struct rxrpc_sock	*socket;	/* socket responsible */
+	struct timer_list	lifetimer;	/* lifetime remaining on call */
+	struct timer_list	deadspan;	/* reap timer for re-ACK'ing, etc  */
+	struct timer_list	ack_timer;	/* ACK generation timer */
+	struct timer_list	resend_timer;	/* Tx resend timer */
+	struct work_struct	destroyer;	/* call destroyer */
+	struct work_struct	processor;	/* packet processor and ACK generator */
+	struct list_head	link;		/* link in master call list */
+	struct list_head	error_link;	/* link in error distribution list */
+	struct list_head	accept_link;	/* calls awaiting acceptance */
+	struct rb_node		sock_node;	/* node in socket call tree */
+	struct rb_node		conn_node;	/* node in connection call tree */
+	struct sk_buff_head	rx_queue;	/* received packets */
+	struct sk_buff_head	rx_oos_queue;	/* packets received out of sequence */
+	struct sk_buff		*tx_pending;	/* Tx socket buffer being filled */
+	wait_queue_head_t	tx_waitq;	/* wait for Tx window space to become available */
+	unsigned long		user_call_ID;	/* user-defined call ID */
+	unsigned long		creation_jif;	/* time of call creation */
+	unsigned long		flags;
+#define RXRPC_CALL_RELEASED	0	/* call has been released - no more message to userspace */
+#define RXRPC_CALL_TERMINAL_MSG	1	/* call has given the socket its final message */
+#define RXRPC_CALL_RCVD_LAST	2	/* all packets received */
+#define RXRPC_CALL_RUN_RTIMER	3	/* Tx resend timer started */
+#define RXRPC_CALL_TX_SOFT_ACK	4	/* sent some soft ACKs */
+#define RXRPC_CALL_PROC_BUSY	5	/* the processor is busy */
+#define RXRPC_CALL_INIT_ACCEPT	6	/* acceptance was initiated */
+#define RXRPC_CALL_HAS_USERID	7	/* has a user ID attached */
+#define RXRPC_CALL_EXPECT_OOS	8	/* expect out of sequence packets */
+	unsigned long		events;
+#define RXRPC_CALL_RCVD_ACKALL	0	/* ACKALL or reply received */
+#define RXRPC_CALL_RCVD_BUSY	1	/* busy packet received */
+#define RXRPC_CALL_RCVD_ABORT	2	/* abort packet received */
+#define RXRPC_CALL_RCVD_ERROR	3	/* network error received */
+#define RXRPC_CALL_ACK_FINAL	4	/* need to generate final ACK (and release call) */
+#define RXRPC_CALL_ACK		5	/* need to generate ACK */
+#define RXRPC_CALL_REJECT_BUSY	6	/* need to generate busy message */
+#define RXRPC_CALL_ABORT	7	/* need to generate abort */
+#define RXRPC_CALL_CONN_ABORT	8	/* local connection abort generated */
+#define RXRPC_CALL_RESEND_TIMER	9	/* Tx resend timer expired */
+#define RXRPC_CALL_RESEND	10	/* Tx resend required */
+#define RXRPC_CALL_DRAIN_RX_OOS	11	/* drain the Rx out of sequence queue */
+#define RXRPC_CALL_LIFE_TIMER	12	/* call's lifetimer ran out */
+#define RXRPC_CALL_ACCEPTED	13	/* incoming call accepted by userspace app */
+#define RXRPC_CALL_SECURED	14	/* incoming call's connection is now secure */
+#define RXRPC_CALL_POST_ACCEPT	15	/* need to post an "accept?" message to the app */
+#define RXRPC_CALL_RELEASE	16	/* need to release the call's resources */
+
+	spinlock_t		lock;
+	rwlock_t		state_lock;	/* lock for state transition */
+	atomic_t		usage;
+	atomic_t		sequence;	/* Tx data packet sequence counter */
+	u32			abort_code;	/* local/remote abort code */
+	enum {					/* current state of call */
+		RXRPC_CALL_CLIENT_SEND_REQUEST,	/* - client sending request phase */
+		RXRPC_CALL_CLIENT_AWAIT_REPLY,	/* - client awaiting reply */
+		RXRPC_CALL_CLIENT_RECV_REPLY,	/* - client receiving reply phase */
+		RXRPC_CALL_CLIENT_FINAL_ACK,	/* - client sending final ACK phase */
+		RXRPC_CALL_SERVER_SECURING,	/* - server securing request connection */
+		RXRPC_CALL_SERVER_ACCEPTING,	/* - server accepting request */
+		RXRPC_CALL_SERVER_RECV_REQUEST,	/* - server receiving request */
+		RXRPC_CALL_SERVER_ACK_REQUEST,	/* - server pending ACK of request */
+		RXRPC_CALL_SERVER_SEND_REPLY,	/* - server sending reply */
+		RXRPC_CALL_SERVER_AWAIT_ACK,	/* - server awaiting final ACK */
+		RXRPC_CALL_COMPLETE,		/* - call completed */
+		RXRPC_CALL_SERVER_BUSY,		/* - call rejected by busy server */
+		RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
+		RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
+		RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
+		RXRPC_CALL_DEAD,		/* - call is dead */
+	} state;
+	int			debug_id;	/* debug ID for printks */
+	u8			channel;	/* connection channel occupied by this call */
+
+	/* transmission-phase ACK management */
+	uint8_t			acks_head;	/* offset into window of first entry */
+	uint8_t			acks_tail;	/* offset into window of last entry */
+	uint8_t			acks_winsz;	/* size of un-ACK'd window */
+	uint8_t			acks_unacked;	/* lowest unacked packet in last ACK received */
+	int			acks_latest;	/* serial number of latest ACK received */
+	rxrpc_seq_t		acks_hard;	/* highest definitively ACK'd msg seq */
+	unsigned long		*acks_window;	/* sent packet window
+						 * - elements are pointers with LSB set if ACK'd
+						 */
+
+	/* receive-phase ACK management */
+	rxrpc_seq_t		rx_data_expect;	/* next data seq ID expected to be received */
+	rxrpc_seq_t		rx_data_post;	/* next data seq ID expected to be posted */
+	rxrpc_seq_t		rx_data_recv;	/* last data seq ID encountered by recvmsg */
+	rxrpc_seq_t		rx_data_eaten;	/* last data seq ID consumed by recvmsg */
+	rxrpc_seq_t		rx_first_oos;	/* first packet in rx_oos_queue (or 0) */
+	rxrpc_seq_t		ackr_win_top;	/* top of ACK window (rx_data_eaten is bottom) */
+	rxrpc_seq_net_t		ackr_prev_seq;	/* previous sequence number received */
+	uint8_t			ackr_reason;	/* reason to ACK */
+	__be32			ackr_serial;	/* serial of packet being ACK'd */
+	atomic_t		ackr_not_idle;	/* number of packets in Rx queue */
+
+	/* received packet records, 1 bit per record */
+#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
+	unsigned long		ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+
+	/* the following should all be in net order */
+	__be32			cid;		/* connection ID + channel index  */
+	__be32			call_id;	/* call ID on connection  */
+};
+
+/*
+ * RxRPC key for Kerberos (type-2 security)
+ */
+struct rxkad_key {
+	u16	security_index;		/* RxRPC header security index */
+	u16	ticket_len;		/* length of ticket[] */
+	u32	expiry;			/* time at which expires */
+	u32	kvno;			/* key version number */
+	u8	session_key[8];		/* DES session key */
+	u8	ticket[0];		/* the encrypted ticket */
+};
+
+struct rxrpc_key_payload {
+	struct rxkad_key k;
+};
+
+/*
+ * locally abort an RxRPC call
+ */
+static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+	write_lock_bh(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE) {
+		call->abort_code = abort_code;
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+	}
+	write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * af_rxrpc.c
+ */
+extern atomic_t rxrpc_n_skbs;
+extern __be32 rxrpc_epoch;
+extern atomic_t rxrpc_debug_id;
+extern struct workqueue_struct *rxrpc_workqueue;
+
+/*
+ * ar-accept.c
+ */
+extern void rxrpc_accept_incoming_calls(struct work_struct *);
+extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *,
+					    unsigned long);
+extern int rxrpc_reject_call(struct rxrpc_sock *);
+
+/*
+ * ar-ack.c
+ */
+extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_process_call(struct work_struct *);
+
+/*
+ * ar-call.c
+ */
+extern struct kmem_cache *rxrpc_call_jar;
+extern struct list_head rxrpc_calls;
+extern rwlock_t rxrpc_call_lock;
+
+extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
+						struct rxrpc_transport *,
+						struct rxrpc_conn_bundle *,
+						unsigned long, int, gfp_t);
+extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
+					      struct rxrpc_connection *,
+					      struct rxrpc_header *, gfp_t);
+extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *,
+						 unsigned long);
+extern void rxrpc_release_call(struct rxrpc_call *);
+extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
+extern void __rxrpc_put_call(struct rxrpc_call *);
+extern void __exit rxrpc_destroy_all_calls(void);
+
+/*
+ * ar-connection.c
+ */
+extern struct list_head rxrpc_connections;
+extern rwlock_t rxrpc_connection_lock;
+
+extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
+						  struct rxrpc_transport *,
+						  struct key *,
+						  __be16, gfp_t);
+extern void rxrpc_put_bundle(struct rxrpc_transport *,
+			     struct rxrpc_conn_bundle *);
+extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
+			      struct rxrpc_conn_bundle *, struct rxrpc_call *,
+			      gfp_t);
+extern void rxrpc_put_connection(struct rxrpc_connection *);
+extern void __exit rxrpc_destroy_all_connections(void);
+extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
+						      struct rxrpc_header *);
+extern struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
+			  gfp_t);
+
+/*
+ * ar-connevent.c
+ */
+extern void rxrpc_process_connection(struct work_struct *);
+extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
+extern void rxrpc_reject_packets(struct work_struct *);
+
+/*
+ * ar-error.c
+ */
+extern void rxrpc_UDP_error_report(struct sock *);
+extern void rxrpc_UDP_error_handler(struct work_struct *);
+
+/*
+ * ar-input.c
+ */
+extern unsigned long rxrpc_ack_timeout;
+extern const char *rxrpc_pkts[];
+
+extern void rxrpc_data_ready(struct sock *, int);
+extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool,
+			       bool);
+extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
+
+/*
+ * ar-local.c
+ */
+extern rwlock_t rxrpc_local_lock;
+extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
+extern void rxrpc_put_local(struct rxrpc_local *);
+extern void __exit rxrpc_destroy_all_locals(void);
+
+/*
+ * ar-key.c
+ */
+extern struct key_type key_type_rxrpc;
+extern struct key_type key_type_rxrpc_s;
+
+extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *,
+				     time_t, u32);
+
+/*
+ * ar-output.c
+ */
+extern int rxrpc_resend_timeout;
+
+extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
+extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
+				struct rxrpc_transport *, struct msghdr *,
+				size_t);
+extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *,
+				struct msghdr *, size_t);
+
+/*
+ * ar-peer.c
+ */
+extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
+extern void rxrpc_put_peer(struct rxrpc_peer *);
+extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *,
+					  __be32, __be16);
+extern void __exit rxrpc_destroy_all_peers(void);
+
+/*
+ * ar-proc.c
+ */
+extern const char *rxrpc_call_states[];
+extern struct file_operations rxrpc_call_seq_fops;
+extern struct file_operations rxrpc_connection_seq_fops;
+
+/*
+ * ar-recvmsg.c
+ */
+extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *,
+			 size_t, int);
+
+/*
+ * ar-security.c
+ */
+extern int rxrpc_register_security(struct rxrpc_security *);
+extern void rxrpc_unregister_security(struct rxrpc_security *);
+extern int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+extern int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *,
+			       size_t, void *);
+extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *,
+			       u32 *);
+extern void rxrpc_clear_conn_security(struct rxrpc_connection *);
+
+/*
+ * ar-skbuff.c
+ */
+extern void rxrpc_packet_destructor(struct sk_buff *);
+
+/*
+ * ar-transport.c
+ */
+extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
+						   struct rxrpc_peer *,
+						   gfp_t);
+extern void rxrpc_put_transport(struct rxrpc_transport *);
+extern void __exit rxrpc_destroy_all_transports(void);
+extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
+						    struct rxrpc_peer *);
+
+/*
+ * debug tracing
+ */
+extern unsigned rxrpc_debug;
+
+#define dbgprintk(FMT,...) \
+	printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline __attribute__((format(printf,1,2)))
+void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT,...)	dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...)	dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...)	dbgprintk("    "FMT ,##__VA_ARGS__)
+#define kproto(FMT,...)	dbgprintk("### "FMT ,##__VA_ARGS__)
+#define knet(FMT,...)	dbgprintk("@@@ "FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+#define _proto(FMT,...)	kproto(FMT,##__VA_ARGS__)
+#define _net(FMT,...)	knet(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_AF_RXRPC_DEBUG)
+#define RXRPC_DEBUG_KENTER	0x01
+#define RXRPC_DEBUG_KLEAVE	0x02
+#define RXRPC_DEBUG_KDEBUG	0x04
+#define RXRPC_DEBUG_KPROTO	0x08
+#define RXRPC_DEBUG_KNET	0x10
+
+#define _enter(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER))	\
+		kenter(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _leave(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE))	\
+		kleave(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _debug(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG))	\
+		kdebug(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _proto(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO))	\
+		kproto(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _net(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET))	\
+		knet(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#else
+#define _enter(FMT,...)	_dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...)	_dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _debug(FMT,...)	_dbprintk("    "FMT ,##__VA_ARGS__)
+#define _proto(FMT,...)	_dbprintk("### "FMT ,##__VA_ARGS__)
+#define _net(FMT,...)	_dbprintk("@@@ "FMT ,##__VA_ARGS__)
+#endif
+
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X)						\
+do {								\
+	if (unlikely(!(X))) {					\
+		printk(KERN_ERR "\n");				\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");	\
+		BUG();						\
+	}							\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)						\
+do {									\
+	if (unlikely(!((X) OP (Y)))) {					\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");		\
+		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTIF(C, X)						\
+do {								\
+	if (unlikely((C) && !(X))) {				\
+		printk(KERN_ERR "\n");				\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");	\
+		BUG();						\
+	}							\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)					\
+do {									\
+	if (unlikely((C) && !((X) OP (Y)))) {				\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");		\
+		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#else
+
+#define ASSERT(X)				\
+do {						\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)			\
+do {						\
+} while(0)
+
+#define ASSERTIF(C, X)				\
+do {						\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)		\
+do {						\
+} while(0)
+
+#endif /* __KDEBUGALL */
+
+/*
+ * socket buffer accounting / leak finding
+ */
+static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
+{
+	//_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+	//atomic_inc(&rxrpc_n_skbs);
+}
+
+#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
+
+static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
+{
+	//_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+	//atomic_dec(&rxrpc_n_skbs);
+}
+
+#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
+
+static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
+{
+	if (skb) {
+		CHECK_SLAB_OKAY(&skb->users);
+		//_net("free skb %p %s [%d]",
+		//     skb, fn, atomic_read(&rxrpc_n_skbs));
+		//atomic_dec(&rxrpc_n_skbs);
+		kfree_skb(skb);
+	}
+}
+
+#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
+
+static inline void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue((list))) != NULL)
+		rxrpc_free_skb(skb);
+}
+
+static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
+{
+	CHECK_SLAB_OKAY(&local->usage);
+	if (atomic_inc_return(&local->usage) == 1)
+		printk("resurrected (%s)\n", f);
+}
+
+#define rxrpc_get_local(LOCAL) __rxrpc_get_local((LOCAL), __func__)
+
+#define rxrpc_get_call(CALL)				\
+do {							\
+	CHECK_SLAB_OKAY(&(CALL)->usage);		\
+	if (atomic_inc_return(&(CALL)->usage) == 1)	\
+		BUG();					\
+} while(0)
+
+#define rxrpc_put_call(CALL)				\
+do {							\
+	__rxrpc_put_call(CALL);				\
+} while(0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
new file mode 100644
index 000000000000..7e049ff6ae60
--- /dev/null
+++ b/net/rxrpc/ar-key.c
@@ -0,0 +1,334 @@
+/* RxRPC key management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * RxRPC keys should have a description of describing their purpose:
+ *	"afs@CAMBRIDGE.REDHAT.COM>
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include <keys/user-type.h>
+#include "ar-internal.h"
+
+static int rxrpc_instantiate(struct key *, const void *, size_t);
+static int rxrpc_instantiate_s(struct key *, const void *, size_t);
+static void rxrpc_destroy(struct key *);
+static void rxrpc_destroy_s(struct key *);
+static void rxrpc_describe(const struct key *, struct seq_file *);
+
+/*
+ * rxrpc defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_rxrpc = {
+	.name		= "rxrpc",
+	.instantiate	= rxrpc_instantiate,
+	.match		= user_match,
+	.destroy	= rxrpc_destroy,
+	.describe	= rxrpc_describe,
+};
+
+EXPORT_SYMBOL(key_type_rxrpc);
+
+/*
+ * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the
+ * description and an 8-byte decryption key as the payload
+ */
+struct key_type key_type_rxrpc_s = {
+	.name		= "rxrpc_s",
+	.instantiate	= rxrpc_instantiate_s,
+	.match		= user_match,
+	.destroy	= rxrpc_destroy_s,
+	.describe	= rxrpc_describe,
+};
+
+/*
+ * instantiate an rxrpc defined key
+ * data should be of the form:
+ *	OFFSET	LEN	CONTENT
+ *	0	4	key interface version number
+ *	4	2	security index (type)
+ *	6	2	ticket length
+ *	8	4	key expiry time (time_t)
+ *	12	4	kvno
+ *	16	8	session key
+ *	24	[len]	ticket
+ *
+ * if no data is provided, then a no-security key is made
+ */
+static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
+{
+	const struct rxkad_key *tsec;
+	struct rxrpc_key_payload *upayload;
+	size_t plen;
+	u32 kver;
+	int ret;
+
+	_enter("{%x},,%zu", key_serial(key), datalen);
+
+	/* handle a no-security key */
+	if (!data && datalen == 0)
+		return 0;
+
+	/* get the key interface version number */
+	ret = -EINVAL;
+	if (datalen <= 4 || !data)
+		goto error;
+	memcpy(&kver, data, sizeof(kver));
+	data += sizeof(kver);
+	datalen -= sizeof(kver);
+
+	_debug("KEY I/F VERSION: %u", kver);
+
+	ret = -EKEYREJECTED;
+	if (kver != 1)
+		goto error;
+
+	/* deal with a version 1 key */
+	ret = -EINVAL;
+	if (datalen < sizeof(*tsec))
+		goto error;
+
+	tsec = data;
+	if (datalen != sizeof(*tsec) + tsec->ticket_len)
+		goto error;
+
+	_debug("SCIX: %u", tsec->security_index);
+	_debug("TLEN: %u", tsec->ticket_len);
+	_debug("EXPY: %x", tsec->expiry);
+	_debug("KVNO: %u", tsec->kvno);
+	_debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+	       tsec->session_key[0], tsec->session_key[1],
+	       tsec->session_key[2], tsec->session_key[3],
+	       tsec->session_key[4], tsec->session_key[5],
+	       tsec->session_key[6], tsec->session_key[7]);
+	if (tsec->ticket_len >= 8)
+		_debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+		       tsec->ticket[0], tsec->ticket[1],
+		       tsec->ticket[2], tsec->ticket[3],
+		       tsec->ticket[4], tsec->ticket[5],
+		       tsec->ticket[6], tsec->ticket[7]);
+
+	ret = -EPROTONOSUPPORT;
+	if (tsec->security_index != 2)
+		goto error;
+
+	key->type_data.x[0] = tsec->security_index;
+
+	plen = sizeof(*upayload) + tsec->ticket_len;
+	ret = key_payload_reserve(key, plen);
+	if (ret < 0)
+		goto error;
+
+	ret = -ENOMEM;
+	upayload = kmalloc(plen, GFP_KERNEL);
+	if (!upayload)
+		goto error;
+
+	/* attach the data */
+	memcpy(&upayload->k, tsec, sizeof(*tsec));
+	memcpy(&upayload->k.ticket, (void *)tsec + sizeof(*tsec),
+	       tsec->ticket_len);
+	key->payload.data = upayload;
+	key->expiry = tsec->expiry;
+	ret = 0;
+
+error:
+	return ret;
+}
+
+/*
+ * instantiate a server secret key
+ * data should be a pointer to the 8-byte secret key
+ */
+static int rxrpc_instantiate_s(struct key *key, const void *data,
+			       size_t datalen)
+{
+	struct crypto_blkcipher *ci;
+
+	_enter("{%x},,%zu", key_serial(key), datalen);
+
+	if (datalen != 8)
+		return -EINVAL;
+
+	memcpy(&key->type_data, data, 8);
+
+	ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ci)) {
+		_leave(" = %ld", PTR_ERR(ci));
+		return PTR_ERR(ci);
+	}
+
+	if (crypto_blkcipher_setkey(ci, data, 8) < 0)
+		BUG();
+
+	key->payload.data = ci;
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy(struct key *key)
+{
+	kfree(key->payload.data);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy_s(struct key *key)
+{
+	if (key->payload.data) {
+		crypto_free_blkcipher(key->payload.data);
+		key->payload.data = NULL;
+	}
+}
+
+/*
+ * describe the rxrpc key
+ */
+static void rxrpc_describe(const struct key *key, struct seq_file *m)
+{
+	seq_puts(m, key->description);
+}
+
+/*
+ * grab the security key for a socket
+ */
+int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
+{
+	struct key *key;
+	char *description;
+
+	_enter("");
+
+	if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+		return -EINVAL;
+
+	description = kmalloc(optlen + 1, GFP_KERNEL);
+	if (!description)
+		return -ENOMEM;
+
+	if (copy_from_user(description, optval, optlen)) {
+		kfree(description);
+		return -EFAULT;
+	}
+	description[optlen] = 0;
+
+	key = request_key(&key_type_rxrpc, description, NULL);
+	if (IS_ERR(key)) {
+		kfree(description);
+		_leave(" = %ld", PTR_ERR(key));
+		return PTR_ERR(key);
+	}
+
+	rx->key = key;
+	kfree(description);
+	_leave(" = 0 [key %x]", key->serial);
+	return 0;
+}
+
+/*
+ * grab the security keyring for a server socket
+ */
+int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
+			 int optlen)
+{
+	struct key *key;
+	char *description;
+
+	_enter("");
+
+	if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+		return -EINVAL;
+
+	description = kmalloc(optlen + 1, GFP_KERNEL);
+	if (!description)
+		return -ENOMEM;
+
+	if (copy_from_user(description, optval, optlen)) {
+		kfree(description);
+		return -EFAULT;
+	}
+	description[optlen] = 0;
+
+	key = request_key(&key_type_keyring, description, NULL);
+	if (IS_ERR(key)) {
+		kfree(description);
+		_leave(" = %ld", PTR_ERR(key));
+		return PTR_ERR(key);
+	}
+
+	rx->securities = key;
+	kfree(description);
+	_leave(" = 0 [key %x]", key->serial);
+	return 0;
+}
+
+/*
+ * generate a server data key
+ */
+int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
+			      const void *session_key,
+			      time_t expiry,
+			      u32 kvno)
+{
+	struct key *key;
+	int ret;
+
+	struct {
+		u32 kver;
+		struct rxkad_key tsec;
+	} data;
+
+	_enter("");
+
+	key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0,
+			KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(key)) {
+		_leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
+		return -ENOMEM;
+	}
+
+	_debug("key %d", key_serial(key));
+
+	data.kver = 1;
+	data.tsec.security_index = 2;
+	data.tsec.ticket_len = 0;
+	data.tsec.expiry = expiry;
+	data.tsec.kvno = 0;
+
+	memcpy(&data.tsec.session_key, session_key,
+	       sizeof(data.tsec.session_key));
+
+	ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
+	if (ret < 0)
+		goto error;
+
+	conn->key = key;
+	_leave(" = 0 [%d]", key_serial(key));
+	return 0;
+
+error:
+	key_revoke(key);
+	key_put(key);
+	_leave(" = -ENOMEM [ins %d]", ret);
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(rxrpc_get_server_data_key);
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
new file mode 100644
index 000000000000..fe03f71f17da
--- /dev/null
+++ b/net/rxrpc/ar-local.c
@@ -0,0 +1,309 @@
+/* AF_RXRPC local endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_locals);
+DEFINE_RWLOCK(rxrpc_local_lock);
+static DECLARE_RWSEM(rxrpc_local_sem);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
+
+static void rxrpc_destroy_local(struct work_struct *work);
+
+/*
+ * allocate a new local
+ */
+static
+struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_local *local;
+
+	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+	if (local) {
+		INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
+		INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
+		INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
+		INIT_LIST_HEAD(&local->services);
+		INIT_LIST_HEAD(&local->link);
+		init_rwsem(&local->defrag_sem);
+		skb_queue_head_init(&local->accept_queue);
+		skb_queue_head_init(&local->reject_queue);
+		spin_lock_init(&local->lock);
+		rwlock_init(&local->services_lock);
+		atomic_set(&local->usage, 1);
+		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		memcpy(&local->srx, srx, sizeof(*srx));
+	}
+
+	_leave(" = %p", local);
+	return local;
+}
+
+/*
+ * create the local socket
+ * - must be called with rxrpc_local_sem writelocked
+ */
+static int rxrpc_create_local(struct rxrpc_local *local)
+{
+	struct sock *sock;
+	int ret, opt;
+
+	_enter("%p{%d}", local, local->srx.transport_type);
+
+	/* create a socket to represent the local endpoint */
+	ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP,
+			       &local->socket);
+	if (ret < 0) {
+		_leave(" = %d [socket]", ret);
+		return ret;
+	}
+
+	/* if a local address was supplied then bind it */
+	if (local->srx.transport_len > sizeof(sa_family_t)) {
+		_debug("bind");
+		ret = kernel_bind(local->socket,
+				  (struct sockaddr *) &local->srx.transport,
+				  local->srx.transport_len);
+		if (ret < 0) {
+			_debug("bind failed");
+			goto error;
+		}
+	}
+
+	/* we want to receive ICMP errors */
+	opt = 1;
+	ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+				(char *) &opt, sizeof(opt));
+	if (ret < 0) {
+		_debug("setsockopt failed");
+		goto error;
+	}
+
+	/* we want to set the don't fragment bit */
+	opt = IP_PMTUDISC_DO;
+	ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+				(char *) &opt, sizeof(opt));
+	if (ret < 0) {
+		_debug("setsockopt failed");
+		goto error;
+	}
+
+	write_lock_bh(&rxrpc_local_lock);
+	list_add(&local->link, &rxrpc_locals);
+	write_unlock_bh(&rxrpc_local_lock);
+
+	/* set the socket up */
+	sock = local->socket->sk;
+	sock->sk_user_data	= local;
+	sock->sk_data_ready	= rxrpc_data_ready;
+	sock->sk_error_report	= rxrpc_UDP_error_report;
+	_leave(" = 0");
+	return 0;
+
+error:
+	local->socket->ops->shutdown(local->socket, 2);
+	local->socket->sk->sk_user_data = NULL;
+	sock_release(local->socket);
+	local->socket = NULL;
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * create a new local endpoint using the specified UDP address
+ */
+struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_local *local;
+	int ret;
+
+	_enter("{%d,%u,%u.%u.%u.%u+%hu}",
+	       srx->transport_type,
+	       srx->transport.family,
+	       NIPQUAD(srx->transport.sin.sin_addr),
+	       ntohs(srx->transport.sin.sin_port));
+
+	down_write(&rxrpc_local_sem);
+
+	/* see if we have a suitable local local endpoint already */
+	read_lock_bh(&rxrpc_local_lock);
+
+	list_for_each_entry(local, &rxrpc_locals, link) {
+		_debug("CMP {%d,%u,%u.%u.%u.%u+%hu}",
+		       local->srx.transport_type,
+		       local->srx.transport.family,
+		       NIPQUAD(local->srx.transport.sin.sin_addr),
+		       ntohs(local->srx.transport.sin.sin_port));
+
+		if (local->srx.transport_type != srx->transport_type ||
+		    local->srx.transport.family != srx->transport.family)
+			continue;
+
+		switch (srx->transport.family) {
+		case AF_INET:
+			if (local->srx.transport.sin.sin_port !=
+			    srx->transport.sin.sin_port)
+				continue;
+			if (memcmp(&local->srx.transport.sin.sin_addr,
+				   &srx->transport.sin.sin_addr,
+				   sizeof(struct in_addr)) != 0)
+				continue;
+			goto found_local;
+
+		default:
+			BUG();
+		}
+	}
+
+	read_unlock_bh(&rxrpc_local_lock);
+
+	/* we didn't find one, so we need to create one */
+	local = rxrpc_alloc_local(srx);
+	if (!local) {
+		up_write(&rxrpc_local_sem);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = rxrpc_create_local(local);
+	if (ret < 0) {
+		up_write(&rxrpc_local_sem);
+		kfree(local);
+		_leave(" = %d", ret);
+		return ERR_PTR(ret);
+	}
+
+	up_write(&rxrpc_local_sem);
+
+	_net("LOCAL new %d {%d,%u,%u.%u.%u.%u+%hu}",
+	     local->debug_id,
+	     local->srx.transport_type,
+	     local->srx.transport.family,
+	     NIPQUAD(local->srx.transport.sin.sin_addr),
+	     ntohs(local->srx.transport.sin.sin_port));
+
+	_leave(" = %p [new]", local);
+	return local;
+
+found_local:
+	rxrpc_get_local(local);
+	read_unlock_bh(&rxrpc_local_lock);
+	up_write(&rxrpc_local_sem);
+
+	_net("LOCAL old %d {%d,%u,%u.%u.%u.%u+%hu}",
+	     local->debug_id,
+	     local->srx.transport_type,
+	     local->srx.transport.family,
+	     NIPQUAD(local->srx.transport.sin.sin_addr),
+	     ntohs(local->srx.transport.sin.sin_port));
+
+	_leave(" = %p [reuse]", local);
+	return local;
+}
+
+/*
+ * release a local endpoint
+ */
+void rxrpc_put_local(struct rxrpc_local *local)
+{
+	_enter("%p{u=%d}", local, atomic_read(&local->usage));
+
+	ASSERTCMP(atomic_read(&local->usage), >, 0);
+
+	/* to prevent a race, the decrement and the dequeue must be effectively
+	 * atomic */
+	write_lock_bh(&rxrpc_local_lock);
+	if (unlikely(atomic_dec_and_test(&local->usage))) {
+		_debug("destroy local");
+		rxrpc_queue_work(&local->destroyer);
+	}
+	write_unlock_bh(&rxrpc_local_lock);
+	_leave("");
+}
+
+/*
+ * destroy a local endpoint
+ */
+static void rxrpc_destroy_local(struct work_struct *work)
+{
+	struct rxrpc_local *local =
+		container_of(work, struct rxrpc_local, destroyer);
+
+	_enter("%p{%d}", local, atomic_read(&local->usage));
+
+	down_write(&rxrpc_local_sem);
+
+	write_lock_bh(&rxrpc_local_lock);
+	if (atomic_read(&local->usage) > 0) {
+		write_unlock_bh(&rxrpc_local_lock);
+		up_read(&rxrpc_local_sem);
+		_leave(" [resurrected]");
+		return;
+	}
+
+	list_del(&local->link);
+	local->socket->sk->sk_user_data = NULL;
+	write_unlock_bh(&rxrpc_local_lock);
+
+	downgrade_write(&rxrpc_local_sem);
+
+	ASSERT(list_empty(&local->services));
+	ASSERT(!work_pending(&local->acceptor));
+	ASSERT(!work_pending(&local->rejecter));
+
+	/* finish cleaning up the local descriptor */
+	rxrpc_purge_queue(&local->accept_queue);
+	rxrpc_purge_queue(&local->reject_queue);
+	local->socket->ops->shutdown(local->socket, 2);
+	sock_release(local->socket);
+
+	up_read(&rxrpc_local_sem);
+
+	_net("DESTROY LOCAL %d", local->debug_id);
+	kfree(local);
+
+	if (list_empty(&rxrpc_locals))
+		wake_up_all(&rxrpc_local_wq);
+
+	_leave("");
+}
+
+/*
+ * preemptively destroy all local local endpoint rather than waiting for
+ * them to be destroyed
+ */
+void __exit rxrpc_destroy_all_locals(void)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	_enter("");
+
+	/* we simply have to wait for them to go away */
+	if (!list_empty(&rxrpc_locals)) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&rxrpc_local_wq, &myself);
+
+		while (!list_empty(&rxrpc_locals)) {
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+		}
+
+		remove_wait_queue(&rxrpc_local_wq, &myself);
+		set_current_state(TASK_RUNNING);
+	}
+
+	_leave("");
+}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
new file mode 100644
index 000000000000..5cdde4a48ed1
--- /dev/null
+++ b/net/rxrpc/ar-output.c
@@ -0,0 +1,734 @@
+/* RxRPC packet transmission
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+int rxrpc_resend_timeout = 4;
+
+static int rxrpc_send_data(struct kiocb *iocb,
+			   struct rxrpc_sock *rx,
+			   struct rxrpc_call *call,
+			   struct msghdr *msg, size_t len);
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+			      unsigned long *user_call_ID,
+			      enum rxrpc_command *command,
+			      u32 *abort_code,
+			      bool server)
+{
+	struct cmsghdr *cmsg;
+	int len;
+
+	*command = RXRPC_CMD_SEND_DATA;
+
+	if (msg->msg_controllen == 0)
+		return -EINVAL;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+
+		len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+		_debug("CMSG %d, %d, %d",
+		       cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+		if (cmsg->cmsg_level != SOL_RXRPC)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case RXRPC_USER_CALL_ID:
+			if (msg->msg_flags & MSG_CMSG_COMPAT) {
+				if (len != sizeof(u32))
+					return -EINVAL;
+				*user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+			} else {
+				if (len != sizeof(unsigned long))
+					return -EINVAL;
+				*user_call_ID = *(unsigned long *)
+					CMSG_DATA(cmsg);
+			}
+			_debug("User Call ID %lx", *user_call_ID);
+			break;
+
+		case RXRPC_ABORT:
+			if (*command != RXRPC_CMD_SEND_DATA)
+				return -EINVAL;
+			*command = RXRPC_CMD_SEND_ABORT;
+			if (len != sizeof(*abort_code))
+				return -EINVAL;
+			*abort_code = *(unsigned int *) CMSG_DATA(cmsg);
+			_debug("Abort %x", *abort_code);
+			if (*abort_code == 0)
+				return -EINVAL;
+			break;
+
+		case RXRPC_ACCEPT:
+			if (*command != RXRPC_CMD_SEND_DATA)
+				return -EINVAL;
+			*command = RXRPC_CMD_ACCEPT;
+			if (len != 0)
+				return -EINVAL;
+			if (!server)
+				return -EISCONN;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * abort a call, sending an ABORT packet to the peer
+ */
+static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
+{
+	write_lock_bh(&call->state_lock);
+
+	if (call->state <= RXRPC_CALL_COMPLETE) {
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = abort_code;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		del_timer_sync(&call->resend_timer);
+		del_timer_sync(&call->ack_timer);
+		clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+		clear_bit(RXRPC_CALL_ACK, &call->events);
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		rxrpc_queue_call(call);
+	}
+
+	write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+			 struct rxrpc_transport *trans, struct msghdr *msg,
+			 size_t len)
+{
+	struct rxrpc_conn_bundle *bundle;
+	enum rxrpc_command cmd;
+	struct rxrpc_call *call;
+	unsigned long user_call_ID = 0;
+	struct key *key;
+	__be16 service_id;
+	u32 abort_code = 0;
+	int ret;
+
+	_enter("");
+
+	ASSERT(trans != NULL);
+
+	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+				 false);
+	if (ret < 0)
+		return ret;
+
+	bundle = NULL;
+	if (trans) {
+		service_id = rx->service_id;
+		if (msg->msg_name) {
+			struct sockaddr_rxrpc *srx =
+				(struct sockaddr_rxrpc *) msg->msg_name;
+			service_id = htons(srx->srx_service);
+		}
+		key = rx->key;
+		if (key && !rx->key->payload.data)
+			key = NULL;
+		bundle = rxrpc_get_bundle(rx, trans, key, service_id,
+					  GFP_KERNEL);
+		if (IS_ERR(bundle))
+			return PTR_ERR(bundle);
+	}
+
+	call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
+				     abort_code == 0, GFP_KERNEL);
+	if (trans)
+		rxrpc_put_bundle(trans, bundle);
+	if (IS_ERR(call)) {
+		_leave(" = %ld", PTR_ERR(call));
+		return PTR_ERR(call);
+	}
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		/* it's too late for this call */
+		ret = -ESHUTDOWN;
+	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
+		rxrpc_send_abort(call, abort_code);
+	} else if (cmd != RXRPC_CMD_SEND_DATA) {
+		ret = -EINVAL;
+	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+		/* request phase complete for this client call */
+		ret = -EPROTO;
+	} else {
+		ret = rxrpc_send_data(iocb, rx, call, msg, len);
+	}
+
+	rxrpc_put_call(call);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ *
+ * Allow a kernel service to send data on a call.  The call must be in an state
+ * appropriate to sending data.  No control data should be supplied in @msg,
+ * nor should an address be supplied.  MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+			   size_t len)
+{
+	int ret;
+
+	_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+
+	ASSERTCMP(msg->msg_name, ==, NULL);
+	ASSERTCMP(msg->msg_control, ==, NULL);
+
+	lock_sock(&call->socket->sk);
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		ret = -ESHUTDOWN; /* it's too late for this call */
+	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+		ret = -EPROTO; /* request phase complete for this client call */
+	} else {
+		mm_segment_t oldfs = get_fs();
+		set_fs(KERNEL_DS);
+		ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
+		set_fs(oldfs);
+	}
+
+	release_sock(&call->socket->sk);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/*
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state.
+ */
+void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+	_enter("{%d},%d", call->debug_id, abort_code);
+
+	lock_sock(&call->socket->sk);
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state < RXRPC_CALL_COMPLETE)
+		rxrpc_send_abort(call, abort_code);
+
+	release_sock(&call->socket->sk);
+	_leave("");
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
+
+/*
+ * send a message through a server socket
+ * - caller holds the socket locked
+ */
+int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+			 struct msghdr *msg, size_t len)
+{
+	enum rxrpc_command cmd;
+	struct rxrpc_call *call;
+	unsigned long user_call_ID = 0;
+	u32 abort_code = 0;
+	int ret;
+
+	_enter("");
+
+	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+				 true);
+	if (ret < 0)
+		return ret;
+
+	if (cmd == RXRPC_CMD_ACCEPT) {
+		call = rxrpc_accept_call(rx, user_call_ID);
+		if (IS_ERR(call))
+			return PTR_ERR(call);
+		rxrpc_put_call(call);
+		return 0;
+	}
+
+	call = rxrpc_find_server_call(rx, user_call_ID);
+	if (!call)
+		return -EBADSLT;
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		ret = -ESHUTDOWN;
+		goto out;
+	}
+
+	switch (cmd) {
+	case RXRPC_CMD_SEND_DATA:
+		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+		    call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		    call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+			/* Tx phase not yet begun for this call */
+			ret = -EPROTO;
+			break;
+		}
+
+		ret = rxrpc_send_data(iocb, rx, call, msg, len);
+		break;
+
+	case RXRPC_CMD_SEND_ABORT:
+		rxrpc_send_abort(call, abort_code);
+		break;
+	default:
+		BUG();
+	}
+
+	out:
+	rxrpc_put_call(call);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
+{
+	struct kvec iov[1];
+	struct msghdr msg;
+	int ret, opt;
+
+	_enter(",{%d}", skb->len);
+
+	iov[0].iov_base = skb->head;
+	iov[0].iov_len = skb->len;
+
+	msg.msg_name = &trans->peer->srx.transport.sin;
+	msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags = 0;
+
+	/* send the packet with the don't fragment bit set if we currently
+	 * think it's small enough */
+	if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) {
+		down_read(&trans->local->defrag_sem);
+		/* send the packet by UDP
+		 * - returns -EMSGSIZE if UDP would have to fragment the packet
+		 *   to go out of the interface
+		 *   - in which case, we'll have processed the ICMP error
+		 *     message and update the peer record
+		 */
+		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+				     iov[0].iov_len);
+
+		up_read(&trans->local->defrag_sem);
+		if (ret == -EMSGSIZE)
+			goto send_fragmentable;
+
+		_leave(" = %d [%u]", ret, trans->peer->maxdata);
+		return ret;
+	}
+
+send_fragmentable:
+	/* attempt to send this message with fragmentation enabled */
+	_debug("send fragment");
+
+	down_write(&trans->local->defrag_sem);
+	opt = IP_PMTUDISC_DONT;
+	ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
+				(char *) &opt, sizeof(opt));
+	if (ret == 0) {
+		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+				     iov[0].iov_len);
+
+		opt = IP_PMTUDISC_DO;
+		kernel_setsockopt(trans->local->socket, SOL_IP,
+				  IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
+	}
+
+	up_write(&trans->local->defrag_sem);
+	_leave(" = %d [frag %u]", ret, trans->peer->maxdata);
+	return ret;
+}
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+				    struct rxrpc_call *call,
+				    long *timeo)
+{
+	DECLARE_WAITQUEUE(myself, current);
+	int ret;
+
+	_enter(",{%d},%ld",
+	       CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz),
+	       *timeo);
+
+	add_wait_queue(&call->tx_waitq, &myself);
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		ret = 0;
+		if (CIRC_SPACE(call->acks_head, call->acks_tail,
+			       call->acks_winsz) > 0)
+			break;
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(*timeo);
+			break;
+		}
+
+		release_sock(&rx->sk);
+		*timeo = schedule_timeout(*timeo);
+		lock_sock(&rx->sk);
+	}
+
+	remove_wait_queue(&call->tx_waitq, &myself);
+	set_current_state(TASK_RUNNING);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * attempt to schedule an instant Tx resend
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call)
+{
+	read_lock_bh(&call->state_lock);
+	if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		if (call->state < RXRPC_CALL_COMPLETE &&
+		    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+			rxrpc_queue_call(call);
+	}
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * queue a packet for transmission, set the resend timer and attempt
+ * to send the packet immediately
+ */
+static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
+			       bool last)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	int ret;
+
+	_net("queue skb %p [%d]", skb, call->acks_head);
+
+	ASSERT(call->acks_window != NULL);
+	call->acks_window[call->acks_head] = (unsigned long) skb;
+	smp_wmb();
+	call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
+
+	if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
+		_debug("________awaiting reply/ACK__________");
+		write_lock_bh(&call->state_lock);
+		switch (call->state) {
+		case RXRPC_CALL_CLIENT_SEND_REQUEST:
+			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+			break;
+		case RXRPC_CALL_SERVER_ACK_REQUEST:
+			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+			if (!last)
+				break;
+		case RXRPC_CALL_SERVER_SEND_REPLY:
+			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+			break;
+		default:
+			break;
+		}
+		write_unlock_bh(&call->state_lock);
+	}
+
+	_proto("Tx DATA %%%u { #%u }",
+	       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+	sp->need_resend = 0;
+	sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+	if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
+		_debug("run timer");
+		call->resend_timer.expires = sp->resend_at;
+		add_timer(&call->resend_timer);
+	}
+
+	/* attempt to cancel the rx-ACK timer, deferring reply transmission if
+	 * we're ACK'ing the request phase of an incoming call */
+	ret = -EAGAIN;
+	if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
+		/* the packet may be freed by rxrpc_process_call() before this
+		 * returns */
+		ret = rxrpc_send_packet(call->conn->trans, skb);
+		_net("sent skb %p", skb);
+	} else {
+		_debug("failed to delete ACK timer");
+	}
+
+	if (ret < 0) {
+		_debug("need instant resend %d", ret);
+		sp->need_resend = 1;
+		rxrpc_instant_resend(call);
+	}
+
+	_leave("");
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - caller holds the socket locked
+ */
+static int rxrpc_send_data(struct kiocb *iocb,
+			   struct rxrpc_sock *rx,
+			   struct rxrpc_call *call,
+			   struct msghdr *msg, size_t len)
+{
+	struct rxrpc_skb_priv *sp;
+	unsigned char __user *from;
+	struct sk_buff *skb;
+	struct iovec *iov;
+	struct sock *sk = &rx->sk;
+	long timeo;
+	bool more;
+	int ret, ioc, segment, copied;
+
+	_enter(",,,{%zu},%zu", msg->msg_iovlen, len);
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	/* this should be in poll */
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		return -EPIPE;
+
+	iov = msg->msg_iov;
+	ioc = msg->msg_iovlen - 1;
+	from = iov->iov_base;
+	segment = iov->iov_len;
+	iov++;
+	more = msg->msg_flags & MSG_MORE;
+
+	skb = call->tx_pending;
+	call->tx_pending = NULL;
+
+	copied = 0;
+	do {
+		int copy;
+
+		if (segment > len)
+			segment = len;
+
+		_debug("SEGMENT %d @%p", segment, from);
+
+		if (!skb) {
+			size_t size, chunk, max, space;
+
+			_debug("alloc");
+
+			if (CIRC_SPACE(call->acks_head, call->acks_tail,
+				       call->acks_winsz) <= 0) {
+				ret = -EAGAIN;
+				if (msg->msg_flags & MSG_DONTWAIT)
+					goto maybe_error;
+				ret = rxrpc_wait_for_tx_window(rx, call,
+							       &timeo);
+				if (ret < 0)
+					goto maybe_error;
+			}
+
+			max = call->conn->trans->peer->maxdata;
+			max -= call->conn->security_size;
+			max &= ~(call->conn->size_align - 1UL);
+
+			chunk = max;
+			if (chunk > len)
+				chunk = len;
+
+			space = chunk + call->conn->size_align;
+			space &= ~(call->conn->size_align - 1UL);
+
+			size = space + call->conn->header_size;
+
+			_debug("SIZE: %zu/%zu/%zu", chunk, space, size);
+
+			/* create a buffer that we can retain until it's ACK'd */
+			skb = sock_alloc_send_skb(
+				sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
+			if (!skb)
+				goto maybe_error;
+
+			rxrpc_new_skb(skb);
+
+			_debug("ALLOC SEND %p", skb);
+
+			ASSERTCMP(skb->mark, ==, 0);
+
+			_debug("HS: %u", call->conn->header_size);
+			skb_reserve(skb, call->conn->header_size);
+			skb->len += call->conn->header_size;
+
+			sp = rxrpc_skb(skb);
+			sp->remain = chunk;
+			if (sp->remain > skb_tailroom(skb))
+				sp->remain = skb_tailroom(skb);
+
+			_net("skb: hr %d, tr %d, hl %d, rm %d",
+			       skb_headroom(skb),
+			       skb_tailroom(skb),
+			       skb_headlen(skb),
+			       sp->remain);
+
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+
+		_debug("append");
+		sp = rxrpc_skb(skb);
+
+		/* append next segment of data to the current buffer */
+		copy = skb_tailroom(skb);
+		ASSERTCMP(copy, >, 0);
+		if (copy > segment)
+			copy = segment;
+		if (copy > sp->remain)
+			copy = sp->remain;
+
+		_debug("add");
+		ret = skb_add_data(skb, from, copy);
+		_debug("added");
+		if (ret < 0)
+			goto efault;
+		sp->remain -= copy;
+		skb->mark += copy;
+
+		len -= copy;
+		segment -= copy;
+		from += copy;
+		while (segment == 0 && ioc > 0) {
+			from = iov->iov_base;
+			segment = iov->iov_len;
+			iov++;
+			ioc--;
+		}
+		if (len == 0) {
+			segment = 0;
+			ioc = 0;
+		}
+
+		/* check for the far side aborting the call or a network error
+		 * occurring */
+		if (call->state > RXRPC_CALL_COMPLETE)
+			goto call_aborted;
+
+		/* add the packet to the send queue if it's now full */
+		if (sp->remain <= 0 || (segment == 0 && !more)) {
+			struct rxrpc_connection *conn = call->conn;
+			size_t pad;
+
+			/* pad out if we're using security */
+			if (conn->security) {
+				pad = conn->security_size + skb->mark;
+				pad = conn->size_align - pad;
+				pad &= conn->size_align - 1;
+				_debug("pad %zu", pad);
+				if (pad)
+					memset(skb_put(skb, pad), 0, pad);
+			}
+
+			sp->hdr.epoch = conn->epoch;
+			sp->hdr.cid = call->cid;
+			sp->hdr.callNumber = call->call_id;
+			sp->hdr.seq =
+				htonl(atomic_inc_return(&call->sequence));
+			sp->hdr.serial =
+				htonl(atomic_inc_return(&conn->serial));
+			sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
+			sp->hdr.userStatus = 0;
+			sp->hdr.securityIndex = conn->security_ix;
+			sp->hdr._rsvd = 0;
+			sp->hdr.serviceId = conn->service_id;
+
+			sp->hdr.flags = conn->out_clientflag;
+			if (len == 0 && !more)
+				sp->hdr.flags |= RXRPC_LAST_PACKET;
+			else if (CIRC_SPACE(call->acks_head, call->acks_tail,
+					    call->acks_winsz) > 1)
+				sp->hdr.flags |= RXRPC_MORE_PACKETS;
+
+			ret = rxrpc_secure_packet(
+				call, skb, skb->mark,
+				skb->head + sizeof(struct rxrpc_header));
+			if (ret < 0)
+				goto out;
+
+			memcpy(skb->head, &sp->hdr,
+			       sizeof(struct rxrpc_header));
+			rxrpc_queue_packet(call, skb, segment == 0 && !more);
+			skb = NULL;
+		}
+
+	} while (segment > 0);
+
+out:
+	call->tx_pending = skb;
+	_leave(" = %d", ret);
+	return ret;
+
+call_aborted:
+	rxrpc_free_skb(skb);
+	if (call->state == RXRPC_CALL_NETWORK_ERROR)
+		ret = call->conn->trans->peer->net_error;
+	else
+		ret = -ECONNABORTED;
+	_leave(" = %d", ret);
+	return ret;
+
+maybe_error:
+	if (copied)
+		ret = copied;
+	goto out;
+
+efault:
+	ret = -EFAULT;
+	goto out;
+}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
new file mode 100644
index 000000000000..d399de4a7fe2
--- /dev/null
+++ b/net/rxrpc/ar-peer.c
@@ -0,0 +1,273 @@
+/* RxRPC remote transport endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_peers);
+static DEFINE_RWLOCK(rxrpc_peer_lock);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
+
+static void rxrpc_destroy_peer(struct work_struct *work);
+
+/*
+ * allocate a new peer
+ */
+static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
+					   gfp_t gfp)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("");
+
+	peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+	if (peer) {
+		INIT_WORK(&peer->destroyer, &rxrpc_destroy_peer);
+		INIT_LIST_HEAD(&peer->link);
+		INIT_LIST_HEAD(&peer->error_targets);
+		spin_lock_init(&peer->lock);
+		atomic_set(&peer->usage, 1);
+		peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		memcpy(&peer->srx, srx, sizeof(*srx));
+
+		peer->mtu = peer->if_mtu = 65535;
+
+		if (srx->transport.family == AF_INET) {
+			peer->hdrsize = sizeof(struct iphdr);
+			switch (srx->transport_type) {
+			case SOCK_DGRAM:
+				peer->hdrsize += sizeof(struct udphdr);
+				break;
+			default:
+				BUG();
+				break;
+			}
+		} else {
+			BUG();
+		}
+
+		peer->hdrsize += sizeof(struct rxrpc_header);
+		peer->maxdata = peer->mtu - peer->hdrsize;
+	}
+
+	_leave(" = %p", peer);
+	return peer;
+}
+
+/*
+ * obtain a remote transport endpoint for the specified address
+ */
+struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+	struct rxrpc_peer *peer, *candidate;
+	const char *new = "old";
+	int usage;
+
+	_enter("{%d,%d,%u.%u.%u.%u+%hu}",
+	       srx->transport_type,
+	       srx->transport_len,
+	       NIPQUAD(srx->transport.sin.sin_addr),
+	       ntohs(srx->transport.sin.sin_port));
+
+	/* search the peer list first */
+	read_lock_bh(&rxrpc_peer_lock);
+	list_for_each_entry(peer, &rxrpc_peers, link) {
+		_debug("check PEER %d { u=%d t=%d l=%d }",
+		       peer->debug_id,
+		       atomic_read(&peer->usage),
+		       peer->srx.transport_type,
+		       peer->srx.transport_len);
+
+		if (atomic_read(&peer->usage) > 0 &&
+		    peer->srx.transport_type == srx->transport_type &&
+		    peer->srx.transport_len == srx->transport_len &&
+		    memcmp(&peer->srx.transport,
+			   &srx->transport,
+			   srx->transport_len) == 0)
+			goto found_extant_peer;
+	}
+	read_unlock_bh(&rxrpc_peer_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_peer(srx, gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	write_lock_bh(&rxrpc_peer_lock);
+
+	list_for_each_entry(peer, &rxrpc_peers, link) {
+		if (atomic_read(&peer->usage) > 0 &&
+		    peer->srx.transport_type == srx->transport_type &&
+		    peer->srx.transport_len == srx->transport_len &&
+		    memcmp(&peer->srx.transport,
+			   &srx->transport,
+			   srx->transport_len) == 0)
+			goto found_extant_second;
+	}
+
+	/* we can now add the new candidate to the list */
+	peer = candidate;
+	candidate = NULL;
+
+	list_add_tail(&peer->link, &rxrpc_peers);
+	write_unlock_bh(&rxrpc_peer_lock);
+	new = "new";
+
+success:
+	_net("PEER %s %d {%d,%u,%u.%u.%u.%u+%hu}",
+	     new,
+	     peer->debug_id,
+	     peer->srx.transport_type,
+	     peer->srx.transport.family,
+	     NIPQUAD(peer->srx.transport.sin.sin_addr),
+	     ntohs(peer->srx.transport.sin.sin_port));
+
+	_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+	return peer;
+
+	/* we found the peer in the list immediately */
+found_extant_peer:
+	usage = atomic_inc_return(&peer->usage);
+	read_unlock_bh(&rxrpc_peer_lock);
+	goto success;
+
+	/* we found the peer on the second time through the list */
+found_extant_second:
+	usage = atomic_inc_return(&peer->usage);
+	write_unlock_bh(&rxrpc_peer_lock);
+	kfree(candidate);
+	goto success;
+}
+
+/*
+ * find the peer associated with a packet
+ */
+struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *local,
+				   __be32 addr, __be16 port)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("");
+
+	/* search the peer list */
+	read_lock_bh(&rxrpc_peer_lock);
+
+	if (local->srx.transport.family == AF_INET &&
+	    local->srx.transport_type == SOCK_DGRAM
+	    ) {
+		list_for_each_entry(peer, &rxrpc_peers, link) {
+			if (atomic_read(&peer->usage) > 0 &&
+			    peer->srx.transport_type == SOCK_DGRAM &&
+			    peer->srx.transport.family == AF_INET &&
+			    peer->srx.transport.sin.sin_port == port &&
+			    peer->srx.transport.sin.sin_addr.s_addr == addr)
+				goto found_UDP_peer;
+		}
+
+		goto new_UDP_peer;
+	}
+
+	read_unlock_bh(&rxrpc_peer_lock);
+	_leave(" = -EAFNOSUPPORT");
+	return ERR_PTR(-EAFNOSUPPORT);
+
+found_UDP_peer:
+	_net("Rx UDP DGRAM from peer %d", peer->debug_id);
+	atomic_inc(&peer->usage);
+	read_unlock_bh(&rxrpc_peer_lock);
+	_leave(" = %p", peer);
+	return peer;
+
+new_UDP_peer:
+	_net("Rx UDP DGRAM from NEW peer %d", peer->debug_id);
+	read_unlock_bh(&rxrpc_peer_lock);
+	_leave(" = -EBUSY [new]");
+	return ERR_PTR(-EBUSY);
+}
+
+/*
+ * release a remote transport endpoint
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+	_enter("%p{u=%d}", peer, atomic_read(&peer->usage));
+
+	ASSERTCMP(atomic_read(&peer->usage), >, 0);
+
+	if (likely(!atomic_dec_and_test(&peer->usage))) {
+		_leave(" [in use]");
+		return;
+	}
+
+	rxrpc_queue_work(&peer->destroyer);
+	_leave("");
+}
+
+/*
+ * destroy a remote transport endpoint
+ */
+static void rxrpc_destroy_peer(struct work_struct *work)
+{
+	struct rxrpc_peer *peer =
+		container_of(work, struct rxrpc_peer, destroyer);
+
+	_enter("%p{%d}", peer, atomic_read(&peer->usage));
+
+	write_lock_bh(&rxrpc_peer_lock);
+	list_del(&peer->link);
+	write_unlock_bh(&rxrpc_peer_lock);
+
+	_net("DESTROY PEER %d", peer->debug_id);
+	kfree(peer);
+
+	if (list_empty(&rxrpc_peers))
+		wake_up_all(&rxrpc_peer_wq);
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the peer records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_peers(void)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	_enter("");
+
+	/* we simply have to wait for them to go away */
+	if (!list_empty(&rxrpc_peers)) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&rxrpc_peer_wq, &myself);
+
+		while (!list_empty(&rxrpc_peers)) {
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+		}
+
+		remove_wait_queue(&rxrpc_peer_wq, &myself);
+		set_current_state(TASK_RUNNING);
+	}
+
+	_leave("");
+}
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
new file mode 100644
index 000000000000..58f4b4e5cece
--- /dev/null
+++ b/net/rxrpc/ar-proc.c
@@ -0,0 +1,247 @@
+/* /proc/net/ support for AF_RXRPC
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static const char *rxrpc_conn_states[] = {
+	[RXRPC_CONN_UNUSED]		= "Unused  ",
+	[RXRPC_CONN_CLIENT]		= "Client  ",
+	[RXRPC_CONN_SERVER_UNSECURED]	= "SvUnsec ",
+	[RXRPC_CONN_SERVER_CHALLENGING]	= "SvChall ",
+	[RXRPC_CONN_SERVER]		= "SvSecure",
+	[RXRPC_CONN_REMOTELY_ABORTED]	= "RmtAbort",
+	[RXRPC_CONN_LOCALLY_ABORTED]	= "LocAbort",
+	[RXRPC_CONN_NETWORK_ERROR]	= "NetError",
+};
+
+const char *rxrpc_call_states[] = {
+	[RXRPC_CALL_CLIENT_SEND_REQUEST]	= "ClSndReq",
+	[RXRPC_CALL_CLIENT_AWAIT_REPLY]		= "ClAwtRpl",
+	[RXRPC_CALL_CLIENT_RECV_REPLY]		= "ClRcvRpl",
+	[RXRPC_CALL_CLIENT_FINAL_ACK]		= "ClFnlACK",
+	[RXRPC_CALL_SERVER_SECURING]		= "SvSecure",
+	[RXRPC_CALL_SERVER_ACCEPTING]		= "SvAccept",
+	[RXRPC_CALL_SERVER_RECV_REQUEST]	= "SvRcvReq",
+	[RXRPC_CALL_SERVER_ACK_REQUEST]		= "SvAckReq",
+	[RXRPC_CALL_SERVER_SEND_REPLY]		= "SvSndRpl",
+	[RXRPC_CALL_SERVER_AWAIT_ACK]		= "SvAwtACK",
+	[RXRPC_CALL_COMPLETE]			= "Complete",
+	[RXRPC_CALL_SERVER_BUSY]		= "SvBusy  ",
+	[RXRPC_CALL_REMOTELY_ABORTED]		= "RmtAbort",
+	[RXRPC_CALL_LOCALLY_ABORTED]		= "LocAbort",
+	[RXRPC_CALL_NETWORK_ERROR]		= "NetError",
+	[RXRPC_CALL_DEAD]			= "Dead    ",
+};
+
+/*
+ * generate a list of extant and dead calls in /proc/net/rxrpc_calls
+ */
+static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	read_lock(&rxrpc_call_lock);
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	list_for_each(_p, &rxrpc_calls)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
+
+	return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&rxrpc_call_lock);
+}
+
+static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
+{
+	struct rxrpc_transport *trans;
+	struct rxrpc_call *call;
+	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "Proto Local                  Remote                "
+			 " SvID ConnID   CallID   End Use State    Abort   "
+			 " UserID\n");
+		return 0;
+	}
+
+	call = list_entry(v, struct rxrpc_call, link);
+	trans = call->conn->trans;
+
+	sprintf(lbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+		ntohs(trans->local->srx.transport.sin.sin_port));
+
+	sprintf(rbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+		ntohs(trans->peer->srx.transport.sin.sin_port));
+
+	seq_printf(seq,
+		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+		   " %-8.8s %08x %lx\n",
+		   lbuff,
+		   rbuff,
+		   ntohs(call->conn->service_id),
+		   ntohl(call->conn->cid),
+		   ntohl(call->call_id),
+		   call->conn->in_clientflag ? "Svc" : "Clt",
+		   atomic_read(&call->usage),
+		   rxrpc_call_states[call->state],
+		   call->abort_code,
+		   call->user_call_ID);
+
+	return 0;
+}
+
+static struct seq_operations rxrpc_call_seq_ops = {
+	.start  = rxrpc_call_seq_start,
+	.next   = rxrpc_call_seq_next,
+	.stop   = rxrpc_call_seq_stop,
+	.show   = rxrpc_call_seq_show,
+};
+
+static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rxrpc_call_seq_ops);
+}
+
+struct file_operations rxrpc_call_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rxrpc_call_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+/*
+ * generate a list of extant virtual connections in /proc/net/rxrpc_conns
+ */
+static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	read_lock(&rxrpc_connection_lock);
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	list_for_each(_p, &rxrpc_connections)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
+				       loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next;
+
+	return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&rxrpc_connection_lock);
+}
+
+static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_transport *trans;
+	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "Proto Local                  Remote                "
+			 " SvID ConnID   Calls    End Use State    Key     "
+			 " Serial   ISerial\n"
+			 );
+		return 0;
+	}
+
+	conn = list_entry(v, struct rxrpc_connection, link);
+	trans = conn->trans;
+
+	sprintf(lbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+		ntohs(trans->local->srx.transport.sin.sin_port));
+
+	sprintf(rbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+		ntohs(trans->peer->srx.transport.sin.sin_port));
+
+	seq_printf(seq,
+		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+		   " %s %08x %08x %08x\n",
+		   lbuff,
+		   rbuff,
+		   ntohs(conn->service_id),
+		   ntohl(conn->cid),
+		   conn->call_counter,
+		   conn->in_clientflag ? "Svc" : "Clt",
+		   atomic_read(&conn->usage),
+		   rxrpc_conn_states[conn->state],
+		   key_serial(conn->key),
+		   atomic_read(&conn->serial),
+		   atomic_read(&conn->hi_serial));
+
+	return 0;
+}
+
+static struct seq_operations rxrpc_connection_seq_ops = {
+	.start  = rxrpc_connection_seq_start,
+	.next   = rxrpc_connection_seq_next,
+	.stop   = rxrpc_connection_seq_stop,
+	.show   = rxrpc_connection_seq_show,
+};
+
+
+static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rxrpc_connection_seq_ops);
+}
+
+struct file_operations rxrpc_connection_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rxrpc_connection_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
new file mode 100644
index 000000000000..f19121d4795b
--- /dev/null
+++ b/net/rxrpc/ar-recvmsg.c
@@ -0,0 +1,437 @@
+/* RxRPC recvmsg() implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * removal a call's user ID from the socket tree to make the user ID available
+ * again and so that it won't be seen again in association with that call
+ */
+void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+	_debug("RELEASE CALL %d", call->debug_id);
+
+	if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		write_lock_bh(&rx->call_lock);
+		rb_erase(&call->sock_node, &call->socket->calls);
+		clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+		write_unlock_bh(&rx->call_lock);
+	}
+
+	read_lock_bh(&call->state_lock);
+	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+		rxrpc_queue_call(call);
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * receive a message from an RxRPC socket
+ * - we need to be careful about two or more threads calling recvmsg
+ *   simultaneously
+ */
+int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
+		  struct msghdr *msg, size_t len, int flags)
+{
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_call *call = NULL, *continue_call = NULL;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	struct sk_buff *skb;
+	long timeo;
+	int copy, ret, ullen, offset, copied = 0;
+	u32 abort_code;
+
+	DEFINE_WAIT(wait);
+
+	_enter(",,,%zu,%d", len, flags);
+
+	if (flags & (MSG_OOB | MSG_TRUNC))
+		return -EOPNOTSUPP;
+
+	ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
+
+	timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
+	msg->msg_flags |= MSG_MORE;
+
+	lock_sock(&rx->sk);
+
+	for (;;) {
+		/* return immediately if a client socket has no outstanding
+		 * calls */
+		if (RB_EMPTY_ROOT(&rx->calls)) {
+			if (copied)
+				goto out;
+			if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+				release_sock(&rx->sk);
+				if (continue_call)
+					rxrpc_put_call(continue_call);
+				return -ENODATA;
+			}
+		}
+
+		/* get the next message on the Rx queue */
+		skb = skb_peek(&rx->sk.sk_receive_queue);
+		if (!skb) {
+			/* nothing remains on the queue */
+			if (copied &&
+			    (msg->msg_flags & MSG_PEEK || timeo == 0))
+				goto out;
+
+			/* wait for a message to turn up */
+			release_sock(&rx->sk);
+			prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait,
+						  TASK_INTERRUPTIBLE);
+			ret = sock_error(&rx->sk);
+			if (ret)
+				goto wait_error;
+
+			if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
+				if (signal_pending(current))
+					goto wait_interrupted;
+				timeo = schedule_timeout(timeo);
+			}
+			finish_wait(rx->sk.sk_sleep, &wait);
+			lock_sock(&rx->sk);
+			continue;
+		}
+
+	peek_next_packet:
+		sp = rxrpc_skb(skb);
+		call = sp->call;
+		ASSERT(call != NULL);
+
+		_debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
+
+		/* make sure we wait for the state to be updated in this call */
+		spin_lock_bh(&call->lock);
+		spin_unlock_bh(&call->lock);
+
+		if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+			_debug("packet from released call");
+			if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+				BUG();
+			rxrpc_free_skb(skb);
+			continue;
+		}
+
+		/* determine whether to continue last data receive */
+		if (continue_call) {
+			_debug("maybe cont");
+			if (call != continue_call ||
+			    skb->mark != RXRPC_SKB_MARK_DATA) {
+				release_sock(&rx->sk);
+				rxrpc_put_call(continue_call);
+				_leave(" = %d [noncont]", copied);
+				return copied;
+			}
+		}
+
+		rxrpc_get_call(call);
+
+		/* copy the peer address and timestamp */
+		if (!continue_call) {
+			if (msg->msg_name && msg->msg_namelen > 0)
+				memcpy(&msg->msg_name, &call->conn->trans->peer->srx,
+				       sizeof(call->conn->trans->peer->srx));
+			sock_recv_timestamp(msg, &rx->sk, skb);
+		}
+
+		/* receive the message */
+		if (skb->mark != RXRPC_SKB_MARK_DATA)
+			goto receive_non_data_message;
+
+		_debug("recvmsg DATA #%u { %d, %d }",
+		       ntohl(sp->hdr.seq), skb->len, sp->offset);
+
+		if (!continue_call) {
+			/* only set the control data once per recvmsg() */
+			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+				       ullen, &call->user_call_ID);
+			if (ret < 0)
+				goto copy_error;
+			ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+		}
+
+		ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+		ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+		call->rx_data_recv = ntohl(sp->hdr.seq);
+
+		ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+
+		offset = sp->offset;
+		copy = skb->len - offset;
+		if (copy > len - copied)
+			copy = len - copied;
+
+		if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+			ret = skb_copy_datagram_iovec(skb, offset,
+						      msg->msg_iov, copy);
+		} else {
+			ret = skb_copy_and_csum_datagram_iovec(skb, offset,
+							       msg->msg_iov);
+			if (ret == -EINVAL)
+				goto csum_copy_error;
+		}
+
+		if (ret < 0)
+			goto copy_error;
+
+		/* handle piecemeal consumption of data packets */
+		_debug("copied %d+%d", copy, copied);
+
+		offset += copy;
+		copied += copy;
+
+		if (!(flags & MSG_PEEK))
+			sp->offset = offset;
+
+		if (sp->offset < skb->len) {
+			_debug("buffer full");
+			ASSERTCMP(copied, ==, len);
+			break;
+		}
+
+		/* we transferred the whole data packet */
+		if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+			_debug("last");
+			if (call->conn->out_clientflag) {
+				 /* last byte of reply received */
+				ret = copied;
+				goto terminal_message;
+			}
+
+			/* last bit of request received */
+			if (!(flags & MSG_PEEK)) {
+				_debug("eat packet");
+				if (skb_dequeue(&rx->sk.sk_receive_queue) !=
+				    skb)
+					BUG();
+				rxrpc_free_skb(skb);
+			}
+			msg->msg_flags &= ~MSG_MORE;
+			break;
+		}
+
+		/* move on to the next data message */
+		_debug("next");
+		if (!continue_call)
+			continue_call = sp->call;
+		else
+			rxrpc_put_call(call);
+		call = NULL;
+
+		if (flags & MSG_PEEK) {
+			_debug("peek next");
+			skb = skb->next;
+			if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
+				break;
+			goto peek_next_packet;
+		}
+
+		_debug("eat packet");
+		if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+			BUG();
+		rxrpc_free_skb(skb);
+	}
+
+	/* end of non-terminal data packet reception for the moment */
+	_debug("end rcv data");
+out:
+	release_sock(&rx->sk);
+	if (call)
+		rxrpc_put_call(call);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	_leave(" = %d [data]", copied);
+	return copied;
+
+	/* handle non-DATA messages such as aborts, incoming connections and
+	 * final ACKs */
+receive_non_data_message:
+	_debug("non-data");
+
+	if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
+		_debug("RECV NEW CALL");
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
+		if (ret < 0)
+			goto copy_error;
+		if (!(flags & MSG_PEEK)) {
+			if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+				BUG();
+			rxrpc_free_skb(skb);
+		}
+		goto out;
+	}
+
+	ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+		       ullen, &call->user_call_ID);
+	if (ret < 0)
+		goto copy_error;
+	ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+
+	switch (skb->mark) {
+	case RXRPC_SKB_MARK_DATA:
+		BUG();
+	case RXRPC_SKB_MARK_FINAL_ACK:
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_BUSY:
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_REMOTE_ABORT:
+		abort_code = call->abort_code;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_NET_ERROR:
+		_debug("RECV NET ERROR %d", sp->error);
+		abort_code = sp->error;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_LOCAL_ERROR:
+		_debug("RECV LOCAL ERROR %d", sp->error);
+		abort_code = sp->error;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
+			       &abort_code);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	if (ret < 0)
+		goto copy_error;
+
+terminal_message:
+	_debug("terminal");
+	msg->msg_flags &= ~MSG_MORE;
+	msg->msg_flags |= MSG_EOR;
+
+	if (!(flags & MSG_PEEK)) {
+		_net("free terminal skb %p", skb);
+		if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+			BUG();
+		rxrpc_free_skb(skb);
+		rxrpc_remove_user_ID(rx, call);
+	}
+
+	release_sock(&rx->sk);
+	rxrpc_put_call(call);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	_leave(" = %d", ret);
+	return ret;
+
+copy_error:
+	_debug("copy error");
+	release_sock(&rx->sk);
+	rxrpc_put_call(call);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	_leave(" = %d", ret);
+	return ret;
+
+csum_copy_error:
+	_debug("csum error");
+	release_sock(&rx->sk);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	rxrpc_kill_skb(skb);
+	skb_kill_datagram(&rx->sk, skb, flags);
+	rxrpc_put_call(call);
+	return -EAGAIN;
+
+wait_interrupted:
+	ret = sock_intr_errno(timeo);
+wait_error:
+	finish_wait(rx->sk.sk_sleep, &wait);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	if (copied)
+		copied = ret;
+	_leave(" = %d [waitfail %d]", copied, ret);
+	return copied;
+
+}
+
+/**
+ * rxrpc_kernel_data_delivered - Record delivery of data message
+ * @skb: Message holding data
+ *
+ * Record the delivery of a data message.  This permits RxRPC to keep its
+ * tracking correct.  The socket buffer will be deleted.
+ */
+void rxrpc_kernel_data_delivered(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_call *call = sp->call;
+
+	ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+	ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+	call->rx_data_recv = ntohl(sp->hdr.seq);
+
+	ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+	rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
+
+/**
+ * rxrpc_kernel_is_data_last - Determine if data message is last one
+ * @skb: Message holding data
+ *
+ * Determine if data message is last one for the parent call.
+ */
+bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
+
+	return sp->hdr.flags & RXRPC_LAST_PACKET;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
+
+/**
+ * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
+ * @skb: Message indicating an abort
+ *
+ * Get the abort code from an RxRPC abort message.
+ */
+u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_REMOTE_ABORT);
+
+	return sp->call->abort_code;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
+
+/**
+ * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
+ * @skb: Message indicating an error
+ *
+ * Get the error number from an RxRPC error message.
+ */
+int rxrpc_kernel_get_error_number(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	return sp->error;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
new file mode 100644
index 000000000000..60d1d364430a
--- /dev/null
+++ b/net/rxrpc/ar-security.c
@@ -0,0 +1,258 @@
+/* RxRPC security handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_security_methods);
+static DECLARE_RWSEM(rxrpc_security_sem);
+
+/*
+ * get an RxRPC security module
+ */
+static struct rxrpc_security *rxrpc_security_get(struct rxrpc_security *sec)
+{
+	return try_module_get(sec->owner) ? sec : NULL;
+}
+
+/*
+ * release an RxRPC security module
+ */
+static void rxrpc_security_put(struct rxrpc_security *sec)
+{
+	module_put(sec->owner);
+}
+
+/*
+ * look up an rxrpc security module
+ */
+struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
+{
+	struct rxrpc_security *sec = NULL;
+
+	_enter("");
+
+	down_read(&rxrpc_security_sem);
+
+	list_for_each_entry(sec, &rxrpc_security_methods, link) {
+		if (sec->security_index == security_index) {
+			if (unlikely(!rxrpc_security_get(sec)))
+				break;
+			goto out;
+		}
+	}
+
+	sec = NULL;
+out:
+	up_read(&rxrpc_security_sem);
+	_leave(" = %p [%s]", sec, sec ? sec->name : "");
+	return sec;
+}
+
+/**
+ * rxrpc_register_security - register an RxRPC security handler
+ * @sec: security module
+ *
+ * register an RxRPC security handler for use by RxRPC
+ */
+int rxrpc_register_security(struct rxrpc_security *sec)
+{
+	struct rxrpc_security *psec;
+	int ret;
+
+	_enter("");
+	down_write(&rxrpc_security_sem);
+
+	ret = -EEXIST;
+	list_for_each_entry(psec, &rxrpc_security_methods, link) {
+		if (psec->security_index == sec->security_index)
+			goto out;
+	}
+
+	list_add(&sec->link, &rxrpc_security_methods);
+
+	printk(KERN_NOTICE "RxRPC: Registered security type %d '%s'\n",
+	       sec->security_index, sec->name);
+	ret = 0;
+
+out:
+	up_write(&rxrpc_security_sem);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_register_security);
+
+/**
+ * rxrpc_unregister_security - unregister an RxRPC security handler
+ * @sec: security module
+ *
+ * unregister an RxRPC security handler
+ */
+void rxrpc_unregister_security(struct rxrpc_security *sec)
+{
+
+	_enter("");
+	down_write(&rxrpc_security_sem);
+	list_del_init(&sec->link);
+	up_write(&rxrpc_security_sem);
+
+	printk(KERN_NOTICE "RxRPC: Unregistered security type %d '%s'\n",
+	       sec->security_index, sec->name);
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_unregister_security);
+
+/*
+ * initialise the security on a client connection
+ */
+int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_security *sec;
+	struct key *key = conn->key;
+	int ret;
+
+	_enter("{%d},{%x}", conn->debug_id, key_serial(key));
+
+	if (!key)
+		return 0;
+
+	ret = key_validate(key);
+	if (ret < 0)
+		return ret;
+
+	sec = rxrpc_security_lookup(key->type_data.x[0]);
+	if (!sec)
+		return -EKEYREJECTED;
+	conn->security = sec;
+
+	ret = conn->security->init_connection_security(conn);
+	if (ret < 0) {
+		rxrpc_security_put(conn->security);
+		conn->security = NULL;
+		return ret;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * initialise the security on a server connection
+ */
+int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_security *sec;
+	struct rxrpc_local *local = conn->trans->local;
+	struct rxrpc_sock *rx;
+	struct key *key;
+	key_ref_t kref;
+	char kdesc[5+1+3+1];
+
+	_enter("");
+
+	sprintf(kdesc, "%u:%u", ntohs(conn->service_id), conn->security_ix);
+
+	sec = rxrpc_security_lookup(conn->security_ix);
+	if (!sec) {
+		_leave(" = -ENOKEY [lookup]");
+		return -ENOKEY;
+	}
+
+	/* find the service */
+	read_lock_bh(&local->services_lock);
+	list_for_each_entry(rx, &local->services, listen_link) {
+		if (rx->service_id == conn->service_id)
+			goto found_service;
+	}
+
+	/* the service appears to have died */
+	read_unlock_bh(&local->services_lock);
+	rxrpc_security_put(sec);
+	_leave(" = -ENOENT");
+	return -ENOENT;
+
+found_service:
+	if (!rx->securities) {
+		read_unlock_bh(&local->services_lock);
+		rxrpc_security_put(sec);
+		_leave(" = -ENOKEY");
+		return -ENOKEY;
+	}
+
+	/* look through the service's keyring */
+	kref = keyring_search(make_key_ref(rx->securities, 1UL),
+			      &key_type_rxrpc_s, kdesc);
+	if (IS_ERR(kref)) {
+		read_unlock_bh(&local->services_lock);
+		rxrpc_security_put(sec);
+		_leave(" = %ld [search]", PTR_ERR(kref));
+		return PTR_ERR(kref);
+	}
+
+	key = key_ref_to_ptr(kref);
+	read_unlock_bh(&local->services_lock);
+
+	conn->server_key = key;
+	conn->security = sec;
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_secure_packet(const struct rxrpc_call *call,
+			struct sk_buff *skb,
+			size_t data_size,
+			void *sechdr)
+{
+	if (call->conn->security)
+		return call->conn->security->secure_packet(
+			call, skb, data_size, sechdr);
+	return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_verify_packet(const struct rxrpc_call *call, struct sk_buff *skb,
+			u32 *_abort_code)
+{
+	if (call->conn->security)
+		return call->conn->security->verify_packet(
+			call, skb, _abort_code);
+	return 0;
+}
+
+/*
+ * clear connection security
+ */
+void rxrpc_clear_conn_security(struct rxrpc_connection *conn)
+{
+	_enter("{%d}", conn->debug_id);
+
+	if (conn->security) {
+		conn->security->clear(conn);
+		rxrpc_security_put(conn->security);
+		conn->security = NULL;
+	}
+
+	key_put(conn->key);
+	key_put(conn->server_key);
+}
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
new file mode 100644
index 000000000000..de755e04d29c
--- /dev/null
+++ b/net/rxrpc/ar-skbuff.c
@@ -0,0 +1,132 @@
+/* ar-skbuff.c: socket buffer destruction handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * set up for the ACK at the end of the receive phase when we discard the final
+ * receive phase data packet
+ * - called with softirqs disabled
+ */
+static void rxrpc_request_final_ACK(struct rxrpc_call *call)
+{
+	/* the call may be aborted before we have a chance to ACK it */
+	write_lock(&call->state_lock);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_RECV_REPLY:
+		call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
+		_debug("request final ACK");
+
+		/* get an extra ref on the call for the final-ACK generator to
+		 * release */
+		rxrpc_get_call(call);
+		set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+		if (try_to_del_timer_sync(&call->ack_timer) >= 0)
+			rxrpc_queue_call(call);
+		break;
+
+	case RXRPC_CALL_SERVER_RECV_REQUEST:
+		call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+	default:
+		break;
+	}
+
+	write_unlock(&call->state_lock);
+}
+
+/*
+ * drop the bottom ACK off of the call ACK window and advance the window
+ */
+static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
+				struct rxrpc_skb_priv *sp)
+{
+	int loop;
+	u32 seq;
+
+	spin_lock_bh(&call->lock);
+
+	_debug("hard ACK #%u", ntohl(sp->hdr.seq));
+
+	for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+		call->ackr_window[loop] >>= 1;
+		call->ackr_window[loop] |=
+			call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
+	}
+
+	seq = ntohl(sp->hdr.seq);
+	ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
+	call->rx_data_eaten = seq;
+
+	if (call->ackr_win_top < UINT_MAX)
+		call->ackr_win_top++;
+
+	ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+		    call->rx_data_post, >=, call->rx_data_recv);
+	ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+		    call->rx_data_recv, >=, call->rx_data_eaten);
+
+	if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+		rxrpc_request_final_ACK(call);
+	} else if (atomic_dec_and_test(&call->ackr_not_idle) &&
+		   test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+		_debug("send Rx idle ACK");
+		__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
+				    true);
+	}
+
+	spin_unlock_bh(&call->lock);
+}
+
+/*
+ * destroy a packet that has an RxRPC control buffer
+ * - advance the hard-ACK state of the parent call (done here in case something
+ *   in the kernel bypasses recvmsg() and steals the packet directly off of the
+ *   socket receive queue)
+ */
+void rxrpc_packet_destructor(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_call *call = sp->call;
+
+	_enter("%p{%p}", skb, call);
+
+	if (call) {
+		/* send the final ACK on a client call */
+		if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+			rxrpc_hard_ACK_data(call, sp);
+		rxrpc_put_call(call);
+		sp->call = NULL;
+	}
+
+	if (skb->sk)
+		sock_rfree(skb);
+	_leave("");
+}
+
+/**
+ * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
+ * @skb: The socket buffer to be freed
+ *
+ * Let RxRPC free its own socket buffer, permitting it to maintain debug
+ * accounting.
+ */
+void rxrpc_kernel_free_skb(struct sk_buff *skb)
+{
+	rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
new file mode 100644
index 000000000000..d43d78f19302
--- /dev/null
+++ b/net/rxrpc/ar-transport.c
@@ -0,0 +1,276 @@
+/* RxRPC point-to-point transport session management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_transport_reaper(struct work_struct *work);
+
+static LIST_HEAD(rxrpc_transports);
+static DEFINE_RWLOCK(rxrpc_transport_lock);
+static unsigned long rxrpc_transport_timeout = 3600 * 24;
+static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
+
+/*
+ * allocate a new transport session manager
+ */
+static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
+						     struct rxrpc_peer *peer,
+						     gfp_t gfp)
+{
+	struct rxrpc_transport *trans;
+
+	_enter("");
+
+	trans = kzalloc(sizeof(struct rxrpc_transport), gfp);
+	if (trans) {
+		trans->local = local;
+		trans->peer = peer;
+		INIT_LIST_HEAD(&trans->link);
+		trans->bundles = RB_ROOT;
+		trans->client_conns = RB_ROOT;
+		trans->server_conns = RB_ROOT;
+		skb_queue_head_init(&trans->error_queue);
+		spin_lock_init(&trans->client_lock);
+		rwlock_init(&trans->conn_lock);
+		atomic_set(&trans->usage, 1);
+		trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
+
+		if (peer->srx.transport.family == AF_INET) {
+			switch (peer->srx.transport_type) {
+			case SOCK_DGRAM:
+				INIT_WORK(&trans->error_handler,
+					  rxrpc_UDP_error_handler);
+				break;
+			default:
+				BUG();
+				break;
+			}
+		} else {
+			BUG();
+		}
+	}
+
+	_leave(" = %p", trans);
+	return trans;
+}
+
+/*
+ * obtain a transport session for the nominated endpoints
+ */
+struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
+					    struct rxrpc_peer *peer,
+					    gfp_t gfp)
+{
+	struct rxrpc_transport *trans, *candidate;
+	const char *new = "old";
+	int usage;
+
+	_enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+	       NIPQUAD(local->srx.transport.sin.sin_addr),
+	       ntohs(local->srx.transport.sin.sin_port),
+	       NIPQUAD(peer->srx.transport.sin.sin_addr),
+	       ntohs(peer->srx.transport.sin.sin_port));
+
+	/* search the transport list first */
+	read_lock_bh(&rxrpc_transport_lock);
+	list_for_each_entry(trans, &rxrpc_transports, link) {
+		if (trans->local == local && trans->peer == peer)
+			goto found_extant_transport;
+	}
+	read_unlock_bh(&rxrpc_transport_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_transport(local, peer, gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	write_lock_bh(&rxrpc_transport_lock);
+
+	list_for_each_entry(trans, &rxrpc_transports, link) {
+		if (trans->local == local && trans->peer == peer)
+			goto found_extant_second;
+	}
+
+	/* we can now add the new candidate to the list */
+	trans = candidate;
+	candidate = NULL;
+
+	rxrpc_get_local(trans->local);
+	atomic_inc(&trans->peer->usage);
+	list_add_tail(&trans->link, &rxrpc_transports);
+	write_unlock_bh(&rxrpc_transport_lock);
+	new = "new";
+
+success:
+	_net("TRANSPORT %s %d local %d -> peer %d",
+	     new,
+	     trans->debug_id,
+	     trans->local->debug_id,
+	     trans->peer->debug_id);
+
+	_leave(" = %p {u=%d}", trans, atomic_read(&trans->usage));
+	return trans;
+
+	/* we found the transport in the list immediately */
+found_extant_transport:
+	usage = atomic_inc_return(&trans->usage);
+	read_unlock_bh(&rxrpc_transport_lock);
+	goto success;
+
+	/* we found the transport on the second time through the list */
+found_extant_second:
+	usage = atomic_inc_return(&trans->usage);
+	write_unlock_bh(&rxrpc_transport_lock);
+	kfree(candidate);
+	goto success;
+}
+
+/*
+ * find the transport connecting two endpoints
+ */
+struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local,
+					     struct rxrpc_peer *peer)
+{
+	struct rxrpc_transport *trans;
+
+	_enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+	       NIPQUAD(local->srx.transport.sin.sin_addr),
+	       ntohs(local->srx.transport.sin.sin_port),
+	       NIPQUAD(peer->srx.transport.sin.sin_addr),
+	       ntohs(peer->srx.transport.sin.sin_port));
+
+	/* search the transport list */
+	read_lock_bh(&rxrpc_transport_lock);
+
+	list_for_each_entry(trans, &rxrpc_transports, link) {
+		if (trans->local == local && trans->peer == peer)
+			goto found_extant_transport;
+	}
+
+	read_unlock_bh(&rxrpc_transport_lock);
+	_leave(" = NULL");
+	return NULL;
+
+found_extant_transport:
+	atomic_inc(&trans->usage);
+	read_unlock_bh(&rxrpc_transport_lock);
+	_leave(" = %p", trans);
+	return trans;
+}
+
+/*
+ * release a transport session
+ */
+void rxrpc_put_transport(struct rxrpc_transport *trans)
+{
+	_enter("%p{u=%d}", trans, atomic_read(&trans->usage));
+
+	ASSERTCMP(atomic_read(&trans->usage), >, 0);
+
+	trans->put_time = xtime.tv_sec;
+	if (unlikely(atomic_dec_and_test(&trans->usage)))
+		_debug("zombie");
+		/* let the reaper determine the timeout to avoid a race with
+		 * overextending the timeout if the reaper is running at the
+		 * same time */
+		rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
+	_leave("");
+}
+
+/*
+ * clean up a transport session
+ */
+static void rxrpc_cleanup_transport(struct rxrpc_transport *trans)
+{
+	_net("DESTROY TRANS %d", trans->debug_id);
+
+	rxrpc_purge_queue(&trans->error_queue);
+
+	rxrpc_put_local(trans->local);
+	rxrpc_put_peer(trans->peer);
+	kfree(trans);
+}
+
+/*
+ * reap dead transports that have passed their expiry date
+ */
+static void rxrpc_transport_reaper(struct work_struct *work)
+{
+	struct rxrpc_transport *trans, *_p;
+	unsigned long now, earliest, reap_time;
+
+	LIST_HEAD(graveyard);
+
+	_enter("");
+
+	now = xtime.tv_sec;
+	earliest = ULONG_MAX;
+
+	/* extract all the transports that have been dead too long */
+	write_lock_bh(&rxrpc_transport_lock);
+	list_for_each_entry_safe(trans, _p, &rxrpc_transports, link) {
+		_debug("reap TRANS %d { u=%d t=%ld }",
+		       trans->debug_id, atomic_read(&trans->usage),
+		       (long) now - (long) trans->put_time);
+
+		if (likely(atomic_read(&trans->usage) > 0))
+			continue;
+
+		reap_time = trans->put_time + rxrpc_transport_timeout;
+		if (reap_time <= now)
+			list_move_tail(&trans->link, &graveyard);
+		else if (reap_time < earliest)
+			earliest = reap_time;
+	}
+	write_unlock_bh(&rxrpc_transport_lock);
+
+	if (earliest != ULONG_MAX) {
+		_debug("reschedule reaper %ld", (long) earliest - now);
+		ASSERTCMP(earliest, >, now);
+		rxrpc_queue_delayed_work(&rxrpc_transport_reap,
+					 (earliest - now) * HZ);
+	}
+
+	/* then destroy all those pulled out */
+	while (!list_empty(&graveyard)) {
+		trans = list_entry(graveyard.next, struct rxrpc_transport,
+				   link);
+		list_del_init(&trans->link);
+
+		ASSERTCMP(atomic_read(&trans->usage), ==, 0);
+		rxrpc_cleanup_transport(trans);
+	}
+
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the transport session records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_transports(void)
+{
+	_enter("");
+
+	rxrpc_transport_timeout = 0;
+	cancel_delayed_work(&rxrpc_transport_reap);
+	rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
+
+	_leave("");
+}
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
deleted file mode 100644
index d07122b57e0d..000000000000
--- a/net/rxrpc/call.c
+++ /dev/null
@@ -1,2277 +0,0 @@
-/* call.c: Rx call routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_call_count);
-__RXACCT_DECL(atomic_t rxrpc_message_count);
-
-LIST_HEAD(rxrpc_calls);
-DECLARE_RWSEM(rxrpc_calls_sem);
-
-unsigned rxrpc_call_rcv_timeout			= HZ/3;
-static unsigned rxrpc_call_acks_timeout		= HZ/3;
-static unsigned rxrpc_call_dfr_ack_timeout	= HZ/20;
-static unsigned short rxrpc_call_max_resend	= HZ/10;
-
-const char *rxrpc_call_states[] = {
-	"COMPLETE",
-	"ERROR",
-	"SRVR_RCV_OPID",
-	"SRVR_RCV_ARGS",
-	"SRVR_GOT_ARGS",
-	"SRVR_SND_REPLY",
-	"SRVR_RCV_FINAL_ACK",
-	"CLNT_SND_ARGS",
-	"CLNT_RCV_REPLY",
-	"CLNT_GOT_REPLY"
-};
-
-const char *rxrpc_call_error_states[] = {
-	"NO_ERROR",
-	"LOCAL_ABORT",
-	"PEER_ABORT",
-	"LOCAL_ERROR",
-	"REMOTE_ERROR"
-};
-
-const char *rxrpc_pkts[] = {
-	"?00",
-	"data", "ack", "busy", "abort", "ackall", "chall", "resp", "debug",
-	"?09", "?10", "?11", "?12", "?13", "?14", "?15"
-};
-
-static const char *rxrpc_acks[] = {
-	"---", "REQ", "DUP", "SEQ", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
-	"-?-"
-};
-
-static const char _acktype[] = "NA-";
-
-static void rxrpc_call_receive_packet(struct rxrpc_call *call);
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
-					   struct rxrpc_message *msg);
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
-					  struct rxrpc_message *msg);
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
-					rxrpc_seq_t higest);
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest);
-static int __rxrpc_call_read_data(struct rxrpc_call *call);
-
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
-				 struct rxrpc_message *msg,
-				 rxrpc_seq_t seq,
-				 size_t count);
-
-static int rxrpc_call_flush(struct rxrpc_call *call);
-
-#define _state(call) \
-	_debug("[[[ state %s ]]]", rxrpc_call_states[call->app_call_state]);
-
-static void rxrpc_call_default_attn_func(struct rxrpc_call *call)
-{
-	wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_error_func(struct rxrpc_call *call)
-{
-	wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_aemap_func(struct rxrpc_call *call)
-{
-	switch (call->app_err_state) {
-	case RXRPC_ESTATE_LOCAL_ABORT:
-		call->app_abort_code = -call->app_errno;
-	case RXRPC_ESTATE_PEER_ABORT:
-		call->app_errno = -ECONNABORTED;
-	default:
-		break;
-	}
-}
-
-static void __rxrpc_call_acks_timeout(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_debug("ACKS TIMEOUT %05lu", jiffies - call->cjif);
-
-	call->flags |= RXRPC_CALL_ACKS_TIMO;
-	rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_rcv_timeout(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_debug("RCV TIMEOUT %05lu", jiffies - call->cjif);
-
-	call->flags |= RXRPC_CALL_RCV_TIMO;
-	rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_ackr_timeout(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_debug("ACKR TIMEOUT %05lu",jiffies - call->cjif);
-
-	call->flags |= RXRPC_CALL_ACKR_TIMO;
-	rxrpc_krxiod_queue_call(call);
-}
-
-/*****************************************************************************/
-/*
- * calculate a timeout based on an RTT value
- */
-static inline unsigned long __rxrpc_rtt_based_timeout(struct rxrpc_call *call,
-						      unsigned long val)
-{
-	unsigned long expiry = call->conn->peer->rtt / (1000000 / HZ);
-
-	expiry += 10;
-	if (expiry < HZ / 25)
-		expiry = HZ / 25;
-	if (expiry > HZ)
-		expiry = HZ;
-
-	_leave(" = %lu jiffies", expiry);
-	return jiffies + expiry;
-} /* end __rxrpc_rtt_based_timeout() */
-
-/*****************************************************************************/
-/*
- * create a new call record
- */
-static inline int __rxrpc_create_call(struct rxrpc_connection *conn,
-				      struct rxrpc_call **_call)
-{
-	struct rxrpc_call *call;
-
-	_enter("%p", conn);
-
-	/* allocate and initialise a call record */
-	call = (struct rxrpc_call *) get_zeroed_page(GFP_KERNEL);
-	if (!call) {
-		_leave(" ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&call->usage, 1);
-
-	init_waitqueue_head(&call->waitq);
-	spin_lock_init(&call->lock);
-	INIT_LIST_HEAD(&call->link);
-	INIT_LIST_HEAD(&call->acks_pendq);
-	INIT_LIST_HEAD(&call->rcv_receiveq);
-	INIT_LIST_HEAD(&call->rcv_krxiodq_lk);
-	INIT_LIST_HEAD(&call->app_readyq);
-	INIT_LIST_HEAD(&call->app_unreadyq);
-	INIT_LIST_HEAD(&call->app_link);
-	INIT_LIST_HEAD(&call->app_attn_link);
-
-	init_timer(&call->acks_timeout);
-	call->acks_timeout.data = (unsigned long) call;
-	call->acks_timeout.function = __rxrpc_call_acks_timeout;
-
-	init_timer(&call->rcv_timeout);
-	call->rcv_timeout.data = (unsigned long) call;
-	call->rcv_timeout.function = __rxrpc_call_rcv_timeout;
-
-	init_timer(&call->ackr_dfr_timo);
-	call->ackr_dfr_timo.data = (unsigned long) call;
-	call->ackr_dfr_timo.function = __rxrpc_call_ackr_timeout;
-
-	call->conn = conn;
-	call->ackr_win_bot = 1;
-	call->ackr_win_top = call->ackr_win_bot + RXRPC_CALL_ACK_WINDOW_SIZE - 1;
-	call->ackr_prev_seq = 0;
-	call->app_mark = RXRPC_APP_MARK_EOF;
-	call->app_attn_func = rxrpc_call_default_attn_func;
-	call->app_error_func = rxrpc_call_default_error_func;
-	call->app_aemap_func = rxrpc_call_default_aemap_func;
-	call->app_scr_alloc = call->app_scratch;
-
-	call->cjif = jiffies;
-
-	_leave(" = 0 (%p)", call);
-
-	*_call = call;
-
-	return 0;
-} /* end __rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for outgoing calls
- */
-int rxrpc_create_call(struct rxrpc_connection *conn,
-		      rxrpc_call_attn_func_t attn,
-		      rxrpc_call_error_func_t error,
-		      rxrpc_call_aemap_func_t aemap,
-		      struct rxrpc_call **_call)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	struct rxrpc_call *call;
-	int ret, cix, loop;
-
-	_enter("%p", conn);
-
-	/* allocate and initialise a call record */
-	ret = __rxrpc_create_call(conn, &call);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	call->app_call_state = RXRPC_CSTATE_CLNT_SND_ARGS;
-	if (attn)
-		call->app_attn_func = attn;
-	if (error)
-		call->app_error_func = error;
-	if (aemap)
-		call->app_aemap_func = aemap;
-
-	_state(call);
-
-	spin_lock(&conn->lock);
-	set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(&conn->chanwait, &myself);
-
- try_again:
-	/* try to find an unused channel */
-	for (cix = 0; cix < 4; cix++)
-		if (!conn->channels[cix])
-			goto obtained_chan;
-
-	/* no free channels - wait for one to become available */
-	ret = -EINTR;
-	if (signal_pending(current))
-		goto error_unwait;
-
-	spin_unlock(&conn->lock);
-
-	schedule();
-	set_current_state(TASK_INTERRUPTIBLE);
-
-	spin_lock(&conn->lock);
-	goto try_again;
-
-	/* got a channel - now attach to the connection */
- obtained_chan:
-	remove_wait_queue(&conn->chanwait, &myself);
-	set_current_state(TASK_RUNNING);
-
-	/* concoct a unique call number */
- next_callid:
-	call->call_id = htonl(++conn->call_counter);
-	for (loop = 0; loop < 4; loop++)
-		if (conn->channels[loop] &&
-		    conn->channels[loop]->call_id == call->call_id)
-			goto next_callid;
-
-	rxrpc_get_connection(conn);
-	conn->channels[cix] = call; /* assign _after_ done callid check loop */
-	do_gettimeofday(&conn->atime);
-	call->chan_ix = htonl(cix);
-
-	spin_unlock(&conn->lock);
-
-	down_write(&rxrpc_calls_sem);
-	list_add_tail(&call->call_link, &rxrpc_calls);
-	up_write(&rxrpc_calls_sem);
-
-	__RXACCT(atomic_inc(&rxrpc_call_count));
-	*_call = call;
-
-	_leave(" = 0 (call=%p cix=%u)", call, cix);
-	return 0;
-
- error_unwait:
-	remove_wait_queue(&conn->chanwait, &myself);
-	set_current_state(TASK_RUNNING);
-	spin_unlock(&conn->lock);
-
-	free_page((unsigned long) call);
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for incoming calls
- */
-int rxrpc_incoming_call(struct rxrpc_connection *conn,
-			struct rxrpc_message *msg,
-			struct rxrpc_call **_call)
-{
-	struct rxrpc_call *call;
-	unsigned cix;
-	int ret;
-
-	cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
-	_enter("%p,%u,%u", conn, ntohl(msg->hdr.callNumber), cix);
-
-	/* allocate and initialise a call record */
-	ret = __rxrpc_create_call(conn, &call);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	call->pkt_rcv_count = 1;
-	call->app_call_state = RXRPC_CSTATE_SRVR_RCV_OPID;
-	call->app_mark = sizeof(uint32_t);
-
-	_state(call);
-
-	/* attach to the connection */
-	ret = -EBUSY;
-	call->chan_ix = htonl(cix);
-	call->call_id = msg->hdr.callNumber;
-
-	spin_lock(&conn->lock);
-
-	if (!conn->channels[cix] ||
-	    conn->channels[cix]->app_call_state == RXRPC_CSTATE_COMPLETE ||
-	    conn->channels[cix]->app_call_state == RXRPC_CSTATE_ERROR
-	    ) {
-		conn->channels[cix] = call;
-		rxrpc_get_connection(conn);
-		ret = 0;
-	}
-
-	spin_unlock(&conn->lock);
-
-	if (ret < 0) {
-		free_page((unsigned long) call);
-		call = NULL;
-	}
-
-	if (ret == 0) {
-		down_write(&rxrpc_calls_sem);
-		list_add_tail(&call->call_link, &rxrpc_calls);
-		up_write(&rxrpc_calls_sem);
-		__RXACCT(atomic_inc(&rxrpc_call_count));
-		*_call = call;
-	}
-
-	_leave(" = %d [%p]", ret, call);
-	return ret;
-} /* end rxrpc_incoming_call() */
-
-/*****************************************************************************/
-/*
- * free a call record
- */
-void rxrpc_put_call(struct rxrpc_call *call)
-{
-	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_message *msg;
-
-	_enter("%p{u=%d}",call,atomic_read(&call->usage));
-
-	/* sanity check */
-	if (atomic_read(&call->usage) <= 0)
-		BUG();
-
-	/* to prevent a race, the decrement and the de-list must be effectively
-	 * atomic */
-	spin_lock(&conn->lock);
-	if (likely(!atomic_dec_and_test(&call->usage))) {
-		spin_unlock(&conn->lock);
-		_leave("");
-		return;
-	}
-
-	if (conn->channels[ntohl(call->chan_ix)] == call)
-		conn->channels[ntohl(call->chan_ix)] = NULL;
-
-	spin_unlock(&conn->lock);
-
-	wake_up(&conn->chanwait);
-
-	rxrpc_put_connection(conn);
-
-	/* clear the timers and dequeue from krxiod */
-	del_timer_sync(&call->acks_timeout);
-	del_timer_sync(&call->rcv_timeout);
-	del_timer_sync(&call->ackr_dfr_timo);
-
-	rxrpc_krxiod_dequeue_call(call);
-
-	/* clean up the contents of the struct */
-	if (call->snd_nextmsg)
-		rxrpc_put_message(call->snd_nextmsg);
-
-	if (call->snd_ping)
-		rxrpc_put_message(call->snd_ping);
-
-	while (!list_empty(&call->acks_pendq)) {
-		msg = list_entry(call->acks_pendq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	while (!list_empty(&call->rcv_receiveq)) {
-		msg = list_entry(call->rcv_receiveq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	while (!list_empty(&call->app_readyq)) {
-		msg = list_entry(call->app_readyq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	while (!list_empty(&call->app_unreadyq)) {
-		msg = list_entry(call->app_unreadyq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	module_put(call->owner);
-
-	down_write(&rxrpc_calls_sem);
-	list_del(&call->call_link);
-	up_write(&rxrpc_calls_sem);
-
-	__RXACCT(atomic_dec(&rxrpc_call_count));
-	free_page((unsigned long) call);
-
-	_leave(" [destroyed]");
-} /* end rxrpc_put_call() */
-
-/*****************************************************************************/
-/*
- * actually generate a normal ACK
- */
-static inline int __rxrpc_call_gen_normal_ACK(struct rxrpc_call *call,
-					      rxrpc_seq_t seq)
-{
-	struct rxrpc_message *msg;
-	struct kvec diov[3];
-	__be32 aux[4];
-	int delta, ret;
-
-	/* ACKs default to DELAY */
-	if (!call->ackr.reason)
-		call->ackr.reason = RXRPC_ACK_DELAY;
-
-	_proto("Rx %05lu Sending ACK { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-	       jiffies - call->cjif,
-	       ntohs(call->ackr.maxSkew),
-	       ntohl(call->ackr.firstPacket),
-	       ntohl(call->ackr.previousPacket),
-	       ntohl(call->ackr.serial),
-	       rxrpc_acks[call->ackr.reason],
-	       call->ackr.nAcks);
-
-	aux[0] = htonl(call->conn->peer->if_mtu);	/* interface MTU */
-	aux[1] = htonl(1444);				/* max MTU */
-	aux[2] = htonl(16);				/* rwind */
-	aux[3] = htonl(4);				/* max packets */
-
-	diov[0].iov_len  = sizeof(struct rxrpc_ackpacket);
-	diov[0].iov_base = &call->ackr;
-	diov[1].iov_len  = call->ackr_pend_cnt + 3;
-	diov[1].iov_base = call->ackr_array;
-	diov[2].iov_len  = sizeof(aux);
-	diov[2].iov_base = &aux;
-
-	/* build and send the message */
-	ret = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
-				3, diov, GFP_KERNEL, &msg);
-	if (ret < 0)
-		goto out;
-
-	msg->seq = seq;
-	msg->hdr.seq = htonl(seq);
-	msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
-	ret = rxrpc_conn_sendmsg(call->conn, msg);
-	rxrpc_put_message(msg);
-	if (ret < 0)
-		goto out;
-	call->pkt_snd_count++;
-
-	/* count how many actual ACKs there were at the front */
-	for (delta = 0; delta < call->ackr_pend_cnt; delta++)
-		if (call->ackr_array[delta] != RXRPC_ACK_TYPE_ACK)
-			break;
-
-	call->ackr_pend_cnt -= delta; /* all ACK'd to this point */
-
-	/* crank the ACK window around */
-	if (delta == 0) {
-		/* un-ACK'd window */
-	}
-	else if (delta < RXRPC_CALL_ACK_WINDOW_SIZE) {
-		/* partially ACK'd window
-		 * - shuffle down to avoid losing out-of-sequence packets
-		 */
-		call->ackr_win_bot += delta;
-		call->ackr_win_top += delta;
-
-		memmove(&call->ackr_array[0],
-			&call->ackr_array[delta],
-			call->ackr_pend_cnt);
-
-		memset(&call->ackr_array[call->ackr_pend_cnt],
-		       RXRPC_ACK_TYPE_NACK,
-		       sizeof(call->ackr_array) - call->ackr_pend_cnt);
-	}
-	else {
-		/* fully ACK'd window
-		 * - just clear the whole thing
-		 */
-		memset(&call->ackr_array,
-		       RXRPC_ACK_TYPE_NACK,
-		       sizeof(call->ackr_array));
-	}
-
-	/* clear this ACK */
-	memset(&call->ackr, 0, sizeof(call->ackr));
-
- out:
-	if (!call->app_call_state)
-		printk("___ STATE 0 ___\n");
-	return ret;
-} /* end __rxrpc_call_gen_normal_ACK() */
-
-/*****************************************************************************/
-/*
- * note the reception of a packet in the call's ACK records and generate an
- * appropriate ACK packet if necessary
- * - returns 0 if packet should be processed, 1 if packet should be ignored
- *   and -ve on an error
- */
-static int rxrpc_call_generate_ACK(struct rxrpc_call *call,
-				   struct rxrpc_header *hdr,
-				   struct rxrpc_ackpacket *ack)
-{
-	struct rxrpc_message *msg;
-	rxrpc_seq_t seq;
-	unsigned offset;
-	int ret = 0, err;
-	u8 special_ACK, do_ACK, force;
-
-	_enter("%p,%p { seq=%d tp=%d fl=%02x }",
-	       call, hdr, ntohl(hdr->seq), hdr->type, hdr->flags);
-
-	seq = ntohl(hdr->seq);
-	offset = seq - call->ackr_win_bot;
-	do_ACK = RXRPC_ACK_DELAY;
-	special_ACK = 0;
-	force = (seq == 1);
-
-	if (call->ackr_high_seq < seq)
-		call->ackr_high_seq = seq;
-
-	/* deal with generation of obvious special ACKs first */
-	if (ack && ack->reason == RXRPC_ACK_PING) {
-		special_ACK = RXRPC_ACK_PING_RESPONSE;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	if (seq < call->ackr_win_bot) {
-		special_ACK = RXRPC_ACK_DUPLICATE;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	if (seq >= call->ackr_win_top) {
-		special_ACK = RXRPC_ACK_EXCEEDS_WINDOW;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	if (call->ackr_array[offset] != RXRPC_ACK_TYPE_NACK) {
-		special_ACK = RXRPC_ACK_DUPLICATE;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	/* okay... it's a normal data packet inside the ACK window */
-	call->ackr_array[offset] = RXRPC_ACK_TYPE_ACK;
-
-	if (offset < call->ackr_pend_cnt) {
-	}
-	else if (offset > call->ackr_pend_cnt) {
-		do_ACK = RXRPC_ACK_OUT_OF_SEQUENCE;
-		call->ackr_pend_cnt = offset;
-		goto gen_ACK;
-	}
-
-	if (hdr->flags & RXRPC_REQUEST_ACK) {
-		do_ACK = RXRPC_ACK_REQUESTED;
-	}
-
-	/* generate an ACK on the final packet of a reply just received */
-	if (hdr->flags & RXRPC_LAST_PACKET) {
-		if (call->conn->out_clientflag)
-			force = 1;
-	}
-	else if (!(hdr->flags & RXRPC_MORE_PACKETS)) {
-		do_ACK = RXRPC_ACK_REQUESTED;
-	}
-
-	/* re-ACK packets previously received out-of-order */
-	for (offset++; offset < RXRPC_CALL_ACK_WINDOW_SIZE; offset++)
-		if (call->ackr_array[offset] != RXRPC_ACK_TYPE_ACK)
-			break;
-
-	call->ackr_pend_cnt = offset;
-
-	/* generate an ACK if we fill up the window */
-	if (call->ackr_pend_cnt >= RXRPC_CALL_ACK_WINDOW_SIZE)
-		force = 1;
-
- gen_ACK:
-	_debug("%05lu ACKs pend=%u norm=%s special=%s%s",
-	       jiffies - call->cjif,
-	       call->ackr_pend_cnt,
-	       rxrpc_acks[do_ACK],
-	       rxrpc_acks[special_ACK],
-	       force ? " immediate" :
-	       do_ACK == RXRPC_ACK_REQUESTED ? " merge-req" :
-	       hdr->flags & RXRPC_LAST_PACKET ? " finalise" :
-	       " defer"
-	       );
-
-	/* send any pending normal ACKs if need be */
-	if (call->ackr_pend_cnt > 0) {
-		/* fill out the appropriate form */
-		call->ackr.bufferSpace	= htons(RXRPC_CALL_ACK_WINDOW_SIZE);
-		call->ackr.maxSkew	= htons(min(call->ackr_high_seq - seq,
-						    65535U));
-		call->ackr.firstPacket	= htonl(call->ackr_win_bot);
-		call->ackr.previousPacket = call->ackr_prev_seq;
-		call->ackr.serial	= hdr->serial;
-		call->ackr.nAcks	= call->ackr_pend_cnt;
-
-		if (do_ACK == RXRPC_ACK_REQUESTED)
-			call->ackr.reason = do_ACK;
-
-		/* generate the ACK immediately if necessary */
-		if (special_ACK || force) {
-			err = __rxrpc_call_gen_normal_ACK(
-				call, do_ACK == RXRPC_ACK_DELAY ? 0 : seq);
-			if (err < 0) {
-				ret = err;
-				goto out;
-			}
-		}
-	}
-
-	if (call->ackr.reason == RXRPC_ACK_REQUESTED)
-		call->ackr_dfr_seq = seq;
-
-	/* start the ACK timer if not running if there are any pending deferred
-	 * ACKs */
-	if (call->ackr_pend_cnt > 0 &&
-	    call->ackr.reason != RXRPC_ACK_REQUESTED &&
-	    !timer_pending(&call->ackr_dfr_timo)
-	    ) {
-		unsigned long timo;
-
-		timo = rxrpc_call_dfr_ack_timeout + jiffies;
-
-		_debug("START ACKR TIMER for cj=%lu", timo - call->cjif);
-
-		spin_lock(&call->lock);
-		mod_timer(&call->ackr_dfr_timo, timo);
-		spin_unlock(&call->lock);
-	}
-	else if ((call->ackr_pend_cnt == 0 ||
-		  call->ackr.reason == RXRPC_ACK_REQUESTED) &&
-		 timer_pending(&call->ackr_dfr_timo)
-		 ) {
-		/* stop timer if no pending ACKs */
-		_debug("CLEAR ACKR TIMER");
-		del_timer_sync(&call->ackr_dfr_timo);
-	}
-
-	/* send a special ACK if one is required */
-	if (special_ACK) {
-		struct rxrpc_ackpacket ack;
-		struct kvec diov[2];
-		uint8_t acks[1] = { RXRPC_ACK_TYPE_ACK };
-
-		/* fill out the appropriate form */
-		ack.bufferSpace	= htons(RXRPC_CALL_ACK_WINDOW_SIZE);
-		ack.maxSkew	= htons(min(call->ackr_high_seq - seq,
-					    65535U));
-		ack.firstPacket	= htonl(call->ackr_win_bot);
-		ack.previousPacket = call->ackr_prev_seq;
-		ack.serial	= hdr->serial;
-		ack.reason	= special_ACK;
-		ack.nAcks	= 0;
-
-		_proto("Rx Sending s-ACK"
-		       " { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-		       ntohs(ack.maxSkew),
-		       ntohl(ack.firstPacket),
-		       ntohl(ack.previousPacket),
-		       ntohl(ack.serial),
-		       rxrpc_acks[ack.reason],
-		       ack.nAcks);
-
-		diov[0].iov_len  = sizeof(struct rxrpc_ackpacket);
-		diov[0].iov_base = &ack;
-		diov[1].iov_len  = sizeof(acks);
-		diov[1].iov_base = acks;
-
-		/* build and send the message */
-		err = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
-					hdr->seq ? 2 : 1, diov,
-					GFP_KERNEL,
-					&msg);
-		if (err < 0) {
-			ret = err;
-			goto out;
-		}
-
-		msg->seq = seq;
-		msg->hdr.seq = htonl(seq);
-		msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
-		err = rxrpc_conn_sendmsg(call->conn, msg);
-		rxrpc_put_message(msg);
-		if (err < 0) {
-			ret = err;
-			goto out;
-		}
-		call->pkt_snd_count++;
-	}
-
- out:
-	if (hdr->seq)
-		call->ackr_prev_seq = hdr->seq;
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_call_generate_ACK() */
-
-/*****************************************************************************/
-/*
- * handle work to be done on a call
- * - includes packet reception and timeout processing
- */
-void rxrpc_call_do_stuff(struct rxrpc_call *call)
-{
-	_enter("%p{flags=%lx}", call, call->flags);
-
-	/* handle packet reception */
-	if (call->flags & RXRPC_CALL_RCV_PKT) {
-		_debug("- receive packet");
-		call->flags &= ~RXRPC_CALL_RCV_PKT;
-		rxrpc_call_receive_packet(call);
-	}
-
-	/* handle overdue ACKs */
-	if (call->flags & RXRPC_CALL_ACKS_TIMO) {
-		_debug("- overdue ACK timeout");
-		call->flags &= ~RXRPC_CALL_ACKS_TIMO;
-		rxrpc_call_resend(call, call->snd_seq_count);
-	}
-
-	/* handle lack of reception */
-	if (call->flags & RXRPC_CALL_RCV_TIMO) {
-		_debug("- reception timeout");
-		call->flags &= ~RXRPC_CALL_RCV_TIMO;
-		rxrpc_call_abort(call, -EIO);
-	}
-
-	/* handle deferred ACKs */
-	if (call->flags & RXRPC_CALL_ACKR_TIMO ||
-	    (call->ackr.nAcks > 0 && call->ackr.reason == RXRPC_ACK_REQUESTED)
-	    ) {
-		_debug("- deferred ACK timeout: cj=%05lu r=%s n=%u",
-		       jiffies - call->cjif,
-		       rxrpc_acks[call->ackr.reason],
-		       call->ackr.nAcks);
-
-		call->flags &= ~RXRPC_CALL_ACKR_TIMO;
-
-		if (call->ackr.nAcks > 0 &&
-		    call->app_call_state != RXRPC_CSTATE_ERROR) {
-			/* generate ACK */
-			__rxrpc_call_gen_normal_ACK(call, call->ackr_dfr_seq);
-			call->ackr_dfr_seq = 0;
-		}
-	}
-
-	_leave("");
-
-} /* end rxrpc_call_do_stuff() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - must be called with call->lock held
- * - the supplied error code is sent as the packet data
- */
-static int __rxrpc_call_abort(struct rxrpc_call *call, int errno)
-{
-	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_message *msg;
-	struct kvec diov[1];
-	int ret;
-	__be32 _error;
-
-	_enter("%p{%08x},%p{%d},%d",
-	       conn, ntohl(conn->conn_id), call, ntohl(call->call_id), errno);
-
-	/* if this call is already aborted, then just wake up any waiters */
-	if (call->app_call_state == RXRPC_CSTATE_ERROR) {
-		spin_unlock(&call->lock);
-		call->app_error_func(call);
-		_leave(" = 0");
-		return 0;
-	}
-
-	rxrpc_get_call(call);
-
-	/* change the state _with_ the lock still held */
-	call->app_call_state	= RXRPC_CSTATE_ERROR;
-	call->app_err_state	= RXRPC_ESTATE_LOCAL_ABORT;
-	call->app_errno		= errno;
-	call->app_mark		= RXRPC_APP_MARK_EOF;
-	call->app_read_buf	= NULL;
-	call->app_async_read	= 0;
-
-	_state(call);
-
-	/* ask the app to translate the error code */
-	call->app_aemap_func(call);
-
-	spin_unlock(&call->lock);
-
-	/* flush any outstanding ACKs */
-	del_timer_sync(&call->acks_timeout);
-	del_timer_sync(&call->rcv_timeout);
-	del_timer_sync(&call->ackr_dfr_timo);
-
-	if (rxrpc_call_is_ack_pending(call))
-		__rxrpc_call_gen_normal_ACK(call, 0);
-
-	/* send the abort packet only if we actually traded some other
-	 * packets */
-	ret = 0;
-	if (call->pkt_snd_count || call->pkt_rcv_count) {
-		/* actually send the abort */
-		_proto("Rx Sending Call ABORT { data=%d }",
-		       call->app_abort_code);
-
-		_error = htonl(call->app_abort_code);
-
-		diov[0].iov_len  = sizeof(_error);
-		diov[0].iov_base = &_error;
-
-		ret = rxrpc_conn_newmsg(conn, call, RXRPC_PACKET_TYPE_ABORT,
-					1, diov, GFP_KERNEL, &msg);
-		if (ret == 0) {
-			ret = rxrpc_conn_sendmsg(conn, msg);
-			rxrpc_put_message(msg);
-		}
-	}
-
-	/* tell the app layer to let go */
-	call->app_error_func(call);
-
-	rxrpc_put_call(call);
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end __rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - the supplied error code is sent as the packet data
- */
-int rxrpc_call_abort(struct rxrpc_call *call, int error)
-{
-	spin_lock(&call->lock);
-
-	return __rxrpc_call_abort(call, error);
-
-} /* end rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * process packets waiting for this call
- */
-static void rxrpc_call_receive_packet(struct rxrpc_call *call)
-{
-	struct rxrpc_message *msg;
-	struct list_head *_p;
-
-	_enter("%p", call);
-
-	rxrpc_get_call(call); /* must not go away too soon if aborted by
-			       * app-layer */
-
-	while (!list_empty(&call->rcv_receiveq)) {
-		/* try to get next packet */
-		_p = NULL;
-		spin_lock(&call->lock);
-		if (!list_empty(&call->rcv_receiveq)) {
-			_p = call->rcv_receiveq.next;
-			list_del_init(_p);
-		}
-		spin_unlock(&call->lock);
-
-		if (!_p)
-			break;
-
-		msg = list_entry(_p, struct rxrpc_message, link);
-
-		_proto("Rx %05lu Received %s packet (%%%u,#%u,%c%c%c%c%c)",
-		       jiffies - call->cjif,
-		       rxrpc_pkts[msg->hdr.type],
-		       ntohl(msg->hdr.serial),
-		       msg->seq,
-		       msg->hdr.flags & RXRPC_JUMBO_PACKET	? 'j' : '-',
-		       msg->hdr.flags & RXRPC_MORE_PACKETS	? 'm' : '-',
-		       msg->hdr.flags & RXRPC_LAST_PACKET	? 'l' : '-',
-		       msg->hdr.flags & RXRPC_REQUEST_ACK	? 'r' : '-',
-		       msg->hdr.flags & RXRPC_CLIENT_INITIATED	? 'C' : 'S'
-		       );
-
-		switch (msg->hdr.type) {
-			/* deal with data packets */
-		case RXRPC_PACKET_TYPE_DATA:
-			/* ACK the packet if necessary */
-			switch (rxrpc_call_generate_ACK(call, &msg->hdr,
-							NULL)) {
-			case 0: /* useful packet */
-				rxrpc_call_receive_data_packet(call, msg);
-				break;
-			case 1: /* duplicate or out-of-window packet */
-				break;
-			default:
-				rxrpc_put_message(msg);
-				goto out;
-			}
-			break;
-
-			/* deal with ACK packets */
-		case RXRPC_PACKET_TYPE_ACK:
-			rxrpc_call_receive_ack_packet(call, msg);
-			break;
-
-			/* deal with abort packets */
-		case RXRPC_PACKET_TYPE_ABORT: {
-			__be32 _dbuf, *dp;
-
-			dp = skb_header_pointer(msg->pkt, msg->offset,
-						sizeof(_dbuf), &_dbuf);
-			if (dp == NULL)
-				printk("Rx Received short ABORT packet\n");
-
-			_proto("Rx Received Call ABORT { data=%d }",
-			       (dp ? ntohl(*dp) : 0));
-
-			spin_lock(&call->lock);
-			call->app_call_state	= RXRPC_CSTATE_ERROR;
-			call->app_err_state	= RXRPC_ESTATE_PEER_ABORT;
-			call->app_abort_code	= (dp ? ntohl(*dp) : 0);
-			call->app_errno		= -ECONNABORTED;
-			call->app_mark		= RXRPC_APP_MARK_EOF;
-			call->app_read_buf	= NULL;
-			call->app_async_read	= 0;
-
-			/* ask the app to translate the error code */
-			call->app_aemap_func(call);
-			_state(call);
-			spin_unlock(&call->lock);
-			call->app_error_func(call);
-			break;
-		}
-		default:
-			/* deal with other packet types */
-			_proto("Rx Unsupported packet type %u (#%u)",
-			       msg->hdr.type, msg->seq);
-			break;
-		}
-
-		rxrpc_put_message(msg);
-	}
-
- out:
-	rxrpc_put_call(call);
-	_leave("");
-} /* end rxrpc_call_receive_packet() */
-
-/*****************************************************************************/
-/*
- * process next data packet
- * - as the next data packet arrives:
- *   - it is queued on app_readyq _if_ it is the next one expected
- *     (app_ready_seq+1)
- *   - it is queued on app_unreadyq _if_ it is not the next one expected
- *   - if a packet placed on app_readyq completely fills a hole leading up to
- *     the first packet on app_unreadyq, then packets now in sequence are
- *     tranferred to app_readyq
- * - the application layer can only see packets on app_readyq
- *   (app_ready_qty bytes)
- * - the application layer is prodded every time a new packet arrives
- */
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
-					   struct rxrpc_message *msg)
-{
-	const struct rxrpc_operation *optbl, *op;
-	struct rxrpc_message *pmsg;
-	struct list_head *_p;
-	int ret, lo, hi, rmtimo;
-	__be32 opid;
-
-	_enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
-	rxrpc_get_message(msg);
-
-	/* add to the unready queue if we'd have to create a hole in the ready
-	 * queue otherwise */
-	if (msg->seq != call->app_ready_seq + 1) {
-		_debug("Call add packet %d to unreadyq", msg->seq);
-
-		/* insert in seq order */
-		list_for_each(_p, &call->app_unreadyq) {
-			pmsg = list_entry(_p, struct rxrpc_message, link);
-			if (pmsg->seq > msg->seq)
-				break;
-		}
-
-		list_add_tail(&msg->link, _p);
-
-		_leave(" [unreadyq]");
-		return;
-	}
-
-	/* next in sequence - simply append into the call's ready queue */
-	_debug("Call add packet %d to readyq (+%Zd => %Zd bytes)",
-	       msg->seq, msg->dsize, call->app_ready_qty);
-
-	spin_lock(&call->lock);
-	call->app_ready_seq = msg->seq;
-	call->app_ready_qty += msg->dsize;
-	list_add_tail(&msg->link, &call->app_readyq);
-
-	/* move unready packets to the readyq if we got rid of a hole */
-	while (!list_empty(&call->app_unreadyq)) {
-		pmsg = list_entry(call->app_unreadyq.next,
-				  struct rxrpc_message, link);
-
-		if (pmsg->seq != call->app_ready_seq + 1)
-			break;
-
-		/* next in sequence - just move list-to-list */
-		_debug("Call transfer packet %d to readyq (+%Zd => %Zd bytes)",
-		       pmsg->seq, pmsg->dsize, call->app_ready_qty);
-
-		call->app_ready_seq = pmsg->seq;
-		call->app_ready_qty += pmsg->dsize;
-		list_move_tail(&pmsg->link, &call->app_readyq);
-	}
-
-	/* see if we've got the last packet yet */
-	if (!list_empty(&call->app_readyq)) {
-		pmsg = list_entry(call->app_readyq.prev,
-				  struct rxrpc_message, link);
-		if (pmsg->hdr.flags & RXRPC_LAST_PACKET) {
-			call->app_last_rcv = 1;
-			_debug("Last packet on readyq");
-		}
-	}
-
-	switch (call->app_call_state) {
-		/* do nothing if call already aborted */
-	case RXRPC_CSTATE_ERROR:
-		spin_unlock(&call->lock);
-		_leave(" [error]");
-		return;
-
-		/* extract the operation ID from an incoming call if that's not
-		 * yet been done */
-	case RXRPC_CSTATE_SRVR_RCV_OPID:
-		spin_unlock(&call->lock);
-
-		/* handle as yet insufficient data for the operation ID */
-		if (call->app_ready_qty < 4) {
-			if (call->app_last_rcv)
-				/* trouble - last packet seen */
-				rxrpc_call_abort(call, -EINVAL);
-
-			_leave("");
-			return;
-		}
-
-		/* pull the operation ID out of the buffer */
-		ret = rxrpc_call_read_data(call, &opid, sizeof(opid), 0);
-		if (ret < 0) {
-			printk("Unexpected error from read-data: %d\n", ret);
-			if (call->app_call_state != RXRPC_CSTATE_ERROR)
-				rxrpc_call_abort(call, ret);
-			_leave("");
-			return;
-		}
-		call->app_opcode = ntohl(opid);
-
-		/* locate the operation in the available ops table */
-		optbl = call->conn->service->ops_begin;
-		lo = 0;
-		hi = call->conn->service->ops_end - optbl;
-
-		while (lo < hi) {
-			int mid = (hi + lo) / 2;
-			op = &optbl[mid];
-			if (call->app_opcode == op->id)
-				goto found_op;
-			if (call->app_opcode > op->id)
-				lo = mid + 1;
-			else
-				hi = mid;
-		}
-
-		/* search failed */
-		kproto("Rx Client requested operation %d from %s service",
-		       call->app_opcode, call->conn->service->name);
-		rxrpc_call_abort(call, -EINVAL);
-		_leave(" [inval]");
-		return;
-
-	found_op:
-		_proto("Rx Client requested operation %s from %s service",
-		       op->name, call->conn->service->name);
-
-		/* we're now waiting for the argument block (unless the call
-		 * was aborted) */
-		spin_lock(&call->lock);
-		if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_OPID ||
-		    call->app_call_state == RXRPC_CSTATE_SRVR_SND_REPLY) {
-			if (!call->app_last_rcv)
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_RCV_ARGS;
-			else if (call->app_ready_qty > 0)
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_GOT_ARGS;
-			else
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_SND_REPLY;
-			call->app_mark = op->asize;
-			call->app_user = op->user;
-		}
-		spin_unlock(&call->lock);
-
-		_state(call);
-		break;
-
-	case RXRPC_CSTATE_SRVR_RCV_ARGS:
-		/* change state if just received last packet of arg block */
-		if (call->app_last_rcv)
-			call->app_call_state = RXRPC_CSTATE_SRVR_GOT_ARGS;
-		spin_unlock(&call->lock);
-
-		_state(call);
-		break;
-
-	case RXRPC_CSTATE_CLNT_RCV_REPLY:
-		/* change state if just received last packet of reply block */
-		rmtimo = 0;
-		if (call->app_last_rcv) {
-			call->app_call_state = RXRPC_CSTATE_CLNT_GOT_REPLY;
-			rmtimo = 1;
-		}
-		spin_unlock(&call->lock);
-
-		if (rmtimo) {
-			del_timer_sync(&call->acks_timeout);
-			del_timer_sync(&call->rcv_timeout);
-			del_timer_sync(&call->ackr_dfr_timo);
-		}
-
-		_state(call);
-		break;
-
-	default:
-		/* deal with data reception in an unexpected state */
-		printk("Unexpected state [[[ %u ]]]\n", call->app_call_state);
-		__rxrpc_call_abort(call, -EBADMSG);
-		_leave("");
-		return;
-	}
-
-	if (call->app_call_state == RXRPC_CSTATE_CLNT_RCV_REPLY &&
-	    call->app_last_rcv)
-		BUG();
-
-	/* otherwise just invoke the data function whenever we can satisfy its desire for more
-	 * data
-	 */
-	_proto("Rx Received Op Data: st=%u qty=%Zu mk=%Zu%s",
-	       call->app_call_state, call->app_ready_qty, call->app_mark,
-	       call->app_last_rcv ? " last-rcvd" : "");
-
-	spin_lock(&call->lock);
-
-	ret = __rxrpc_call_read_data(call);
-	switch (ret) {
-	case 0:
-		spin_unlock(&call->lock);
-		call->app_attn_func(call);
-		break;
-	case -EAGAIN:
-		spin_unlock(&call->lock);
-		break;
-	case -ECONNABORTED:
-		spin_unlock(&call->lock);
-		break;
-	default:
-		__rxrpc_call_abort(call, ret);
-		break;
-	}
-
-	_state(call);
-
-	_leave("");
-
-} /* end rxrpc_call_receive_data_packet() */
-
-/*****************************************************************************/
-/*
- * received an ACK packet
- */
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
-					  struct rxrpc_message *msg)
-{
-	struct rxrpc_ackpacket _ack, *ap;
-	rxrpc_serial_net_t serial;
-	rxrpc_seq_t seq;
-	int ret;
-
-	_enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
-	/* extract the basic ACK record */
-	ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack);
-	if (ap == NULL) {
-		printk("Rx Received short ACK packet\n");
-		return;
-	}
-	msg->offset += sizeof(_ack);
-
-	serial = ap->serial;
-	seq = ntohl(ap->firstPacket);
-
-	_proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }",
-	       ntohl(msg->hdr.serial),
-	       ntohs(ap->bufferSpace),
-	       ntohs(ap->maxSkew),
-	       seq,
-	       ntohl(ap->previousPacket),
-	       ntohl(serial),
-	       rxrpc_acks[ap->reason],
-	       call->ackr.nAcks
-	       );
-
-	/* check the other side isn't ACK'ing a sequence number I haven't sent
-	 * yet */
-	if (ap->nAcks > 0 &&
-	    (seq > call->snd_seq_count ||
-	     seq + ap->nAcks - 1 > call->snd_seq_count)) {
-		printk("Received ACK (#%u-#%u) for unsent packet\n",
-		       seq, seq + ap->nAcks - 1);
-		rxrpc_call_abort(call, -EINVAL);
-		_leave("");
-		return;
-	}
-
-	/* deal with RTT calculation */
-	if (serial) {
-		struct rxrpc_message *rttmsg;
-
-		/* find the prompting packet */
-		spin_lock(&call->lock);
-		if (call->snd_ping && call->snd_ping->hdr.serial == serial) {
-			/* it was a ping packet */
-			rttmsg = call->snd_ping;
-			call->snd_ping = NULL;
-			spin_unlock(&call->lock);
-
-			if (rttmsg) {
-				rttmsg->rttdone = 1;
-				rxrpc_peer_calculate_rtt(call->conn->peer,
-							 rttmsg, msg);
-				rxrpc_put_message(rttmsg);
-			}
-		}
-		else {
-			struct list_head *_p;
-
-			/* it ought to be a data packet - look in the pending
-			 * ACK list */
-			list_for_each(_p, &call->acks_pendq) {
-				rttmsg = list_entry(_p, struct rxrpc_message,
-						    link);
-				if (rttmsg->hdr.serial == serial) {
-					if (rttmsg->rttdone)
-						/* never do RTT twice without
-						 * resending */
-						break;
-
-					rttmsg->rttdone = 1;
-					rxrpc_peer_calculate_rtt(
-						call->conn->peer, rttmsg, msg);
-					break;
-				}
-			}
-			spin_unlock(&call->lock);
-		}
-	}
-
-	switch (ap->reason) {
-		/* deal with negative/positive acknowledgement of data
-		 * packets */
-	case RXRPC_ACK_REQUESTED:
-	case RXRPC_ACK_DELAY:
-	case RXRPC_ACK_IDLE:
-		rxrpc_call_definitively_ACK(call, seq - 1);
-
-	case RXRPC_ACK_DUPLICATE:
-	case RXRPC_ACK_OUT_OF_SEQUENCE:
-	case RXRPC_ACK_EXCEEDS_WINDOW:
-		call->snd_resend_cnt = 0;
-		ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks);
-		if (ret < 0)
-			rxrpc_call_abort(call, ret);
-		break;
-
-		/* respond to ping packets immediately */
-	case RXRPC_ACK_PING:
-		rxrpc_call_generate_ACK(call, &msg->hdr, ap);
-		break;
-
-		/* only record RTT on ping response packets */
-	case RXRPC_ACK_PING_RESPONSE:
-		if (call->snd_ping) {
-			struct rxrpc_message *rttmsg;
-
-			/* only do RTT stuff if the response matches the
-			 * retained ping */
-			rttmsg = NULL;
-			spin_lock(&call->lock);
-			if (call->snd_ping &&
-			    call->snd_ping->hdr.serial == ap->serial) {
-				rttmsg = call->snd_ping;
-				call->snd_ping = NULL;
-			}
-			spin_unlock(&call->lock);
-
-			if (rttmsg) {
-				rttmsg->rttdone = 1;
-				rxrpc_peer_calculate_rtt(call->conn->peer,
-							 rttmsg, msg);
-				rxrpc_put_message(rttmsg);
-			}
-		}
-		break;
-
-	default:
-		printk("Unsupported ACK reason %u\n", ap->reason);
-		break;
-	}
-
-	_leave("");
-} /* end rxrpc_call_receive_ack_packet() */
-
-/*****************************************************************************/
-/*
- * record definitive ACKs for all messages up to and including the one with the
- * 'highest' seq
- */
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
-					rxrpc_seq_t highest)
-{
-	struct rxrpc_message *msg;
-	int now_complete;
-
-	_enter("%p{ads=%u},%u", call, call->acks_dftv_seq, highest);
-
-	while (call->acks_dftv_seq < highest) {
-		call->acks_dftv_seq++;
-
-		_proto("Definitive ACK on packet #%u", call->acks_dftv_seq);
-
-		/* discard those at front of queue until message with highest
-		 * ACK is found */
-		spin_lock(&call->lock);
-		msg = NULL;
-		if (!list_empty(&call->acks_pendq)) {
-			msg = list_entry(call->acks_pendq.next,
-					 struct rxrpc_message, link);
-			list_del_init(&msg->link); /* dequeue */
-			if (msg->state == RXRPC_MSG_SENT)
-				call->acks_pend_cnt--;
-		}
-		spin_unlock(&call->lock);
-
-		/* insanity check */
-		if (!msg)
-			panic("%s(): acks_pendq unexpectedly empty\n",
-			      __FUNCTION__);
-
-		if (msg->seq != call->acks_dftv_seq)
-			panic("%s(): Packet #%u expected at front of acks_pendq"
-			      " (#%u found)\n",
-			      __FUNCTION__, call->acks_dftv_seq, msg->seq);
-
-		/* discard the message */
-		msg->state = RXRPC_MSG_DONE;
-		rxrpc_put_message(msg);
-	}
-
-	/* if all sent packets are definitively ACK'd then prod any sleepers just in case */
-	now_complete = 0;
-	spin_lock(&call->lock);
-	if (call->acks_dftv_seq == call->snd_seq_count) {
-		if (call->app_call_state != RXRPC_CSTATE_COMPLETE) {
-			call->app_call_state = RXRPC_CSTATE_COMPLETE;
-			_state(call);
-			now_complete = 1;
-		}
-	}
-	spin_unlock(&call->lock);
-
-	if (now_complete) {
-		del_timer_sync(&call->acks_timeout);
-		del_timer_sync(&call->rcv_timeout);
-		del_timer_sync(&call->ackr_dfr_timo);
-		call->app_attn_func(call);
-	}
-
-	_leave("");
-} /* end rxrpc_call_definitively_ACK() */
-
-/*****************************************************************************/
-/*
- * record the specified amount of ACKs/NAKs
- */
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
-				 struct rxrpc_message *msg,
-				 rxrpc_seq_t seq,
-				 size_t count)
-{
-	struct rxrpc_message *dmsg;
-	struct list_head *_p;
-	rxrpc_seq_t highest;
-	unsigned ix;
-	size_t chunk;
-	char resend, now_complete;
-	u8 acks[16];
-
-	_enter("%p{apc=%u ads=%u},%p,%u,%Zu",
-	       call, call->acks_pend_cnt, call->acks_dftv_seq,
-	       msg, seq, count);
-
-	/* handle re-ACK'ing of definitively ACK'd packets (may be out-of-order
-	 * ACKs) */
-	if (seq <= call->acks_dftv_seq) {
-		unsigned delta = call->acks_dftv_seq - seq;
-
-		if (count <= delta) {
-			_leave(" = 0 [all definitively ACK'd]");
-			return 0;
-		}
-
-		seq += delta;
-		count -= delta;
-		msg->offset += delta;
-	}
-
-	highest = seq + count - 1;
-	resend = 0;
-	while (count > 0) {
-		/* extract up to 16 ACK slots at a time */
-		chunk = min(count, sizeof(acks));
-		count -= chunk;
-
-		memset(acks, 2, sizeof(acks));
-
-		if (skb_copy_bits(msg->pkt, msg->offset, &acks, chunk) < 0) {
-			printk("Rx Received short ACK packet\n");
-			_leave(" = -EINVAL");
-			return -EINVAL;
-		}
-		msg->offset += chunk;
-
-		/* check that the ACK set is valid */
-		for (ix = 0; ix < chunk; ix++) {
-			switch (acks[ix]) {
-			case RXRPC_ACK_TYPE_ACK:
-				break;
-			case RXRPC_ACK_TYPE_NACK:
-				resend = 1;
-				break;
-			default:
-				printk("Rx Received unsupported ACK state"
-				       " %u\n", acks[ix]);
-				_leave(" = -EINVAL");
-				return -EINVAL;
-			}
-		}
-
-		_proto("Rx ACK of packets #%u-#%u "
-		       "[%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c] (pend=%u)",
-		       seq, (unsigned) (seq + chunk - 1),
-		       _acktype[acks[0x0]],
-		       _acktype[acks[0x1]],
-		       _acktype[acks[0x2]],
-		       _acktype[acks[0x3]],
-		       _acktype[acks[0x4]],
-		       _acktype[acks[0x5]],
-		       _acktype[acks[0x6]],
-		       _acktype[acks[0x7]],
-		       _acktype[acks[0x8]],
-		       _acktype[acks[0x9]],
-		       _acktype[acks[0xA]],
-		       _acktype[acks[0xB]],
-		       _acktype[acks[0xC]],
-		       _acktype[acks[0xD]],
-		       _acktype[acks[0xE]],
-		       _acktype[acks[0xF]],
-		       call->acks_pend_cnt
-		       );
-
-		/* mark the packets in the ACK queue as being provisionally
-		 * ACK'd */
-		ix = 0;
-		spin_lock(&call->lock);
-
-		/* find the first packet ACK'd/NAK'd here */
-		list_for_each(_p, &call->acks_pendq) {
-			dmsg = list_entry(_p, struct rxrpc_message, link);
-			if (dmsg->seq == seq)
-				goto found_first;
-			_debug("- %u: skipping #%u", ix, dmsg->seq);
-		}
-		goto bad_queue;
-
-	found_first:
-		do {
-			_debug("- %u: processing #%u (%c) apc=%u",
-			       ix, dmsg->seq, _acktype[acks[ix]],
-			       call->acks_pend_cnt);
-
-			if (acks[ix] == RXRPC_ACK_TYPE_ACK) {
-				if (dmsg->state == RXRPC_MSG_SENT)
-					call->acks_pend_cnt--;
-				dmsg->state = RXRPC_MSG_ACKED;
-			}
-			else {
-				if (dmsg->state == RXRPC_MSG_ACKED)
-					call->acks_pend_cnt++;
-				dmsg->state = RXRPC_MSG_SENT;
-			}
-			ix++;
-			seq++;
-
-			_p = dmsg->link.next;
-			dmsg = list_entry(_p, struct rxrpc_message, link);
-		} while(ix < chunk &&
-			_p != &call->acks_pendq &&
-			dmsg->seq == seq);
-
-		if (ix < chunk)
-			goto bad_queue;
-
-		spin_unlock(&call->lock);
-	}
-
-	if (resend)
-		rxrpc_call_resend(call, highest);
-
-	/* if all packets are provisionally ACK'd, then wake up anyone who's
-	 * waiting for that */
-	now_complete = 0;
-	spin_lock(&call->lock);
-	if (call->acks_pend_cnt == 0) {
-		if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_FINAL_ACK) {
-			call->app_call_state = RXRPC_CSTATE_COMPLETE;
-			_state(call);
-		}
-		now_complete = 1;
-	}
-	spin_unlock(&call->lock);
-
-	if (now_complete) {
-		_debug("- wake up waiters");
-		del_timer_sync(&call->acks_timeout);
-		del_timer_sync(&call->rcv_timeout);
-		del_timer_sync(&call->ackr_dfr_timo);
-		call->app_attn_func(call);
-	}
-
-	_leave(" = 0 (apc=%u)", call->acks_pend_cnt);
-	return 0;
-
- bad_queue:
-	panic("%s(): acks_pendq in bad state (packet #%u absent)\n",
-	      __FUNCTION__, seq);
-
-} /* end rxrpc_call_record_ACK() */
-
-/*****************************************************************************/
-/*
- * transfer data from the ready packet queue to the asynchronous read buffer
- * - since this func is the only one going to look at packets queued on
- *   app_readyq, we don't need a lock to modify or access them, only to modify
- *   the queue pointers
- * - called with call->lock held
- * - the buffer must be in kernel space
- * - returns:
- *	0 if buffer filled
- *	-EAGAIN if buffer not filled and more data to come
- *	-EBADMSG if last packet received and insufficient data left
- *	-ECONNABORTED if the call has in an error state
- */
-static int __rxrpc_call_read_data(struct rxrpc_call *call)
-{
-	struct rxrpc_message *msg;
-	size_t qty;
-	int ret;
-
-	_enter("%p{as=%d buf=%p qty=%Zu/%Zu}",
-	       call,
-	       call->app_async_read, call->app_read_buf,
-	       call->app_ready_qty, call->app_mark);
-
-	/* check the state */
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_SRVR_RCV_ARGS:
-	case RXRPC_CSTATE_CLNT_RCV_REPLY:
-		if (call->app_last_rcv) {
-			printk("%s(%p,%p,%Zd):"
-			       " Inconsistent call state (%s, last pkt)",
-			       __FUNCTION__,
-			       call, call->app_read_buf, call->app_mark,
-			       rxrpc_call_states[call->app_call_state]);
-			BUG();
-		}
-		break;
-
-	case RXRPC_CSTATE_SRVR_RCV_OPID:
-	case RXRPC_CSTATE_SRVR_GOT_ARGS:
-	case RXRPC_CSTATE_CLNT_GOT_REPLY:
-		break;
-
-	case RXRPC_CSTATE_SRVR_SND_REPLY:
-		if (!call->app_last_rcv) {
-			printk("%s(%p,%p,%Zd):"
-			       " Inconsistent call state (%s, not last pkt)",
-			       __FUNCTION__,
-			       call, call->app_read_buf, call->app_mark,
-			       rxrpc_call_states[call->app_call_state]);
-			BUG();
-		}
-		_debug("Trying to read data from call in SND_REPLY state");
-		break;
-
-	case RXRPC_CSTATE_ERROR:
-		_leave(" = -ECONNABORTED");
-		return -ECONNABORTED;
-
-	default:
-		printk("reading in unexpected state [[[ %u ]]]\n",
-		       call->app_call_state);
-		BUG();
-	}
-
-	/* handle the case of not having an async buffer */
-	if (!call->app_async_read) {
-		if (call->app_mark == RXRPC_APP_MARK_EOF) {
-			ret = call->app_last_rcv ? 0 : -EAGAIN;
-		}
-		else {
-			if (call->app_mark >= call->app_ready_qty) {
-				call->app_mark = RXRPC_APP_MARK_EOF;
-				ret = 0;
-			}
-			else {
-				ret = call->app_last_rcv ? -EBADMSG : -EAGAIN;
-			}
-		}
-
-		_leave(" = %d [no buf]", ret);
-		return 0;
-	}
-
-	while (!list_empty(&call->app_readyq) && call->app_mark > 0) {
-		msg = list_entry(call->app_readyq.next,
-				 struct rxrpc_message, link);
-
-		/* drag as much data as we need out of this packet */
-		qty = min(call->app_mark, msg->dsize);
-
-		_debug("reading %Zu from skb=%p off=%lu",
-		       qty, msg->pkt, msg->offset);
-
-		if (call->app_read_buf)
-			if (skb_copy_bits(msg->pkt, msg->offset,
-					  call->app_read_buf, qty) < 0)
-				panic("%s: Failed to copy data from packet:"
-				      " (%p,%p,%Zd)",
-				      __FUNCTION__,
-				      call, call->app_read_buf, qty);
-
-		/* if that packet is now empty, discard it */
-		call->app_ready_qty -= qty;
-		msg->dsize -= qty;
-
-		if (msg->dsize == 0) {
-			list_del_init(&msg->link);
-			rxrpc_put_message(msg);
-		}
-		else {
-			msg->offset += qty;
-		}
-
-		call->app_mark -= qty;
-		if (call->app_read_buf)
-			call->app_read_buf += qty;
-	}
-
-	if (call->app_mark == 0) {
-		call->app_async_read = 0;
-		call->app_mark = RXRPC_APP_MARK_EOF;
-		call->app_read_buf = NULL;
-
-		/* adjust the state if used up all packets */
-		if (list_empty(&call->app_readyq) && call->app_last_rcv) {
-			switch (call->app_call_state) {
-			case RXRPC_CSTATE_SRVR_RCV_OPID:
-				call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
-				call->app_mark = RXRPC_APP_MARK_EOF;
-				_state(call);
-				del_timer_sync(&call->rcv_timeout);
-				break;
-			case RXRPC_CSTATE_SRVR_GOT_ARGS:
-				call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
-				_state(call);
-				del_timer_sync(&call->rcv_timeout);
-				break;
-			default:
-				call->app_call_state = RXRPC_CSTATE_COMPLETE;
-				_state(call);
-				del_timer_sync(&call->acks_timeout);
-				del_timer_sync(&call->ackr_dfr_timo);
-				del_timer_sync(&call->rcv_timeout);
-				break;
-			}
-		}
-
-		_leave(" = 0");
-		return 0;
-	}
-
-	if (call->app_last_rcv) {
-		_debug("Insufficient data (%Zu/%Zu)",
-		       call->app_ready_qty, call->app_mark);
-		call->app_async_read = 0;
-		call->app_mark = RXRPC_APP_MARK_EOF;
-		call->app_read_buf = NULL;
-
-		_leave(" = -EBADMSG");
-		return -EBADMSG;
-	}
-
-	_leave(" = -EAGAIN");
-	return -EAGAIN;
-} /* end __rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * attempt to read the specified amount of data from the call's ready queue
- * into the buffer provided
- * - since this func is the only one going to look at packets queued on
- *   app_readyq, we don't need a lock to modify or access them, only to modify
- *   the queue pointers
- * - if the buffer pointer is NULL, then data is merely drained, not copied
- * - if flags&RXRPC_CALL_READ_BLOCK, then the function will wait until there is
- *   enough data or an error will be generated
- *   - note that the caller must have added the calling task to the call's wait
- *     queue beforehand
- * - if flags&RXRPC_CALL_READ_ALL, then an error will be generated if this
- *   function doesn't read all available data
- */
-int rxrpc_call_read_data(struct rxrpc_call *call,
-			 void *buffer, size_t size, int flags)
-{
-	int ret;
-
-	_enter("%p{arq=%Zu},%p,%Zd,%x",
-	       call, call->app_ready_qty, buffer, size, flags);
-
-	spin_lock(&call->lock);
-
-	if (unlikely(!!call->app_read_buf)) {
-		spin_unlock(&call->lock);
-		_leave(" = -EBUSY");
-		return -EBUSY;
-	}
-
-	call->app_mark = size;
-	call->app_read_buf = buffer;
-	call->app_async_read = 1;
-	call->app_read_count++;
-
-	/* read as much data as possible */
-	ret = __rxrpc_call_read_data(call);
-	switch (ret) {
-	case 0:
-		if (flags & RXRPC_CALL_READ_ALL &&
-		    (!call->app_last_rcv || call->app_ready_qty > 0)) {
-			_leave(" = -EBADMSG");
-			__rxrpc_call_abort(call, -EBADMSG);
-			return -EBADMSG;
-		}
-
-		spin_unlock(&call->lock);
-		call->app_attn_func(call);
-		_leave(" = 0");
-		return ret;
-
-	case -ECONNABORTED:
-		spin_unlock(&call->lock);
-		_leave(" = %d [aborted]", ret);
-		return ret;
-
-	default:
-		__rxrpc_call_abort(call, ret);
-		_leave(" = %d", ret);
-		return ret;
-
-	case -EAGAIN:
-		spin_unlock(&call->lock);
-
-		if (!(flags & RXRPC_CALL_READ_BLOCK)) {
-			_leave(" = -EAGAIN");
-			return -EAGAIN;
-		}
-
-		/* wait for the data to arrive */
-		_debug("blocking for data arrival");
-
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (!call->app_async_read || signal_pending(current))
-				break;
-			schedule();
-		}
-		set_current_state(TASK_RUNNING);
-
-		if (signal_pending(current)) {
-			_leave(" = -EINTR");
-			return -EINTR;
-		}
-
-		if (call->app_call_state == RXRPC_CSTATE_ERROR) {
-			_leave(" = -ECONNABORTED");
-			return -ECONNABORTED;
-		}
-
-		_leave(" = 0");
-		return 0;
-	}
-
-} /* end rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * write data to a call
- * - the data may not be sent immediately if it doesn't fill a buffer
- * - if we can't queue all the data for buffering now, siov[] will have been
- *   adjusted to take account of what has been sent
- */
-int rxrpc_call_write_data(struct rxrpc_call *call,
-			  size_t sioc,
-			  struct kvec *siov,
-			  u8 rxhdr_flags,
-			  gfp_t alloc_flags,
-			  int dup_data,
-			  size_t *size_sent)
-{
-	struct rxrpc_message *msg;
-	struct kvec *sptr;
-	size_t space, size, chunk, tmp;
-	char *buf;
-	int ret;
-
-	_enter("%p,%Zu,%p,%02x,%x,%d,%p",
-	       call, sioc, siov, rxhdr_flags, alloc_flags, dup_data,
-	       size_sent);
-
-	*size_sent = 0;
-	size = 0;
-	ret = -EINVAL;
-
-	/* can't send more if we've sent last packet from this end */
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_SRVR_SND_REPLY:
-	case RXRPC_CSTATE_CLNT_SND_ARGS:
-		break;
-	case RXRPC_CSTATE_ERROR:
-		ret = call->app_errno;
-	default:
-		goto out;
-	}
-
-	/* calculate how much data we've been given */
-	sptr = siov;
-	for (; sioc > 0; sptr++, sioc--) {
-		if (!sptr->iov_len)
-			continue;
-
-		if (!sptr->iov_base)
-			goto out;
-
-		size += sptr->iov_len;
-	}
-
-	_debug("- size=%Zu mtu=%Zu", size, call->conn->mtu_size);
-
-	do {
-		/* make sure there's a message under construction */
-		if (!call->snd_nextmsg) {
-			/* no - allocate a message with no data yet attached */
-			ret = rxrpc_conn_newmsg(call->conn, call,
-						RXRPC_PACKET_TYPE_DATA,
-						0, NULL, alloc_flags,
-						&call->snd_nextmsg);
-			if (ret < 0)
-				goto out;
-			_debug("- allocated new message [ds=%Zu]",
-			       call->snd_nextmsg->dsize);
-		}
-
-		msg = call->snd_nextmsg;
-		msg->hdr.flags |= rxhdr_flags;
-
-		/* deal with zero-length terminal packet */
-		if (size == 0) {
-			if (rxhdr_flags & RXRPC_LAST_PACKET) {
-				ret = rxrpc_call_flush(call);
-				if (ret < 0)
-					goto out;
-			}
-			break;
-		}
-
-		/* work out how much space current packet has available */
-		space = call->conn->mtu_size - msg->dsize;
-		chunk = min(space, size);
-
-		_debug("- [before] space=%Zu chunk=%Zu", space, chunk);
-
-		while (!siov->iov_len)
-			siov++;
-
-		/* if we are going to have to duplicate the data then coalesce
-		 * it too */
-		if (dup_data) {
-			/* don't allocate more that 1 page at a time */
-			if (chunk > PAGE_SIZE)
-				chunk = PAGE_SIZE;
-
-			/* allocate a data buffer and attach to the message */
-			buf = kmalloc(chunk, alloc_flags);
-			if (unlikely(!buf)) {
-				if (msg->dsize ==
-				    sizeof(struct rxrpc_header)) {
-					/* discard an empty msg and wind back
-					 * the seq counter */
-					rxrpc_put_message(msg);
-					call->snd_nextmsg = NULL;
-					call->snd_seq_count--;
-				}
-
-				ret = -ENOMEM;
-				goto out;
-			}
-
-			tmp = msg->dcount++;
-			set_bit(tmp, &msg->dfree);
-			msg->data[tmp].iov_base = buf;
-			msg->data[tmp].iov_len = chunk;
-			msg->dsize += chunk;
-			*size_sent += chunk;
-			size -= chunk;
-
-			/* load the buffer with data */
-			while (chunk > 0) {
-				tmp = min(chunk, siov->iov_len);
-				memcpy(buf, siov->iov_base, tmp);
-				buf += tmp;
-				siov->iov_base += tmp;
-				siov->iov_len -= tmp;
-				if (!siov->iov_len)
-					siov++;
-				chunk -= tmp;
-			}
-		}
-		else {
-			/* we want to attach the supplied buffers directly */
-			while (chunk > 0 &&
-			       msg->dcount < RXRPC_MSG_MAX_IOCS) {
-				tmp = msg->dcount++;
-				msg->data[tmp].iov_base = siov->iov_base;
-				msg->data[tmp].iov_len = siov->iov_len;
-				msg->dsize += siov->iov_len;
-				*size_sent += siov->iov_len;
-				size -= siov->iov_len;
-				chunk -= siov->iov_len;
-				siov++;
-			}
-		}
-
-		_debug("- [loaded] chunk=%Zu size=%Zu", chunk, size);
-
-		/* dispatch the message when full, final or requesting ACK */
-		if (msg->dsize >= call->conn->mtu_size || rxhdr_flags) {
-			ret = rxrpc_call_flush(call);
-			if (ret < 0)
-				goto out;
-		}
-
-	} while(size > 0);
-
-	ret = 0;
- out:
-	_leave(" = %d (%Zd queued, %Zd rem)", ret, *size_sent, size);
-	return ret;
-
-} /* end rxrpc_call_write_data() */
-
-/*****************************************************************************/
-/*
- * flush outstanding packets to the network
- */
-static int rxrpc_call_flush(struct rxrpc_call *call)
-{
-	struct rxrpc_message *msg;
-	int ret = 0;
-
-	_enter("%p", call);
-
-	rxrpc_get_call(call);
-
-	/* if there's a packet under construction, then dispatch it now */
-	if (call->snd_nextmsg) {
-		msg = call->snd_nextmsg;
-		call->snd_nextmsg = NULL;
-
-		if (msg->hdr.flags & RXRPC_LAST_PACKET) {
-			msg->hdr.flags &= ~RXRPC_MORE_PACKETS;
-			if (call->app_call_state != RXRPC_CSTATE_CLNT_SND_ARGS)
-				msg->hdr.flags |= RXRPC_REQUEST_ACK;
-		}
-		else {
-			msg->hdr.flags |= RXRPC_MORE_PACKETS;
-		}
-
-		_proto("Sending DATA message { ds=%Zu dc=%u df=%02lu }",
-		       msg->dsize, msg->dcount, msg->dfree);
-
-		/* queue and adjust call state */
-		spin_lock(&call->lock);
-		list_add_tail(&msg->link, &call->acks_pendq);
-
-		/* decide what to do depending on current state and if this is
-		 * the last packet */
-		ret = -EINVAL;
-		switch (call->app_call_state) {
-		case RXRPC_CSTATE_SRVR_SND_REPLY:
-			if (msg->hdr.flags & RXRPC_LAST_PACKET) {
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_RCV_FINAL_ACK;
-				_state(call);
-			}
-			break;
-
-		case RXRPC_CSTATE_CLNT_SND_ARGS:
-			if (msg->hdr.flags & RXRPC_LAST_PACKET) {
-				call->app_call_state =
-					RXRPC_CSTATE_CLNT_RCV_REPLY;
-				_state(call);
-			}
-			break;
-
-		case RXRPC_CSTATE_ERROR:
-			ret = call->app_errno;
-		default:
-			spin_unlock(&call->lock);
-			goto out;
-		}
-
-		call->acks_pend_cnt++;
-
-		mod_timer(&call->acks_timeout,
-			  __rxrpc_rtt_based_timeout(call,
-						    rxrpc_call_acks_timeout));
-
-		spin_unlock(&call->lock);
-
-		ret = rxrpc_conn_sendmsg(call->conn, msg);
-		if (ret == 0)
-			call->pkt_snd_count++;
-	}
-
- out:
-	rxrpc_put_call(call);
-
-	_leave(" = %d", ret);
-	return ret;
-
-} /* end rxrpc_call_flush() */
-
-/*****************************************************************************/
-/*
- * resend NAK'd or unacknowledged packets up to the highest one specified
- */
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest)
-{
-	struct rxrpc_message *msg;
-	struct list_head *_p;
-	rxrpc_seq_t seq = 0;
-
-	_enter("%p,%u", call, highest);
-
-	_proto("Rx Resend required");
-
-	/* handle too many resends */
-	if (call->snd_resend_cnt >= rxrpc_call_max_resend) {
-		_debug("Aborting due to too many resends (rcv=%d)",
-		       call->pkt_rcv_count);
-		rxrpc_call_abort(call,
-				 call->pkt_rcv_count > 0 ? -EIO : -ETIMEDOUT);
-		_leave("");
-		return;
-	}
-
-	spin_lock(&call->lock);
-	call->snd_resend_cnt++;
-	for (;;) {
-		/* determine which the next packet we might need to ACK is */
-		if (seq <= call->acks_dftv_seq)
-			seq = call->acks_dftv_seq;
-		seq++;
-
-		if (seq > highest)
-			break;
-
-		/* look for the packet in the pending-ACK queue */
-		list_for_each(_p, &call->acks_pendq) {
-			msg = list_entry(_p, struct rxrpc_message, link);
-			if (msg->seq == seq)
-				goto found_msg;
-		}
-
-		panic("%s(%p,%d):"
-		      " Inconsistent pending-ACK queue (ds=%u sc=%u sq=%u)\n",
-		      __FUNCTION__, call, highest,
-		      call->acks_dftv_seq, call->snd_seq_count, seq);
-
-	found_msg:
-		if (msg->state != RXRPC_MSG_SENT)
-			continue; /* only un-ACK'd packets */
-
-		rxrpc_get_message(msg);
-		spin_unlock(&call->lock);
-
-		/* send each message again (and ignore any errors we might
-		 * incur) */
-		_proto("Resending DATA message { ds=%Zu dc=%u df=%02lu }",
-		       msg->dsize, msg->dcount, msg->dfree);
-
-		if (rxrpc_conn_sendmsg(call->conn, msg) == 0)
-			call->pkt_snd_count++;
-
-		rxrpc_put_message(msg);
-
-		spin_lock(&call->lock);
-	}
-
-	/* reset the timeout */
-	mod_timer(&call->acks_timeout,
-		  __rxrpc_rtt_based_timeout(call, rxrpc_call_acks_timeout));
-
-	spin_unlock(&call->lock);
-
-	_leave("");
-} /* end rxrpc_call_resend() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a call
- */
-void rxrpc_call_handle_error(struct rxrpc_call *call, int local, int errno)
-{
-	_enter("%p{%u},%d", call, ntohl(call->call_id), errno);
-
-	/* if this call is already aborted, then just wake up any waiters */
-	if (call->app_call_state == RXRPC_CSTATE_ERROR) {
-		call->app_error_func(call);
-	}
-	else {
-		/* tell the app layer what happened */
-		spin_lock(&call->lock);
-		call->app_call_state = RXRPC_CSTATE_ERROR;
-		_state(call);
-		if (local)
-			call->app_err_state = RXRPC_ESTATE_LOCAL_ERROR;
-		else
-			call->app_err_state = RXRPC_ESTATE_REMOTE_ERROR;
-		call->app_errno		= errno;
-		call->app_mark		= RXRPC_APP_MARK_EOF;
-		call->app_read_buf	= NULL;
-		call->app_async_read	= 0;
-
-		/* map the error */
-		call->app_aemap_func(call);
-
-		del_timer_sync(&call->acks_timeout);
-		del_timer_sync(&call->rcv_timeout);
-		del_timer_sync(&call->ackr_dfr_timo);
-
-		spin_unlock(&call->lock);
-
-		call->app_error_func(call);
-	}
-
-	_leave("");
-} /* end rxrpc_call_handle_error() */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
deleted file mode 100644
index a7c929a9fdca..000000000000
--- a/net/rxrpc/connection.c
+++ /dev/null
@@ -1,777 +0,0 @@
-/* connection.c: Rx connection routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_connection_count);
-
-LIST_HEAD(rxrpc_conns);
-DECLARE_RWSEM(rxrpc_conns_sem);
-unsigned long rxrpc_conn_timeout = 60 * 60;
-
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn);
-
-static void __rxrpc_conn_timeout(rxrpc_timer_t *timer)
-{
-	struct rxrpc_connection *conn =
-		list_entry(timer, struct rxrpc_connection, timeout);
-
-	_debug("Rx CONN TIMEOUT [%p{u=%d}]", conn, atomic_read(&conn->usage));
-
-	rxrpc_conn_do_timeout(conn);
-}
-
-static const struct rxrpc_timer_ops rxrpc_conn_timer_ops = {
-	.timed_out	= __rxrpc_conn_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a new connection record
- */
-static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
-					    struct rxrpc_connection **_conn)
-{
-	struct rxrpc_connection *conn;
-
-	_enter("%p",peer);
-
-	/* allocate and initialise a connection record */
-	conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
-	if (!conn) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&conn->usage, 1);
-
-	INIT_LIST_HEAD(&conn->link);
-	INIT_LIST_HEAD(&conn->id_link);
-	init_waitqueue_head(&conn->chanwait);
-	spin_lock_init(&conn->lock);
-	rxrpc_timer_init(&conn->timeout, &rxrpc_conn_timer_ops);
-
-	do_gettimeofday(&conn->atime);
-	conn->mtu_size = 1024;
-	conn->peer = peer;
-	conn->trans = peer->trans;
-
-	__RXACCT(atomic_inc(&rxrpc_connection_count));
-	*_conn = conn;
-	_leave(" = 0 (%p)", conn);
-
-	return 0;
-} /* end __rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * create a new connection record for outgoing connections
- */
-int rxrpc_create_connection(struct rxrpc_transport *trans,
-			    __be16 port,
-			    __be32 addr,
-			    uint16_t service_id,
-			    void *security,
-			    struct rxrpc_connection **_conn)
-{
-	struct rxrpc_connection *candidate, *conn;
-	struct rxrpc_peer *peer;
-	struct list_head *_p;
-	__be32 connid;
-	int ret;
-
-	_enter("%p{%hu},%u,%hu", trans, trans->port, ntohs(port), service_id);
-
-	/* get a peer record */
-	ret = rxrpc_peer_lookup(trans, addr, &peer);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	/* allocate and initialise a connection record */
-	ret = __rxrpc_create_connection(peer, &candidate);
-	if (ret < 0) {
-		rxrpc_put_peer(peer);
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	/* fill in the specific bits */
-	candidate->addr.sin_family	= AF_INET;
-	candidate->addr.sin_port	= port;
-	candidate->addr.sin_addr.s_addr	= addr;
-
-	candidate->in_epoch		= rxrpc_epoch;
-	candidate->out_epoch		= rxrpc_epoch;
-	candidate->in_clientflag	= 0;
-	candidate->out_clientflag	= RXRPC_CLIENT_INITIATED;
-	candidate->service_id		= htons(service_id);
-
-	/* invent a unique connection ID */
-	write_lock(&peer->conn_idlock);
-
- try_next_id:
-	connid = htonl(peer->conn_idcounter & RXRPC_CIDMASK);
-	peer->conn_idcounter += RXRPC_MAXCALLS;
-
-	list_for_each(_p, &peer->conn_idlist) {
-		conn = list_entry(_p, struct rxrpc_connection, id_link);
-		if (connid == conn->conn_id)
-			goto try_next_id;
-		if (connid > conn->conn_id)
-			break;
-	}
-
-	_debug("selected candidate conn ID %x.%u",
-	       ntohl(peer->addr.s_addr), ntohl(connid));
-
-	candidate->conn_id = connid;
-	list_add_tail(&candidate->id_link, _p);
-
-	write_unlock(&peer->conn_idlock);
-
-	/* attach to peer */
-	candidate->peer = peer;
-
-	write_lock(&peer->conn_lock);
-
-	/* search the peer's transport graveyard list */
-	spin_lock(&peer->conn_gylock);
-	list_for_each(_p, &peer->conn_graveyard) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port	== candidate->addr.sin_port	&&
-		    conn->security_ix	== candidate->security_ix	&&
-		    conn->service_id	== candidate->service_id	&&
-		    conn->in_clientflag	== 0)
-			goto found_in_graveyard;
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	/* pick the new candidate */
-	_debug("created connection: {%08x} [out]", ntohl(candidate->conn_id));
-	atomic_inc(&peer->conn_count);
-	conn = candidate;
-	candidate = NULL;
-
- make_active:
-	list_add_tail(&conn->link, &peer->conn_active);
-	write_unlock(&peer->conn_lock);
-
-	if (candidate) {
-		write_lock(&peer->conn_idlock);
-		list_del(&candidate->id_link);
-		write_unlock(&peer->conn_idlock);
-
-		__RXACCT(atomic_dec(&rxrpc_connection_count));
-		kfree(candidate);
-	}
-	else {
-		down_write(&rxrpc_conns_sem);
-		list_add_tail(&conn->proc_link, &rxrpc_conns);
-		up_write(&rxrpc_conns_sem);
-	}
-
-	*_conn = conn;
-	_leave(" = 0 (%p)", conn);
-
-	return 0;
-
-	/* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
-	_debug("resurrecting connection: {%08x} [out]", ntohl(conn->conn_id));
-	rxrpc_get_connection(conn);
-	rxrpc_krxtimod_del_timer(&conn->timeout);
-	list_del_init(&conn->link);
-	spin_unlock(&peer->conn_gylock);
-	goto make_active;
-} /* end rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * lookup the connection for an incoming packet
- * - create a new connection record for unrecorded incoming connections
- */
-int rxrpc_connection_lookup(struct rxrpc_peer *peer,
-			    struct rxrpc_message *msg,
-			    struct rxrpc_connection **_conn)
-{
-	struct rxrpc_connection *conn, *candidate = NULL;
-	struct list_head *_p;
-	struct sk_buff *pkt = msg->pkt;
-	int ret, fresh = 0;
-	__be32 x_epoch, x_connid;
-	__be16 x_port, x_servid;
-	__u32 x_secix;
-	u8 x_clflag;
-
-	_enter("%p{{%hu}},%u,%hu",
-	       peer,
-	       peer->trans->port,
-	       ntohs(pkt->h.uh->source),
-	       ntohs(msg->hdr.serviceId));
-
-	x_port		= pkt->h.uh->source;
-	x_epoch		= msg->hdr.epoch;
-	x_clflag	= msg->hdr.flags & RXRPC_CLIENT_INITIATED;
-	x_connid	= htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
-	x_servid	= msg->hdr.serviceId;
-	x_secix		= msg->hdr.securityIndex;
-
-	/* [common case] search the transport's active list first */
-	read_lock(&peer->conn_lock);
-	list_for_each(_p, &peer->conn_active) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port		== x_port	&&
-		    conn->in_epoch		== x_epoch	&&
-		    conn->conn_id		== x_connid	&&
-		    conn->security_ix		== x_secix	&&
-		    conn->service_id		== x_servid	&&
-		    conn->in_clientflag		== x_clflag)
-			goto found_active;
-	}
-	read_unlock(&peer->conn_lock);
-
-	/* [uncommon case] not active
-	 * - create a candidate for a new record if an inbound connection
-	 * - only examine the graveyard for an outbound connection
-	 */
-	if (x_clflag) {
-		ret = __rxrpc_create_connection(peer, &candidate);
-		if (ret < 0) {
-			_leave(" = %d", ret);
-			return ret;
-		}
-
-		/* fill in the specifics */
-		candidate->addr.sin_family	= AF_INET;
-		candidate->addr.sin_port	= x_port;
-		candidate->addr.sin_addr.s_addr = pkt->nh.iph->saddr;
-		candidate->in_epoch		= x_epoch;
-		candidate->out_epoch		= x_epoch;
-		candidate->in_clientflag	= RXRPC_CLIENT_INITIATED;
-		candidate->out_clientflag	= 0;
-		candidate->conn_id		= x_connid;
-		candidate->service_id		= x_servid;
-		candidate->security_ix		= x_secix;
-	}
-
-	/* search the active list again, just in case it appeared whilst we
-	 * were busy */
-	write_lock(&peer->conn_lock);
-	list_for_each(_p, &peer->conn_active) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port		== x_port	&&
-		    conn->in_epoch		== x_epoch	&&
-		    conn->conn_id		== x_connid	&&
-		    conn->security_ix		== x_secix	&&
-		    conn->service_id		== x_servid	&&
-		    conn->in_clientflag		== x_clflag)
-			goto found_active_second_chance;
-	}
-
-	/* search the transport's graveyard list */
-	spin_lock(&peer->conn_gylock);
-	list_for_each(_p, &peer->conn_graveyard) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port		== x_port	&&
-		    conn->in_epoch		== x_epoch	&&
-		    conn->conn_id		== x_connid	&&
-		    conn->security_ix		== x_secix	&&
-		    conn->service_id		== x_servid	&&
-		    conn->in_clientflag		== x_clflag)
-			goto found_in_graveyard;
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	/* outbound connections aren't created here */
-	if (!x_clflag) {
-		write_unlock(&peer->conn_lock);
-		_leave(" = -ENOENT");
-		return -ENOENT;
-	}
-
-	/* we can now add the new candidate to the list */
-	_debug("created connection: {%08x} [in]", ntohl(candidate->conn_id));
-	rxrpc_get_peer(peer);
-	conn = candidate;
-	candidate = NULL;
-	atomic_inc(&peer->conn_count);
-	fresh = 1;
-
- make_active:
-	list_add_tail(&conn->link, &peer->conn_active);
-
- success_uwfree:
-	write_unlock(&peer->conn_lock);
-
-	if (candidate) {
-		write_lock(&peer->conn_idlock);
-		list_del(&candidate->id_link);
-		write_unlock(&peer->conn_idlock);
-
-		__RXACCT(atomic_dec(&rxrpc_connection_count));
-		kfree(candidate);
-	}
-
-	if (fresh) {
-		down_write(&rxrpc_conns_sem);
-		list_add_tail(&conn->proc_link, &rxrpc_conns);
-		up_write(&rxrpc_conns_sem);
-	}
-
- success:
-	*_conn = conn;
-	_leave(" = 0 (%p)", conn);
-	return 0;
-
-	/* handle the connection being found in the active list straight off */
- found_active:
-	rxrpc_get_connection(conn);
-	read_unlock(&peer->conn_lock);
-	goto success;
-
-	/* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
-	_debug("resurrecting connection: {%08x} [in]", ntohl(conn->conn_id));
-	rxrpc_get_peer(peer);
-	rxrpc_get_connection(conn);
-	rxrpc_krxtimod_del_timer(&conn->timeout);
-	list_del_init(&conn->link);
-	spin_unlock(&peer->conn_gylock);
-	goto make_active;
-
-	/* handle finding the connection on the second time through the active
-	 * list */
- found_active_second_chance:
-	rxrpc_get_connection(conn);
-	goto success_uwfree;
-
-} /* end rxrpc_connection_lookup() */
-
-/*****************************************************************************/
-/*
- * finish using a connection record
- * - it will be transferred to the peer's connection graveyard when refcount
- *   reaches 0
- */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
-	struct rxrpc_peer *peer;
-
-	if (!conn)
-		return;
-
-	_enter("%p{u=%d p=%hu}",
-	       conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
-	peer = conn->peer;
-	spin_lock(&peer->conn_gylock);
-
-	/* sanity check */
-	if (atomic_read(&conn->usage) <= 0)
-		BUG();
-
-	if (likely(!atomic_dec_and_test(&conn->usage))) {
-		spin_unlock(&peer->conn_gylock);
-		_leave("");
-		return;
-	}
-
-	/* move to graveyard queue */
-	_debug("burying connection: {%08x}", ntohl(conn->conn_id));
-	list_move_tail(&conn->link, &peer->conn_graveyard);
-
-	rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
-
-	spin_unlock(&peer->conn_gylock);
-
-	rxrpc_put_peer(conn->peer);
-
-	_leave(" [killed]");
-} /* end rxrpc_put_connection() */
-
-/*****************************************************************************/
-/*
- * free a connection record
- */
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn)
-{
-	struct rxrpc_peer *peer;
-
-	_enter("%p{u=%d p=%hu}",
-	       conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
-	peer = conn->peer;
-
-	if (atomic_read(&conn->usage) < 0)
-		BUG();
-
-	/* remove from graveyard if still dead */
-	spin_lock(&peer->conn_gylock);
-	if (atomic_read(&conn->usage) == 0) {
-		list_del_init(&conn->link);
-	}
-	else {
-		conn = NULL;
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	if (!conn) {
-		_leave("");
-		return; /* resurrected */
-	}
-
-	_debug("--- Destroying Connection %p{%08x} ---",
-	       conn, ntohl(conn->conn_id));
-
-	down_write(&rxrpc_conns_sem);
-	list_del(&conn->proc_link);
-	up_write(&rxrpc_conns_sem);
-
-	write_lock(&peer->conn_idlock);
-	list_del(&conn->id_link);
-	write_unlock(&peer->conn_idlock);
-
-	__RXACCT(atomic_dec(&rxrpc_connection_count));
-	kfree(conn);
-
-	/* if the graveyard is now empty, wake up anyone waiting for that */
-	if (atomic_dec_and_test(&peer->conn_count))
-		wake_up(&peer->conn_gy_waitq);
-
-	_leave(" [destroyed]");
-} /* end rxrpc_conn_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all connection records from a peer endpoint
- */
-void rxrpc_conn_clearall(struct rxrpc_peer *peer)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	struct rxrpc_connection *conn;
-	int err;
-
-	_enter("%p", peer);
-
-	/* there shouldn't be any active conns remaining */
-	if (!list_empty(&peer->conn_active))
-		BUG();
-
-	/* manually timeout all conns in the graveyard */
-	spin_lock(&peer->conn_gylock);
-	while (!list_empty(&peer->conn_graveyard)) {
-		conn = list_entry(peer->conn_graveyard.next,
-				  struct rxrpc_connection, link);
-		err = rxrpc_krxtimod_del_timer(&conn->timeout);
-		spin_unlock(&peer->conn_gylock);
-
-		if (err == 0)
-			rxrpc_conn_do_timeout(conn);
-
-		spin_lock(&peer->conn_gylock);
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	/* wait for the the conn graveyard to be completely cleared */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(&peer->conn_gy_waitq, &myself);
-
-	while (atomic_read(&peer->conn_count) != 0) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-
-	remove_wait_queue(&peer->conn_gy_waitq, &myself);
-	set_current_state(TASK_RUNNING);
-
-	_leave("");
-} /* end rxrpc_conn_clearall() */
-
-/*****************************************************************************/
-/*
- * allocate and prepare a message for sending out through the transport
- * endpoint
- */
-int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
-		      struct rxrpc_call *call,
-		      uint8_t type,
-		      int dcount,
-		      struct kvec diov[],
-		      gfp_t alloc_flags,
-		      struct rxrpc_message **_msg)
-{
-	struct rxrpc_message *msg;
-	int loop;
-
-	_enter("%p{%d},%p,%u", conn, ntohs(conn->addr.sin_port), call, type);
-
-	if (dcount > 3) {
-		_leave(" = -EINVAL");
-		return -EINVAL;
-	}
-
-	msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags);
-	if (!msg) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&msg->usage, 1);
-
-	INIT_LIST_HEAD(&msg->link);
-
-	msg->state = RXRPC_MSG_PREPARED;
-
-	msg->hdr.epoch		= conn->out_epoch;
-	msg->hdr.cid		= conn->conn_id | (call ? call->chan_ix : 0);
-	msg->hdr.callNumber	= call ? call->call_id : 0;
-	msg->hdr.type		= type;
-	msg->hdr.flags		= conn->out_clientflag;
-	msg->hdr.securityIndex	= conn->security_ix;
-	msg->hdr.serviceId	= conn->service_id;
-
-	/* generate sequence numbers for data packets */
-	if (call) {
-		switch (type) {
-		case RXRPC_PACKET_TYPE_DATA:
-			msg->seq = ++call->snd_seq_count;
-			msg->hdr.seq = htonl(msg->seq);
-			break;
-		case RXRPC_PACKET_TYPE_ACK:
-			/* ACK sequence numbers are complicated. The following
-			 * may be wrong:
-			 * - jumbo packet ACKs should have a seq number
-			 * - normal ACKs should not
-			 */
-		default:
-			break;
-		}
-	}
-
-	msg->dcount = dcount + 1;
-	msg->dsize = sizeof(msg->hdr);
-	msg->data[0].iov_len = sizeof(msg->hdr);
-	msg->data[0].iov_base = &msg->hdr;
-
-	for (loop=0; loop < dcount; loop++) {
-		msg->dsize += diov[loop].iov_len;
-		msg->data[loop+1].iov_len  = diov[loop].iov_len;
-		msg->data[loop+1].iov_base = diov[loop].iov_base;
-	}
-
-	__RXACCT(atomic_inc(&rxrpc_message_count));
-	*_msg = msg;
-	_leave(" = 0 (%p) #%d", msg, atomic_read(&rxrpc_message_count));
-	return 0;
-} /* end rxrpc_conn_newmsg() */
-
-/*****************************************************************************/
-/*
- * free a message
- */
-void __rxrpc_put_message(struct rxrpc_message *msg)
-{
-	int loop;
-
-	_enter("%p #%d", msg, atomic_read(&rxrpc_message_count));
-
-	if (msg->pkt)
-		kfree_skb(msg->pkt);
-	rxrpc_put_connection(msg->conn);
-
-	for (loop = 0; loop < 8; loop++)
-		if (test_bit(loop, &msg->dfree))
-			kfree(msg->data[loop].iov_base);
-
-	__RXACCT(atomic_dec(&rxrpc_message_count));
-	kfree(msg);
-
-	_leave("");
-} /* end __rxrpc_put_message() */
-
-/*****************************************************************************/
-/*
- * send a message out through the transport endpoint
- */
-int rxrpc_conn_sendmsg(struct rxrpc_connection *conn,
-		       struct rxrpc_message *msg)
-{
-	struct msghdr msghdr;
-	int ret;
-
-	_enter("%p{%d}", conn, ntohs(conn->addr.sin_port));
-
-	/* fill in some fields in the header */
-	spin_lock(&conn->lock);
-	msg->hdr.serial = htonl(++conn->serial_counter);
-	msg->rttdone = 0;
-	spin_unlock(&conn->lock);
-
-	/* set up the message to be transmitted */
-	msghdr.msg_name		= &conn->addr;
-	msghdr.msg_namelen	= sizeof(conn->addr);
-	msghdr.msg_control	= NULL;
-	msghdr.msg_controllen	= 0;
-	msghdr.msg_flags	= MSG_CONFIRM | MSG_DONTWAIT;
-
-	_net("Sending message type %d of %Zd bytes to %08x:%d",
-	     msg->hdr.type,
-	     msg->dsize,
-	     ntohl(conn->addr.sin_addr.s_addr),
-	     ntohs(conn->addr.sin_port));
-
-	/* send the message */
-	ret = kernel_sendmsg(conn->trans->socket, &msghdr,
-			     msg->data, msg->dcount, msg->dsize);
-	if (ret < 0) {
-		msg->state = RXRPC_MSG_ERROR;
-	} else {
-		msg->state = RXRPC_MSG_SENT;
-		ret = 0;
-
-		spin_lock(&conn->lock);
-		do_gettimeofday(&conn->atime);
-		msg->stamp = conn->atime;
-		spin_unlock(&conn->lock);
-	}
-
-	_leave(" = %d", ret);
-
-	return ret;
-} /* end rxrpc_conn_sendmsg() */
-
-/*****************************************************************************/
-/*
- * deal with a subsequent call packet
- */
-int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
-				   struct rxrpc_call *call,
-				   struct rxrpc_message *msg)
-{
-	struct rxrpc_message *pmsg;
-	struct dst_entry *dst;
-	struct list_head *_p;
-	unsigned cix, seq;
-	int ret = 0;
-
-	_enter("%p,%p,%p", conn, call, msg);
-
-	if (!call) {
-		cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
-		spin_lock(&conn->lock);
-		call = conn->channels[cix];
-
-		if (!call || call->call_id != msg->hdr.callNumber) {
-			spin_unlock(&conn->lock);
-			rxrpc_trans_immediate_abort(conn->trans, msg, -ENOENT);
-			goto out;
-		}
-		else {
-			rxrpc_get_call(call);
-			spin_unlock(&conn->lock);
-		}
-	}
-	else {
-		rxrpc_get_call(call);
-	}
-
-	_proto("Received packet %%%u [%u] on call %hu:%u:%u",
-	       ntohl(msg->hdr.serial),
-	       ntohl(msg->hdr.seq),
-	       ntohs(msg->hdr.serviceId),
-	       ntohl(conn->conn_id),
-	       ntohl(call->call_id));
-
-	call->pkt_rcv_count++;
-
-	dst = msg->pkt->dst;
-	if (dst && dst->dev)
-		conn->peer->if_mtu =
-			dst->dev->mtu - dst->dev->hard_header_len;
-
-	/* queue on the call in seq order */
-	rxrpc_get_message(msg);
-	seq = msg->seq;
-
-	spin_lock(&call->lock);
-	list_for_each(_p, &call->rcv_receiveq) {
-		pmsg = list_entry(_p, struct rxrpc_message, link);
-		if (pmsg->seq > seq)
-			break;
-	}
-	list_add_tail(&msg->link, _p);
-
-	/* reset the activity timeout */
-	call->flags |= RXRPC_CALL_RCV_PKT;
-	mod_timer(&call->rcv_timeout,jiffies + rxrpc_call_rcv_timeout * HZ);
-
-	spin_unlock(&call->lock);
-
-	rxrpc_krxiod_queue_call(call);
-
-	rxrpc_put_call(call);
- out:
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_conn_receive_call_packet() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a connection
- */
-void rxrpc_conn_handle_error(struct rxrpc_connection *conn,
-			     int local, int errno)
-{
-	struct rxrpc_call *calls[4];
-	int loop;
-
-	_enter("%p{%d},%d", conn, ntohs(conn->addr.sin_port), errno);
-
-	/* get a ref to all my calls in one go */
-	memset(calls, 0, sizeof(calls));
-	spin_lock(&conn->lock);
-
-	for (loop = 3; loop >= 0; loop--) {
-		if (conn->channels[loop]) {
-			calls[loop] = conn->channels[loop];
-			rxrpc_get_call(calls[loop]);
-		}
-	}
-
-	spin_unlock(&conn->lock);
-
-	/* now kick them all */
-	for (loop = 3; loop >= 0; loop--) {
-		if (calls[loop]) {
-			rxrpc_call_handle_error(calls[loop], local, errno);
-			rxrpc_put_call(calls[loop]);
-		}
-	}
-
-	_leave("");
-} /* end rxrpc_conn_handle_error() */
diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h
deleted file mode 100644
index cc0c5795a103..000000000000
--- a/net/rxrpc/internal.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* internal.h: internal Rx RPC stuff
- *
- * Copyright (c) 2002   David Howells (dhowells@redhat.com).
- */
-
-#ifndef RXRPC_INTERNAL_H
-#define RXRPC_INTERNAL_H
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-
-/*
- * debug accounting
- */
-#if 1
-#define __RXACCT_DECL(X) X
-#define __RXACCT(X) do { X; } while(0)
-#else
-#define __RXACCT_DECL(X)
-#define __RXACCT(X) do { } while(0)
-#endif
-
-__RXACCT_DECL(extern atomic_t rxrpc_transport_count);
-__RXACCT_DECL(extern atomic_t rxrpc_peer_count);
-__RXACCT_DECL(extern atomic_t rxrpc_connection_count);
-__RXACCT_DECL(extern atomic_t rxrpc_call_count);
-__RXACCT_DECL(extern atomic_t rxrpc_message_count);
-
-/*
- * debug tracing
- */
-#define kenter(FMT, a...)	printk("==> %s("FMT")\n",__FUNCTION__ , ##a)
-#define kleave(FMT, a...)	printk("<== %s()"FMT"\n",__FUNCTION__ , ##a)
-#define kdebug(FMT, a...)	printk("    "FMT"\n" , ##a)
-#define kproto(FMT, a...)	printk("### "FMT"\n" , ##a)
-#define knet(FMT, a...)		printk("    "FMT"\n" , ##a)
-
-#if 0
-#define _enter(FMT, a...)	kenter(FMT , ##a)
-#define _leave(FMT, a...)	kleave(FMT , ##a)
-#define _debug(FMT, a...)	kdebug(FMT , ##a)
-#define _proto(FMT, a...)	kproto(FMT , ##a)
-#define _net(FMT, a...)		knet(FMT , ##a)
-#else
-#define _enter(FMT, a...)	do { if (rxrpc_ktrace) kenter(FMT , ##a); } while(0)
-#define _leave(FMT, a...)	do { if (rxrpc_ktrace) kleave(FMT , ##a); } while(0)
-#define _debug(FMT, a...)	do { if (rxrpc_kdebug) kdebug(FMT , ##a); } while(0)
-#define _proto(FMT, a...)	do { if (rxrpc_kproto) kproto(FMT , ##a); } while(0)
-#define _net(FMT, a...)		do { if (rxrpc_knet)   knet  (FMT , ##a); } while(0)
-#endif
-
-static inline void rxrpc_discard_my_signals(void)
-{
-	while (signal_pending(current)) {
-		siginfo_t sinfo;
-
-		spin_lock_irq(&current->sighand->siglock);
-		dequeue_signal(current, &current->blocked, &sinfo);
-		spin_unlock_irq(&current->sighand->siglock);
-	}
-}
-
-/*
- * call.c
- */
-extern struct list_head rxrpc_calls;
-extern struct rw_semaphore rxrpc_calls_sem;
-
-/*
- * connection.c
- */
-extern struct list_head rxrpc_conns;
-extern struct rw_semaphore rxrpc_conns_sem;
-extern unsigned long rxrpc_conn_timeout;
-
-extern void rxrpc_conn_clearall(struct rxrpc_peer *peer);
-
-/*
- * peer.c
- */
-extern struct list_head rxrpc_peers;
-extern struct rw_semaphore rxrpc_peers_sem;
-extern unsigned long rxrpc_peer_timeout;
-
-extern void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
-				     struct rxrpc_message *msg,
-				     struct rxrpc_message *resp);
-
-extern void rxrpc_peer_clearall(struct rxrpc_transport *trans);
-
-
-/*
- * proc.c
- */
-#ifdef CONFIG_PROC_FS
-extern int rxrpc_proc_init(void);
-extern void rxrpc_proc_cleanup(void);
-#endif
-
-/*
- * transport.c
- */
-extern struct list_head rxrpc_proc_transports;
-extern struct rw_semaphore rxrpc_proc_transports_sem;
-
-#endif /* RXRPC_INTERNAL_H */
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
deleted file mode 100644
index bbbcd6c24048..000000000000
--- a/net/rxrpc/krxiod.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* krxiod.c: Rx I/O daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/freezer.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxiod_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxiod_dead);
-
-static atomic_t rxrpc_krxiod_qcount = ATOMIC_INIT(0);
-
-static LIST_HEAD(rxrpc_krxiod_transportq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_transportq_lock);
-
-static LIST_HEAD(rxrpc_krxiod_callq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_callq_lock);
-
-static volatile int rxrpc_krxiod_die;
-
-/*****************************************************************************/
-/*
- * Rx I/O daemon
- */
-static int rxrpc_krxiod(void *arg)
-{
-	DECLARE_WAITQUEUE(krxiod,current);
-
-	printk("Started krxiod %d\n",current->pid);
-
-	daemonize("krxiod");
-
-	/* loop around waiting for work to do */
-	do {
-		/* wait for work or to be told to exit */
-		_debug("### Begin Wait");
-		if (!atomic_read(&rxrpc_krxiod_qcount)) {
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			add_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
-
-			for (;;) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				if (atomic_read(&rxrpc_krxiod_qcount) ||
-				    rxrpc_krxiod_die ||
-				    signal_pending(current))
-					break;
-
-				schedule();
-			}
-
-			remove_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
-			set_current_state(TASK_RUNNING);
-		}
-		_debug("### End Wait");
-
-		/* do work if been given some to do */
-		_debug("### Begin Work");
-
-		/* see if there's a transport in need of attention */
-		if (!list_empty(&rxrpc_krxiod_transportq)) {
-			struct rxrpc_transport *trans = NULL;
-
-			spin_lock_irq(&rxrpc_krxiod_transportq_lock);
-
-			if (!list_empty(&rxrpc_krxiod_transportq)) {
-				trans = list_entry(
-					rxrpc_krxiod_transportq.next,
-					struct rxrpc_transport,
-					krxiodq_link);
-
-				list_del_init(&trans->krxiodq_link);
-				atomic_dec(&rxrpc_krxiod_qcount);
-
-				/* make sure it hasn't gone away and doesn't go
-				 * away */
-				if (atomic_read(&trans->usage)>0)
-					rxrpc_get_transport(trans);
-				else
-					trans = NULL;
-			}
-
-			spin_unlock_irq(&rxrpc_krxiod_transportq_lock);
-
-			if (trans) {
-				rxrpc_trans_receive_packet(trans);
-				rxrpc_put_transport(trans);
-			}
-		}
-
-		/* see if there's a call in need of attention */
-		if (!list_empty(&rxrpc_krxiod_callq)) {
-			struct rxrpc_call *call = NULL;
-
-			spin_lock_irq(&rxrpc_krxiod_callq_lock);
-
-			if (!list_empty(&rxrpc_krxiod_callq)) {
-				call = list_entry(rxrpc_krxiod_callq.next,
-						  struct rxrpc_call,
-						  rcv_krxiodq_lk);
-				list_del_init(&call->rcv_krxiodq_lk);
-				atomic_dec(&rxrpc_krxiod_qcount);
-
-				/* make sure it hasn't gone away and doesn't go
-				 * away */
-				if (atomic_read(&call->usage) > 0) {
-					_debug("@@@ KRXIOD"
-					       " Begin Attend Call %p", call);
-					rxrpc_get_call(call);
-				}
-				else {
-					call = NULL;
-				}
-			}
-
-			spin_unlock_irq(&rxrpc_krxiod_callq_lock);
-
-			if (call) {
-				rxrpc_call_do_stuff(call);
-				rxrpc_put_call(call);
-				_debug("@@@ KRXIOD End Attend Call %p", call);
-			}
-		}
-
-		_debug("### End Work");
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		rxrpc_discard_my_signals();
-
-	} while (!rxrpc_krxiod_die);
-
-	/* and that's all */
-	complete_and_exit(&rxrpc_krxiod_dead, 0);
-
-} /* end rxrpc_krxiod() */
-
-/*****************************************************************************/
-/*
- * start up a krxiod daemon
- */
-int __init rxrpc_krxiod_init(void)
-{
-	return kernel_thread(rxrpc_krxiod, NULL, 0);
-
-} /* end rxrpc_krxiod_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxiod daemon and wait for it to complete
- */
-void rxrpc_krxiod_kill(void)
-{
-	rxrpc_krxiod_die = 1;
-	wake_up_all(&rxrpc_krxiod_sleepq);
-	wait_for_completion(&rxrpc_krxiod_dead);
-
-} /* end rxrpc_krxiod_kill() */
-
-/*****************************************************************************/
-/*
- * queue a transport for attention by krxiod
- */
-void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans)
-{
-	unsigned long flags;
-
-	_enter("");
-
-	if (list_empty(&trans->krxiodq_link)) {
-		spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
-
-		if (list_empty(&trans->krxiodq_link)) {
-			if (atomic_read(&trans->usage) > 0) {
-				list_add_tail(&trans->krxiodq_link,
-					      &rxrpc_krxiod_transportq);
-				atomic_inc(&rxrpc_krxiod_qcount);
-			}
-		}
-
-		spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
-		wake_up_all(&rxrpc_krxiod_sleepq);
-	}
-
-	_leave("");
-
-} /* end rxrpc_krxiod_queue_transport() */
-
-/*****************************************************************************/
-/*
- * dequeue a transport from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans)
-{
-	unsigned long flags;
-
-	_enter("");
-
-	spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
-	if (!list_empty(&trans->krxiodq_link)) {
-		list_del_init(&trans->krxiodq_link);
-		atomic_dec(&rxrpc_krxiod_qcount);
-	}
-	spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
-
-	_leave("");
-
-} /* end rxrpc_krxiod_dequeue_transport() */
-
-/*****************************************************************************/
-/*
- * queue a call for attention by krxiod
- */
-void rxrpc_krxiod_queue_call(struct rxrpc_call *call)
-{
-	unsigned long flags;
-
-	if (list_empty(&call->rcv_krxiodq_lk)) {
-		spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
-		if (atomic_read(&call->usage) > 0) {
-			list_add_tail(&call->rcv_krxiodq_lk,
-				      &rxrpc_krxiod_callq);
-			atomic_inc(&rxrpc_krxiod_qcount);
-		}
-		spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
-	}
-	wake_up_all(&rxrpc_krxiod_sleepq);
-
-} /* end rxrpc_krxiod_queue_call() */
-
-/*****************************************************************************/
-/*
- * dequeue a call from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
-	if (!list_empty(&call->rcv_krxiodq_lk)) {
-		list_del_init(&call->rcv_krxiodq_lk);
-		atomic_dec(&rxrpc_krxiod_qcount);
-	}
-	spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
-
-} /* end rxrpc_krxiod_dequeue_call() */
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
deleted file mode 100644
index 9a1e7f5e034c..000000000000
--- a/net/rxrpc/krxsecd.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/* krxsecd.c: Rx security daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This daemon deals with:
- * - consulting the application as to whether inbound peers and calls should be authorised
- * - generating security challenges for inbound connections
- * - responding to security challenges on outbound connections
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/message.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <linux/freezer.h>
-#include <net/sock.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxsecd_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxsecd_dead);
-static volatile int rxrpc_krxsecd_die;
-
-static atomic_t rxrpc_krxsecd_qcount;
-
-/* queue of unprocessed inbound messages with seqno #1 and
- * RXRPC_CLIENT_INITIATED flag set */
-static LIST_HEAD(rxrpc_krxsecd_initmsgq);
-static DEFINE_SPINLOCK(rxrpc_krxsecd_initmsgq_lock);
-
-static void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg);
-
-/*****************************************************************************/
-/*
- * Rx security daemon
- */
-static int rxrpc_krxsecd(void *arg)
-{
-	DECLARE_WAITQUEUE(krxsecd, current);
-
-	int die;
-
-	printk("Started krxsecd %d\n", current->pid);
-
-	daemonize("krxsecd");
-
-	/* loop around waiting for work to do */
-	do {
-		/* wait for work or to be told to exit */
-		_debug("### Begin Wait");
-		if (!atomic_read(&rxrpc_krxsecd_qcount)) {
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			add_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
-
-			for (;;) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				if (atomic_read(&rxrpc_krxsecd_qcount) ||
-				    rxrpc_krxsecd_die ||
-				    signal_pending(current))
-					break;
-
-				schedule();
-			}
-
-			remove_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
-			set_current_state(TASK_RUNNING);
-		}
-		die = rxrpc_krxsecd_die;
-		_debug("### End Wait");
-
-		/* see if there're incoming calls in need of authenticating */
-		_debug("### Begin Inbound Calls");
-
-		if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
-			struct rxrpc_message *msg = NULL;
-
-			spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
-			if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
-				msg = list_entry(rxrpc_krxsecd_initmsgq.next,
-						 struct rxrpc_message, link);
-				list_del_init(&msg->link);
-				atomic_dec(&rxrpc_krxsecd_qcount);
-			}
-
-			spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
-			if (msg) {
-				rxrpc_krxsecd_process_incoming_call(msg);
-				rxrpc_put_message(msg);
-			}
-		}
-
-		_debug("### End Inbound Calls");
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		rxrpc_discard_my_signals();
-
-	} while (!die);
-
-	/* and that's all */
-	complete_and_exit(&rxrpc_krxsecd_dead, 0);
-
-} /* end rxrpc_krxsecd() */
-
-/*****************************************************************************/
-/*
- * start up a krxsecd daemon
- */
-int __init rxrpc_krxsecd_init(void)
-{
-	return kernel_thread(rxrpc_krxsecd, NULL, 0);
-
-} /* end rxrpc_krxsecd_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxsecd daemon and wait for it to complete
- */
-void rxrpc_krxsecd_kill(void)
-{
-	rxrpc_krxsecd_die = 1;
-	wake_up_all(&rxrpc_krxsecd_sleepq);
-	wait_for_completion(&rxrpc_krxsecd_dead);
-
-} /* end rxrpc_krxsecd_kill() */
-
-/*****************************************************************************/
-/*
- * clear all pending incoming calls for the specified transport
- */
-void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
-{
-	LIST_HEAD(tmp);
-
-	struct rxrpc_message *msg;
-	struct list_head *_p, *_n;
-
-	_enter("%p",trans);
-
-	/* move all the messages for this transport onto a temp list */
-	spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
-	list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
-		msg = list_entry(_p, struct rxrpc_message, link);
-		if (msg->trans == trans) {
-			list_move_tail(&msg->link, &tmp);
-			atomic_dec(&rxrpc_krxsecd_qcount);
-		}
-	}
-
-	spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
-	/* zap all messages on the temp list */
-	while (!list_empty(&tmp)) {
-		msg = list_entry(tmp.next, struct rxrpc_message, link);
-		list_del_init(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	_leave("");
-} /* end rxrpc_krxsecd_clear_transport() */
-
-/*****************************************************************************/
-/*
- * queue a message on the incoming calls list
- */
-void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg)
-{
-	_enter("%p", msg);
-
-	/* queue for processing by krxsecd */
-	spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
-	if (!rxrpc_krxsecd_die) {
-		rxrpc_get_message(msg);
-		list_add_tail(&msg->link, &rxrpc_krxsecd_initmsgq);
-		atomic_inc(&rxrpc_krxsecd_qcount);
-	}
-
-	spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
-	wake_up(&rxrpc_krxsecd_sleepq);
-
-	_leave("");
-} /* end rxrpc_krxsecd_queue_incoming_call() */
-
-/*****************************************************************************/
-/*
- * process the initial message of an incoming call
- */
-void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg)
-{
-	struct rxrpc_transport *trans = msg->trans;
-	struct rxrpc_service *srv;
-	struct rxrpc_call *call;
-	struct list_head *_p;
-	unsigned short sid;
-	int ret;
-
-	_enter("%p{tr=%p}", msg, trans);
-
-	ret = rxrpc_incoming_call(msg->conn, msg, &call);
-	if (ret < 0)
-		goto out;
-
-	/* find the matching service on the transport */
-	sid = ntohs(msg->hdr.serviceId);
-	srv = NULL;
-
-	spin_lock(&trans->lock);
-	list_for_each(_p, &trans->services) {
-		srv = list_entry(_p, struct rxrpc_service, link);
-		if (srv->service_id == sid && try_module_get(srv->owner)) {
-			/* found a match (made sure it won't vanish) */
-			_debug("found service '%s'", srv->name);
-			call->owner = srv->owner;
-			break;
-		}
-	}
-	spin_unlock(&trans->lock);
-
-	/* report the new connection
-	 * - the func must inc the call's usage count to keep it
-	 */
-	ret = -ENOENT;
-	if (_p != &trans->services) {
-		/* attempt to accept the call */
-		call->conn->service = srv;
-		call->app_attn_func = srv->attn_func;
-		call->app_error_func = srv->error_func;
-		call->app_aemap_func = srv->aemap_func;
-
-		ret = srv->new_call(call);
-
-		/* send an abort if an error occurred */
-		if (ret < 0) {
-			rxrpc_call_abort(call, ret);
-		}
-		else {
-			/* formally receive and ACK the new packet */
-			ret = rxrpc_conn_receive_call_packet(call->conn,
-							     call, msg);
-		}
-	}
-
-	rxrpc_put_call(call);
- out:
-	if (ret < 0)
-		rxrpc_trans_immediate_abort(trans, msg, ret);
-
-	_leave(" (%d)", ret);
-} /* end rxrpc_krxsecd_process_incoming_call() */
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
deleted file mode 100644
index 9a9b6132dba4..000000000000
--- a/net/rxrpc/krxtimod.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/* krxtimod.c: RXRPC timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxtimod.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-static DECLARE_COMPLETION(krxtimod_alive);
-static DECLARE_COMPLETION(krxtimod_dead);
-static DECLARE_WAIT_QUEUE_HEAD(krxtimod_sleepq);
-static int krxtimod_die;
-
-static LIST_HEAD(krxtimod_list);
-static DEFINE_SPINLOCK(krxtimod_lock);
-
-static int krxtimod(void *arg);
-
-/*****************************************************************************/
-/*
- * start the timeout daemon
- */
-int rxrpc_krxtimod_start(void)
-{
-	int ret;
-
-	ret = kernel_thread(krxtimod, NULL, 0);
-	if (ret < 0)
-		return ret;
-
-	wait_for_completion(&krxtimod_alive);
-
-	return ret;
-} /* end rxrpc_krxtimod_start() */
-
-/*****************************************************************************/
-/*
- * stop the timeout daemon
- */
-void rxrpc_krxtimod_kill(void)
-{
-	/* get rid of my daemon */
-	krxtimod_die = 1;
-	wake_up(&krxtimod_sleepq);
-	wait_for_completion(&krxtimod_dead);
-
-} /* end rxrpc_krxtimod_kill() */
-
-/*****************************************************************************/
-/*
- * timeout processing daemon
- */
-static int krxtimod(void *arg)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	rxrpc_timer_t *timer;
-
-	printk("Started krxtimod %d\n", current->pid);
-
-	daemonize("krxtimod");
-
-	complete(&krxtimod_alive);
-
-	/* loop around looking for things to attend to */
- loop:
-	set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(&krxtimod_sleepq, &myself);
-
-	for (;;) {
-		unsigned long jif;
-		long timeout;
-
-		/* deal with the server being asked to die */
-		if (krxtimod_die) {
-			remove_wait_queue(&krxtimod_sleepq, &myself);
-			_leave("");
-			complete_and_exit(&krxtimod_dead, 0);
-		}
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		rxrpc_discard_my_signals();
-
-		/* work out the time to elapse before the next event */
-		spin_lock(&krxtimod_lock);
-		if (list_empty(&krxtimod_list)) {
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		}
-		else {
-			timer = list_entry(krxtimod_list.next,
-					   rxrpc_timer_t, link);
-			timeout = timer->timo_jif;
-			jif = jiffies;
-
-			if (time_before_eq((unsigned long) timeout, jif))
-				goto immediate;
-
-			else {
-				timeout = (long) timeout - (long) jiffies;
-			}
-		}
-		spin_unlock(&krxtimod_lock);
-
-		schedule_timeout(timeout);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-
-	/* the thing on the front of the queue needs processing
-	 * - we come here with the lock held and timer pointing to the expired
-	 *   entry
-	 */
- immediate:
-	remove_wait_queue(&krxtimod_sleepq, &myself);
-	set_current_state(TASK_RUNNING);
-
-	_debug("@@@ Begin Timeout of %p", timer);
-
-	/* dequeue the timer */
-	list_del_init(&timer->link);
-	spin_unlock(&krxtimod_lock);
-
-	/* call the timeout function */
-	timer->ops->timed_out(timer);
-
-	_debug("@@@ End Timeout");
-	goto loop;
-
-} /* end krxtimod() */
-
-/*****************************************************************************/
-/*
- * (re-)queue a timer
- */
-void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout)
-{
-	struct list_head *_p;
-	rxrpc_timer_t *ptimer;
-
-	_enter("%p,%lu", timer, timeout);
-
-	spin_lock(&krxtimod_lock);
-
-	list_del(&timer->link);
-
-	/* the timer was deferred or reset - put it back in the queue at the
-	 * right place */
-	timer->timo_jif = jiffies + timeout;
-
-	list_for_each(_p, &krxtimod_list) {
-		ptimer = list_entry(_p, rxrpc_timer_t, link);
-		if (time_before(timer->timo_jif, ptimer->timo_jif))
-			break;
-	}
-
-	list_add_tail(&timer->link, _p); /* insert before stopping point */
-
-	spin_unlock(&krxtimod_lock);
-
-	wake_up(&krxtimod_sleepq);
-
-	_leave("");
-} /* end rxrpc_krxtimod_add_timer() */
-
-/*****************************************************************************/
-/*
- * dequeue a timer
- * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
- */
-int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer)
-{
-	int ret = 0;
-
-	_enter("%p", timer);
-
-	spin_lock(&krxtimod_lock);
-
-	if (list_empty(&timer->link))
-		ret = -ENOENT;
-	else
-		list_del_init(&timer->link);
-
-	spin_unlock(&krxtimod_lock);
-
-	wake_up(&krxtimod_sleepq);
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_krxtimod_del_timer() */
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
deleted file mode 100644
index baec1f7fd8b9..000000000000
--- a/net/rxrpc/main.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* main.c: Rx RPC interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/krxtimod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-MODULE_DESCRIPTION("Rx RPC implementation");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
-__be32 rxrpc_epoch;
-
-/*****************************************************************************/
-/*
- * initialise the Rx module
- */
-static int __init rxrpc_initialise(void)
-{
-	int ret;
-
-	/* my epoch value */
-	rxrpc_epoch = htonl(xtime.tv_sec);
-
-	/* register the /proc interface */
-#ifdef CONFIG_PROC_FS
-	ret = rxrpc_proc_init();
-	if (ret<0)
-		return ret;
-#endif
-
-	/* register the sysctl files */
-#ifdef CONFIG_SYSCTL
-	ret = rxrpc_sysctl_init();
-	if (ret<0)
-		goto error_proc;
-#endif
-
-	/* start the krxtimod daemon */
-	ret = rxrpc_krxtimod_start();
-	if (ret<0)
-		goto error_sysctl;
-
-	/* start the krxiod daemon */
-	ret = rxrpc_krxiod_init();
-	if (ret<0)
-		goto error_krxtimod;
-
-	/* start the krxsecd daemon */
-	ret = rxrpc_krxsecd_init();
-	if (ret<0)
-		goto error_krxiod;
-
-	kdebug("\n\n");
-
-	return 0;
-
- error_krxiod:
-	rxrpc_krxiod_kill();
- error_krxtimod:
-	rxrpc_krxtimod_kill();
- error_sysctl:
-#ifdef CONFIG_SYSCTL
-	rxrpc_sysctl_cleanup();
- error_proc:
-#endif
-#ifdef CONFIG_PROC_FS
-	rxrpc_proc_cleanup();
-#endif
-	return ret;
-} /* end rxrpc_initialise() */
-
-module_init(rxrpc_initialise);
-
-/*****************************************************************************/
-/*
- * clean up the Rx module
- */
-static void __exit rxrpc_cleanup(void)
-{
-	kenter("");
-
-	__RXACCT(printk("Outstanding Messages   : %d\n",
-			atomic_read(&rxrpc_message_count)));
-	__RXACCT(printk("Outstanding Calls      : %d\n",
-			atomic_read(&rxrpc_call_count)));
-	__RXACCT(printk("Outstanding Connections: %d\n",
-			atomic_read(&rxrpc_connection_count)));
-	__RXACCT(printk("Outstanding Peers      : %d\n",
-			atomic_read(&rxrpc_peer_count)));
-	__RXACCT(printk("Outstanding Transports : %d\n",
-			atomic_read(&rxrpc_transport_count)));
-
-	rxrpc_krxsecd_kill();
-	rxrpc_krxiod_kill();
-	rxrpc_krxtimod_kill();
-#ifdef CONFIG_SYSCTL
-	rxrpc_sysctl_cleanup();
-#endif
-#ifdef CONFIG_PROC_FS
-	rxrpc_proc_cleanup();
-#endif
-
-	__RXACCT(printk("Outstanding Messages   : %d\n",
-			atomic_read(&rxrpc_message_count)));
-	__RXACCT(printk("Outstanding Calls      : %d\n",
-			atomic_read(&rxrpc_call_count)));
-	__RXACCT(printk("Outstanding Connections: %d\n",
-			atomic_read(&rxrpc_connection_count)));
-	__RXACCT(printk("Outstanding Peers      : %d\n",
-			atomic_read(&rxrpc_peer_count)));
-	__RXACCT(printk("Outstanding Transports : %d\n",
-			atomic_read(&rxrpc_transport_count)));
-
-	kleave("");
-} /* end rxrpc_cleanup() */
-
-module_exit(rxrpc_cleanup);
-
-/*****************************************************************************/
-/*
- * clear the dead space between task_struct and kernel stack
- * - called by supplying -finstrument-functions to gcc
- */
-#if 0
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-		    "  andl    %0,%%edi        \n"
-		    "  addl    %1,%%edi        \n"
-		    "  movl    %%esp,%%ecx     \n"
-		    "  subl    %%edi,%%ecx     \n"
-		    "  shrl    $2,%%ecx        \n"
-		    "  movl    $0xedededed,%%eax     \n"
-		    "  rep stosl               \n"
-		    :
-		    : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
-		    : "eax", "ecx", "edi", "memory", "cc"
-		    );
-}
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-		    "  andl    %0,%%edi        \n"
-		    "  addl    %1,%%edi        \n"
-		    "  movl    %%esp,%%ecx     \n"
-		    "  subl    %%edi,%%ecx     \n"
-		    "  shrl    $2,%%ecx        \n"
-		    "  movl    $0xdadadada,%%eax     \n"
-		    "  rep stosl               \n"
-		    :
-		    : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
-		    : "eax", "ecx", "edi", "memory", "cc"
-		    );
-}
-#endif
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
deleted file mode 100644
index 8a275157a3bb..000000000000
--- a/net/rxrpc/peer.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/* peer.c: Rx RPC peer management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include <asm/div64.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_peer_count);
-LIST_HEAD(rxrpc_peers);
-DECLARE_RWSEM(rxrpc_peers_sem);
-unsigned long rxrpc_peer_timeout = 12 * 60 * 60;
-
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer);
-
-static void __rxrpc_peer_timeout(rxrpc_timer_t *timer)
-{
-	struct rxrpc_peer *peer =
-		list_entry(timer, struct rxrpc_peer, timeout);
-
-	_debug("Rx PEER TIMEOUT [%p{u=%d}]", peer, atomic_read(&peer->usage));
-
-	rxrpc_peer_do_timeout(peer);
-}
-
-static const struct rxrpc_timer_ops rxrpc_peer_timer_ops = {
-	.timed_out	= __rxrpc_peer_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a peer record
- */
-static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
-			       struct rxrpc_peer **_peer)
-{
-	struct rxrpc_peer *peer;
-
-	_enter("%p,%08x", trans, ntohl(addr));
-
-	/* allocate and initialise a peer record */
-	peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
-	if (!peer) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&peer->usage, 1);
-
-	INIT_LIST_HEAD(&peer->link);
-	INIT_LIST_HEAD(&peer->proc_link);
-	INIT_LIST_HEAD(&peer->conn_idlist);
-	INIT_LIST_HEAD(&peer->conn_active);
-	INIT_LIST_HEAD(&peer->conn_graveyard);
-	spin_lock_init(&peer->conn_gylock);
-	init_waitqueue_head(&peer->conn_gy_waitq);
-	rwlock_init(&peer->conn_idlock);
-	rwlock_init(&peer->conn_lock);
-	atomic_set(&peer->conn_count, 0);
-	spin_lock_init(&peer->lock);
-	rxrpc_timer_init(&peer->timeout, &rxrpc_peer_timer_ops);
-
-	peer->addr.s_addr = addr;
-
-	peer->trans = trans;
-	peer->ops = trans->peer_ops;
-
-	__RXACCT(atomic_inc(&rxrpc_peer_count));
-	*_peer = peer;
-	_leave(" = 0 (%p)", peer);
-
-	return 0;
-} /* end __rxrpc_create_peer() */
-
-/*****************************************************************************/
-/*
- * find a peer record on the specified transport
- * - returns (if successful) with peer record usage incremented
- * - resurrects it from the graveyard if found there
- */
-int rxrpc_peer_lookup(struct rxrpc_transport *trans, __be32 addr,
-		      struct rxrpc_peer **_peer)
-{
-	struct rxrpc_peer *peer, *candidate = NULL;
-	struct list_head *_p;
-	int ret;
-
-	_enter("%p{%hu},%08x", trans, trans->port, ntohl(addr));
-
-	/* [common case] search the transport's active list first */
-	read_lock(&trans->peer_lock);
-	list_for_each(_p, &trans->peer_active) {
-		peer = list_entry(_p, struct rxrpc_peer, link);
-		if (peer->addr.s_addr == addr)
-			goto found_active;
-	}
-	read_unlock(&trans->peer_lock);
-
-	/* [uncommon case] not active - create a candidate for a new record */
-	ret = __rxrpc_create_peer(trans, addr, &candidate);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	/* search the active list again, just in case it appeared whilst we
-	 * were busy */
-	write_lock(&trans->peer_lock);
-	list_for_each(_p, &trans->peer_active) {
-		peer = list_entry(_p, struct rxrpc_peer, link);
-		if (peer->addr.s_addr == addr)
-			goto found_active_second_chance;
-	}
-
-	/* search the transport's graveyard list */
-	spin_lock(&trans->peer_gylock);
-	list_for_each(_p, &trans->peer_graveyard) {
-		peer = list_entry(_p, struct rxrpc_peer, link);
-		if (peer->addr.s_addr == addr)
-			goto found_in_graveyard;
-	}
-	spin_unlock(&trans->peer_gylock);
-
-	/* we can now add the new candidate to the list
-	 * - tell the application layer that this peer has been added
-	 */
-	rxrpc_get_transport(trans);
-	peer = candidate;
-	candidate = NULL;
-
-	if (peer->ops && peer->ops->adding) {
-		ret = peer->ops->adding(peer);
-		if (ret < 0) {
-			write_unlock(&trans->peer_lock);
-			__RXACCT(atomic_dec(&rxrpc_peer_count));
-			kfree(peer);
-			rxrpc_put_transport(trans);
-			_leave(" = %d", ret);
-			return ret;
-		}
-	}
-
-	atomic_inc(&trans->peer_count);
-
- make_active:
-	list_add_tail(&peer->link, &trans->peer_active);
-
- success_uwfree:
-	write_unlock(&trans->peer_lock);
-
-	if (candidate) {
-		__RXACCT(atomic_dec(&rxrpc_peer_count));
-		kfree(candidate);
-	}
-
-	if (list_empty(&peer->proc_link)) {
-		down_write(&rxrpc_peers_sem);
-		list_add_tail(&peer->proc_link, &rxrpc_peers);
-		up_write(&rxrpc_peers_sem);
-	}
-
- success:
-	*_peer = peer;
-
-	_leave(" = 0 (%p{u=%d cc=%d})",
-	       peer,
-	       atomic_read(&peer->usage),
-	       atomic_read(&peer->conn_count));
-	return 0;
-
-	/* handle the peer being found in the active list straight off */
- found_active:
-	rxrpc_get_peer(peer);
-	read_unlock(&trans->peer_lock);
-	goto success;
-
-	/* handle resurrecting a peer from the graveyard */
- found_in_graveyard:
-	rxrpc_get_peer(peer);
-	rxrpc_get_transport(peer->trans);
-	rxrpc_krxtimod_del_timer(&peer->timeout);
-	list_del_init(&peer->link);
-	spin_unlock(&trans->peer_gylock);
-	goto make_active;
-
-	/* handle finding the peer on the second time through the active
-	 * list */
- found_active_second_chance:
-	rxrpc_get_peer(peer);
-	goto success_uwfree;
-
-} /* end rxrpc_peer_lookup() */
-
-/*****************************************************************************/
-/*
- * finish with a peer record
- * - it gets sent to the graveyard from where it can be resurrected or timed
- *   out
- */
-void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
-	struct rxrpc_transport *trans = peer->trans;
-
-	_enter("%p{cc=%d a=%08x}",
-	       peer,
-	       atomic_read(&peer->conn_count),
-	       ntohl(peer->addr.s_addr));
-
-	/* sanity check */
-	if (atomic_read(&peer->usage) <= 0)
-		BUG();
-
-	write_lock(&trans->peer_lock);
-	spin_lock(&trans->peer_gylock);
-	if (likely(!atomic_dec_and_test(&peer->usage))) {
-		spin_unlock(&trans->peer_gylock);
-		write_unlock(&trans->peer_lock);
-		_leave("");
-		return;
-	}
-
-	/* move to graveyard queue */
-	list_del(&peer->link);
-	write_unlock(&trans->peer_lock);
-
-	list_add_tail(&peer->link, &trans->peer_graveyard);
-
-	BUG_ON(!list_empty(&peer->conn_active));
-
-	rxrpc_krxtimod_add_timer(&peer->timeout, rxrpc_peer_timeout * HZ);
-
-	spin_unlock(&trans->peer_gylock);
-
-	rxrpc_put_transport(trans);
-
-	_leave(" [killed]");
-} /* end rxrpc_put_peer() */
-
-/*****************************************************************************/
-/*
- * handle a peer timing out in the graveyard
- * - called from krxtimod
- */
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer)
-{
-	struct rxrpc_transport *trans = peer->trans;
-
-	_enter("%p{u=%d cc=%d a=%08x}",
-	       peer,
-	       atomic_read(&peer->usage),
-	       atomic_read(&peer->conn_count),
-	       ntohl(peer->addr.s_addr));
-
-	BUG_ON(atomic_read(&peer->usage) < 0);
-
-	/* remove from graveyard if still dead */
-	spin_lock(&trans->peer_gylock);
-	if (atomic_read(&peer->usage) == 0)
-		list_del_init(&peer->link);
-	else
-		peer = NULL;
-	spin_unlock(&trans->peer_gylock);
-
-	if (!peer) {
-		_leave("");
-		return; /* resurrected */
-	}
-
-	/* clear all connections on this peer */
-	rxrpc_conn_clearall(peer);
-
-	BUG_ON(!list_empty(&peer->conn_active));
-	BUG_ON(!list_empty(&peer->conn_graveyard));
-
-	/* inform the application layer */
-	if (peer->ops && peer->ops->discarding)
-		peer->ops->discarding(peer);
-
-	if (!list_empty(&peer->proc_link)) {
-		down_write(&rxrpc_peers_sem);
-		list_del(&peer->proc_link);
-		up_write(&rxrpc_peers_sem);
-	}
-
-	__RXACCT(atomic_dec(&rxrpc_peer_count));
-	kfree(peer);
-
-	/* if the graveyard is now empty, wake up anyone waiting for that */
-	if (atomic_dec_and_test(&trans->peer_count))
-		wake_up(&trans->peer_gy_waitq);
-
-	_leave(" [destroyed]");
-} /* end rxrpc_peer_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all peer records from a transport endpoint
- */
-void rxrpc_peer_clearall(struct rxrpc_transport *trans)
-{
-	DECLARE_WAITQUEUE(myself,current);
-
-	struct rxrpc_peer *peer;
-	int err;
-
-	_enter("%p",trans);
-
-	/* there shouldn't be any active peers remaining */
-	BUG_ON(!list_empty(&trans->peer_active));
-
-	/* manually timeout all peers in the graveyard */
-	spin_lock(&trans->peer_gylock);
-	while (!list_empty(&trans->peer_graveyard)) {
-		peer = list_entry(trans->peer_graveyard.next,
-				  struct rxrpc_peer, link);
-		_debug("Clearing peer %p\n", peer);
-		err = rxrpc_krxtimod_del_timer(&peer->timeout);
-		spin_unlock(&trans->peer_gylock);
-
-		if (err == 0)
-			rxrpc_peer_do_timeout(peer);
-
-		spin_lock(&trans->peer_gylock);
-	}
-	spin_unlock(&trans->peer_gylock);
-
-	/* wait for the the peer graveyard to be completely cleared */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(&trans->peer_gy_waitq, &myself);
-
-	while (atomic_read(&trans->peer_count) != 0) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-
-	remove_wait_queue(&trans->peer_gy_waitq, &myself);
-	set_current_state(TASK_RUNNING);
-
-	_leave("");
-} /* end rxrpc_peer_clearall() */
-
-/*****************************************************************************/
-/*
- * calculate and cache the Round-Trip-Time for a message and its response
- */
-void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
-			      struct rxrpc_message *msg,
-			      struct rxrpc_message *resp)
-{
-	unsigned long long rtt;
-	int loop;
-
-	_enter("%p,%p,%p", peer, msg, resp);
-
-	/* calculate the latest RTT */
-	rtt = resp->stamp.tv_sec - msg->stamp.tv_sec;
-	rtt *= 1000000UL;
-	rtt += resp->stamp.tv_usec - msg->stamp.tv_usec;
-
-	/* add to cache */
-	peer->rtt_cache[peer->rtt_point] = rtt;
-	peer->rtt_point++;
-	peer->rtt_point %= RXRPC_RTT_CACHE_SIZE;
-
-	if (peer->rtt_usage < RXRPC_RTT_CACHE_SIZE)
-		peer->rtt_usage++;
-
-	/* recalculate RTT */
-	rtt = 0;
-	for (loop = peer->rtt_usage - 1; loop >= 0; loop--)
-		rtt += peer->rtt_cache[loop];
-
-	do_div(rtt, peer->rtt_usage);
-	peer->rtt = rtt;
-
-	_leave(" RTT=%lu.%lums",
-	       (long) (peer->rtt / 1000), (long) (peer->rtt % 1000));
-
-} /* end rxrpc_peer_calculate_rtt() */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
deleted file mode 100644
index 8551c879e456..000000000000
--- a/net/rxrpc/proc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/* proc.c: /proc interface for RxRPC
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-static struct proc_dir_entry *proc_rxrpc;
-
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_transports_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_transports_ops = {
-	.start	= rxrpc_proc_transports_start,
-	.next	= rxrpc_proc_transports_next,
-	.stop	= rxrpc_proc_transports_stop,
-	.show	= rxrpc_proc_transports_show,
-};
-
-static const struct file_operations rxrpc_proc_transports_fops = {
-	.open		= rxrpc_proc_transports_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_peers_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_peers_ops = {
-	.start	= rxrpc_proc_peers_start,
-	.next	= rxrpc_proc_peers_next,
-	.stop	= rxrpc_proc_peers_stop,
-	.show	= rxrpc_proc_peers_show,
-};
-
-static const struct file_operations rxrpc_proc_peers_fops = {
-	.open		= rxrpc_proc_peers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_conns_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_conns_ops = {
-	.start	= rxrpc_proc_conns_start,
-	.next	= rxrpc_proc_conns_next,
-	.stop	= rxrpc_proc_conns_stop,
-	.show	= rxrpc_proc_conns_show,
-};
-
-static const struct file_operations rxrpc_proc_conns_fops = {
-	.open		= rxrpc_proc_conns_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_calls_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_calls_ops = {
-	.start	= rxrpc_proc_calls_start,
-	.next	= rxrpc_proc_calls_next,
-	.stop	= rxrpc_proc_calls_stop,
-	.show	= rxrpc_proc_calls_show,
-};
-
-static const struct file_operations rxrpc_proc_calls_fops = {
-	.open		= rxrpc_proc_calls_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const char *rxrpc_call_states7[] = {
-	"complet",
-	"error  ",
-	"rcv_op ",
-	"rcv_arg",
-	"got_arg",
-	"snd_rpl",
-	"fin_ack",
-	"snd_arg",
-	"rcv_rpl",
-	"got_rpl"
-};
-
-static const char *rxrpc_call_error_states7[] = {
-	"no_err ",
-	"loc_abt",
-	"rmt_abt",
-	"loc_err",
-	"rmt_err"
-};
-
-/*****************************************************************************/
-/*
- * initialise the /proc/net/rxrpc/ directory
- */
-int rxrpc_proc_init(void)
-{
-	struct proc_dir_entry *p;
-
-	proc_rxrpc = proc_mkdir("rxrpc", proc_net);
-	if (!proc_rxrpc)
-		goto error;
-	proc_rxrpc->owner = THIS_MODULE;
-
-	p = create_proc_entry("calls", 0, proc_rxrpc);
-	if (!p)
-		goto error_proc;
-	p->proc_fops = &rxrpc_proc_calls_fops;
-	p->owner = THIS_MODULE;
-
-	p = create_proc_entry("connections", 0, proc_rxrpc);
-	if (!p)
-		goto error_calls;
-	p->proc_fops = &rxrpc_proc_conns_fops;
-	p->owner = THIS_MODULE;
-
-	p = create_proc_entry("peers", 0, proc_rxrpc);
-	if (!p)
-		goto error_calls;
-	p->proc_fops = &rxrpc_proc_peers_fops;
-	p->owner = THIS_MODULE;
-
-	p = create_proc_entry("transports", 0, proc_rxrpc);
-	if (!p)
-		goto error_conns;
-	p->proc_fops = &rxrpc_proc_transports_fops;
-	p->owner = THIS_MODULE;
-
-	return 0;
-
- error_conns:
-	remove_proc_entry("connections", proc_rxrpc);
- error_calls:
-	remove_proc_entry("calls", proc_rxrpc);
- error_proc:
-	remove_proc_entry("rxrpc", proc_net);
- error:
-	return -ENOMEM;
-} /* end rxrpc_proc_init() */
-
-/*****************************************************************************/
-/*
- * clean up the /proc/net/rxrpc/ directory
- */
-void rxrpc_proc_cleanup(void)
-{
-	remove_proc_entry("transports", proc_rxrpc);
-	remove_proc_entry("peers", proc_rxrpc);
-	remove_proc_entry("connections", proc_rxrpc);
-	remove_proc_entry("calls", proc_rxrpc);
-
-	remove_proc_entry("rxrpc", proc_net);
-
-} /* end rxrpc_proc_cleanup() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/transports" which provides a summary of extant transports
- */
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_transports_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_transports_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the transports list and return the first item
- */
-static void *rxrpc_proc_transports_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_proc_transports_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_proc_transports)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in transports list
- */
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_proc_transports.next : _p->next;
-
-	return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the transports list
- */
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_proc_transports_sem);
-
-} /* end rxrpc_proc_transports_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_transport *trans =
-		list_entry(v, struct rxrpc_transport, proc_link);
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "LOCAL USE\n");
-		return 0;
-	}
-
-	/* display one transport per line on subsequent lines */
-	seq_printf(m, "%5hu %3d\n",
-		   trans->port,
-		   atomic_read(&trans->usage)
-		   );
-
-	return 0;
-} /* end rxrpc_proc_transports_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/peers" which provides a summary of extant peers
- */
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_peers_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_peers_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the peers list and return the
- * first item
- */
-static void *rxrpc_proc_peers_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_peers_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_peers)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in peers list
- */
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_peers.next : _p->next;
-
-	return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the peers list
- */
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_peers_sem);
-
-} /* end rxrpc_proc_peers_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_peer *peer = list_entry(v, struct rxrpc_peer, proc_link);
-	long timeout;
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "LOCAL REMOTE   USAGE CONNS  TIMEOUT"
-			 "   MTU RTT(uS)\n");
-		return 0;
-	}
-
-	/* display one peer per line on subsequent lines */
-	timeout = 0;
-	if (!list_empty(&peer->timeout.link))
-		timeout = (long) peer->timeout.timo_jif -
-			(long) jiffies;
-
-	seq_printf(m, "%5hu %08x %5d %5d %8ld %5Zu %7lu\n",
-		   peer->trans->port,
-		   ntohl(peer->addr.s_addr),
-		   atomic_read(&peer->usage),
-		   atomic_read(&peer->conn_count),
-		   timeout,
-		   peer->if_mtu,
-		   (long) peer->rtt
-		   );
-
-	return 0;
-} /* end rxrpc_proc_peers_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/connections" which provides a summary of extant
- * connections
- */
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_conns_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_conns_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the conns list and return the
- * first item
- */
-static void *rxrpc_proc_conns_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_conns_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_conns)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in conns list
- */
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_conns.next : _p->next;
-
-	return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the conns list
- */
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_conns_sem);
-
-} /* end rxrpc_proc_conns_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_connection *conn;
-	long timeout;
-
-	conn = list_entry(v, struct rxrpc_connection, proc_link);
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m,
-			 "LOCAL REMOTE   RPORT SRVC CONN     END SERIALNO "
-			 "CALLNO     MTU  TIMEOUT"
-			 "\n");
-		return 0;
-	}
-
-	/* display one conn per line on subsequent lines */
-	timeout = 0;
-	if (!list_empty(&conn->timeout.link))
-		timeout = (long) conn->timeout.timo_jif -
-			(long) jiffies;
-
-	seq_printf(m,
-		   "%5hu %08x %5hu %04hx %08x %-3.3s %08x %08x %5Zu %8ld\n",
-		   conn->trans->port,
-		   ntohl(conn->addr.sin_addr.s_addr),
-		   ntohs(conn->addr.sin_port),
-		   ntohs(conn->service_id),
-		   ntohl(conn->conn_id),
-		   conn->out_clientflag ? "CLT" : "SRV",
-		   conn->serial_counter,
-		   conn->call_counter,
-		   conn->mtu_size,
-		   timeout
-		   );
-
-	return 0;
-} /* end rxrpc_proc_conns_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/calls" which provides a summary of extant calls
- */
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_calls_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_calls_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the calls list and return the
- * first item
- */
-static void *rxrpc_proc_calls_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_calls_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_calls)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in calls list
- */
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
-
-	return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the calls list
- */
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_calls_sem);
-
-} /* end rxrpc_proc_calls_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_call *call = list_entry(v, struct rxrpc_call, call_link);
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m,
-			 "LOCAL REMOT SRVC CONN     CALL     DIR USE "
-			 " L STATE   OPCODE ABORT    ERRNO\n"
-			 );
-		return 0;
-	}
-
-	/* display one call per line on subsequent lines */
-	seq_printf(m,
-		   "%5hu %5hu %04hx %08x %08x %s %3u%c"
-		   " %c %-7.7s %6d %08x %5d\n",
-		   call->conn->trans->port,
-		   ntohs(call->conn->addr.sin_port),
-		   ntohs(call->conn->service_id),
-		   ntohl(call->conn->conn_id),
-		   ntohl(call->call_id),
-		   call->conn->service ? "SVC" : "CLT",
-		   atomic_read(&call->usage),
-		   waitqueue_active(&call->waitq) ? 'w' : ' ',
-		   call->app_last_rcv ? 'Y' : '-',
-		   (call->app_call_state!=RXRPC_CSTATE_ERROR ?
-		    rxrpc_call_states7[call->app_call_state] :
-		    rxrpc_call_error_states7[call->app_err_state]),
-		   call->app_opcode,
-		   call->app_abort_code,
-		   call->app_errno
-		   );
-
-	return 0;
-} /* end rxrpc_proc_calls_show() */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
new file mode 100644
index 000000000000..5ec705144e10
--- /dev/null
+++ b/net/rxrpc/rxkad.c
@@ -0,0 +1,1154 @@
+/* Kerberos-based RxRPC security
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/ctype.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#define rxrpc_debug rxkad_debug
+#include "ar-internal.h"
+
+#define RXKAD_VERSION			2
+#define MAXKRB5TICKETLEN		1024
+#define RXKAD_TKT_TYPE_KERBEROS_V5	256
+#define ANAME_SZ			40	/* size of authentication name */
+#define INST_SZ				40	/* size of principal's instance */
+#define REALM_SZ			40	/* size of principal's auth domain */
+#define SNAME_SZ			40	/* size of service name */
+
+unsigned rxrpc_debug;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "rxkad debugging mask");
+
+struct rxkad_level1_hdr {
+	__be32	data_size;	/* true data size (excluding padding) */
+};
+
+struct rxkad_level2_hdr {
+	__be32	data_size;	/* true data size (excluding padding) */
+	__be32	checksum;	/* decrypted data checksum */
+};
+
+MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+/*
+ * this holds a pinned cipher so that keventd doesn't get called by the cipher
+ * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
+ * packets
+ */
+static struct crypto_blkcipher *rxkad_ci;
+static DEFINE_MUTEX(rxkad_ci_mutex);
+
+/*
+ * initialise connection security
+ */
+static int rxkad_init_connection_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_key_payload *payload;
+	struct crypto_blkcipher *ci;
+	int ret;
+
+	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
+
+	payload = conn->key->payload.data;
+	conn->security_ix = payload->k.security_index;
+
+	ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ci)) {
+		_debug("no cipher");
+		ret = PTR_ERR(ci);
+		goto error;
+	}
+
+	if (crypto_blkcipher_setkey(ci, payload->k.session_key,
+				    sizeof(payload->k.session_key)) < 0)
+		BUG();
+
+	switch (conn->security_level) {
+	case RXRPC_SECURITY_PLAIN:
+		break;
+	case RXRPC_SECURITY_AUTH:
+		conn->size_align = 8;
+		conn->security_size = sizeof(struct rxkad_level1_hdr);
+		conn->header_size += sizeof(struct rxkad_level1_hdr);
+		break;
+	case RXRPC_SECURITY_ENCRYPT:
+		conn->size_align = 8;
+		conn->security_size = sizeof(struct rxkad_level2_hdr);
+		conn->header_size += sizeof(struct rxkad_level2_hdr);
+		break;
+	default:
+		ret = -EKEYREJECTED;
+		goto error;
+	}
+
+	conn->cipher = ci;
+	ret = 0;
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * prime the encryption state with the invariant parts of a connection's
+ * description
+ */
+static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_key_payload *payload;
+	struct blkcipher_desc desc;
+	struct scatterlist sg[2];
+	struct rxrpc_crypt iv;
+	struct {
+		__be32 x[4];
+	} tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+
+	_enter("");
+
+	if (!conn->key)
+		return;
+
+	payload = conn->key->payload.data;
+	memcpy(&iv, payload->k.session_key, sizeof(iv));
+
+	desc.tfm = conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	tmpbuf.x[0] = conn->epoch;
+	tmpbuf.x[1] = conn->cid;
+	tmpbuf.x[2] = 0;
+	tmpbuf.x[3] = htonl(conn->security_ix);
+
+	memset(sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
+	ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
+
+	_leave("");
+}
+
+/*
+ * partially encrypt a packet (level 1 security)
+ */
+static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
+				    struct sk_buff *skb,
+				    u32 data_size,
+				    void *sechdr)
+{
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct {
+		struct rxkad_level1_hdr hdr;
+		__be32	first;	/* first four bytes of data and padding */
+	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	u16 check;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("");
+
+	check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+	data_size |= (u32) check << 16;
+
+	tmpbuf.hdr.data_size = htonl(data_size);
+	memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+
+	/* start the encryption afresh */
+	memset(&iv, 0, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	memset(sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * wholly encrypt a packet (level 2 security)
+ */
+static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
+					struct sk_buff *skb,
+					u32 data_size,
+					void *sechdr)
+{
+	const struct rxrpc_key_payload *payload;
+	struct rxkad_level2_hdr rxkhdr
+		__attribute__((aligned(8))); /* must be all on one page */
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[16];
+	struct sk_buff *trailer;
+	unsigned len;
+	u16 check;
+	int nsg;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("");
+
+	check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+
+	rxkhdr.data_size = htonl(data_size | (u32) check << 16);
+	rxkhdr.checksum = 0;
+
+	/* encrypt from the session key */
+	payload = call->conn->key->payload.data;
+	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	memset(sg, 0, sizeof(sg[0]) * 2);
+	sg_set_buf(&sg[0], sechdr, sizeof(rxkhdr));
+	sg_set_buf(&sg[1], &rxkhdr, sizeof(rxkhdr));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr));
+
+	/* we want to encrypt the skbuff in-place */
+	nsg = skb_cow_data(skb, 0, &trailer);
+	if (nsg < 0 || nsg > 16)
+		return -ENOMEM;
+
+	len = data_size + call->conn->size_align - 1;
+	len &= ~(call->conn->size_align - 1);
+
+	skb_to_sgvec(skb, sg, 0, len);
+	crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * checksum an RxRPC packet header
+ */
+static int rxkad_secure_packet(const struct rxrpc_call *call,
+				struct sk_buff *skb,
+				size_t data_size,
+				void *sechdr)
+{
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct {
+		__be32 x[2];
+	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	__be32 x;
+	int ret;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("{%d{%x}},{#%u},%zu,",
+	       call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq),
+	       data_size);
+
+	if (!call->conn->cipher)
+		return 0;
+
+	ret = key_validate(call->conn->key);
+	if (ret < 0)
+		return ret;
+
+	/* continue encrypting from where we left off */
+	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	/* calculate the security checksum */
+	x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+	x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+	tmpbuf.x[0] = sp->hdr.callNumber;
+	tmpbuf.x[1] = x;
+
+	memset(&sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	x = ntohl(tmpbuf.x[1]);
+	x = (x >> 16) & 0xffff;
+	if (x == 0)
+		x = 1; /* zero checksums are not permitted */
+	sp->hdr.cksum = htons(x);
+
+	switch (call->conn->security_level) {
+	case RXRPC_SECURITY_PLAIN:
+		ret = 0;
+		break;
+	case RXRPC_SECURITY_AUTH:
+		ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr);
+		break;
+	case RXRPC_SECURITY_ENCRYPT:
+		ret = rxkad_secure_packet_encrypt(call, skb, data_size,
+						  sechdr);
+		break;
+	default:
+		ret = -EPERM;
+		break;
+	}
+
+	_leave(" = %d [set %hx]", ret, x);
+	return ret;
+}
+
+/*
+ * decrypt partial encryption on a packet (level 1 security)
+ */
+static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
+				    struct sk_buff *skb,
+				    u32 *_abort_code)
+{
+	struct rxkad_level1_hdr sechdr;
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct sk_buff *trailer;
+	u32 data_size, buf;
+	u16 check;
+
+	_enter("");
+
+	sp = rxrpc_skb(skb);
+
+	/* we want to decrypt the skbuff in-place */
+	if (skb_cow_data(skb, 0, &trailer) < 0)
+		goto nomem;
+
+	skb_to_sgvec(skb, sg, 0, 8);
+
+	/* start the decryption afresh */
+	memset(&iv, 0, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8);
+
+	/* remove the decrypted packet length */
+	if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+		goto datalen_error;
+	if (!skb_pull(skb, sizeof(sechdr)))
+		BUG();
+
+	buf = ntohl(sechdr.data_size);
+	data_size = buf & 0xffff;
+
+	check = buf >> 16;
+	check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+	check &= 0xffff;
+	if (check != 0) {
+		*_abort_code = RXKADSEALEDINCON;
+		goto protocol_error;
+	}
+
+	/* shorten the packet to remove the padding */
+	if (data_size > skb->len)
+		goto datalen_error;
+	else if (data_size < skb->len)
+		skb->len = data_size;
+
+	_leave(" = 0 [dlen=%x]", data_size);
+	return 0;
+
+datalen_error:
+	*_abort_code = RXKADDATALEN;
+protocol_error:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+
+nomem:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+}
+
+/*
+ * wholly decrypt a packet (level 2 security)
+ */
+static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
+				       struct sk_buff *skb,
+				       u32 *_abort_code)
+{
+	const struct rxrpc_key_payload *payload;
+	struct rxkad_level2_hdr sechdr;
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist _sg[4], *sg;
+	struct sk_buff *trailer;
+	u32 data_size, buf;
+	u16 check;
+	int nsg;
+
+	_enter(",{%d}", skb->len);
+
+	sp = rxrpc_skb(skb);
+
+	/* we want to decrypt the skbuff in-place */
+	nsg = skb_cow_data(skb, 0, &trailer);
+	if (nsg < 0)
+		goto nomem;
+
+	sg = _sg;
+	if (unlikely(nsg > 4)) {
+		sg = kmalloc(sizeof(*sg) * nsg, GFP_NOIO);
+		if (!sg)
+			goto nomem;
+	}
+
+	skb_to_sgvec(skb, sg, 0, skb->len);
+
+	/* decrypt from the session key */
+	payload = call->conn->key->payload.data;
+	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len);
+	if (sg != _sg)
+		kfree(sg);
+
+	/* remove the decrypted packet length */
+	if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+		goto datalen_error;
+	if (!skb_pull(skb, sizeof(sechdr)))
+		BUG();
+
+	buf = ntohl(sechdr.data_size);
+	data_size = buf & 0xffff;
+
+	check = buf >> 16;
+	check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+	check &= 0xffff;
+	if (check != 0) {
+		*_abort_code = RXKADSEALEDINCON;
+		goto protocol_error;
+	}
+
+	/* shorten the packet to remove the padding */
+	if (data_size > skb->len)
+		goto datalen_error;
+	else if (data_size < skb->len)
+		skb->len = data_size;
+
+	_leave(" = 0 [dlen=%x]", data_size);
+	return 0;
+
+datalen_error:
+	*_abort_code = RXKADDATALEN;
+protocol_error:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+
+nomem:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+}
+
+/*
+ * verify the security on a received packet
+ */
+static int rxkad_verify_packet(const struct rxrpc_call *call,
+			       struct sk_buff *skb,
+			       u32 *_abort_code)
+{
+	struct blkcipher_desc desc;
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct {
+		__be32 x[2];
+	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	__be32 x;
+	__be16 cksum;
+	int ret;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("{%d{%x}},{#%u}",
+	       call->debug_id, key_serial(call->conn->key),
+	       ntohl(sp->hdr.seq));
+
+	if (!call->conn->cipher)
+		return 0;
+
+	if (sp->hdr.securityIndex != 2) {
+		*_abort_code = RXKADINCONSISTENCY;
+		_leave(" = -EPROTO [not rxkad]");
+		return -EPROTO;
+	}
+
+	/* continue encrypting from where we left off */
+	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	/* validate the security checksum */
+	x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+	x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+	tmpbuf.x[0] = call->call_id;
+	tmpbuf.x[1] = x;
+
+	memset(&sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	x = ntohl(tmpbuf.x[1]);
+	x = (x >> 16) & 0xffff;
+	if (x == 0)
+		x = 1; /* zero checksums are not permitted */
+
+	cksum = htons(x);
+	if (sp->hdr.cksum != cksum) {
+		*_abort_code = RXKADSEALEDINCON;
+		_leave(" = -EPROTO [csum failed]");
+		return -EPROTO;
+	}
+
+	switch (call->conn->security_level) {
+	case RXRPC_SECURITY_PLAIN:
+		ret = 0;
+		break;
+	case RXRPC_SECURITY_AUTH:
+		ret = rxkad_verify_packet_auth(call, skb, _abort_code);
+		break;
+	case RXRPC_SECURITY_ENCRYPT:
+		ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
+		break;
+	default:
+		ret = -ENOANO;
+		break;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * issue a challenge
+ */
+static int rxkad_issue_challenge(struct rxrpc_connection *conn)
+{
+	struct rxkad_challenge challenge;
+	struct rxrpc_header hdr;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t len;
+	int ret;
+
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+	ret = key_validate(conn->key);
+	if (ret < 0)
+		return ret;
+
+	get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce));
+
+	challenge.version	= htonl(2);
+	challenge.nonce		= htonl(conn->security_nonce);
+	challenge.min_level	= htonl(0);
+	challenge.__padding	= 0;
+
+	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr.epoch	= conn->epoch;
+	hdr.cid		= conn->cid;
+	hdr.callNumber	= 0;
+	hdr.seq		= 0;
+	hdr.type	= RXRPC_PACKET_TYPE_CHALLENGE;
+	hdr.flags	= conn->out_clientflag;
+	hdr.userStatus	= 0;
+	hdr.securityIndex = conn->security_ix;
+	hdr._rsvd	= 0;
+	hdr.serviceId	= conn->service_id;
+
+	iov[0].iov_base	= &hdr;
+	iov[0].iov_len	= sizeof(hdr);
+	iov[1].iov_base	= &challenge;
+	iov[1].iov_len	= sizeof(challenge);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	hdr.serial = htonl(atomic_inc_return(&conn->serial));
+	_proto("Tx CHALLENGE %%%u", ntohl(hdr.serial));
+
+	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * send a Kerberos security response
+ */
+static int rxkad_send_response(struct rxrpc_connection *conn,
+			       struct rxrpc_header *hdr,
+			       struct rxkad_response *resp,
+			       const struct rxkad_key *s2)
+{
+	struct msghdr msg;
+	struct kvec iov[3];
+	size_t len;
+	int ret;
+
+	_enter("");
+
+	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr->epoch	= conn->epoch;
+	hdr->seq	= 0;
+	hdr->type	= RXRPC_PACKET_TYPE_RESPONSE;
+	hdr->flags	= conn->out_clientflag;
+	hdr->userStatus	= 0;
+	hdr->_rsvd	= 0;
+
+	iov[0].iov_base	= hdr;
+	iov[0].iov_len	= sizeof(*hdr);
+	iov[1].iov_base	= resp;
+	iov[1].iov_len	= sizeof(*resp);
+	iov[2].iov_base	= (void *) s2->ticket;
+	iov[2].iov_len	= s2->ticket_len;
+
+	len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
+
+	hdr->serial = htonl(atomic_inc_return(&conn->serial));
+	_proto("Tx RESPONSE %%%u", ntohl(hdr->serial));
+
+	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * calculate the response checksum
+ */
+static void rxkad_calc_response_checksum(struct rxkad_response *response)
+{
+	u32 csum = 1000003;
+	int loop;
+	u8 *p = (u8 *) response;
+
+	for (loop = sizeof(*response); loop > 0; loop--)
+		csum = csum * 0x10204081 + *p++;
+
+	response->encrypted.checksum = htonl(csum);
+}
+
+/*
+ * load a scatterlist with a potentially split-page buffer
+ */
+static void rxkad_sg_set_buf2(struct scatterlist sg[2],
+			      void *buf, size_t buflen)
+{
+
+	memset(sg, 0, sizeof(sg));
+
+	sg_set_buf(&sg[0], buf, buflen);
+	if (sg[0].offset + buflen > PAGE_SIZE) {
+		/* the buffer was split over two pages */
+		sg[0].length = PAGE_SIZE - sg[0].offset;
+		sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
+	}
+
+	ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
+}
+
+/*
+ * encrypt the response packet
+ */
+static void rxkad_encrypt_response(struct rxrpc_connection *conn,
+				   struct rxkad_response *resp,
+				   const struct rxkad_key *s2)
+{
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist ssg[2], dsg[2];
+
+	/* continue encrypting from where we left off */
+	memcpy(&iv, s2->session_key, sizeof(iv));
+	desc.tfm = conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+	memcpy(dsg, ssg, sizeof(dsg));
+	crypto_blkcipher_encrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+}
+
+/*
+ * respond to a challenge packet
+ */
+static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
+				      struct sk_buff *skb,
+				      u32 *_abort_code)
+{
+	const struct rxrpc_key_payload *payload;
+	struct rxkad_challenge challenge;
+	struct rxkad_response resp
+		__attribute__((aligned(8))); /* must be aligned for crypto */
+	struct rxrpc_skb_priv *sp;
+	u32 version, nonce, min_level, abort_code;
+	int ret;
+
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+	if (!conn->key) {
+		_leave(" = -EPROTO [no key]");
+		return -EPROTO;
+	}
+
+	ret = key_validate(conn->key);
+	if (ret < 0) {
+		*_abort_code = RXKADEXPIRED;
+		return ret;
+	}
+
+	abort_code = RXKADPACKETSHORT;
+	sp = rxrpc_skb(skb);
+	if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
+		goto protocol_error;
+
+	version = ntohl(challenge.version);
+	nonce = ntohl(challenge.nonce);
+	min_level = ntohl(challenge.min_level);
+
+	_proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
+	       ntohl(sp->hdr.serial), version, nonce, min_level);
+
+	abort_code = RXKADINCONSISTENCY;
+	if (version != RXKAD_VERSION)
+		goto protocol_error;
+
+	abort_code = RXKADLEVELFAIL;
+	if (conn->security_level < min_level)
+		goto protocol_error;
+
+	payload = conn->key->payload.data;
+
+	/* build the response packet */
+	memset(&resp, 0, sizeof(resp));
+
+	resp.version = RXKAD_VERSION;
+	resp.encrypted.epoch = conn->epoch;
+	resp.encrypted.cid = conn->cid;
+	resp.encrypted.securityIndex = htonl(conn->security_ix);
+	resp.encrypted.call_id[0] =
+		(conn->channels[0] ? conn->channels[0]->call_id : 0);
+	resp.encrypted.call_id[1] =
+		(conn->channels[1] ? conn->channels[1]->call_id : 0);
+	resp.encrypted.call_id[2] =
+		(conn->channels[2] ? conn->channels[2]->call_id : 0);
+	resp.encrypted.call_id[3] =
+		(conn->channels[3] ? conn->channels[3]->call_id : 0);
+	resp.encrypted.inc_nonce = htonl(nonce + 1);
+	resp.encrypted.level = htonl(conn->security_level);
+	resp.kvno = htonl(payload->k.kvno);
+	resp.ticket_len = htonl(payload->k.ticket_len);
+
+	/* calculate the response checksum and then do the encryption */
+	rxkad_calc_response_checksum(&resp);
+	rxkad_encrypt_response(conn, &resp, &payload->k);
+	return rxkad_send_response(conn, &sp->hdr, &resp, &payload->k);
+
+protocol_error:
+	*_abort_code = abort_code;
+	_leave(" = -EPROTO [%d]", abort_code);
+	return -EPROTO;
+}
+
+/*
+ * decrypt the kerberos IV ticket in the response
+ */
+static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+				void *ticket, size_t ticket_len,
+				struct rxrpc_crypt *_session_key,
+				time_t *_expiry,
+				u32 *_abort_code)
+{
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv, key;
+	struct scatterlist ssg[1], dsg[1];
+	struct in_addr addr;
+	unsigned life;
+	time_t issue, now;
+	bool little_endian;
+	int ret;
+	u8 *p, *q, *name, *end;
+
+	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
+
+	*_expiry = 0;
+
+	ret = key_validate(conn->server_key);
+	if (ret < 0) {
+		switch (ret) {
+		case -EKEYEXPIRED:
+			*_abort_code = RXKADEXPIRED;
+			goto error;
+		default:
+			*_abort_code = RXKADNOAUTH;
+			goto error;
+		}
+	}
+
+	ASSERT(conn->server_key->payload.data != NULL);
+	ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
+
+	memcpy(&iv, &conn->server_key->type_data, sizeof(iv));
+
+	desc.tfm = conn->server_key->payload.data;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	sg_init_one(&ssg[0], ticket, ticket_len);
+	memcpy(dsg, ssg, sizeof(dsg));
+	crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, ticket_len);
+
+	p = ticket;
+	end = p + ticket_len;
+
+#define Z(size)						\
+	({						\
+		u8 *__str = p;				\
+		q = memchr(p, 0, end - p);		\
+		if (!q || q - p > (size))		\
+			goto bad_ticket;		\
+		for (; p < q; p++)			\
+			if (!isprint(*p))		\
+				goto bad_ticket;	\
+		p++;					\
+		__str;					\
+	})
+
+	/* extract the ticket flags */
+	_debug("KIV FLAGS: %x", *p);
+	little_endian = *p & 1;
+	p++;
+
+	/* extract the authentication name */
+	name = Z(ANAME_SZ);
+	_debug("KIV ANAME: %s", name);
+
+	/* extract the principal's instance */
+	name = Z(INST_SZ);
+	_debug("KIV INST : %s", name);
+
+	/* extract the principal's authentication domain */
+	name = Z(REALM_SZ);
+	_debug("KIV REALM: %s", name);
+
+	if (end - p < 4 + 8 + 4 + 2)
+		goto bad_ticket;
+
+	/* get the IPv4 address of the entity that requested the ticket */
+	memcpy(&addr, p, sizeof(addr));
+	p += 4;
+	_debug("KIV ADDR : "NIPQUAD_FMT, NIPQUAD(addr));
+
+	/* get the session key from the ticket */
+	memcpy(&key, p, sizeof(key));
+	p += 8;
+	_debug("KIV KEY  : %08x %08x", ntohl(key.n[0]), ntohl(key.n[1]));
+	memcpy(_session_key, &key, sizeof(key));
+
+	/* get the ticket's lifetime */
+	life = *p++ * 5 * 60;
+	_debug("KIV LIFE : %u", life);
+
+	/* get the issue time of the ticket */
+	if (little_endian) {
+		__le32 stamp;
+		memcpy(&stamp, p, 4);
+		issue = le32_to_cpu(stamp);
+	} else {
+		__be32 stamp;
+		memcpy(&stamp, p, 4);
+		issue = be32_to_cpu(stamp);
+	}
+	p += 4;
+	now = xtime.tv_sec;
+	_debug("KIV ISSUE: %lx [%lx]", issue, now);
+
+	/* check the ticket is in date */
+	if (issue > now) {
+		*_abort_code = RXKADNOAUTH;
+		ret = -EKEYREJECTED;
+		goto error;
+	}
+
+	if (issue < now - life) {
+		*_abort_code = RXKADEXPIRED;
+		ret = -EKEYEXPIRED;
+		goto error;
+	}
+
+	*_expiry = issue + life;
+
+	/* get the service name */
+	name = Z(SNAME_SZ);
+	_debug("KIV SNAME: %s", name);
+
+	/* get the service instance name */
+	name = Z(INST_SZ);
+	_debug("KIV SINST: %s", name);
+
+	ret = 0;
+error:
+	_leave(" = %d", ret);
+	return ret;
+
+bad_ticket:
+	*_abort_code = RXKADBADTICKET;
+	ret = -EBADMSG;
+	goto error;
+}
+
+/*
+ * decrypt the response packet
+ */
+static void rxkad_decrypt_response(struct rxrpc_connection *conn,
+				   struct rxkad_response *resp,
+				   const struct rxrpc_crypt *session_key)
+{
+	struct blkcipher_desc desc;
+	struct scatterlist ssg[2], dsg[2];
+	struct rxrpc_crypt iv;
+
+	_enter(",,%08x%08x",
+	       ntohl(session_key->n[0]), ntohl(session_key->n[1]));
+
+	ASSERT(rxkad_ci != NULL);
+
+	mutex_lock(&rxkad_ci_mutex);
+	if (crypto_blkcipher_setkey(rxkad_ci, session_key->x,
+				    sizeof(*session_key)) < 0)
+		BUG();
+
+	memcpy(&iv, session_key, sizeof(iv));
+	desc.tfm = rxkad_ci;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+	memcpy(dsg, ssg, sizeof(dsg));
+	crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+	mutex_unlock(&rxkad_ci_mutex);
+
+	_leave("");
+}
+
+/*
+ * verify a response
+ */
+static int rxkad_verify_response(struct rxrpc_connection *conn,
+				 struct sk_buff *skb,
+				 u32 *_abort_code)
+{
+	struct rxkad_response response
+		__attribute__((aligned(8))); /* must be aligned for crypto */
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_crypt session_key;
+	time_t expiry;
+	void *ticket;
+	u32 abort_code, version, kvno, ticket_len, csum, level;
+	int ret;
+
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+
+	abort_code = RXKADPACKETSHORT;
+	if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
+		goto protocol_error;
+	if (!pskb_pull(skb, sizeof(response)))
+		BUG();
+
+	version = ntohl(response.version);
+	ticket_len = ntohl(response.ticket_len);
+	kvno = ntohl(response.kvno);
+	sp = rxrpc_skb(skb);
+	_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
+	       ntohl(sp->hdr.serial), version, kvno, ticket_len);
+
+	abort_code = RXKADINCONSISTENCY;
+	if (version != RXKAD_VERSION)
+
+	abort_code = RXKADTICKETLEN;
+	if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+		goto protocol_error;
+
+	abort_code = RXKADUNKNOWNKEY;
+	if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+		goto protocol_error;
+
+	/* extract the kerberos ticket and decrypt and decode it */
+	ticket = kmalloc(ticket_len, GFP_NOFS);
+	if (!ticket)
+		return -ENOMEM;
+
+	abort_code = RXKADPACKETSHORT;
+	if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
+		goto protocol_error_free;
+
+	ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
+				   &expiry, &abort_code);
+	if (ret < 0) {
+		*_abort_code = abort_code;
+		kfree(ticket);
+		return ret;
+	}
+
+	/* use the session key from inside the ticket to decrypt the
+	 * response */
+	rxkad_decrypt_response(conn, &response, &session_key);
+
+	abort_code = RXKADSEALEDINCON;
+	if (response.encrypted.epoch != conn->epoch)
+		goto protocol_error_free;
+	if (response.encrypted.cid != conn->cid)
+		goto protocol_error_free;
+	if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
+		goto protocol_error_free;
+	csum = response.encrypted.checksum;
+	response.encrypted.checksum = 0;
+	rxkad_calc_response_checksum(&response);
+	if (response.encrypted.checksum != csum)
+		goto protocol_error_free;
+
+	if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
+	    ntohl(response.encrypted.call_id[1]) > INT_MAX ||
+	    ntohl(response.encrypted.call_id[2]) > INT_MAX ||
+	    ntohl(response.encrypted.call_id[3]) > INT_MAX)
+		goto protocol_error_free;
+
+	abort_code = RXKADOUTOFSEQUENCE;
+	if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1))
+		goto protocol_error_free;
+
+	abort_code = RXKADLEVELFAIL;
+	level = ntohl(response.encrypted.level);
+	if (level > RXRPC_SECURITY_ENCRYPT)
+		goto protocol_error_free;
+	conn->security_level = level;
+
+	/* create a key to hold the security data and expiration time - after
+	 * this the connection security can be handled in exactly the same way
+	 * as for a client connection */
+	ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
+	if (ret < 0) {
+		kfree(ticket);
+		return ret;
+	}
+
+	kfree(ticket);
+	_leave(" = 0");
+	return 0;
+
+protocol_error_free:
+	kfree(ticket);
+protocol_error:
+	*_abort_code = abort_code;
+	_leave(" = -EPROTO [%d]", abort_code);
+	return -EPROTO;
+}
+
+/*
+ * clear the connection security
+ */
+static void rxkad_clear(struct rxrpc_connection *conn)
+{
+	_enter("");
+
+	if (conn->cipher)
+		crypto_free_blkcipher(conn->cipher);
+}
+
+/*
+ * RxRPC Kerberos-based security
+ */
+static struct rxrpc_security rxkad = {
+	.owner				= THIS_MODULE,
+	.name				= "rxkad",
+	.security_index			= RXKAD_VERSION,
+	.init_connection_security	= rxkad_init_connection_security,
+	.prime_packet_security		= rxkad_prime_packet_security,
+	.secure_packet			= rxkad_secure_packet,
+	.verify_packet			= rxkad_verify_packet,
+	.issue_challenge		= rxkad_issue_challenge,
+	.respond_to_challenge		= rxkad_respond_to_challenge,
+	.verify_response		= rxkad_verify_response,
+	.clear				= rxkad_clear,
+};
+
+static __init int rxkad_init(void)
+{
+	_enter("");
+
+	/* pin the cipher we need so that the crypto layer doesn't invoke
+	 * keventd to go get it */
+	rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(rxkad_ci))
+		return PTR_ERR(rxkad_ci);
+
+	return rxrpc_register_security(&rxkad);
+}
+
+module_init(rxkad_init);
+
+static __exit void rxkad_exit(void)
+{
+	_enter("");
+
+	rxrpc_unregister_security(&rxkad);
+	crypto_free_blkcipher(rxkad_ci);
+}
+
+module_exit(rxkad_exit);
diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c
deleted file mode 100644
index 9896fd87a4d4..000000000000
--- a/net/rxrpc/rxrpc_syms.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* rxrpc_syms.c: exported Rx RPC layer interface symbols
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/krxiod.h>
-
-/* call.c */
-EXPORT_SYMBOL(rxrpc_create_call);
-EXPORT_SYMBOL(rxrpc_put_call);
-EXPORT_SYMBOL(rxrpc_call_abort);
-EXPORT_SYMBOL(rxrpc_call_read_data);
-EXPORT_SYMBOL(rxrpc_call_write_data);
-
-/* connection.c */
-EXPORT_SYMBOL(rxrpc_create_connection);
-EXPORT_SYMBOL(rxrpc_put_connection);
-
-/* transport.c */
-EXPORT_SYMBOL(rxrpc_create_transport);
-EXPORT_SYMBOL(rxrpc_put_transport);
-EXPORT_SYMBOL(rxrpc_add_service);
-EXPORT_SYMBOL(rxrpc_del_service);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
deleted file mode 100644
index 884290754af7..000000000000
--- a/net/rxrpc/sysctl.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/* sysctl.c: Rx RPC control
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-#include <rxrpc/types.h>
-#include <rxrpc/rxrpc.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-int rxrpc_ktrace;
-int rxrpc_kdebug;
-int rxrpc_kproto;
-int rxrpc_knet;
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *rxrpc_sysctl = NULL;
-
-static ctl_table rxrpc_sysctl_table[] = {
-	{
-		.ctl_name	= 1,
-		.procname	= "kdebug",
-		.data		= &rxrpc_kdebug,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 2,
-		.procname	= "ktrace",
-		.data		= &rxrpc_ktrace,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 3,
-		.procname	= "kproto",
-		.data		= &rxrpc_kproto,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 4,
-		.procname	= "knet",
-		.data		= &rxrpc_knet,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 5,
-		.procname	= "peertimo",
-		.data		= &rxrpc_peer_timeout,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax
-	},
-	{
-		.ctl_name	= 6,
-		.procname	= "conntimo",
-		.data		= &rxrpc_conn_timeout,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table rxrpc_dir_sysctl_table[] = {
-	{
-		.ctl_name	= 1,
-		.procname	= "rxrpc",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= rxrpc_sysctl_table
-	},
-	{ .ctl_name = 0 }
-};
-#endif /* CONFIG_SYSCTL */
-
-/*****************************************************************************/
-/*
- * initialise the sysctl stuff for Rx RPC
- */
-int rxrpc_sysctl_init(void)
-{
-#ifdef CONFIG_SYSCTL
-	rxrpc_sysctl = register_sysctl_table(rxrpc_dir_sysctl_table);
-	if (!rxrpc_sysctl)
-		return -ENOMEM;
-#endif /* CONFIG_SYSCTL */
-
-	return 0;
-} /* end rxrpc_sysctl_init() */
-
-/*****************************************************************************/
-/*
- * clean up the sysctl stuff for Rx RPC
- */
-void rxrpc_sysctl_cleanup(void)
-{
-#ifdef CONFIG_SYSCTL
-	if (rxrpc_sysctl) {
-		unregister_sysctl_table(rxrpc_sysctl);
-		rxrpc_sysctl = NULL;
-	}
-#endif /* CONFIG_SYSCTL */
-
-} /* end rxrpc_sysctl_cleanup() */
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
deleted file mode 100644
index 8e57be2df936..000000000000
--- a/net/rxrpc/transport.c
+++ /dev/null
@@ -1,846 +0,0 @@
-/* transport.c: Rx Transport routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-#include <linux/ipv6.h>	/* this should _really_ be in errqueue.h.. */
-#endif
-#include <linux/errqueue.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-struct errormsg {
-	struct cmsghdr			cmsg;		/* control message header */
-	struct sock_extended_err	ee;		/* extended error information */
-	struct sockaddr_in		icmp_src;	/* ICMP packet source address */
-};
-
-static DEFINE_SPINLOCK(rxrpc_transports_lock);
-static struct list_head rxrpc_transports = LIST_HEAD_INIT(rxrpc_transports);
-
-__RXACCT_DECL(atomic_t rxrpc_transport_count);
-LIST_HEAD(rxrpc_proc_transports);
-DECLARE_RWSEM(rxrpc_proc_transports_sem);
-
-static void rxrpc_data_ready(struct sock *sk, int count);
-static void rxrpc_error_report(struct sock *sk);
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
-					struct list_head *msgq);
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans);
-
-/*****************************************************************************/
-/*
- * create a new transport endpoint using the specified UDP port
- */
-int rxrpc_create_transport(unsigned short port,
-			   struct rxrpc_transport **_trans)
-{
-	struct rxrpc_transport *trans;
-	struct sockaddr_in sin;
-	mm_segment_t oldfs;
-	struct sock *sock;
-	int ret, opt;
-
-	_enter("%hu", port);
-
-	trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
-	if (!trans)
-		return -ENOMEM;
-
-	atomic_set(&trans->usage, 1);
-	INIT_LIST_HEAD(&trans->services);
-	INIT_LIST_HEAD(&trans->link);
-	INIT_LIST_HEAD(&trans->krxiodq_link);
-	spin_lock_init(&trans->lock);
-	INIT_LIST_HEAD(&trans->peer_active);
-	INIT_LIST_HEAD(&trans->peer_graveyard);
-	spin_lock_init(&trans->peer_gylock);
-	init_waitqueue_head(&trans->peer_gy_waitq);
-	rwlock_init(&trans->peer_lock);
-	atomic_set(&trans->peer_count, 0);
-	trans->port = port;
-
-	/* create a UDP socket to be my actual transport endpoint */
-	ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &trans->socket);
-	if (ret < 0)
-		goto error;
-
-	/* use the specified port */
-	if (port) {
-		memset(&sin, 0, sizeof(sin));
-		sin.sin_family = AF_INET;
-		sin.sin_port = htons(port);
-		ret = trans->socket->ops->bind(trans->socket,
-					       (struct sockaddr *) &sin,
-					       sizeof(sin));
-		if (ret < 0)
-			goto error;
-	}
-
-	opt = 1;
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = trans->socket->ops->setsockopt(trans->socket, SOL_IP, IP_RECVERR,
-					     (char *) &opt, sizeof(opt));
-	set_fs(oldfs);
-
-	spin_lock(&rxrpc_transports_lock);
-	list_add(&trans->link, &rxrpc_transports);
-	spin_unlock(&rxrpc_transports_lock);
-
-	/* set the socket up */
-	sock = trans->socket->sk;
-	sock->sk_user_data	= trans;
-	sock->sk_data_ready	= rxrpc_data_ready;
-	sock->sk_error_report	= rxrpc_error_report;
-
-	down_write(&rxrpc_proc_transports_sem);
-	list_add_tail(&trans->proc_link, &rxrpc_proc_transports);
-	up_write(&rxrpc_proc_transports_sem);
-
-	__RXACCT(atomic_inc(&rxrpc_transport_count));
-
-	*_trans = trans;
-	_leave(" = 0 (%p)", trans);
-	return 0;
-
- error:
-	/* finish cleaning up the transport (not really needed here, but...) */
-	if (trans->socket)
-		trans->socket->ops->shutdown(trans->socket, 2);
-
-	/* close the socket */
-	if (trans->socket) {
-		trans->socket->sk->sk_user_data = NULL;
-		sock_release(trans->socket);
-		trans->socket = NULL;
-	}
-
-	kfree(trans);
-
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_create_transport() */
-
-/*****************************************************************************/
-/*
- * destroy a transport endpoint
- */
-void rxrpc_put_transport(struct rxrpc_transport *trans)
-{
-	_enter("%p{u=%d p=%hu}",
-	       trans, atomic_read(&trans->usage), trans->port);
-
-	BUG_ON(atomic_read(&trans->usage) <= 0);
-
-	/* to prevent a race, the decrement and the dequeue must be
-	 * effectively atomic */
-	spin_lock(&rxrpc_transports_lock);
-	if (likely(!atomic_dec_and_test(&trans->usage))) {
-		spin_unlock(&rxrpc_transports_lock);
-		_leave("");
-		return;
-	}
-
-	list_del(&trans->link);
-	spin_unlock(&rxrpc_transports_lock);
-
-	/* finish cleaning up the transport */
-	if (trans->socket)
-		trans->socket->ops->shutdown(trans->socket, 2);
-
-	rxrpc_krxsecd_clear_transport(trans);
-	rxrpc_krxiod_dequeue_transport(trans);
-
-	/* discard all peer information */
-	rxrpc_peer_clearall(trans);
-
-	down_write(&rxrpc_proc_transports_sem);
-	list_del(&trans->proc_link);
-	up_write(&rxrpc_proc_transports_sem);
-	__RXACCT(atomic_dec(&rxrpc_transport_count));
-
-	/* close the socket */
-	if (trans->socket) {
-		trans->socket->sk->sk_user_data = NULL;
-		sock_release(trans->socket);
-		trans->socket = NULL;
-	}
-
-	kfree(trans);
-
-	_leave("");
-} /* end rxrpc_put_transport() */
-
-/*****************************************************************************/
-/*
- * add a service to a transport to be listened upon
- */
-int rxrpc_add_service(struct rxrpc_transport *trans,
-		      struct rxrpc_service *newsrv)
-{
-	struct rxrpc_service *srv;
-	struct list_head *_p;
-	int ret = -EEXIST;
-
-	_enter("%p{%hu},%p{%hu}",
-	       trans, trans->port, newsrv, newsrv->service_id);
-
-	/* verify that the service ID is not already present */
-	spin_lock(&trans->lock);
-
-	list_for_each(_p, &trans->services) {
-		srv = list_entry(_p, struct rxrpc_service, link);
-		if (srv->service_id == newsrv->service_id)
-			goto out;
-	}
-
-	/* okay - add the transport to the list */
-	list_add_tail(&newsrv->link, &trans->services);
-	rxrpc_get_transport(trans);
-	ret = 0;
-
- out:
-	spin_unlock(&trans->lock);
-
-	_leave("= %d", ret);
-	return ret;
-} /* end rxrpc_add_service() */
-
-/*****************************************************************************/
-/*
- * remove a service from a transport
- */
-void rxrpc_del_service(struct rxrpc_transport *trans, struct rxrpc_service *srv)
-{
-	_enter("%p{%hu},%p{%hu}", trans, trans->port, srv, srv->service_id);
-
-	spin_lock(&trans->lock);
-	list_del(&srv->link);
-	spin_unlock(&trans->lock);
-
-	rxrpc_put_transport(trans);
-
-	_leave("");
-} /* end rxrpc_del_service() */
-
-/*****************************************************************************/
-/*
- * INET callback when data has been received on the socket.
- */
-static void rxrpc_data_ready(struct sock *sk, int count)
-{
-	struct rxrpc_transport *trans;
-
-	_enter("%p{t=%p},%d", sk, sk->sk_user_data, count);
-
-	/* queue the transport for attention by krxiod */
-	trans = (struct rxrpc_transport *) sk->sk_user_data;
-	if (trans)
-		rxrpc_krxiod_queue_transport(trans);
-
-	/* wake up anyone waiting on the socket */
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
-
-	_leave("");
-} /* end rxrpc_data_ready() */
-
-/*****************************************************************************/
-/*
- * INET callback when an ICMP error packet is received
- * - sk->err is error (EHOSTUNREACH, EPROTO or EMSGSIZE)
- */
-static void rxrpc_error_report(struct sock *sk)
-{
-	struct rxrpc_transport *trans;
-
-	_enter("%p{t=%p}", sk, sk->sk_user_data);
-
-	/* queue the transport for attention by krxiod */
-	trans = (struct rxrpc_transport *) sk->sk_user_data;
-	if (trans) {
-		trans->error_rcvd = 1;
-		rxrpc_krxiod_queue_transport(trans);
-	}
-
-	/* wake up anyone waiting on the socket */
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
-
-	_leave("");
-} /* end rxrpc_error_report() */
-
-/*****************************************************************************/
-/*
- * split a message up, allocating message records and filling them in
- * from the contents of a socket buffer
- */
-static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
-			      struct sk_buff *pkt,
-			      struct list_head *msgq)
-{
-	struct rxrpc_message *msg;
-	int ret;
-
-	_enter("");
-
-	msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
-	if (!msg) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&msg->usage, 1);
-	list_add_tail(&msg->link,msgq);
-
-	/* dig out the Rx routing parameters */
-	if (skb_copy_bits(pkt, sizeof(struct udphdr),
-			  &msg->hdr, sizeof(msg->hdr)) < 0) {
-		ret = -EBADMSG;
-		goto error;
-	}
-
-	msg->trans = trans;
-	msg->state = RXRPC_MSG_RECEIVED;
-	skb_get_timestamp(pkt, &msg->stamp);
-	if (msg->stamp.tv_sec == 0) {
-		do_gettimeofday(&msg->stamp);
-		if (pkt->sk)
-			sock_enable_timestamp(pkt->sk);
-	}
-	msg->seq = ntohl(msg->hdr.seq);
-
-	/* attach the packet */
-	skb_get(pkt);
-	msg->pkt = pkt;
-
-	msg->offset = sizeof(struct udphdr) + sizeof(struct rxrpc_header);
-	msg->dsize = msg->pkt->len - msg->offset;
-
-	_net("Rx Received packet from %s (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
-	     msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
-	     ntohl(msg->hdr.epoch),
-	     (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
-	     ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
-	     ntohl(msg->hdr.callNumber),
-	     rxrpc_pkts[msg->hdr.type],
-	     msg->hdr.flags,
-	     ntohs(msg->hdr.serviceId),
-	     msg->hdr.securityIndex);
-
-	__RXACCT(atomic_inc(&rxrpc_message_count));
-
-	/* split off jumbo packets */
-	while (msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-	       msg->hdr.flags & RXRPC_JUMBO_PACKET
-	       ) {
-		struct rxrpc_jumbo_header jumbo;
-		struct rxrpc_message *jumbomsg = msg;
-
-		_debug("split jumbo packet");
-
-		/* quick sanity check */
-		ret = -EBADMSG;
-		if (msg->dsize <
-		    RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
-			goto error;
-		if (msg->hdr.flags & RXRPC_LAST_PACKET)
-			goto error;
-
-		/* dig out the secondary header */
-		if (skb_copy_bits(pkt, msg->offset + RXRPC_JUMBO_DATALEN,
-				  &jumbo, sizeof(jumbo)) < 0)
-			goto error;
-
-		/* allocate a new message record */
-		ret = -ENOMEM;
-		msg = kmemdup(jumbomsg, sizeof(struct rxrpc_message), GFP_KERNEL);
-		if (!msg)
-			goto error;
-
-		list_add_tail(&msg->link, msgq);
-
-		/* adjust the jumbo packet */
-		jumbomsg->dsize = RXRPC_JUMBO_DATALEN;
-
-		/* attach the packet here too */
-		skb_get(pkt);
-
-		/* adjust the parameters */
-		msg->seq++;
-		msg->hdr.seq = htonl(msg->seq);
-		msg->hdr.serial = htonl(ntohl(msg->hdr.serial) + 1);
-		msg->offset += RXRPC_JUMBO_DATALEN +
-			sizeof(struct rxrpc_jumbo_header);
-		msg->dsize -= RXRPC_JUMBO_DATALEN +
-			sizeof(struct rxrpc_jumbo_header);
-		msg->hdr.flags = jumbo.flags;
-		msg->hdr._rsvd = jumbo._rsvd;
-
-		_net("Rx Split jumbo packet from %s"
-		     " (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
-		     msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
-		     ntohl(msg->hdr.epoch),
-		     (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
-		     ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
-		     ntohl(msg->hdr.callNumber),
-		     rxrpc_pkts[msg->hdr.type],
-		     msg->hdr.flags,
-		     ntohs(msg->hdr.serviceId),
-		     msg->hdr.securityIndex);
-
-		__RXACCT(atomic_inc(&rxrpc_message_count));
-	}
-
-	_leave(" = 0 #%d", atomic_read(&rxrpc_message_count));
-	return 0;
-
- error:
-	while (!list_empty(msgq)) {
-		msg = list_entry(msgq->next, struct rxrpc_message, link);
-		list_del_init(&msg->link);
-
-		rxrpc_put_message(msg);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_incoming_msg() */
-
-/*****************************************************************************/
-/*
- * accept a new call
- * - called from krxiod in process context
- */
-void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
-{
-	struct rxrpc_message *msg;
-	struct rxrpc_peer *peer;
-	struct sk_buff *pkt;
-	int ret;
-	__be32 addr;
-	__be16 port;
-
-	LIST_HEAD(msgq);
-
-	_enter("%p{%d}", trans, trans->port);
-
-	for (;;) {
-		/* deal with outstanting errors first */
-		if (trans->error_rcvd)
-			rxrpc_trans_receive_error_report(trans);
-
-		/* attempt to receive a packet */
-		pkt = skb_recv_datagram(trans->socket->sk, 0, 1, &ret);
-		if (!pkt) {
-			if (ret == -EAGAIN) {
-				_leave(" EAGAIN");
-				return;
-			}
-
-			/* an icmp error may have occurred */
-			rxrpc_krxiod_queue_transport(trans);
-			_leave(" error %d\n", ret);
-			return;
-		}
-
-		/* we'll probably need to checksum it (didn't call
-		 * sock_recvmsg) */
-		if (skb_checksum_complete(pkt)) {
-			kfree_skb(pkt);
-			rxrpc_krxiod_queue_transport(trans);
-			_leave(" CSUM failed");
-			return;
-		}
-
-		addr = pkt->nh.iph->saddr;
-		port = pkt->h.uh->source;
-
-		_net("Rx Received UDP packet from %08x:%04hu",
-		     ntohl(addr), ntohs(port));
-
-		/* unmarshall the Rx parameters and split jumbo packets */
-		ret = rxrpc_incoming_msg(trans, pkt, &msgq);
-		if (ret < 0) {
-			kfree_skb(pkt);
-			rxrpc_krxiod_queue_transport(trans);
-			_leave(" bad packet");
-			return;
-		}
-
-		BUG_ON(list_empty(&msgq));
-
-		msg = list_entry(msgq.next, struct rxrpc_message, link);
-
-		/* locate the record for the peer from which it
-		 * originated */
-		ret = rxrpc_peer_lookup(trans, addr, &peer);
-		if (ret < 0) {
-			kdebug("Rx No connections from that peer");
-			rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
-			goto finished_msg;
-		}
-
-		/* try and find a matching connection */
-		ret = rxrpc_connection_lookup(peer, msg, &msg->conn);
-		if (ret < 0) {
-			kdebug("Rx Unknown Connection");
-			rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
-			rxrpc_put_peer(peer);
-			goto finished_msg;
-		}
-		rxrpc_put_peer(peer);
-
-		/* deal with the first packet of a new call */
-		if (msg->hdr.flags & RXRPC_CLIENT_INITIATED &&
-		    msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-		    ntohl(msg->hdr.seq) == 1
-		    ) {
-			_debug("Rx New server call");
-			rxrpc_trans_receive_new_call(trans, &msgq);
-			goto finished_msg;
-		}
-
-		/* deal with subsequent packet(s) of call */
-		_debug("Rx Call packet");
-		while (!list_empty(&msgq)) {
-			msg = list_entry(msgq.next, struct rxrpc_message, link);
-			list_del_init(&msg->link);
-
-			ret = rxrpc_conn_receive_call_packet(msg->conn, NULL, msg);
-			if (ret < 0) {
-				rxrpc_trans_immediate_abort(trans, msg, ret);
-				rxrpc_put_message(msg);
-				goto finished_msg;
-			}
-
-			rxrpc_put_message(msg);
-		}
-
-		goto finished_msg;
-
-		/* dispose of the packets */
-	finished_msg:
-		while (!list_empty(&msgq)) {
-			msg = list_entry(msgq.next, struct rxrpc_message, link);
-			list_del_init(&msg->link);
-
-			rxrpc_put_message(msg);
-		}
-		kfree_skb(pkt);
-	}
-
-	_leave("");
-
-} /* end rxrpc_trans_receive_packet() */
-
-/*****************************************************************************/
-/*
- * accept a new call from a client trying to connect to one of my services
- * - called in process context
- */
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
-					struct list_head *msgq)
-{
-	struct rxrpc_message *msg;
-
-	_enter("");
-
-	/* only bother with the first packet */
-	msg = list_entry(msgq->next, struct rxrpc_message, link);
-	list_del_init(&msg->link);
-	rxrpc_krxsecd_queue_incoming_call(msg);
-	rxrpc_put_message(msg);
-
-	_leave(" = 0");
-
-	return 0;
-} /* end rxrpc_trans_receive_new_call() */
-
-/*****************************************************************************/
-/*
- * perform an immediate abort without connection or call structures
- */
-int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
-				struct rxrpc_message *msg,
-				int error)
-{
-	struct rxrpc_header ahdr;
-	struct sockaddr_in sin;
-	struct msghdr msghdr;
-	struct kvec iov[2];
-	__be32 _error;
-	int len, ret;
-
-	_enter("%p,%p,%d", trans, msg, error);
-
-	/* don't abort an abort packet */
-	if (msg->hdr.type == RXRPC_PACKET_TYPE_ABORT) {
-		_leave(" = 0");
-		return 0;
-	}
-
-	_error = htonl(-error);
-
-	/* set up the message to be transmitted */
-	memcpy(&ahdr, &msg->hdr, sizeof(ahdr));
-	ahdr.epoch	= msg->hdr.epoch;
-	ahdr.serial	= htonl(1);
-	ahdr.seq	= 0;
-	ahdr.type	= RXRPC_PACKET_TYPE_ABORT;
-	ahdr.flags	= RXRPC_LAST_PACKET;
-	ahdr.flags	|= ~msg->hdr.flags & RXRPC_CLIENT_INITIATED;
-
-	iov[0].iov_len	= sizeof(ahdr);
-	iov[0].iov_base	= &ahdr;
-	iov[1].iov_len	= sizeof(_error);
-	iov[1].iov_base	= &_error;
-
-	len = sizeof(ahdr) + sizeof(_error);
-
-	memset(&sin,0,sizeof(sin));
-	sin.sin_family		= AF_INET;
-	sin.sin_port		= msg->pkt->h.uh->source;
-	sin.sin_addr.s_addr	= msg->pkt->nh.iph->saddr;
-
-	msghdr.msg_name		= &sin;
-	msghdr.msg_namelen	= sizeof(sin);
-	msghdr.msg_control	= NULL;
-	msghdr.msg_controllen	= 0;
-	msghdr.msg_flags	= MSG_DONTWAIT;
-
-	_net("Sending message type %d of %d bytes to %08x:%d",
-	     ahdr.type,
-	     len,
-	     ntohl(sin.sin_addr.s_addr),
-	     ntohs(sin.sin_port));
-
-	/* send the message */
-	ret = kernel_sendmsg(trans->socket, &msghdr, iov, 2, len);
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_trans_immediate_abort() */
-
-/*****************************************************************************/
-/*
- * receive an ICMP error report and percolate it to all connections
- * heading to the affected host or port
- */
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans)
-{
-	struct rxrpc_connection *conn;
-	struct sockaddr_in sin;
-	struct rxrpc_peer *peer;
-	struct list_head connq, *_p;
-	struct errormsg emsg;
-	struct msghdr msg;
-	__be16 port;
-	int local, err;
-
-	_enter("%p", trans);
-
-	for (;;) {
-		trans->error_rcvd = 0;
-
-		/* try and receive an error message */
-		msg.msg_name	= &sin;
-		msg.msg_namelen	= sizeof(sin);
-		msg.msg_control	= &emsg;
-		msg.msg_controllen = sizeof(emsg);
-		msg.msg_flags	= 0;
-
-		err = kernel_recvmsg(trans->socket, &msg, NULL, 0, 0,
-				   MSG_ERRQUEUE | MSG_DONTWAIT | MSG_TRUNC);
-
-		if (err == -EAGAIN) {
-			_leave("");
-			return;
-		}
-
-		if (err < 0) {
-			printk("%s: unable to recv an error report: %d\n",
-			       __FUNCTION__, err);
-			_leave("");
-			return;
-		}
-
-		msg.msg_controllen = (char *) msg.msg_control - (char *) &emsg;
-
-		if (msg.msg_controllen < sizeof(emsg.cmsg) ||
-		    msg.msg_namelen < sizeof(sin)) {
-			printk("%s: short control message"
-			       " (nlen=%u clen=%Zu fl=%x)\n",
-			       __FUNCTION__,
-			       msg.msg_namelen,
-			       msg.msg_controllen,
-			       msg.msg_flags);
-			continue;
-		}
-
-		_net("Rx Received control message"
-		     " { len=%Zu level=%u type=%u }",
-		     emsg.cmsg.cmsg_len,
-		     emsg.cmsg.cmsg_level,
-		     emsg.cmsg.cmsg_type);
-
-		if (sin.sin_family != AF_INET) {
-			printk("Rx Ignoring error report with non-INET address"
-			       " (fam=%u)",
-			       sin.sin_family);
-			continue;
-		}
-
-		_net("Rx Received message pertaining to host addr=%x port=%hu",
-		     ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-
-		if (emsg.cmsg.cmsg_level != SOL_IP ||
-		    emsg.cmsg.cmsg_type != IP_RECVERR) {
-			printk("Rx Ignoring unknown error report"
-			       " { level=%u type=%u }",
-			       emsg.cmsg.cmsg_level,
-			       emsg.cmsg.cmsg_type);
-			continue;
-		}
-
-		if (msg.msg_controllen < sizeof(emsg.cmsg) + sizeof(emsg.ee)) {
-			printk("%s: short error message (%Zu)\n",
-			       __FUNCTION__, msg.msg_controllen);
-			_leave("");
-			return;
-		}
-
-		port = sin.sin_port;
-
-		switch (emsg.ee.ee_origin) {
-		case SO_EE_ORIGIN_ICMP:
-			local = 0;
-			switch (emsg.ee.ee_type) {
-			case ICMP_DEST_UNREACH:
-				switch (emsg.ee.ee_code) {
-				case ICMP_NET_UNREACH:
-					_net("Rx Received ICMP Network Unreachable");
-					port = 0;
-					err = -ENETUNREACH;
-					break;
-				case ICMP_HOST_UNREACH:
-					_net("Rx Received ICMP Host Unreachable");
-					port = 0;
-					err = -EHOSTUNREACH;
-					break;
-				case ICMP_PORT_UNREACH:
-					_net("Rx Received ICMP Port Unreachable");
-					err = -ECONNREFUSED;
-					break;
-				case ICMP_NET_UNKNOWN:
-					_net("Rx Received ICMP Unknown Network");
-					port = 0;
-					err = -ENETUNREACH;
-					break;
-				case ICMP_HOST_UNKNOWN:
-					_net("Rx Received ICMP Unknown Host");
-					port = 0;
-					err = -EHOSTUNREACH;
-					break;
-				default:
-					_net("Rx Received ICMP DestUnreach { code=%u }",
-					     emsg.ee.ee_code);
-					err = emsg.ee.ee_errno;
-					break;
-				}
-				break;
-
-			case ICMP_TIME_EXCEEDED:
-				_net("Rx Received ICMP TTL Exceeded");
-				err = emsg.ee.ee_errno;
-				break;
-
-			default:
-				_proto("Rx Received ICMP error { type=%u code=%u }",
-				       emsg.ee.ee_type, emsg.ee.ee_code);
-				err = emsg.ee.ee_errno;
-				break;
-			}
-			break;
-
-		case SO_EE_ORIGIN_LOCAL:
-			_proto("Rx Received local error { error=%d }",
-			       emsg.ee.ee_errno);
-			local = 1;
-			err = emsg.ee.ee_errno;
-			break;
-
-		case SO_EE_ORIGIN_NONE:
-		case SO_EE_ORIGIN_ICMP6:
-		default:
-			_proto("Rx Received error report { orig=%u }",
-			       emsg.ee.ee_origin);
-			local = 0;
-			err = emsg.ee.ee_errno;
-			break;
-		}
-
-		/* find all the connections between this transport and the
-		 * affected destination */
-		INIT_LIST_HEAD(&connq);
-
-		if (rxrpc_peer_lookup(trans, sin.sin_addr.s_addr,
-				      &peer) == 0) {
-			read_lock(&peer->conn_lock);
-			list_for_each(_p, &peer->conn_active) {
-				conn = list_entry(_p, struct rxrpc_connection,
-						  link);
-				if (port && conn->addr.sin_port != port)
-					continue;
-				if (!list_empty(&conn->err_link))
-					continue;
-
-				rxrpc_get_connection(conn);
-				list_add_tail(&conn->err_link, &connq);
-			}
-			read_unlock(&peer->conn_lock);
-
-			/* service all those connections */
-			while (!list_empty(&connq)) {
-				conn = list_entry(connq.next,
-						  struct rxrpc_connection,
-						  err_link);
-				list_del(&conn->err_link);
-
-				rxrpc_conn_handle_error(conn, local, err);
-
-				rxrpc_put_connection(conn);
-			}
-
-			rxrpc_put_peer(peer);
-		}
-	}
-
-	_leave("");
-	return;
-} /* end rxrpc_trans_receive_error_report() */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f4544dd86476..475df8449be9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -46,62 +46,6 @@ config NET_SCH_FIFO
 
 if NET_SCHED
 
-choice
-	prompt "Packet scheduler clock source"
-	default NET_SCH_CLK_GETTIMEOFDAY
-	---help---
-	  Packet schedulers need a monotonic clock that increments at a static
-	  rate. The kernel provides several suitable interfaces, each with
-	  different properties:
-	  
-	  - high resolution (us or better)
-	  - fast to read (minimal locking, no i/o access)
-	  - synchronized on all processors
-	  - handles cpu clock frequency changes
-
-	  but nothing provides all of the above.
-
-config NET_SCH_CLK_JIFFIES
-	bool "Timer interrupt"
-	---help---
-	  Say Y here if you want to use the timer interrupt (jiffies) as clock
-	  source. This clock source is fast, synchronized on all processors and
-	  handles cpu clock frequency changes, but its resolution is too low
-	  for accurate shaping except at very low speed.
-
-config NET_SCH_CLK_GETTIMEOFDAY
-	bool "gettimeofday"
-	---help---
-	  Say Y here if you want to use gettimeofday as clock source. This clock
-	  source has high resolution, is synchronized on all processors and
-	  handles cpu clock frequency changes, but it is slow.
-
-	  Choose this if you need a high resolution clock source but can't use
-	  the CPU's cycle counter.
-
-# don't allow on SMP x86 because they can have unsynchronized TSCs.
-# gettimeofday is a good alternative
-config NET_SCH_CLK_CPU
-	bool "CPU cycle counter"
-	depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
-	---help---
-	  Say Y here if you want to use the CPU's cycle counter as clock source.
-	  This is a cheap and high resolution clock source, but on some
-	  architectures it is not synchronized on all processors and doesn't
-	  handle cpu clock frequency changes.
-
-	  The useable cycle counters are:
-
-	  	x86/x86_64	- Timestamp Counter
-		alpha		- Cycle Counter
-		sparc64		- %ticks register
-		ppc64		- Time base
-		ia64		- Interval Time Counter
-
-	  Choose this if your CPU's cycle counter is working properly.
-
-endchoice
-
 comment "Queueing/Scheduling"
 
 config NET_SCH_CBQ
diff --git a/net/sched/Makefile b/net/sched/Makefile
index ff2d6e5e282c..020767a204d4 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,7 +17,6 @@ obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
-obj-$(CONFIG_NET_SCH_HPFQ)	+= sch_hpfq.o
 obj-$(CONFIG_NET_SCH_HFSC)	+= sch_hfsc.o
 obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
 obj-$(CONFIG_NET_SCH_GRED)	+= sch_gred.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cb21617a5670..711dd26c95c3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -25,12 +25,12 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <net/sock.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
+#include <net/netlink.h>
 
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
@@ -93,15 +93,15 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 				continue;
 			a->priv = p;
 			a->order = n_i;
-			r = (struct rtattr*) skb->tail;
+			r = (struct rtattr *)skb_tail_pointer(skb);
 			RTA_PUT(skb, a->order, 0, NULL);
 			err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
 				index--;
-				skb_trim(skb, (u8*)r - skb->data);
+				nlmsg_trim(skb, r);
 				goto done;
 			}
-			r->rta_len = skb->tail - (u8*)r;
+			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 			n_i++;
 			if (n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
@@ -114,7 +114,7 @@ done:
 	return n_i;
 
 rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
+	nlmsg_trim(skb, r);
 	goto done;
 }
 
@@ -125,7 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 	struct rtattr *r ;
 	int i= 0, n_i = 0;
 
-	r = (struct rtattr*) skb->tail;
+	r = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, a->order, 0, NULL);
 	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
@@ -140,11 +140,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 	RTA_PUT(skb, TCA_FCNT, 4, &n_i);
-	r->rta_len = skb->tail - (u8*)r;
+	r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 
 	return n_i;
 rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
+	nlmsg_trim(skb, r);
 	return -EINVAL;
 }
 
@@ -423,7 +423,7 @@ int
 tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	int err = -EINVAL;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *r;
 
 	if (a->ops == NULL || a->ops->dump == NULL)
@@ -432,15 +432,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	if (tcf_action_copy_stats(skb, a, 0))
 		goto rtattr_failure;
-	r = (struct rtattr*) skb->tail;
+	r = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
-		r->rta_len = skb->tail - (u8*)r;
+		r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 		return err;
 	}
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -449,17 +449,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 {
 	struct tc_action *a;
 	int err = -EINVAL;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *r ;
 
 	while ((a = act) != NULL) {
-		r = (struct rtattr*) skb->tail;
+		r = (struct rtattr *)skb_tail_pointer(skb);
 		act = a->next;
 		RTA_PUT(skb, a->order, 0, NULL);
 		err = tcf_action_dump_1(skb, a, bind, ref);
 		if (err < 0)
 			goto errout;
-		r->rta_len = skb->tail - (u8*)r;
+		r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 	}
 
 	return 0;
@@ -467,7 +467,7 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 rtattr_failure:
 	err = -EINVAL;
 errout:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return err;
 }
 
@@ -635,7 +635,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 {
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *x;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
@@ -645,20 +645,20 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr*) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	if (tcf_action_dump(skb, a, bind, ref) < 0)
 		goto rtattr_failure;
 
-	x->rta_len = skb->tail - (u8*)x;
+	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -767,7 +767,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 		return -ENOBUFS;
 	}
 
-	b = (unsigned char *)skb->tail;
+	b = skb_tail_pointer(skb);
 
 	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
 		goto err_out;
@@ -783,16 +783,16 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
 	if (err < 0)
 		goto rtattr_failure;
 
-	x->rta_len = skb->tail - (u8 *) x;
+	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
@@ -884,7 +884,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	if (!skb)
 		return -ENOBUFS;
 
-	b = (unsigned char *)skb->tail;
+	b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
 	t = NLMSG_DATA(nlh);
@@ -892,15 +892,15 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr*) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	if (tcf_action_dump(skb, a, 0, 0) < 0)
 		goto rtattr_failure;
 
-	x->rta_len = skb->tail - (u8*)x;
+	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
 	err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
@@ -1015,7 +1015,7 @@ static int
 tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *x;
 	struct tc_action_ops *a_o;
 	struct tc_action a;
@@ -1048,7 +1048,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
@@ -1056,12 +1056,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		goto rtattr_failure;
 
 	if (ret > 0) {
-		x->rta_len = skb->tail - (u8 *) x;
+		x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 		ret = skb->len;
 	} else
-		skb_trim(skb, (u8*)x - skb->data);
+		nlmsg_trim(skb, x);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	if (NETLINK_CB(cb->skb).pid && ret)
 		nlh->nlmsg_flags |= NLM_F_MULTI;
 	module_put(a_o->owner);
@@ -1070,20 +1070,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 rtattr_failure:
 nlmsg_failure:
 	module_put(a_o->owner);
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return skb->len;
 }
 
 static int __init tc_action_init(void)
 {
-	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
-
-	if (link_p) {
-		link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action;
-		link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action;
-		link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action;
-		link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
-	}
+	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action);
 
 	return 0;
 }
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 87d0faf32867..7517f3791541 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_gact.h>
@@ -155,7 +156,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 
 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_gact opt;
 	struct tcf_gact *gact = a->priv;
 	struct tcf_t t;
@@ -181,7 +182,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 47f0b1324239..00b05f422d45 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/kmod.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_ipt.h>
@@ -245,7 +246,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 
 static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_ipt *ipt = a->priv;
 	struct ipt_entry_target *t;
 	struct tcf_t tm;
@@ -277,7 +278,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	kfree(t);
 	return -1;
 }
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 68f26cb278f9..de21c92faaa2 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_mirred.h>
@@ -198,7 +199,7 @@ bad_mirred:
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
 	skb2->dev = dev;
-	skb2->input_dev = skb->dev;
+	skb2->iif = skb->dev->ifindex;
 	dev_queue_xmit(skb2);
 	spin_unlock(&m->tcf_lock);
 	return m->tcf_action;
@@ -206,7 +207,7 @@ bad_mirred:
 
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_mirred *m = a->priv;
 	struct tc_mirred opt;
 	struct tcf_t t;
@@ -225,7 +226,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3d6a2fcc9ce4..45b3cda86a21 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_pedit.h>
@@ -136,7 +137,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 
-	pptr = skb->nh.raw;
+	pptr = skb_network_header(skb);
 
 	spin_lock(&p->tcf_lock);
 
@@ -195,7 +196,7 @@ done:
 static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 			  int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_pedit *p = a->priv;
 	struct tc_pedit *opt;
 	struct tcf_t t;
@@ -226,7 +227,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	kfree(opt);
 	return -1;
 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 10a5a5c36f76..616f465f407e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <net/sock.h>
 #include <net/act_api.h>
+#include <net/netlink.h>
 
 #define L2T(p,L)   ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
 #define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
@@ -80,7 +81,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				continue;
 			a->priv = p;
 			a->order = index;
-			r = (struct rtattr*) skb->tail;
+			r = (struct rtattr *)skb_tail_pointer(skb);
 			RTA_PUT(skb, a->order, 0, NULL);
 			if (type == RTM_DELACTION)
 				err = tcf_action_dump_1(skb, a, 0, 1);
@@ -88,10 +89,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
 				index--;
-				skb_trim(skb, (u8*)r - skb->data);
+				nlmsg_trim(skb, r);
 				goto done;
 			}
-			r->rta_len = skb->tail - (u8*)r;
+			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 			n_i++;
 		}
 	}
@@ -102,7 +103,7 @@ done:
 	return n_i;
 
 rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
+	nlmsg_trim(skb, r);
 	goto done;
 }
 #endif
@@ -240,7 +241,7 @@ override:
 	if (ret != ACT_P_CREATED)
 		return ret;
 
-	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcfp_t_c = psched_get_time();
 	police->tcf_index = parm->index ? parm->index :
 		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
 	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -295,10 +296,9 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 			return police->tcfp_result;
 		}
 
-		PSCHED_GET_TIME(now);
-
-		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
-					 police->tcfp_burst);
+		now = psched_get_time();
+		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+					    police->tcfp_burst);
 		if (police->tcfp_P_tab) {
 			ptoks = toks + police->tcfp_ptoks;
 			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -326,7 +326,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 static int
 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_police *police = a->priv;
 	struct tc_police opt;
 
@@ -355,7 +355,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -494,7 +494,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 	}
 	if (police->tcfp_P_tab)
 		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
-	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcfp_t_c = psched_get_time();
 	police->tcf_index = parm->index ? parm->index :
 		tcf_police_new_index();
 	police->tcf_action = parm->action;
@@ -542,9 +542,9 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police)
 			return police->tcfp_result;
 		}
 
-		PSCHED_GET_TIME(now);
-		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
-					 police->tcfp_burst);
+		now = psched_get_time();
+		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+					    police->tcfp_burst);
 		if (police->tcfp_P_tab) {
 			ptoks = toks + police->tcfp_ptoks;
 			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -572,7 +572,7 @@ EXPORT_SYMBOL(tcf_police);
 
 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_police opt;
 
 	opt.index = police->tcf_index;
@@ -598,7 +598,7 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index c7971182af07..36e1edad5990 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -16,6 +16,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 #define TCA_ACT_SIMP 22
@@ -155,7 +156,7 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
 static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 				int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_defact *d = a->priv;
 	struct tc_defact opt;
 	struct tcf_t t;
@@ -173,7 +174,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5c6ffdb77d2d..ebf94edf0478 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -29,9 +29,10 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
@@ -323,7 +324,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
 	tcm = NLMSG_DATA(nlh);
@@ -340,12 +341,12 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
 		if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
 			goto rtattr_failure;
 	}
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -399,7 +400,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
 		return skb->len;
 
-	read_lock(&qdisc_tree_lock);
 	if (!tcm->tcm_parent)
 		q = dev->qdisc_sleeping;
 	else
@@ -456,7 +456,6 @@ errout:
 	if (cl)
 		cops->put(q, cl);
 out:
-	read_unlock(&qdisc_tree_lock);
 	dev_put(dev);
 	return skb->len;
 }
@@ -563,30 +562,30 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
 		 * to work with both old and new modes of entering
 		 * tc data even if iproute2  was newer - jhs
 		 */
-		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+		struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
 
 		if (exts->action->type != TCA_OLD_COMPAT) {
 			RTA_PUT(skb, map->action, 0, NULL);
 			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
 				goto rtattr_failure;
-			p_rta->rta_len = skb->tail - (u8*)p_rta;
+			p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 		} else if (map->police) {
 			RTA_PUT(skb, map->police, 0, NULL);
 			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
 				goto rtattr_failure;
-			p_rta->rta_len = skb->tail - (u8*)p_rta;
+			p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 		}
 	}
 #elif defined CONFIG_NET_CLS_POLICE
 	if (map->police && exts->police) {
-		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+		struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
 
 		RTA_PUT(skb, map->police, 0, NULL);
 
 		if (tcf_police_dump(skb, exts->police) < 0)
 			goto rtattr_failure;
 
-		p_rta->rta_len = skb->tail - (u8*)p_rta;
+		p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 	}
 #endif
 	return 0;
@@ -614,18 +613,11 @@ rtattr_failure: __attribute__ ((unused))
 
 static int __init tc_filter_init(void)
 {
-	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
+						 tc_dump_tfilter);
 
-	/* Setup rtnetlink links. It is made here to avoid
-	   exporting large number of public symbols.
-	 */
-
-	if (link_p) {
-		link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
-		link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
-		link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
-		link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter;
-	}
 	return 0;
 }
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index fad08e521c24..c885412d79d5 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
+#include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
@@ -81,6 +82,13 @@ static void basic_put(struct tcf_proto *tp, unsigned long f)
 
 static int basic_init(struct tcf_proto *tp)
 {
+	struct basic_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+	INIT_LIST_HEAD(&head->flist);
+	tp->root = head;
 	return 0;
 }
 
@@ -102,6 +110,7 @@ static void basic_destroy(struct tcf_proto *tp)
 		list_del(&f->link);
 		basic_delete_filter(tp, f);
 	}
+	kfree(head);
 }
 
 static int basic_delete(struct tcf_proto *tp, unsigned long arg)
@@ -176,15 +185,6 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
 	}
 
 	err = -ENOBUFS;
-	if (head == NULL) {
-		head = kzalloc(sizeof(*head), GFP_KERNEL);
-		if (head == NULL)
-			goto errout;
-
-		INIT_LIST_HEAD(&head->flist);
-		tp->root = head;
-	}
-
 	f = kzalloc(sizeof(*f), GFP_KERNEL);
 	if (f == NULL)
 		goto errout;
@@ -246,7 +246,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
 	struct basic_filter *f = (struct basic_filter *) fh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (f == NULL)
@@ -264,11 +264,11 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = (skb->tail - b);
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 5dbb9d451f73..bbec4a0d4dcb 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -38,6 +38,7 @@
 #include <linux/notifier.h>
 #include <linux/netfilter.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -348,7 +349,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter*)fh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (f == NULL)
@@ -374,7 +375,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
 		goto rtattr_failure;
@@ -382,7 +383,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index abc47cc48ad0..cc941d0ee3a5 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -28,6 +28,7 @@
 #include <linux/etherdevice.h>
 #include <linux/notifier.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -88,9 +89,9 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
 static inline
 void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
 {
-	spin_lock_bh(&dev->queue_lock);
+	qdisc_lock_tree(dev);
 	memset(head->fastmap, 0, sizeof(head->fastmap));
-	spin_unlock_bh(&dev->queue_lock);
+	qdisc_unlock_tree(dev);
 }
 
 static inline void
@@ -562,7 +563,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
 	struct route4_filter *f = (struct route4_filter*)fh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	u32 id;
 
@@ -591,7 +592,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
 		goto rtattr_failure;
@@ -599,7 +600,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index 1d4a1fb17608..0a683c07c648 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -31,6 +31,7 @@
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 7853621a04cc..22f9ede70e8f 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -143,9 +143,9 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	u8 tunnelid = 0;
 	u8 *xprt;
 #if RSVP_DST_LEN == 4
-	struct ipv6hdr *nhptr = skb->nh.ipv6h;
+	struct ipv6hdr *nhptr = ipv6_hdr(skb);
 #else
-	struct iphdr *nhptr = skb->nh.iph;
+	struct iphdr *nhptr = ip_hdr(skb);
 #endif
 
 restart:
@@ -160,7 +160,7 @@ restart:
 	dst = &nhptr->daddr;
 	protocol = nhptr->protocol;
 	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
-	if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
+	if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
 		return -1;
 #endif
 
@@ -593,7 +593,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct rsvp_filter *f = (struct rsvp_filter*)fh;
 	struct rsvp_session *s;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_rsvp_pinfo pinfo;
 
@@ -623,14 +623,14 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 		goto rtattr_failure;
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index a2979d89798f..93b6abed57db 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -34,6 +34,7 @@
 #include <net/sock.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
+#include <net/netlink.h>
 
 #define RSVP_DST_LEN	4
 #define RSVP_ID		"rsvp6"
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 040e2d2d281a..47ac0c556429 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -12,6 +12,7 @@
 #include <linux/netdevice.h>
 #include <net/ip.h>
 #include <net/act_api.h>
+#include <net/netlink.h>
 #include <net/pkt_cls.h>
 #include <net/route.h>
 
@@ -245,9 +246,9 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
 	}
 
 	if (tb[TCA_TCINDEX_SHIFT-1]) {
-		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(u16))
+		if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(int))
 			goto errout;
-		cp.shift = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
+		cp.shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
 	}
 
 	err = -EBUSY;
@@ -448,7 +449,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
@@ -463,7 +464,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 		RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
 		RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
 		    &p->fall_through);
-		rta->rta_len = skb->tail-b;
+		rta->rta_len = skb_tail_pointer(skb) - b;
 	} else {
 		if (p->perfect) {
 			t->tcm_handle = r-p->perfect;
@@ -486,7 +487,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 
 		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto rtattr_failure;
-		rta->rta_len = skb->tail-b;
+		rta->rta_len = skb_tail_pointer(skb) - b;
 
 		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto rtattr_failure;
@@ -495,7 +496,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0bcb16928d25..c7a347bd6d70 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -50,6 +50,7 @@
 #include <linux/notifier.h>
 #include <linux/rtnetlink.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -119,7 +120,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
 	} stack[TC_U32_MAXDEPTH];
 
 	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
-	u8 *ptr = skb->nh.raw;
+	u8 *ptr = skb_network_header(skb);
 	struct tc_u_knode *n;
 	int sdepth = 0;
 	int off2 = 0;
@@ -213,7 +214,7 @@ check_terminal:
 			off2 = 0;
 		}
 
-		if (ptr < skb->tail)
+		if (ptr < skb_tail_pointer(skb))
 			goto next_ht;
 	}
 
@@ -435,7 +436,7 @@ static void u32_destroy(struct tcf_proto *tp)
 			BUG_TRAP(ht->refcnt == 0);
 
 			kfree(ht);
-		};
+		}
 
 		kfree(tp_c);
 	}
@@ -718,7 +719,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tc_u_knode *n = (struct tc_u_knode*)fh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (n == NULL)
@@ -765,14 +766,14 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 #endif
 	}
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	if (TC_U32_KEY(n->handle))
 		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
 			goto rtattr_failure;
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index cd0600c67969..0a2a7fe08de3 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -22,7 +22,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
 			struct tcf_pkt_info *info)
 {
 	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
-	unsigned char *ptr = skb->nh.raw;
+	const unsigned char *ptr = skb_network_header(skb);
 
 	if (info) {
 		if (info->ptr)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 959c306c5714..63146d339d81 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -418,17 +418,19 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
 int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 {
 	int i;
-	struct rtattr * top_start = (struct rtattr*) skb->tail;
-	struct rtattr * list_start;
+	u8 *tail;
+	struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb);
+	struct rtattr *list_start;
 
 	RTA_PUT(skb, tlv, 0, NULL);
 	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
 
-	list_start = (struct rtattr *) skb->tail;
+	list_start = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
 
+	tail = skb_tail_pointer(skb);
 	for (i = 0; i < tree->hdr.nmatches; i++) {
-		struct rtattr *match_start = (struct rtattr*) skb->tail;
+		struct rtattr *match_start = (struct rtattr *)tail;
 		struct tcf_ematch *em = tcf_em_get_match(tree, i);
 		struct tcf_ematch_hdr em_hdr = {
 			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -447,11 +449,12 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 		} else if (em->datalen > 0)
 			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
 
-		match_start->rta_len = skb->tail - (u8*) match_start;
+		tail = skb_tail_pointer(skb);
+		match_start->rta_len = tail - (u8 *)match_start;
 	}
 
-	list_start->rta_len = skb->tail - (u8 *) list_start;
-	top_start->rta_len = skb->tail - (u8 *) top_start;
+	list_start->rta_len = tail - (u8 *)list_start;
+	top_start->rta_len = tail - (u8 *)top_start;
 
 	return 0;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ecc988af4a9a..8699e7006d80 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,14 +27,15 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kmod.h>
 #include <linux/list.h>
 #include <linux/bitops.h>
+#include <linux/hrtimer.h>
 
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
@@ -190,7 +191,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
    (root qdisc, all its children, children of children etc.)
  */
 
-static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
 	struct Qdisc *q;
 
@@ -201,16 +202,6 @@ static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
 	return NULL;
 }
 
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
-{
-	struct Qdisc *q;
-
-	read_lock(&qdisc_tree_lock);
-	q = __qdisc_lookup(dev, handle);
-	read_unlock(&qdisc_tree_lock);
-	return q;
-}
-
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
@@ -291,6 +282,48 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 	}
 }
 
+static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
+{
+	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
+						 timer);
+	struct net_device *dev = wd->qdisc->dev;
+
+	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	smp_wmb();
+	if (spin_trylock(&dev->queue_lock)) {
+		qdisc_run(dev);
+		spin_unlock(&dev->queue_lock);
+	} else
+		netif_schedule(dev);
+
+	return HRTIMER_NORESTART;
+}
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	wd->timer.function = qdisc_watchdog;
+	wd->qdisc = qdisc;
+}
+EXPORT_SYMBOL(qdisc_watchdog_init);
+
+void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
+{
+	ktime_t time;
+
+	wd->qdisc->flags |= TCQ_F_THROTTLED;
+	time = ktime_set(0, 0);
+	time = ktime_add_ns(time, PSCHED_US2NS(expires));
+	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+}
+EXPORT_SYMBOL(qdisc_watchdog_schedule);
+
+void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
+{
+	hrtimer_cancel(&wd->timer);
+	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+}
+EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
 /* Allocate an unique handle from space managed by kernel */
 
@@ -362,7 +395,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 	if (n == 0)
 		return;
 	while ((parentid = sch->parent)) {
-		sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+		sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
 		cops = sch->ops->cl_ops;
 		if (cops->qlen_notify) {
 			cl = cops->get(sch, parentid);
@@ -467,12 +500,16 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
+		sch->stats_lock = &dev->ingress_lock;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	} else if (handle == 0) {
-		handle = qdisc_alloc_handle(dev);
-		err = -ENOMEM;
-		if (handle == 0)
-			goto err_out3;
+	} else {
+		sch->stats_lock = &dev->queue_lock;
+		if (handle == 0) {
+			handle = qdisc_alloc_handle(dev);
+			err = -ENOMEM;
+			if (handle == 0)
+				goto err_out3;
+		}
 	}
 
 	sch->handle = handle;
@@ -621,9 +658,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			return err;
 		if (q) {
 			qdisc_notify(skb, n, clid, q, NULL);
-			spin_lock_bh(&dev->queue_lock);
+			qdisc_lock_tree(dev);
 			qdisc_destroy(q);
-			spin_unlock_bh(&dev->queue_lock);
+			qdisc_unlock_tree(dev);
 		}
 	} else {
 		qdisc_notify(skb, n, clid, NULL, q);
@@ -756,17 +793,17 @@ graft:
 		err = qdisc_graft(dev, p, clid, q, &old_q);
 		if (err) {
 			if (q) {
-				spin_lock_bh(&dev->queue_lock);
+				qdisc_lock_tree(dev);
 				qdisc_destroy(q);
-				spin_unlock_bh(&dev->queue_lock);
+				qdisc_unlock_tree(dev);
 			}
 			return err;
 		}
 		qdisc_notify(skb, n, clid, old_q, q);
 		if (old_q) {
-			spin_lock_bh(&dev->queue_lock);
+			qdisc_lock_tree(dev);
 			qdisc_destroy(old_q);
-			spin_unlock_bh(&dev->queue_lock);
+			qdisc_unlock_tree(dev);
 		}
 	}
 	return 0;
@@ -777,7 +814,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct gnet_dump d;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
@@ -811,12 +848,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	if (gnet_stats_finish_copy(&d) < 0)
 		goto rtattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -862,7 +899,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 		if (idx > s_idx)
 			s_q_idx = 0;
-		read_lock(&qdisc_tree_lock);
 		q_idx = 0;
 		list_for_each_entry(q, &dev->qdisc_list, list) {
 			if (q_idx < s_q_idx) {
@@ -870,13 +906,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			}
 			if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
-				read_unlock(&qdisc_tree_lock);
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
 				goto done;
-			}
 			q_idx++;
 		}
-		read_unlock(&qdisc_tree_lock);
 	}
 
 done:
@@ -1015,7 +1048,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct gnet_dump d;
 	struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
 
@@ -1040,12 +1073,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	if (gnet_stats_finish_copy(&d) < 0)
 		goto rtattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1099,7 +1132,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	read_lock(&qdisc_tree_lock);
 	list_for_each_entry(q, &dev->qdisc_list, list) {
 		if (t < s_t || !q->ops->cl_ops ||
 		    (tcm->tcm_parent &&
@@ -1121,7 +1153,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 			break;
 		t++;
 	}
-	read_unlock(&qdisc_tree_lock);
 
 	cb->args[0] = t;
 
@@ -1146,7 +1177,7 @@ reclassify:
 
 	for ( ; tp; tp = tp->next) {
 		if ((tp->protocol == protocol ||
-			tp->protocol == __constant_htons(ETH_P_ALL)) &&
+			tp->protocol == htons(ETH_P_ALL)) &&
 			(err = tp->classify(skb, tp, res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 			if ( TC_ACT_RECLASSIFY == err) {
@@ -1175,15 +1206,31 @@ reclassify:
 	return -1;
 }
 
-static int psched_us_per_tick = 1;
-static int psched_tick_per_us = 1;
+void tcf_destroy(struct tcf_proto *tp)
+{
+	tp->ops->destroy(tp);
+	module_put(tp->ops->owner);
+	kfree(tp);
+}
+
+void tcf_destroy_chain(struct tcf_proto *fl)
+{
+	struct tcf_proto *tp;
+
+	while ((tp = fl) != NULL) {
+		fl = tp->next;
+		tcf_destroy(tp);
+	}
+}
+EXPORT_SYMBOL(tcf_destroy_chain);
 
 #ifdef CONFIG_PROC_FS
 static int psched_show(struct seq_file *seq, void *v)
 {
 	seq_printf(seq, "%08x %08x %08x %08x\n",
-		      psched_tick_per_us, psched_us_per_tick,
-		      1000000, HZ);
+		   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
+		   1000000,
+		   (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
 
 	return 0;
 }
@@ -1202,101 +1249,19 @@ static const struct file_operations psched_fops = {
 };
 #endif
 
-#ifdef CONFIG_NET_SCH_CLK_CPU
-psched_tdiff_t psched_clock_per_hz;
-int psched_clock_scale;
-EXPORT_SYMBOL(psched_clock_per_hz);
-EXPORT_SYMBOL(psched_clock_scale);
-
-psched_time_t psched_time_base;
-cycles_t psched_time_mark;
-EXPORT_SYMBOL(psched_time_mark);
-EXPORT_SYMBOL(psched_time_base);
-
-/*
- * Periodically adjust psched_time_base to avoid overflow
- * with 32-bit get_cycles(). Safe up to 4GHz CPU.
- */
-static void psched_tick(unsigned long);
-static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
-
-static void psched_tick(unsigned long dummy)
-{
-	if (sizeof(cycles_t) == sizeof(u32)) {
-		psched_time_t dummy_stamp;
-		PSCHED_GET_TIME(dummy_stamp);
-		psched_timer.expires = jiffies + 1*HZ;
-		add_timer(&psched_timer);
-	}
-}
-
-int __init psched_calibrate_clock(void)
-{
-	psched_time_t stamp, stamp1;
-	struct timeval tv, tv1;
-	psched_tdiff_t delay;
-	long rdelay;
-	unsigned long stop;
-
-	psched_tick(0);
-	stop = jiffies + HZ/10;
-	PSCHED_GET_TIME(stamp);
-	do_gettimeofday(&tv);
-	while (time_before(jiffies, stop)) {
-		barrier();
-		cpu_relax();
-	}
-	PSCHED_GET_TIME(stamp1);
-	do_gettimeofday(&tv1);
-
-	delay = PSCHED_TDIFF(stamp1, stamp);
-	rdelay = tv1.tv_usec - tv.tv_usec;
-	rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
-	if (rdelay > delay)
-		return -1;
-	delay /= rdelay;
-	psched_tick_per_us = delay;
-	while ((delay>>=1) != 0)
-		psched_clock_scale++;
-	psched_us_per_tick = 1<<psched_clock_scale;
-	psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
-	return 0;
-}
-#endif
-
 static int __init pktsched_init(void)
 {
-	struct rtnetlink_link *link_p;
-
-#ifdef CONFIG_NET_SCH_CLK_CPU
-	if (psched_calibrate_clock() < 0)
-		return -1;
-#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
-	psched_tick_per_us = HZ<<PSCHED_JSCALE;
-	psched_us_per_tick = 1000000;
-#endif
-
-	link_p = rtnetlink_links[PF_UNSPEC];
-
-	/* Setup rtnetlink links. It is made here to avoid
-	   exporting large number of public symbols.
-	 */
-
-	if (link_p) {
-		link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
-		link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
-		link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
-		link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
-		link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
-		link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
-		link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
-		link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
-	}
-
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
 	proc_net_fops_create("psched", 0, &psched_fops);
 
+	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
+	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
+
 	return 0;
 }
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index afb3bbd571f2..be7d299acd73 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/file.h> /* for fput */
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 
@@ -157,19 +158,6 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
 	return atm_tc_get(sch,classid);
 }
 
-
-static void destroy_filters(struct atm_flow_data *flow)
-{
-	struct tcf_proto *filter;
-
-	while ((filter = flow->filter_list)) {
-		DPRINTK("destroy_filters: destroying filter %p\n",filter);
-		flow->filter_list = filter->next;
-		tcf_destroy(filter);
-	}
-}
-
-
 /*
  * atm_tc_put handles all destructions, including the ones that are explicitly
  * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
@@ -194,7 +182,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	*prev = flow->next;
 	DPRINTK("atm_tc_put: qdisc %p\n",flow->q);
 	qdisc_destroy(flow->q);
-	destroy_filters(flow);
+	tcf_destroy_chain(flow->filter_list);
 	if (flow->sock) {
 		DPRINTK("atm_tc_put: f_count %d\n",
 		    file_count(flow->sock->file));
@@ -503,7 +491,7 @@ static void sch_atm_dequeue(unsigned long data)
 			}
 			D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow);
 			/* remove any LL header somebody else has attached */
-			skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data);
+			skb_pull(skb, skb_network_offset(skb));
 			if (skb_headroom(skb) < flow->hdr_len) {
 				struct sk_buff *new;
 
@@ -513,7 +501,7 @@ static void sch_atm_dequeue(unsigned long data)
 				skb = new;
 			}
 			D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
-			    skb->nh.iph,skb->data);
+				 skb_network_header(skb), skb->data);
 			ATM_SKB(skb)->vcc = flow->vcc;
 			memcpy(skb_push(skb,flow->hdr_len),flow->hdr,
 			    flow->hdr_len);
@@ -610,7 +598,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 	DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p);
 	/* races ? */
 	while ((flow = p->flows)) {
-		destroy_filters(flow);
+		tcf_destroy_chain(flow->filter_list);
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
 			    flow->ref);
@@ -631,7 +619,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 {
 	struct atm_qdisc_data *p = PRIV(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
@@ -661,11 +649,11 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 
 		RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero);
 	}
-	rta->rta_len = skb->tail-b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb,b-skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 static int
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 76c92e710a33..a294542cb8e4 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -29,6 +29,7 @@
 #include <linux/etherdevice.h>
 #include <linux/notifier.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -112,7 +113,7 @@ struct cbq_class
 
 	/* Overlimit strategy parameters */
 	void			(*overlimit)(struct cbq_class *cl);
-	long			penalty;
+	psched_tdiff_t		penalty;
 
 	/* General scheduler (WRR) parameters */
 	long			allot;
@@ -143,7 +144,7 @@ struct cbq_class
 	psched_time_t		undertime;
 	long			avgidle;
 	long			deficit;	/* Saved deficit for WRR */
-	unsigned long		penalized;
+	psched_time_t		penalized;
 	struct gnet_stats_basic bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
@@ -180,12 +181,12 @@ struct cbq_sched_data
 	psched_time_t		now_rt;		/* Cached real time */
 	unsigned		pmask;
 
-	struct timer_list	delay_timer;
-	struct timer_list	wd_timer;	/* Watchdog timer,
+	struct hrtimer		delay_timer;
+	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 						   started when CBQ has
 						   backlog, but cannot
 						   transmit just now */
-	long			wd_expires;
+	psched_tdiff_t		wd_expires;
 	int			toplevel;
 	u32			hgenerator;
 };
@@ -384,12 +385,12 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 		psched_time_t now;
 		psched_tdiff_t incr;
 
-		PSCHED_GET_TIME(now);
-		incr = PSCHED_TDIFF(now, q->now_rt);
-		PSCHED_TADD2(q->now, incr, now);
+		now = psched_get_time();
+		incr = now - q->now_rt;
+		now = q->now + incr;
 
 		do {
-			if (PSCHED_TLESS(cl->undertime, now)) {
+			if (cl->undertime < now) {
 				q->toplevel = cl->level;
 				return;
 			}
@@ -473,7 +474,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 static void cbq_ovl_classic(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+	psched_tdiff_t delay = cl->undertime - q->now;
 
 	if (!cl->delayed) {
 		delay += cl->offtime;
@@ -491,7 +492,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 			cl->avgidle = cl->minidle;
 		if (delay <= 0)
 			delay = 1;
-		PSCHED_TADD2(q->now, delay, cl->undertime);
+		cl->undertime = q->now + delay;
 
 		cl->xstats.overactions++;
 		cl->delayed = 1;
@@ -508,7 +509,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 		psched_tdiff_t base_delay = q->wd_expires;
 
 		for (b = cl->borrow; b; b = b->borrow) {
-			delay = PSCHED_TDIFF(b->undertime, q->now);
+			delay = b->undertime - q->now;
 			if (delay < base_delay) {
 				if (delay <= 0)
 					delay = 1;
@@ -546,27 +547,32 @@ static void cbq_ovl_rclassic(struct cbq_class *cl)
 static void cbq_ovl_delay(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+	psched_tdiff_t delay = cl->undertime - q->now;
 
 	if (!cl->delayed) {
-		unsigned long sched = jiffies;
+		psched_time_t sched = q->now;
+		ktime_t expires;
 
 		delay += cl->offtime;
 		if (cl->avgidle < 0)
 			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 		if (cl->avgidle < cl->minidle)
 			cl->avgidle = cl->minidle;
-		PSCHED_TADD2(q->now, delay, cl->undertime);
+		cl->undertime = q->now + delay;
 
 		if (delay > 0) {
-			sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
+			sched += delay + cl->penalty;
 			cl->penalized = sched;
 			cl->cpriority = TC_CBQ_MAXPRIO;
 			q->pmask |= (1<<TC_CBQ_MAXPRIO);
-			if (del_timer(&q->delay_timer) &&
-			    (long)(q->delay_timer.expires - sched) > 0)
-				q->delay_timer.expires = sched;
-			add_timer(&q->delay_timer);
+
+			expires = ktime_set(0, 0);
+			expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
+			if (hrtimer_try_to_cancel(&q->delay_timer) &&
+			    ktime_to_ns(ktime_sub(q->delay_timer.expires,
+						  expires)) > 0)
+				q->delay_timer.expires = expires;
+			hrtimer_restart(&q->delay_timer);
 			cl->delayed = 1;
 			cl->xstats.overactions++;
 			return;
@@ -583,7 +589,7 @@ static void cbq_ovl_lowprio(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 
-	cl->penalized = jiffies + cl->penalty;
+	cl->penalized = q->now + cl->penalty;
 
 	if (cl->cpriority != cl->priority2) {
 		cl->cpriority = cl->priority2;
@@ -604,27 +610,19 @@ static void cbq_ovl_drop(struct cbq_class *cl)
 	cbq_ovl_classic(cl);
 }
 
-static void cbq_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc*)arg;
-
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
-static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
+static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
+				       psched_time_t now)
 {
 	struct cbq_class *cl;
 	struct cbq_class *cl_prev = q->active[prio];
-	unsigned long now = jiffies;
-	unsigned long sched = now;
+	psched_time_t sched = now;
 
 	if (cl_prev == NULL)
-		return now;
+		return 0;
 
 	do {
 		cl = cl_prev->next_alive;
-		if ((long)(now - cl->penalized) > 0) {
+		if (now - cl->penalized > 0) {
 			cl_prev->next_alive = cl->next_alive;
 			cl->next_alive = NULL;
 			cl->cpriority = cl->priority;
@@ -640,30 +638,34 @@ static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 			}
 
 			cl = cl_prev->next_alive;
-		} else if ((long)(sched - cl->penalized) > 0)
+		} else if (sched - cl->penalized > 0)
 			sched = cl->penalized;
 	} while ((cl_prev = cl) != q->active[prio]);
 
-	return (long)(sched - now);
+	return sched - now;
 }
 
-static void cbq_undelay(unsigned long arg)
+static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 {
-	struct Qdisc *sch = (struct Qdisc*)arg;
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	long delay = 0;
+	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
+						delay_timer);
+	struct Qdisc *sch = q->watchdog.qdisc;
+	psched_time_t now;
+	psched_tdiff_t delay = 0;
 	unsigned pmask;
 
+	now = psched_get_time();
+
 	pmask = q->pmask;
 	q->pmask = 0;
 
 	while (pmask) {
 		int prio = ffz(~pmask);
-		long tmp;
+		psched_tdiff_t tmp;
 
 		pmask &= ~(1<<prio);
 
-		tmp = cbq_undelay_prio(q, prio);
+		tmp = cbq_undelay_prio(q, prio, now);
 		if (tmp > 0) {
 			q->pmask |= 1<<prio;
 			if (tmp < delay || delay == 0)
@@ -672,12 +674,16 @@ static void cbq_undelay(unsigned long arg)
 	}
 
 	if (delay) {
-		q->delay_timer.expires = jiffies + delay;
-		add_timer(&q->delay_timer);
+		ktime_t time;
+
+		time = ktime_set(0, 0);
+		time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
+		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
 	netif_schedule(sch->dev);
+	return HRTIMER_NORESTART;
 }
 
 
@@ -732,7 +738,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 	if (cl && q->toplevel >= borrowed->level) {
 		if (cl->q->q.qlen > 1) {
 			do {
-				if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
+				if (borrowed->undertime == PSCHED_PASTPERFECT) {
 					q->toplevel = borrowed->level;
 					return;
 				}
@@ -770,7 +776,7 @@ cbq_update(struct cbq_sched_data *q)
 			 idle = (now - last) - last_pktlen/rate
 		 */
 
-		idle = PSCHED_TDIFF(q->now, cl->last);
+		idle = q->now - cl->last;
 		if ((unsigned long)idle > 128*1024*1024) {
 			avgidle = cl->maxidle;
 		} else {
@@ -814,13 +820,11 @@ cbq_update(struct cbq_sched_data *q)
 			idle -= L2T(&q->link, len);
 			idle += L2T(cl, len);
 
-			PSCHED_AUDIT_TDIFF(idle);
-
-			PSCHED_TADD2(q->now, idle, cl->undertime);
+			cl->undertime = q->now + idle;
 		} else {
 			/* Underlimit */
 
-			PSCHED_SET_PASTPERFECT(cl->undertime);
+			cl->undertime = PSCHED_PASTPERFECT;
 			if (avgidle > cl->maxidle)
 				cl->avgidle = cl->maxidle;
 			else
@@ -841,8 +845,7 @@ cbq_under_limit(struct cbq_class *cl)
 	if (cl->tparent == NULL)
 		return cl;
 
-	if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
-	    !PSCHED_TLESS(q->now, cl->undertime)) {
+	if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 		cl->delayed = 0;
 		return cl;
 	}
@@ -865,8 +868,7 @@ cbq_under_limit(struct cbq_class *cl)
 		}
 		if (cl->level > q->toplevel)
 			return NULL;
-	} while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
-		 PSCHED_TLESS(q->now, cl->undertime));
+	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 
 	cl->delayed = 0;
 	return cl;
@@ -1001,8 +1003,8 @@ cbq_dequeue(struct Qdisc *sch)
 	psched_time_t now;
 	psched_tdiff_t incr;
 
-	PSCHED_GET_TIME(now);
-	incr = PSCHED_TDIFF(now, q->now_rt);
+	now = psched_get_time();
+	incr = now - q->now_rt;
 
 	if (q->tx_class) {
 		psched_tdiff_t incr2;
@@ -1014,12 +1016,12 @@ cbq_dequeue(struct Qdisc *sch)
 		   cbq_time = max(real_time, work);
 		 */
 		incr2 = L2T(&q->link, q->tx_len);
-		PSCHED_TADD(q->now, incr2);
+		q->now += incr2;
 		cbq_update(q);
 		if ((incr -= incr2) < 0)
 			incr = 0;
 	}
-	PSCHED_TADD(q->now, incr);
+	q->now += incr;
 	q->now_rt = now;
 
 	for (;;) {
@@ -1051,11 +1053,11 @@ cbq_dequeue(struct Qdisc *sch)
 		*/
 
 		if (q->toplevel == TC_CBQ_MAXLEVEL &&
-		    PSCHED_IS_PASTPERFECT(q->link.undertime))
+		    q->link.undertime == PSCHED_PASTPERFECT)
 			break;
 
 		q->toplevel = TC_CBQ_MAXLEVEL;
-		PSCHED_SET_PASTPERFECT(q->link.undertime);
+		q->link.undertime = PSCHED_PASTPERFECT;
 	}
 
 	/* No packets in scheduler or nobody wants to give them to us :-(
@@ -1063,13 +1065,9 @@ cbq_dequeue(struct Qdisc *sch)
 
 	if (sch->q.qlen) {
 		sch->qstats.overlimits++;
-		if (q->wd_expires) {
-			long delay = PSCHED_US2JIFFIE(q->wd_expires);
-			if (delay <= 0)
-				delay = 1;
-			mod_timer(&q->wd_timer, jiffies + delay);
-			sch->flags |= TCQ_F_THROTTLED;
-		}
+		if (q->wd_expires)
+			qdisc_watchdog_schedule(&q->watchdog,
+						now + q->wd_expires);
 	}
 	return NULL;
 }
@@ -1276,10 +1274,10 @@ cbq_reset(struct Qdisc* sch)
 	q->pmask = 0;
 	q->tx_class = NULL;
 	q->tx_borrowed = NULL;
-	del_timer(&q->wd_timer);
-	del_timer(&q->delay_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
+	hrtimer_cancel(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
-	PSCHED_GET_TIME(q->now);
+	q->now = psched_get_time();
 	q->now_rt = q->now;
 
 	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
@@ -1290,7 +1288,7 @@ cbq_reset(struct Qdisc* sch)
 			qdisc_reset(cl->q);
 
 			cl->next_alive = NULL;
-			PSCHED_SET_PASTPERFECT(cl->undertime);
+			cl->undertime = PSCHED_PASTPERFECT;
 			cl->avgidle = cl->maxidle;
 			cl->deficit = cl->quantum;
 			cl->cpriority = cl->priority;
@@ -1379,7 +1377,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
 	default:
 		return -EINVAL;
 	}
-	cl->penalty = (ovl->penalty*HZ)/1000;
+	cl->penalty = ovl->penalty;
 	return 0;
 }
 
@@ -1446,14 +1444,11 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 	q->link.minidle = -0x7FFFFFFF;
 	q->link.stats_lock = &sch->dev->queue_lock;
 
-	init_timer(&q->wd_timer);
-	q->wd_timer.data = (unsigned long)sch;
-	q->wd_timer.function = cbq_watchdog;
-	init_timer(&q->delay_timer);
-	q->delay_timer.data = (unsigned long)sch;
+	qdisc_watchdog_init(&q->watchdog, sch);
+	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
-	PSCHED_GET_TIME(q->now);
+	q->now = psched_get_time();
 	q->now_rt = q->now;
 
 	cbq_link_class(&q->link);
@@ -1467,19 +1462,19 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 
 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_lssopt opt;
 
 	opt.flags = 0;
@@ -1498,13 +1493,13 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_wrropt opt;
 
 	opt.flags = 0;
@@ -1516,30 +1511,30 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_ovl opt;
 
 	opt.strategy = cl->ovl_strategy;
 	opt.priority2 = cl->priority2+1;
 	opt.pad = 0;
-	opt.penalty = (cl->penalty*1000)/HZ;
+	opt.penalty = cl->penalty;
 	RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_fopt opt;
 
 	if (cl->split || cl->defmap) {
@@ -1551,14 +1546,14 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
 #ifdef CONFIG_NET_CLS_POLICE
 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_police opt;
 
 	if (cl->police) {
@@ -1570,7 +1565,7 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 #endif
@@ -1592,18 +1587,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	rta = (struct rtattr*)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (cbq_dump_attr(skb, &q->link) < 0)
 		goto rtattr_failure;
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1621,7 +1616,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	       struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct cbq_class *cl = (struct cbq_class*)arg;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (cl->tparent)
@@ -1635,11 +1630,11 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (cbq_dump_attr(skb, cl) < 0)
 		goto rtattr_failure;
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1654,8 +1649,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	cl->xstats.avgidle = cl->avgidle;
 	cl->xstats.undertime = 0;
 
-	if (!PSCHED_IS_PASTPERFECT(cl->undertime))
-		cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
+	if (cl->undertime != PSCHED_PASTPERFECT)
+		cl->xstats.undertime = cl->undertime - q->now;
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
 #ifdef CONFIG_NET_ESTIMATOR
@@ -1722,23 +1717,13 @@ static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
 	return 0;
 }
 
-static void cbq_destroy_filters(struct cbq_class *cl)
-{
-	struct tcf_proto *tp;
-
-	while ((tp = cl->filter_list) != NULL) {
-		cl->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
-}
-
 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 
 	BUG_TRAP(!cl->filters);
 
-	cbq_destroy_filters(cl);
+	tcf_destroy_chain(cl->filter_list);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 #ifdef CONFIG_NET_ESTIMATOR
@@ -1765,7 +1750,7 @@ cbq_destroy(struct Qdisc* sch)
 	 */
 	for (h = 0; h < 16; h++)
 		for (cl = q->classes[h]; cl; cl = cl->next)
-			cbq_destroy_filters(cl);
+			tcf_destroy_chain(cl->filter_list);
 
 	for (h = 0; h < 16; h++) {
 		struct cbq_class *next;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 96324cf4e6a9..3c6fd181263f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -216,17 +216,17 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		/* FIXME: Safe with non-linear skbs? --RR */
 		switch (skb->protocol) {
 			case __constant_htons(ETH_P_IP):
-				skb->tc_index = ipv4_get_dsfield(skb->nh.iph)
+				skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
 					& ~INET_ECN_MASK;
 				break;
 			case __constant_htons(ETH_P_IPV6):
-				skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h)
+				skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
 					& ~INET_ECN_MASK;
 				break;
 			default:
 				skb->tc_index = 0;
 				break;
-		};
+		}
 	}
 
 	if (TC_H_MAJ(skb->priority) == sch->handle)
@@ -257,7 +257,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 				if (p->default_index != NO_DEFAULT_INDEX)
 					skb->tc_index = p->default_index;
 				break;
-		};
+		}
 	}
 
 	err = p->q->enqueue(skb,p->q);
@@ -292,11 +292,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 
 	switch (skb->protocol) {
 		case __constant_htons(ETH_P_IP):
-			ipv4_change_dsfield(skb->nh.iph, p->mask[index],
+			ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
 					    p->value[index]);
 			break;
 		case __constant_htons(ETH_P_IPV6):
-			ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
+			ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
 					    p->value[index]);
 			break;
 		default:
@@ -310,7 +310,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 				       "unsupported protocol %d\n",
 				       ntohs(skb->protocol));
 			break;
-	};
+	}
 
 	return skb;
 }
@@ -412,16 +412,10 @@ static void dsmark_reset(struct Qdisc *sch)
 static void dsmark_destroy(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = PRIV(sch);
-	struct tcf_proto *tp;
 
 	DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
 
-	while (p->filter_list) {
-		tp = p->filter_list;
-		p->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
-
+	tcf_destroy_chain(p->filter_list);
 	qdisc_destroy(p->q);
 	kfree(p->mask);
 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 52eb3439d7c6..3385ee592541 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -36,34 +36,27 @@
 
 /* Main transmission queue. */
 
-/* Main qdisc structure lock.
-
-   However, modifications
-   to data, participating in scheduling must be additionally
-   protected with dev->queue_lock spinlock.
-
-   The idea is the following:
-   - enqueue, dequeue are serialized via top level device
-     spinlock dev->queue_lock.
-   - tree walking is protected by read_lock(qdisc_tree_lock)
-     and this lock is used only in process context.
-   - updates to tree are made only under rtnl semaphore,
-     hence this lock may be made without local bh disabling.
-
-   qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+/* Modifications to data participating in scheduling must be protected with
+ * dev->queue_lock spinlock.
+ *
+ * The idea is the following:
+ * - enqueue, dequeue are serialized via top level device
+ *   spinlock dev->queue_lock.
+ * - ingress filtering is serialized via top level device
+ *   spinlock dev->ingress_lock.
+ * - updates to tree and tree walking are only done under the rtnl mutex.
  */
-DEFINE_RWLOCK(qdisc_tree_lock);
 
 void qdisc_lock_tree(struct net_device *dev)
 {
-	write_lock(&qdisc_tree_lock);
 	spin_lock_bh(&dev->queue_lock);
+	spin_lock(&dev->ingress_lock);
 }
 
 void qdisc_unlock_tree(struct net_device *dev)
 {
+	spin_unlock(&dev->ingress_lock);
 	spin_unlock_bh(&dev->queue_lock);
-	write_unlock(&qdisc_tree_lock);
 }
 
 /*
@@ -442,7 +435,6 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
 	sch->dequeue = ops->dequeue;
 	sch->dev = dev;
 	dev_hold(dev);
-	sch->stats_lock = &dev->queue_lock;
 	atomic_set(&sch->refcnt, 1);
 
 	return sch;
@@ -458,6 +450,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
 	sch = qdisc_alloc(dev, ops);
 	if (IS_ERR(sch))
 		goto errout;
+	sch->stats_lock = &dev->queue_lock;
 	sch->parent = parentid;
 
 	if (!ops->init || ops->init(sch, NULL) == 0)
@@ -528,15 +521,11 @@ void dev_activate(struct net_device *dev)
 				printk(KERN_INFO "%s: activation failed\n", dev->name);
 				return;
 			}
-			write_lock(&qdisc_tree_lock);
 			list_add_tail(&qdisc->list, &dev->qdisc_list);
-			write_unlock(&qdisc_tree_lock);
 		} else {
 			qdisc =  &noqueue_qdisc;
 		}
-		write_lock(&qdisc_tree_lock);
 		dev->qdisc_sleeping = qdisc;
-		write_unlock(&qdisc_tree_lock);
 	}
 
 	if (!netif_carrier_ok(dev))
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 396deb71480f..9d124c4ee3a7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -59,13 +59,13 @@
 #include <linux/skbuff.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/timer.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/pkt_sched.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 #include <asm/system.h>
@@ -192,23 +192,9 @@ struct hfsc_sched
 	struct list_head droplist;		/* active leaf class list (for
 						   dropping) */
 	struct sk_buff_head requeue;		/* requeued packet */
-	struct timer_list wd_timer;		/* watchdog timer */
+	struct qdisc_watchdog watchdog;		/* watchdog timer */
 };
 
-/*
- * macros
- */
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#include <linux/time.h>
-#undef PSCHED_GET_TIME
-#define PSCHED_GET_TIME(stamp)						\
-do {									\
-	struct timeval tv;						\
-	do_gettimeofday(&tv);						\
-	(stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec;		\
-} while (0)
-#endif
-
 #define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
 
 
@@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl)
  *	ism: (psched_us/byte) << ISM_SHIFT
  *	dx: psched_us
  *
- * Clock source resolution (CONFIG_NET_SCH_CLK_*)
- *  JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
- *  CPU: resolution is between 0.5us and 1us.
- *  GETTIMEOFDAY: resolution is exactly 1us.
+ * The clock source resolution with ktime is 1.024us.
  *
  * sm and ism are scaled in order to keep effective digits.
  * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
  * digits in decimal using the following table.
  *
- * Note: We can afford the additional accuracy (altq hfsc keeps at most
- * 3 effective digits) thanks to the fact that linux clock is bounded
- * much more tightly.
- *
  *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
  *  ------------+-------------------------------------------------------
- *  bytes/0.5us   6.25e-3    62.5e-3    625e-3     6250e-e    62500e-3
- *  bytes/us      12.5e-3    125e-3     1250e-3    12500e-3   125000e-3
- *  bytes/1.27us  15.875e-3  158.75e-3  1587.5e-3  15875e-3   158750e-3
+ *  bytes/1.024us 12.8e-3    128e-3     1280e-3    12800e-3   128000e-3
  *
- *  0.5us/byte    160        16         1.6        0.16       0.016
- *  us/byte       80         8          0.8        0.08       0.008
- *  1.27us/byte   63         6.3        0.63       0.063      0.0063
+ *  1.024us/byte  78.125     7.8125     0.78125    0.078125   0.0078125
  */
 #define	SM_SHIFT	20
 #define	ISM_SHIFT	18
@@ -460,8 +435,8 @@ m2sm(u32 m)
 	u64 sm;
 
 	sm = ((u64)m << SM_SHIFT);
-	sm += PSCHED_JIFFIE2US(HZ) - 1;
-	do_div(sm, PSCHED_JIFFIE2US(HZ));
+	sm += PSCHED_TICKS_PER_SEC - 1;
+	do_div(sm, PSCHED_TICKS_PER_SEC);
 	return sm;
 }
 
@@ -474,7 +449,7 @@ m2ism(u32 m)
 	if (m == 0)
 		ism = HT_INFINITY;
 	else {
-		ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
+		ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT);
 		ism += m - 1;
 		do_div(ism, m);
 	}
@@ -487,7 +462,7 @@ d2dx(u32 d)
 {
 	u64 dx;
 
-	dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
+	dx = ((u64)d * PSCHED_TICKS_PER_SEC);
 	dx += USEC_PER_SEC - 1;
 	do_div(dx, USEC_PER_SEC);
 	return dx;
@@ -499,7 +474,7 @@ sm2m(u64 sm)
 {
 	u64 m;
 
-	m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
+	m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT;
 	return (u32)m;
 }
 
@@ -510,7 +485,7 @@ dx2d(u64 dx)
 	u64 d;
 
 	d = dx * USEC_PER_SEC;
-	do_div(d, PSCHED_JIFFIE2US(HZ));
+	do_div(d, PSCHED_TICKS_PER_SEC);
 	return (u32)d;
 }
 
@@ -654,9 +629,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
 static void
 init_ed(struct hfsc_class *cl, unsigned int next_len)
 {
-	u64 cur_time;
-
-	PSCHED_GET_TIME(cur_time);
+	u64 cur_time = psched_get_time();
 
 	/* update the deadline curve */
 	rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
@@ -779,7 +752,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 			if (cl->cl_flags & HFSC_USC) {
 				/* class has upper limit curve */
 				if (cur_time == 0)
-					PSCHED_GET_TIME(cur_time);
+					cur_time = psched_get_time();
 
 				/* update the ulimit curve */
 				rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
@@ -1063,7 +1036,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
 				return -EINVAL;
 		}
-		PSCHED_GET_TIME(cur_time);
+		cur_time = psched_get_time();
 
 		sch_tree_lock(sch);
 		if (rsc != NULL)
@@ -1149,22 +1122,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 }
 
 static void
-hfsc_destroy_filters(struct tcf_proto **fl)
-{
-	struct tcf_proto *tp;
-
-	while ((tp = *fl) != NULL) {
-		*fl = tp->next;
-		tcf_destroy(tp);
-	}
-}
-
-static void
 hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 
-	hfsc_destroy_filters(&cl->filter_list);
+	tcf_destroy_chain(cl->filter_list);
 	qdisc_destroy(cl->qdisc);
 #ifdef CONFIG_NET_ESTIMATOR
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1184,10 +1146,12 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	list_del(&cl->hlist);
 	list_del(&cl->siblings);
 	hfsc_adjust_levels(cl->cl_parent);
+
 	hfsc_purge_queue(sch, cl);
+	list_del(&cl->hlist);
+
 	if (--cl->refcnt == 0)
 		hfsc_destroy_class(sch, cl);
 
@@ -1387,7 +1351,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 		struct tcmsg *tcm)
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta = (struct rtattr *)b;
 
 	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
@@ -1398,11 +1362,11 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (hfsc_dump_curves(skb, cl) < 0)
 		goto rtattr_failure;
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
  rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1457,21 +1421,11 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 }
 
 static void
-hfsc_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc *)arg;
-
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
-static void
-hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
+hfsc_schedule_watchdog(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl;
 	u64 next_time = 0;
-	long delay;
 
 	if ((cl = eltree_get_minel(q)) != NULL)
 		next_time = cl->cl_e;
@@ -1480,11 +1434,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
 			next_time = q->root.cl_cfmin;
 	}
 	WARN_ON(next_time == 0);
-	delay = next_time - cur_time;
-	delay = PSCHED_US2JIFFIE(delay);
-
-	sch->flags |= TCQ_F_THROTTLED;
-	mod_timer(&q->wd_timer, jiffies + delay);
+	qdisc_watchdog_schedule(&q->watchdog, next_time);
 }
 
 static int
@@ -1521,9 +1471,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
 
 	list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
 
-	init_timer(&q->wd_timer);
-	q->wd_timer.function = hfsc_watchdog;
-	q->wd_timer.data = (unsigned long)sch;
+	qdisc_watchdog_init(&q->watchdog, sch);
 
 	return 0;
 }
@@ -1593,8 +1541,7 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 	__skb_queue_purge(&q->requeue);
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
-	del_timer(&q->wd_timer);
-	sch->flags &= ~TCQ_F_THROTTLED;
+	qdisc_watchdog_cancel(&q->watchdog);
 	sch->q.qlen = 0;
 }
 
@@ -1610,14 +1557,14 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 			hfsc_destroy_class(sch, cl);
 	}
 	__skb_queue_purge(&q->requeue);
-	del_timer(&q->wd_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 }
 
 static int
 hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_hfsc_qopt qopt;
 
 	qopt.defcls = q->defcls;
@@ -1625,7 +1572,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
  rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1679,7 +1626,7 @@ hfsc_dequeue(struct Qdisc *sch)
 	if ((skb = __skb_dequeue(&q->requeue)))
 		goto out;
 
-	PSCHED_GET_TIME(cur_time);
+	cur_time = psched_get_time();
 
 	/*
 	 * if there are eligible classes, use real-time criteria.
@@ -1696,7 +1643,7 @@ hfsc_dequeue(struct Qdisc *sch)
 		cl = vttree_get_minvt(&q->root, cur_time);
 		if (cl == NULL) {
 			sch->qstats.overlimits++;
-			hfsc_schedule_watchdog(sch, cur_time);
+			hfsc_schedule_watchdog(sch);
 			return NULL;
 		}
 	}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 97cbb9aec946..99bcec8dd04c 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -50,6 +50,7 @@
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/compiler.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/rbtree.h>
@@ -128,7 +129,7 @@ struct htb_class {
 	} un;
 	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
 	struct rb_node pq_node;	/* node for event queue */
-	unsigned long pq_key;	/* the same type as jiffies global */
+	psched_time_t pq_key;
 
 	int prio_activity;	/* for which prios are we active */
 	enum htb_cmode cmode;	/* current mode of the class */
@@ -179,10 +180,7 @@ struct htb_sched {
 	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 
 	/* time of nearest event per level (row) */
-	unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
-
-	/* cached value of jiffies in dequeue */
-	unsigned long jiffies;
+	psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 
 	/* whether we hit non-work conserving class during this dequeue; we use */
 	int nwc_hit;		/* this to disable mindelay complaint in dequeue */
@@ -195,7 +193,7 @@ struct htb_sched {
 
 	int rate2quantum;	/* quant = rate / rate2quantum */
 	psched_time_t now;	/* cached dequeue time */
-	struct timer_list timer;	/* send delay timer */
+	struct qdisc_watchdog watchdog;
 #ifdef HTB_RATECM
 	struct timer_list rttim;	/* rate computer timer */
 	int recmp_bucket;	/* which hash bucket to recompute next */
@@ -342,19 +340,19 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
 {
 	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 
-	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
-	if (cl->pq_key == q->jiffies)
+	cl->pq_key = q->now + delay;
+	if (cl->pq_key == q->now)
 		cl->pq_key++;
 
 	/* update the nearest event cache */
-	if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
+	if (q->near_ev_cache[cl->level] > cl->pq_key)
 		q->near_ev_cache[cl->level] = cl->pq_key;
 
 	while (*p) {
 		struct htb_class *c;
 		parent = *p;
 		c = rb_entry(parent, struct htb_class, pq_node);
-		if (time_after_eq(cl->pq_key, c->pq_key))
+		if (cl->pq_key >= c->pq_key)
 			p = &parent->rb_right;
 		else
 			p = &parent->rb_left;
@@ -679,14 +677,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
-static void htb_timer(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc *)arg;
-	sch->flags &= ~TCQ_F_THROTTLED;
-	wmb();
-	netif_schedule(sch->dev);
-}
-
 #ifdef HTB_RATECM
 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
 static void htb_rate_timer(unsigned long arg)
@@ -739,7 +729,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 	cl->T = toks
 
 	while (cl) {
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 		if (cl->level >= level) {
 			if (cl->level == level)
 				cl->xstats.lends++;
@@ -778,11 +768,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 /**
  * htb_do_events - make mode changes to classes at the level
  *
- * Scans event queue for pending events and applies them. Returns jiffies to
+ * Scans event queue for pending events and applies them. Returns time of
  * next pending event (0 for no event in pq).
- * Note: Aplied are events whose have cl->pq_key <= jiffies.
+ * Note: Applied are events whose have cl->pq_key <= q->now.
  */
-static long htb_do_events(struct htb_sched *q, int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level)
 {
 	int i;
 
@@ -795,18 +785,18 @@ static long htb_do_events(struct htb_sched *q, int level)
 			return 0;
 
 		cl = rb_entry(p, struct htb_class, pq_node);
-		if (time_after(cl->pq_key, q->jiffies)) {
-			return cl->pq_key - q->jiffies;
-		}
+		if (cl->pq_key > q->now)
+			return cl->pq_key;
+
 		htb_safe_rb_erase(p, q->wait_pq + level);
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 		htb_change_class_mode(q, cl, &diff);
 		if (cl->cmode != HTB_CAN_SEND)
 			htb_add_to_wait_tree(q, cl, diff);
 	}
 	if (net_ratelimit())
 		printk(KERN_WARNING "htb: too many events !\n");
-	return HZ / 10;
+	return q->now + PSCHED_TICKS_PER_SEC / 10;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -958,30 +948,12 @@ next:
 	return skb;
 }
 
-static void htb_delay_by(struct Qdisc *sch, long delay)
-{
-	struct htb_sched *q = qdisc_priv(sch);
-	if (delay <= 0)
-		delay = 1;
-	if (unlikely(delay > 5 * HZ)) {
-		if (net_ratelimit())
-			printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
-		delay = 5 * HZ;
-	}
-	/* why don't use jiffies here ? because expires can be in past */
-	mod_timer(&q->timer, q->jiffies + delay);
-	sch->flags |= TCQ_F_THROTTLED;
-	sch->qstats.overlimits++;
-}
-
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 {
 	struct sk_buff *skb = NULL;
 	struct htb_sched *q = qdisc_priv(sch);
 	int level;
-	long min_delay;
-
-	q->jiffies = jiffies;
+	psched_time_t next_event;
 
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
 	skb = __skb_dequeue(&q->direct_queue);
@@ -993,23 +965,25 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 
 	if (!sch->q.qlen)
 		goto fin;
-	PSCHED_GET_TIME(q->now);
+	q->now = psched_get_time();
 
-	min_delay = LONG_MAX;
+	next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 	q->nwc_hit = 0;
 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 		/* common case optimization - skip event handler quickly */
 		int m;
-		long delay;
-		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
-			delay = htb_do_events(q, level);
-			q->near_ev_cache[level] =
-			    q->jiffies + (delay ? delay : HZ);
+		psched_time_t event;
+
+		if (q->now >= q->near_ev_cache[level]) {
+			event = htb_do_events(q, level);
+			q->near_ev_cache[level] = event ? event :
+							  PSCHED_TICKS_PER_SEC;
 		} else
-			delay = q->near_ev_cache[level] - q->jiffies;
+			event = q->near_ev_cache[level];
+
+		if (event && next_event > event)
+			next_event = event;
 
-		if (delay && min_delay > delay)
-			min_delay = delay;
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
 			int prio = ffz(m);
@@ -1022,7 +996,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 			}
 		}
 	}
-	htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
+	sch->qstats.overlimits++;
+	qdisc_watchdog_schedule(&q->watchdog, next_event);
 fin:
 	return skb;
 }
@@ -1075,8 +1050,7 @@ static void htb_reset(struct Qdisc *sch)
 
 		}
 	}
-	sch->flags &= ~TCQ_F_THROTTLED;
-	del_timer(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 	__skb_queue_purge(&q->direct_queue);
 	sch->q.qlen = 0;
 	memset(q->row, 0, sizeof(q->row));
@@ -1113,14 +1087,12 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
 		INIT_LIST_HEAD(q->drops + i);
 
-	init_timer(&q->timer);
+	qdisc_watchdog_init(&q->watchdog, sch);
 	skb_queue_head_init(&q->direct_queue);
 
 	q->direct_qlen = sch->dev->tx_queue_len;
 	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
 		q->direct_qlen = 2;
-	q->timer.function = htb_timer;
-	q->timer.data = (unsigned long)sch;
 
 #ifdef HTB_RATECM
 	init_timer(&q->rttim);
@@ -1139,7 +1111,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_htb_glob gopt;
 	spin_lock_bh(&sch->dev->queue_lock);
@@ -1152,12 +1124,12 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
-	skb_trim(skb, skb->tail - skb->data);
+	nlmsg_trim(skb, skb_tail_pointer(skb));
 	return -1;
 }
 
@@ -1165,7 +1137,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 			  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_htb_opt opt;
 
@@ -1188,12 +1160,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	opt.prio = cl->un.leaf.prio;
 	opt.level = cl->level;
 	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1264,16 +1236,6 @@ static unsigned long htb_get(struct Qdisc *sch, u32 classid)
 	return (unsigned long)cl;
 }
 
-static void htb_destroy_filters(struct tcf_proto **fl)
-{
-	struct tcf_proto *tp;
-
-	while ((tp = *fl) != NULL) {
-		*fl = tp->next;
-		tcf_destroy(tp);
-	}
-}
-
 static inline int htb_parent_last_child(struct htb_class *cl)
 {
 	if (!cl->parent)
@@ -1302,7 +1264,7 @@ static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
 	parent->un.leaf.prio = parent->prio;
 	parent->tokens = parent->buffer;
 	parent->ctokens = parent->cbuffer;
-	PSCHED_GET_TIME(parent->t_c);
+	parent->t_c = psched_get_time();
 	parent->cmode = HTB_CAN_SEND;
 }
 
@@ -1317,7 +1279,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
 
-	htb_destroy_filters(&cl->filter_list);
+	tcf_destroy_chain(cl->filter_list);
 
 	while (!list_empty(&cl->children))
 		htb_destroy_class(sch, list_entry(cl->children.next,
@@ -1341,7 +1303,7 @@ static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 
-	del_timer_sync(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 #ifdef HTB_RATECM
 	del_timer_sync(&q->rttim);
 #endif
@@ -1349,7 +1311,7 @@ static void htb_destroy(struct Qdisc *sch)
 	   and surprisingly it worked in 2.4. But it must precede it
 	   because filter need its target class alive to be able to call
 	   unbind_filter on it (without Oops). */
-	htb_destroy_filters(&q->filter_list);
+	tcf_destroy_chain(q->filter_list);
 
 	while (!list_empty(&q->root))
 		htb_destroy_class(sch, list_entry(q->root.next,
@@ -1380,15 +1342,15 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	/* delete from hash and active; remainder in destroy_class */
-	hlist_del_init(&cl->hlist);
-
 	if (!cl->level) {
 		qlen = cl->un.leaf.q->q.qlen;
 		qdisc_reset(cl->un.leaf.q);
 		qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
 	}
 
+	/* delete from hash and active; remainder in destroy_class */
+	hlist_del_init(&cl->hlist);
+
 	if (cl->prio_activity)
 		htb_deactivate(q, cl);
 
@@ -1498,8 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* set class to be in HTB_CAN_SEND state */
 		cl->tokens = hopt->buffer;
 		cl->ctokens = hopt->cbuffer;
-		cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60);	/* 1min */
-		PSCHED_GET_TIME(cl->t_c);
+		cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;	/* 1min */
+		cl->t_c = psched_get_time();
 		cl->cmode = HTB_CAN_SEND;
 
 		/* attach to the hash list and parent's family */
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index cfe070ee6ee3..f8b9f1cdf738 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -16,6 +16,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter.h>
 #include <linux/smp.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
@@ -169,7 +170,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 			skb->tc_index = TC_H_MIN(res.classid);
 			result = TC_ACT_OK;
 			break;
-	};
+	}
 /* backward compat */
 #else
 #ifdef	CONFIG_NET_CLS_POLICE
@@ -186,7 +187,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		sch->bstats.bytes += skb->len;
 		result = NF_ACCEPT;
 		break;
-	};
+	}
 
 #else
 	D2PRINTK("Overriding result to ACCEPT\n");
@@ -247,16 +248,11 @@ ing_hook(unsigned int hook, struct sk_buff **pskb,
 		skb->dev ? (*pskb)->dev->name : "(no dev)",
 		skb->len);
 
-/*
-revisit later: Use a private since lock dev->queue_lock is also
-used on the egress (might slow things for an iota)
-*/
-
 	if (dev->qdisc_ingress) {
-		spin_lock(&dev->queue_lock);
+		spin_lock(&dev->ingress_lock);
 		if ((q = dev->qdisc_ingress) != NULL)
 			fwres = q->enqueue(skb, q);
-		spin_unlock(&dev->queue_lock);
+		spin_unlock(&dev->ingress_lock);
 	}
 
 	return fwres;
@@ -345,14 +341,9 @@ static void ingress_reset(struct Qdisc *sch)
 static void ingress_destroy(struct Qdisc *sch)
 {
 	struct ingress_qdisc_data *p = PRIV(sch);
-	struct tcf_proto *tp;
 
 	DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
-	while (p->filter_list) {
-		tp = p->filter_list;
-		p->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
+	tcf_destroy_chain(p->filter_list);
 #if 0
 /* for future use */
 	qdisc_destroy(p->q);
@@ -362,16 +353,16 @@ static void ingress_destroy(struct Qdisc *sch)
 
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	rta = (struct rtattr *) b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1ccbfb55b0b8..5d9d8bc9cc3a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 #define VERSION "1.2"
@@ -54,21 +55,22 @@
 
 struct netem_sched_data {
 	struct Qdisc	*qdisc;
-	struct timer_list timer;
+	struct qdisc_watchdog watchdog;
+
+	psched_tdiff_t latency;
+	psched_tdiff_t jitter;
 
-	u32 latency;
 	u32 loss;
 	u32 limit;
 	u32 counter;
 	u32 gap;
-	u32 jitter;
 	u32 duplicate;
 	u32 reorder;
 	u32 corrupt;
 
 	struct crndstate {
-		unsigned long last;
-		unsigned long rho;
+		u32 last;
+		u32 rho;
 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
 	struct disttable {
@@ -95,12 +97,12 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
  * Next number depends on last value.
  * rho is scaled to avoid floating point.
  */
-static unsigned long get_crandom(struct crndstate *state)
+static u32 get_crandom(struct crndstate *state)
 {
 	u64 value, rho;
 	unsigned long answer;
 
-	if (state->rho == 0)	/* no correllation */
+	if (state->rho == 0)	/* no correlation */
 		return net_random();
 
 	value = net_random();
@@ -114,11 +116,13 @@ static unsigned long get_crandom(struct crndstate *state)
  * std deviation sigma.  Uses table lookup to approximate the desired
  * distribution, and a uniformly-distributed pseudo-random source.
  */
-static long tabledist(unsigned long mu, long sigma,
-		      struct crndstate *state, const struct disttable *dist)
+static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
+				struct crndstate *state,
+				const struct disttable *dist)
 {
-	long t, x;
-	unsigned long rnd;
+	psched_tdiff_t x;
+	long t;
+	u32 rnd;
 
 	if (sigma == 0)
 		return mu;
@@ -213,8 +217,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		delay = tabledist(q->latency, q->jitter,
 				  &q->delay_cor, q->delay_dist);
 
-		PSCHED_GET_TIME(now);
-		PSCHED_TADD2(now, delay, cb->time_to_send);
+		now = psched_get_time();
+		cb->time_to_send = now + delay;
 		++q->counter;
 		ret = q->qdisc->enqueue(skb, q->qdisc);
 	} else {
@@ -222,7 +226,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		 * Do re-ordering by putting one out of N packets at the front
 		 * of the queue.
 		 */
-		PSCHED_GET_TIME(cb->time_to_send);
+		cb->time_to_send = psched_get_time();
 		q->counter = 0;
 		ret = q->qdisc->ops->requeue(skb, q->qdisc);
 	}
@@ -269,55 +273,43 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 
+	smp_mb();
+	if (sch->flags & TCQ_F_THROTTLED)
+		return NULL;
+
 	skb = q->qdisc->dequeue(q->qdisc);
 	if (skb) {
 		const struct netem_skb_cb *cb
 			= (const struct netem_skb_cb *)skb->cb;
-		psched_time_t now;
+		psched_time_t now = psched_get_time();
 
 		/* if more time remaining? */
-		PSCHED_GET_TIME(now);
-
-		if (PSCHED_TLESS(cb->time_to_send, now)) {
+		if (cb->time_to_send <= now) {
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
-			sch->flags &= ~TCQ_F_THROTTLED;
 			return skb;
-		} else {
-			psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
-
-			if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
-				qdisc_tree_decrease_qlen(q->qdisc, 1);
-				sch->qstats.drops++;
-				printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
-				       q->qdisc->ops->id);
-			}
+		}
 
-			mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
-			sch->flags |= TCQ_F_THROTTLED;
+		if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
+			qdisc_tree_decrease_qlen(q->qdisc, 1);
+			sch->qstats.drops++;
+			printk(KERN_ERR "netem: %s could not requeue\n",
+			       q->qdisc->ops->id);
 		}
+
+		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 	}
 
 	return NULL;
 }
 
-static void netem_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc *)arg;
-
-	pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
 static void netem_reset(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
 	qdisc_reset(q->qdisc);
 	sch->q.qlen = 0;
-	sch->flags &= ~TCQ_F_THROTTLED;
-	del_timer_sync(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 }
 
 /* Pass size change message down to embedded FIFO */
@@ -438,10 +430,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 	q->loss = qopt->loss;
 	q->duplicate = qopt->duplicate;
 
-	/* for compatiablity with earlier versions.
-	 * if gap is set, need to assume 100% probablity
+	/* for compatibility with earlier versions.
+	 * if gap is set, need to assume 100% probability
 	 */
-	q->reorder = ~0;
+	if (q->gap)
+		q->reorder = ~0;
 
 	/* Handle nested options after initial queue options.
 	 * Should have put all options in nested format but too late now.
@@ -487,22 +480,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
  */
 struct fifo_sched_data {
 	u32 limit;
+	psched_time_t oldest;
 };
 
 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 	struct sk_buff_head *list = &sch->q;
-	const struct netem_skb_cb *ncb
-		= (const struct netem_skb_cb *)nskb->cb;
+	psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
 	struct sk_buff *skb;
 
 	if (likely(skb_queue_len(list) < q->limit)) {
+		/* Optimize for add at tail */
+		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
+			q->oldest = tnext;
+			return qdisc_enqueue_tail(nskb, sch);
+		}
+
 		skb_queue_reverse_walk(list, skb) {
 			const struct netem_skb_cb *cb
 				= (const struct netem_skb_cb *)skb->cb;
 
-			if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send))
+			if (tnext >= cb->time_to_send)
 				break;
 		}
 
@@ -515,7 +514,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		return NET_XMIT_SUCCESS;
 	}
 
-	return qdisc_drop(nskb, sch);
+	return qdisc_reshape_fail(nskb, sch);
 }
 
 static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -531,6 +530,7 @@ static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
 	} else
 		q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
 
+	q->oldest = PSCHED_PASTPERFECT;
 	return 0;
 }
 
@@ -567,9 +567,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
 	if (!opt)
 		return -EINVAL;
 
-	init_timer(&q->timer);
-	q->timer.function = netem_watchdog;
-	q->timer.data = (unsigned long) sch;
+	qdisc_watchdog_init(&q->watchdog, sch);
 
 	q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
 				     TC_H_MAKE(sch->handle, 1));
@@ -590,7 +588,7 @@ static void netem_destroy(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
-	del_timer_sync(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 	qdisc_destroy(q->qdisc);
 	kfree(q->delay_dist);
 }
@@ -598,7 +596,7 @@ static void netem_destroy(struct Qdisc *sch)
 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	const struct netem_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta = (struct rtattr *) b;
 	struct tc_netem_qopt qopt;
 	struct tc_netem_corr cor;
@@ -626,12 +624,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	corrupt.correlation = q->corrupt_cor.rho;
 	RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de889f23f22a..269a6e17c6c4 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -32,6 +32,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
@@ -61,7 +62,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 			*qerr = NET_XMIT_SUCCESS;
 		case TC_ACT_SHOT:
 			return NULL;
-		};
+		}
 
 		if (!q->filter_list ) {
 #else
@@ -188,13 +189,8 @@ prio_destroy(struct Qdisc* sch)
 {
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
-	struct tcf_proto *tp;
-
-	while ((tp = q->filter_list) != NULL) {
-		q->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
 
+	tcf_destroy_chain(q->filter_list);
 	for (prio=0; prio<q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
@@ -271,7 +267,7 @@ static int prio_init(struct Qdisc *sch, struct rtattr *opt)
 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
@@ -280,7 +276,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 66f32051a99b..96dfdf78d32c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -30,6 +30,7 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <linux/ipv6.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
@@ -137,7 +138,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
 	{
-		struct iphdr *iph = skb->nh.iph;
+		const struct iphdr *iph = ip_hdr(skb);
 		h = iph->daddr;
 		h2 = iph->saddr^iph->protocol;
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -152,7 +153,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	}
 	case __constant_htons(ETH_P_IPV6):
 	{
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 		h = iph->daddr.s6_addr32[3];
 		h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
 		if (iph->nexthdr == IPPROTO_TCP ||
@@ -461,7 +462,7 @@ static void sfq_destroy(struct Qdisc *sch)
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_sfq_qopt opt;
 
 	opt.quantum = q->quantum;
@@ -476,7 +477,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 85da8daa61d2..53862953baaf 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -32,6 +32,7 @@
 #include <linux/etherdevice.h>
 #include <linux/notifier.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -127,8 +128,8 @@ struct tbf_sched_data
 	long	tokens;			/* Current number of B tokens */
 	long	ptokens;		/* Current number of P tokens */
 	psched_time_t	t_c;		/* Time check-point */
-	struct timer_list wd_timer;	/* Watchdog timer */
 	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
+	struct qdisc_watchdog watchdog;	/* Watchdog timer */
 };
 
 #define L2T(q,L)   ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
@@ -185,14 +186,6 @@ static unsigned int tbf_drop(struct Qdisc* sch)
 	return len;
 }
 
-static void tbf_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc*)arg;
-
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
 static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -202,13 +195,12 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 
 	if (skb) {
 		psched_time_t now;
-		long toks, delay;
+		long toks;
 		long ptoks = 0;
 		unsigned int len = skb->len;
 
-		PSCHED_GET_TIME(now);
-
-		toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer);
+		now = psched_get_time();
+		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
 
 		if (q->P_tab) {
 			ptoks = toks + q->ptokens;
@@ -230,12 +222,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 			return skb;
 		}
 
-		delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks));
-
-		if (delay == 0)
-			delay = 1;
-
-		mod_timer(&q->wd_timer, jiffies+delay);
+		qdisc_watchdog_schedule(&q->watchdog,
+					now + max_t(long, -toks, -ptoks));
 
 		/* Maybe we have a shorter packet in the queue,
 		   which can be sent now. It sounds cool,
@@ -254,7 +242,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 			sch->qstats.drops++;
 		}
 
-		sch->flags |= TCQ_F_THROTTLED;
 		sch->qstats.overlimits++;
 	}
 	return NULL;
@@ -266,11 +253,10 @@ static void tbf_reset(struct Qdisc* sch)
 
 	qdisc_reset(q->qdisc);
 	sch->q.qlen = 0;
-	PSCHED_GET_TIME(q->t_c);
+	q->t_c = psched_get_time();
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
-	sch->flags &= ~TCQ_F_THROTTLED;
-	del_timer(&q->wd_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 }
 
 static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
@@ -377,11 +363,8 @@ static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	PSCHED_GET_TIME(q->t_c);
-	init_timer(&q->wd_timer);
-	q->wd_timer.function = tbf_watchdog;
-	q->wd_timer.data = (unsigned long)sch;
-
+	q->t_c = psched_get_time();
+	qdisc_watchdog_init(&q->watchdog, sch);
 	q->qdisc = &noop_qdisc;
 
 	return tbf_change(sch, opt);
@@ -391,7 +374,7 @@ static void tbf_destroy(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
-	del_timer(&q->wd_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 
 	if (q->P_tab)
 		qdisc_put_rtab(q->P_tab);
@@ -404,7 +387,7 @@ static void tbf_destroy(struct Qdisc *sch)
 static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_tbf_qopt opt;
 
@@ -420,12 +403,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.mtu = q->mtu;
 	opt.buffer = q->buffer;
 	RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 587123c61af9..d24914db7861 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -323,7 +323,7 @@ restart:
 			nores = 1;
 			break;
 		}
-		__skb_pull(skb, skb->nh.raw - skb->data);
+		__skb_pull(skb, skb_network_offset(skb));
 	} while ((q = NEXT_SLAVE(q)) != start);
 
 	if (nores && skb_res == NULL) {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 78d2ddb5ca18..db73ef97485a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -143,7 +143,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	/* Initialize the maximum mumber of new data packets that can be sent
 	 * in a burst.
 	 */
-	asoc->max_burst = sctp_max_burst;
+	asoc->max_burst = sp->max_burst;
 
 	/* initialize association timers */
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0;
@@ -714,8 +714,16 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 	/* Record the transition on the transport.  */
 	switch (command) {
 	case SCTP_TRANSPORT_UP:
+		/* If we are moving from UNCONFIRMED state due
+		 * to heartbeat success, report the SCTP_ADDR_CONFIRMED
+		 * state to the user, otherwise report SCTP_ADDR_AVAILABLE.
+		 */
+		if (SCTP_UNCONFIRMED == transport->state &&
+		    SCTP_HEARTBEAT_SUCCESS == error)
+			spc_state = SCTP_ADDR_CONFIRMED;
+		else
+			spc_state = SCTP_ADDR_AVAILABLE;
 		transport->state = SCTP_ACTIVE;
-		spc_state = SCTP_ADDR_AVAILABLE;
 		break;
 
 	case SCTP_TRANSPORT_DOWN:
@@ -725,7 +733,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 
 	default:
 		return;
-	};
+	}
 
 	/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
 	 * user.
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 5f5ab28977c9..e8c0f7435d7f 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -93,8 +93,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
 		return "FWD_TSN";
 
 	default:
-		return "unknown chunk";
-	};
+		break;
+	}
+
 	return "unknown chunk";
 }
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 71db66873695..885109fb3dda 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -79,14 +79,10 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
 /* Calculate the SCTP checksum of an SCTP packet.  */
 static inline int sctp_rcv_checksum(struct sk_buff *skb)
 {
-	struct sctphdr *sh;
-	__u32 cmp, val;
 	struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-	sh = (struct sctphdr *) skb->h.raw;
-	cmp = ntohl(sh->checksum);
-
-	val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+	struct sctphdr *sh = sctp_hdr(skb);
+	__u32 cmp = ntohl(sh->checksum);
+	__u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
 
 	for (; list; list = list->next)
 		val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
@@ -138,14 +134,13 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb_linearize(skb))
 		goto discard_it;
 
-	sh = (struct sctphdr *) skb->h.raw;
+	sh = sctp_hdr(skb);
 
 	/* Pull up the IP and SCTP headers. */
-	__skb_pull(skb, skb->h.raw - skb->data);
+	__skb_pull(skb, skb_transport_offset(skb));
 	if (skb->len < sizeof(struct sctphdr))
 		goto discard_it;
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY) &&
-	    (sctp_rcv_checksum(skb) < 0))
+	if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0)
 		goto discard_it;
 
 	skb_pull(skb, sizeof(struct sctphdr));
@@ -154,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb->len < sizeof(struct sctp_chunkhdr))
 		goto discard_it;
 
-	family = ipver2af(skb->nh.iph->version);
+	family = ipver2af(ip_hdr(skb)->version);
 	af = sctp_get_af_specific(family);
 	if (unlikely(!af))
 		goto discard_it;
@@ -510,30 +505,30 @@ void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
 void sctp_v4_err(struct sk_buff *skb, __u32 info)
 {
 	struct iphdr *iph = (struct iphdr *)skb->data;
-	struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2));
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int ihlen = iph->ihl * 4;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	struct sctp_association *asoc = NULL;
 	struct sctp_transport *transport;
 	struct inet_sock *inet;
-	char *saveip, *savesctp;
+	sk_buff_data_t saveip, savesctp;
 	int err;
 
-	if (skb->len < ((iph->ihl << 2) + 8)) {
+	if (skb->len < ihlen + 8) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 		return;
 	}
 
 	/* Fix up skb to look at the embedded net header. */
-	saveip = skb->nh.raw;
-	savesctp  = skb->h.raw;
-	skb->nh.iph = iph;
-	skb->h.raw = (char *)sh;
-	sk = sctp_err_lookup(AF_INET, skb, sh, &asoc, &transport);
-	/* Put back, the original pointers. */
-	skb->nh.raw = saveip;
-	skb->h.raw = savesctp;
+	saveip = skb->network_header;
+	savesctp = skb->transport_header;
+	skb_reset_network_header(skb);
+	skb_set_transport_header(skb, ihlen);
+	sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
+	/* Put back, the original values. */
+	skb->network_header = saveip;
+	skb->transport_header = savesctp;
 	if (!sk) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 		return;
@@ -616,7 +611,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
 			break;
 
 		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
-		if (ch_end > skb->tail)
+		if (ch_end > skb_tail_pointer(skb))
 			break;
 
 		/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -648,7 +643,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
 		}
 
 		ch = (sctp_chunkhdr_t *) ch_end;
-	} while (ch_end < skb->tail);
+	} while (ch_end < skb_tail_pointer(skb));
 
 	return 0;
 
@@ -905,7 +900,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
 	struct sctp_association *asoc;
 	union sctp_addr addr;
 	union sctp_addr *paddr = &addr;
-	struct sctphdr *sh = (struct sctphdr *) skb->h.raw;
+	struct sctphdr *sh = sctp_hdr(skb);
 	sctp_chunkhdr_t *ch;
 	union sctp_params params;
 	sctp_init_chunk_t *init;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index c30629e17781..88aa22407549 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -159,16 +159,16 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 	 * the skb->tail.
 	 */
 	if (unlikely(skb_is_nonlinear(chunk->skb))) {
-		if (chunk->chunk_end > chunk->skb->tail)
-			chunk->chunk_end = chunk->skb->tail;
+		if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
+			chunk->chunk_end = skb_tail_pointer(chunk->skb);
 	}
 	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
-	if (chunk->chunk_end < chunk->skb->tail) {
+	if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
 		/* This is not a singleton */
 		chunk->singleton = 0;
-	} else if (chunk->chunk_end > chunk->skb->tail) {
+	} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
 		/* RFC 2960, Section 6.10  Bundling
 		 *
 		 * Partial chunks MUST NOT be placed in an SCTP packet.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0b9c49b3a100..ca527a27dd05 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,26 +122,24 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			     int type, int code, int offset, __be32 info)
 {
 	struct inet6_dev *idev;
-	struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
-	struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
 	struct sock *sk;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
 	struct ipv6_pinfo *np;
-	char *saveip, *savesctp;
+	sk_buff_data_t saveip, savesctp;
 	int err;
 
 	idev = in6_dev_get(skb->dev);
 
 	/* Fix up skb to look at the embedded net header. */
-	saveip = skb->nh.raw;
-	savesctp  = skb->h.raw;
-	skb->nh.ipv6h = iph;
-	skb->h.raw = (char *)sh;
-	sk = sctp_err_lookup(AF_INET6, skb, sh, &asoc, &transport);
+	saveip	 = skb->network_header;
+	savesctp = skb->transport_header;
+	skb_reset_network_header(skb);
+	skb_set_transport_header(skb, offset);
+	sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original pointers. */
-	skb->nh.raw = saveip;
-	skb->h.raw = savesctp;
+	skb->network_header   = saveip;
+	skb->transport_header = savesctp;
 	if (!sk) {
 		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
 		goto out;
@@ -391,13 +389,13 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
 	addr->v6.sin6_flowinfo = 0; /* FIXME */
 	addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
 
-	sh = (struct sctphdr *) skb->h.raw;
+	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
-		from = &skb->nh.ipv6h->saddr;
+		from = &ipv6_hdr(skb)->saddr;
 	} else {
 		*port = sh->dest;
-		from = &skb->nh.ipv6h->daddr;
+		from = &ipv6_hdr(skb)->daddr;
 	}
 	ipv6_addr_copy(&addr->v6.sin6_addr, from);
 }
@@ -606,7 +604,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
 	default:
 		retval = SCTP_SCOPE_GLOBAL;
 		break;
-	};
+	}
 
 	return retval;
 }
@@ -699,7 +697,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
 /* Was this packet marked by Explicit Congestion Notification? */
 static int sctp_v6_is_ce(const struct sk_buff *skb)
 {
-	return *((__u32 *)(skb->nh.ipv6h)) & htonl(1<<20);
+	return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
 }
 
 /* Dump the v6 addr to the seq file. */
@@ -766,19 +764,19 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 	if (msgname) {
 		sctp_inet6_msgname(msgname, addr_len);
 		sin6 = (struct sockaddr_in6 *)msgname;
-		sh = (struct sctphdr *)skb->h.raw;
+		sh = sctp_hdr(skb);
 		sin6->sin6_port = sh->source;
 
 		/* Map ipv4 address into v4-mapped-on-v6 address. */
 		if (sctp_sk(skb->sk)->v4mapped &&
-		    skb->nh.iph->version == 4) {
+		    ip_hdr(skb)->version == 4) {
 			sctp_v4_map_v6((union sctp_addr *)sin6);
-			sin6->sin6_addr.s6_addr32[3] = skb->nh.iph->saddr;
+			sin6->sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
 			return;
 		}
 
 		/* Otherwise, just copy the v6 address. */
-		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
 			struct sctp_ulpevent *ev = sctp_skb2event(skb);
 			sin6->sin6_scope_id = ev->iif;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f875fc3ced54..d85543def754 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -176,7 +176,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 	case SCTP_XMIT_OK:
 	case SCTP_XMIT_NAGLE_DELAY:
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 41abfd17627e..992f361084b7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -338,7 +338,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 				SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
 			q->empty = 0;
 			break;
-		};
+		}
 	} else {
 		list_add_tail(&chunk->list, &q->control_chunk_list);
 		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
@@ -630,7 +630,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			/* Retrieve a new chunk to bundle. */
 			lchunk = sctp_list_dequeue(lqueue);
 			break;
-		};
+		}
 
 		/* If we are here due to a retransmit timeout or a fast
 		 * retransmit and if there are any chunks left in the retransmit
@@ -779,7 +779,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 		default:
 			/* We built a chunk with an illegal type! */
 			BUG();
-		};
+		}
 	}
 
 	/* Is it OK to send data chunks?  */
@@ -1397,7 +1397,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
 				dbg_prt_state = 0;
 				dbg_ack_tsn = tsn;
-			};
+			}
 
 			dbg_last_ack_tsn = tsn;
 #endif /* SCTP_DEBUG */
@@ -1452,7 +1452,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
 				dbg_prt_state = 1;
 				dbg_kept_tsn = tsn;
-			};
+			}
 
 			dbg_last_kept_tsn = tsn;
 #endif /* SCTP_DEBUG */
@@ -1476,7 +1476,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 		} else {
 			SCTP_DEBUG_PRINTK("\n");
 		}
-	};
+	}
 #endif /* SCTP_DEBUG */
 	if (transport) {
 		if (bytes_acked) {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e17a823ca90f..c361deb6cea9 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -235,13 +235,13 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 	port = &addr->v4.sin_port;
 	addr->v4.sin_family = AF_INET;
 
-	sh = (struct sctphdr *) skb->h.raw;
+	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
-		from = &skb->nh.iph->saddr;
+		from = &ip_hdr(skb)->saddr;
 	} else {
 		*port = sh->dest;
-		from = &skb->nh.iph->daddr;
+		from = &ip_hdr(skb)->daddr;
 	}
 	memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr));
 }
@@ -530,7 +530,7 @@ static int sctp_v4_skb_iif(const struct sk_buff *skb)
 /* Was this packet marked by Explicit Congestion Notification? */
 static int sctp_v4_is_ce(const struct sk_buff *skb)
 {
-	return INET_ECN_is_ce(skb->nh.iph->tos);
+	return INET_ECN_is_ce(ip_hdr(skb)->tos);
 }
 
 /* Create and initialize a new sk for the socket returned by accept(). */
@@ -731,15 +731,13 @@ static void sctp_inet_event_msgname(struct sctp_ulpevent *event, char *msgname,
 /* Initialize and copy out a msgname from an inbound skb. */
 static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
 {
-	struct sctphdr *sh;
-	struct sockaddr_in *sin;
-
 	if (msgname) {
+		struct sctphdr *sh = sctp_hdr(skb);
+		struct sockaddr_in *sin = (struct sockaddr_in *)msgname;
+
 		sctp_inet_msgname(msgname, len);
-		sin = (struct sockaddr_in *)msgname;
-		sh = (struct sctphdr *)skb->h.raw;
 		sin->sin_port = sh->source;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 	}
 }
 
@@ -1044,7 +1042,7 @@ SCTP_STATIC __init int sctp_init(void)
 	sctp_cookie_preserve_enable 	= 1;
 
 	/* Max.Burst		    - 4 */
-	sctp_max_burst 			= SCTP_MAX_BURST;
+	sctp_max_burst 			= SCTP_DEFAULT_MAX_BURST;
 
 	/* Association.Max.Retrans  - 10 attempts
 	 * Path.Max.Retrans         - 5  attempts (per destination address)
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f7fb29d5a0c7..be783a3761c4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -86,7 +86,7 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk)
 	struct sctp_af *af;
 	int iif = 0;
 
-	af = sctp_get_af_specific(ipver2af(chunk->skb->nh.iph->version));
+	af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
 	if (af)
 		iif = af->skb_iif(chunk->skb);
 
@@ -1143,7 +1143,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 
 	/* Adjust the chunk length field.  */
 	chunk->chunk_hdr->length = htons(chunklen + padlen + len);
-	chunk->chunk_end = chunk->skb->tail;
+	chunk->chunk_end = skb_tail_pointer(chunk->skb);
 
 	return target;
 }
@@ -1168,7 +1168,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len,
 	/* Adjust the chunk length field.  */
 	chunk->chunk_hdr->length =
 		htons(ntohs(chunk->chunk_hdr->length) + len);
-	chunk->chunk_end = chunk->skb->tail;
+	chunk->chunk_end = skb_tail_pointer(chunk->skb);
 
 out:
 	return err;
@@ -1233,7 +1233,7 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 	asoc->temp = 1;
 	skb = chunk->skb;
 	/* Create an entry for the source address of the packet.  */
-	af = sctp_get_af_specific(ipver2af(skb->nh.iph->version));
+	af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
 	if (unlikely(!af))
 		goto fail;
 	af->from_skb(&asoc->c.peer_addr, skb, 1);
@@ -2077,7 +2077,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 
 			default: /* Just ignore anything else.  */
 				break;
-			};
+			}
 		}
 		break;
 
@@ -2118,7 +2118,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 		SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n",
 				  ntohs(param.p->type), asoc);
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 135567493119..b37a7adeb150 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -464,7 +464,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 	struct sctp_ulpevent *event;
 
 	event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC,
-						(__u16)error, 0, 0,
+						(__u16)error, 0, 0, NULL,
 						GFP_ATOMIC);
 
 	if (event)
@@ -492,8 +492,13 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 	/* Cancel any partial delivery in progress. */
 	sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
 
-	event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
-						(__u16)error, 0, 0,
+	if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
+		event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+						(__u16)error, 0, 0, chunk,
+						GFP_ATOMIC);
+	else
+		event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+						(__u16)error, 0, 0, NULL,
 						GFP_ATOMIC);
 	if (event)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
@@ -1004,7 +1009,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
 		       status, state, event_type, subtype.chunk);
 		BUG();
 		break;
-	};
+	}
 
 bail:
 	return error;
@@ -1484,7 +1489,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			printk(KERN_WARNING "Impossible command: %u, %p\n",
 			       cmd->verb, cmd->obj.ptr);
 			break;
-		};
+		}
+
 		if (error)
 			break;
 	}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e9097cf614ba..9e28a5d51200 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -186,7 +186,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 	 * notification is passed to the upper layer.
 	 */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
-					     0, 0, 0, GFP_ATOMIC);
+					     0, 0, 0, NULL, GFP_ATOMIC);
 	if (ev)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 				SCTP_ULPEVENT(ev));
@@ -629,7 +629,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 		case -SCTP_IERROR_BAD_SIG:
 		default:
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-		};
+		}
 	}
 
 
@@ -661,7 +661,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0,
 					     new_asoc->c.sinit_num_ostreams,
 					     new_asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem_ev;
 
@@ -790,7 +790,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP,
 					     0, asoc->c.sinit_num_ostreams,
 					     asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 
 	if (!ev)
 		goto nomem;
@@ -1195,7 +1195,7 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc,
 		new_asoc->c.my_ttag   = asoc->c.my_vtag;
 		new_asoc->c.peer_ttag = asoc->c.peer_vtag;
 		break;
-	};
+	}
 
 	/* Other parameters for the endpoint SHOULD be copied from the
 	 * existing parameters of the association (e.g. number of
@@ -1625,7 +1625,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
 					     new_asoc->c.sinit_num_ostreams,
 					     new_asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem_ev;
 
@@ -1691,7 +1691,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0,
 					     new_asoc->c.sinit_num_ostreams,
 					     new_asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem_ev;
 
@@ -1786,7 +1786,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 					     SCTP_COMM_UP, 0,
 					     asoc->c.sinit_num_ostreams,
 					     asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+                                             NULL, GFP_ATOMIC);
 		if (!ev)
 			goto nomem;
 
@@ -1904,7 +1904,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 		case -SCTP_IERROR_BAD_SIG:
 		default:
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-		};
+		}
 	}
 
 	/* Compare the tie_tag in cookie with the verification tag of
@@ -1936,7 +1936,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 	default: /* Discard packet for all others. */
 		retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 		break;
-	};
+	}
 
 	/* Delete the tempory new association. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -3035,7 +3035,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	 * notification is passed to the upper layer.
 	 */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
-					     0, 0, 0, GFP_ATOMIC);
+					     0, 0, 0, NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem;
 
@@ -3115,7 +3115,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 			break;
 
 		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
-		if (ch_end > skb->tail)
+		if (ch_end > skb_tail_pointer(skb))
 			break;
 
 		if (SCTP_CID_SHUTDOWN_ACK == ch->type)
@@ -3130,7 +3130,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 
 		ch = (sctp_chunkhdr_t *) ch_end;
-	} while (ch_end < skb->tail);
+	} while (ch_end < skb_tail_pointer(skb));
 
 	if (ootb_shut_ack)
 		sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
@@ -4816,7 +4816,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
 	default:
 		BUG();
 		break;
-	};
+	}
 
 	if (!reply)
 		goto nomem;
@@ -5286,7 +5286,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 		chunk->ecn_ce_done = 1;
 
 		af = sctp_get_af_specific(
-			ipver2af(chunk->skb->nh.iph->version));
+			ipver2af(ip_hdr(chunk->skb)->version));
 
 		if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
 			/* Do real work as sideffect. */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 5e54b17377f4..523071c7902f 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -101,7 +101,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 	default:
 		/* Yikes!  We got an illegal event type.  */
 		return &bug;
-	};
+	}
 }
 
 #define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 536298c2eda2..2fc0a92caa78 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -627,6 +627,12 @@ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
 			retval = -EINVAL;
 			goto err_bindx_rem;
 		}
+
+		if (!af->addr_valid(sa_addr, sp, NULL)) {
+			retval = -EADDRNOTAVAIL;
+			goto err_bindx_rem;
+		}
+
 		if (sa_addr->v4.sin_port != htons(bp->port)) {
 			retval = -EINVAL;
 			goto err_bindx_rem;
@@ -935,7 +941,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
 	default:
 		err = -EINVAL;
 		break;
-	};
+	}
 
 out:
 	kfree(kaddrs);
@@ -2033,6 +2039,10 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *                     SPP_HB_DEMAND - Request a user initiated heartbeat
  *                     to be made immediately.
  *
+ *                     SPP_HB_TIME_IS_ZERO - Specify's that the time for
+ *                     heartbeat delayis to be set to the value of 0
+ *                     milliseconds.
+ *
  *                     SPP_PMTUD_ENABLE - This field will enable PMTU
  *                     discovery upon the specified address. Note that
  *                     if the address feild is empty then all addresses
@@ -2075,13 +2085,30 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 			return error;
 	}
 
-	if (params->spp_hbinterval) {
-		if (trans) {
-			trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
-		} else if (asoc) {
-			asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
-		} else {
-			sp->hbinterval = params->spp_hbinterval;
+	/* Note that unless the spp_flag is set to SPP_HB_ENABLE the value of
+	 * this field is ignored.  Note also that a value of zero indicates
+	 * the current setting should be left unchanged.
+	 */
+	if (params->spp_flags & SPP_HB_ENABLE) {
+
+		/* Re-zero the interval if the SPP_HB_TIME_IS_ZERO is
+		 * set.  This lets us use 0 value when this flag
+		 * is set.
+		 */
+		if (params->spp_flags & SPP_HB_TIME_IS_ZERO)
+			params->spp_hbinterval = 0;
+
+		if (params->spp_hbinterval ||
+		    (params->spp_flags & SPP_HB_TIME_IS_ZERO)) {
+			if (trans) {
+				trans->hbinterval =
+				    msecs_to_jiffies(params->spp_hbinterval);
+			} else if (asoc) {
+				asoc->hbinterval =
+				    msecs_to_jiffies(params->spp_hbinterval);
+			} else {
+				sp->hbinterval = params->spp_hbinterval;
+			}
 		}
 	}
 
@@ -2098,7 +2125,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
-	if (params->spp_pathmtu) {
+	/* When Path MTU discovery is disabled the value specified here will
+	 * be the "fixed" path mtu (i.e. the value of the spp_flags field must
+	 * include the flag SPP_PMTUD_DISABLE for this field to have any
+	 * effect).
+	 */
+	if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
 		if (trans) {
 			trans->pathmtu = params->spp_pathmtu;
 			sctp_assoc_sync_pmtu(asoc);
@@ -2129,7 +2161,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
-	if (params->spp_sackdelay) {
+	/* Note that unless the spp_flag is set to SPP_SACKDELAY_ENABLE the
+	 * value of this field is ignored.  Note also that a value of zero
+	 * indicates the current setting should be left unchanged.
+	 */
+	if ((params->spp_flags & SPP_SACKDELAY_ENABLE) && params->spp_sackdelay) {
 		if (trans) {
 			trans->sackdelay =
 				msecs_to_jiffies(params->spp_sackdelay);
@@ -2157,7 +2193,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
-	if (params->spp_pathmaxrxt) {
+	/* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value
+	 * of this field is ignored.  Note also that a value of zero
+	 * indicates the current setting should be left unchanged.
+	 */
+	if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) {
 		if (trans) {
 			trans->pathmaxrxt = params->spp_pathmaxrxt;
 		} else if (asoc) {
@@ -2249,7 +2289,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 	return 0;
 }
 
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
  *
  *   This options will get or set the delayed ack timer.  The time is set
  *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
@@ -2786,6 +2826,102 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
 	return 0;
 }
 
+/*
+ * 7.1.24.  Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ *
+ * This options will at a minimum specify if the implementation is doing
+ * fragmented interleave.  Fragmented interleave, for a one to many
+ * socket, is when subsequent calls to receive a message may return
+ * parts of messages from different associations.  Some implementations
+ * may allow you to turn this value on or off.  If so, when turned off,
+ * no fragment interleave will occur (which will cause a head of line
+ * blocking amongst multiple associations sharing the same one to many
+ * socket).  When this option is turned on, then each receive call may
+ * come from a different association (thus the user must receive data
+ * with the extended calls (e.g. sctp_recvmsg) to keep track of which
+ * association each receive belongs to.
+ *
+ * This option takes a boolean value.  A non-zero value indicates that
+ * fragmented interleave is on.  A value of zero indicates that
+ * fragmented interleave is off.
+ *
+ * Note that it is important that an implementation that allows this
+ * option to be turned on, have it off by default.  Otherwise an unaware
+ * application using the one to many model may become confused and act
+ * incorrectly.
+ */
+static int sctp_setsockopt_fragment_interleave(struct sock *sk,
+					       char __user *optval,
+					       int optlen)
+{
+	int val;
+
+	if (optlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+
+	return 0;
+}
+
+/*
+ * 7.1.25.  Set or Get the sctp partial delivery point
+ *       (SCTP_PARTIAL_DELIVERY_POINT)
+ * This option will set or get the SCTP partial delivery point.  This
+ * point is the size of a message where the partial delivery API will be
+ * invoked to help free up rwnd space for the peer.  Setting this to a
+ * lower value will cause partial delivery's to happen more often.  The
+ * calls argument is an integer that sets or gets the partial delivery
+ * point.
+ */
+static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
+						  char __user *optval,
+						  int optlen)
+{
+	u32 val;
+
+	if (optlen != sizeof(u32))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->pd_point = val;
+
+	return 0; /* is this the right error code? */
+}
+
+/*
+ * 7.1.28.  Set or Get the maximum burst (SCTP_MAX_BURST)
+ *
+ * This option will allow a user to change the maximum burst of packets
+ * that can be emitted by this association.  Note that the default value
+ * is 4, and some implementations may restrict this setting so that it
+ * can only be lowered.
+ *
+ * NOTE: This text doesn't seem right.  Do this on a socket basis with
+ * future associations inheriting the socket value.
+ */
+static int sctp_setsockopt_maxburst(struct sock *sk,
+				    char __user *optval,
+				    int optlen)
+{
+	int val;
+
+	if (optlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	if (val < 0)
+		return -EINVAL;
+
+	sctp_sk(sk)->max_burst = val;
+
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2865,6 +3001,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_DELAYED_ACK_TIME:
 		retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
 		break;
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen);
+		break;
 
 	case SCTP_INITMSG:
 		retval = sctp_setsockopt_initmsg(sk, optval, optlen);
@@ -2900,11 +3039,16 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_CONTEXT:
 		retval = sctp_setsockopt_context(sk, optval, optlen);
 		break;
-
+	case SCTP_FRAGMENT_INTERLEAVE:
+		retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
+		break;
+	case SCTP_MAX_BURST:
+		retval = sctp_setsockopt_maxburst(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	sctp_release_sock(sk);
 
@@ -3060,6 +3204,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->default_timetolive = 0;
 
 	sp->default_rcv_context = 0;
+	sp->max_burst = sctp_max_burst;
 
 	/* Initialize default setup parameters. These parameters
 	 * can be modified with the SCTP_INITMSG socket option or
@@ -3128,8 +3273,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->pf = sctp_get_pf_specific(sk->sk_family);
 
 	/* Control variables for partial data delivery. */
-	sp->pd_mode           = 0;
+	atomic_set(&sp->pd_mode, 0);
 	skb_queue_head_init(&sp->pd_lobby);
+	sp->frag_interleave = 0;
 
 	/* Create a per socket endpoint structure.  Even if we
 	 * change the data structure relationships, this may still
@@ -3636,7 +3782,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 	return 0;
 }
 
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
  *
  *   This options will get or set the delayed ack timer.  The time is set
  *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
@@ -3841,7 +3987,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
 		memcpy(&temp, &from->ipaddr, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
-		if(space_left < addrlen)
+		if (space_left < addrlen)
 			return -ENOMEM;
 		if (copy_to_user(to, &temp, addrlen))
 			return -EFAULT;
@@ -3930,8 +4076,9 @@ done:
 /* Helper function that copies local addresses to user and returns the number
  * of addresses copied.
  */
-static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_addrs,
-					void __user *to)
+static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
+					int max_addrs, void *to,
+					int *bytes_copied)
 {
 	struct list_head *pos, *next;
 	struct sctp_sockaddr_entry *addr;
@@ -3948,10 +4095,10 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
 								&temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if (copy_to_user(to, &temp, addrlen))
-			return -EFAULT;
+		memcpy(to, &temp, addrlen);
 
 		to += addrlen;
+		*bytes_copied += addrlen;
 		cnt ++;
 		if (cnt >= max_addrs) break;
 	}
@@ -3959,8 +4106,8 @@ static int sctp_copy_laddrs_to_user_old(struct sock *sk, __u16 port, int max_add
 	return cnt;
 }
 
-static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
-				    void __user **to, size_t space_left)
+static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
+			    size_t space_left, int *bytes_copied)
 {
 	struct list_head *pos, *next;
 	struct sctp_sockaddr_entry *addr;
@@ -3977,14 +4124,14 @@ static int sctp_copy_laddrs_to_user(struct sock *sk, __u16 port,
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
 								&temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if(space_left<addrlen)
+		if (space_left < addrlen)
 			return -ENOMEM;
-		if (copy_to_user(*to, &temp, addrlen))
-			return -EFAULT;
+		memcpy(to, &temp, addrlen);
 
-		*to += addrlen;
+		to += addrlen;
 		cnt ++;
 		space_left -= addrlen;
+		bytes_copied += addrlen;
 	}
 
 	return cnt;
@@ -4008,6 +4155,8 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	int addrlen;
 	rwlock_t *addr_lock;
 	int err = 0;
+	void *addrs;
+	int bytes_copied = 0;
 
 	if (len != sizeof(struct sctp_getaddrs_old))
 		return -EINVAL;
@@ -4035,6 +4184,15 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 
 	to = getaddrs.addrs;
 
+	/* Allocate space for a local instance of packed array to hold all
+	 * the data.  We store addresses here first and then put write them
+	 * to the user in one shot.
+	 */
+	addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num,
+			GFP_KERNEL);
+	if (!addrs)
+		return -ENOMEM;
+
 	sctp_read_lock(addr_lock);
 
 	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
@@ -4044,13 +4202,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
 		if (sctp_is_any(&addr->a)) {
-			cnt = sctp_copy_laddrs_to_user_old(sk, bp->port,
-							   getaddrs.addr_num,
-							   to);
-			if (cnt < 0) {
-				err = cnt;
-				goto unlock;
-			}
+			cnt = sctp_copy_laddrs_old(sk, bp->port,
+						   getaddrs.addr_num,
+						   addrs, &bytes_copied);
 			goto copy_getaddrs;
 		}
 	}
@@ -4060,22 +4214,29 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 		memcpy(&temp, &addr->a, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if (copy_to_user(to, &temp, addrlen)) {
-			err = -EFAULT;
-			goto unlock;
-		}
+		memcpy(addrs, &temp, addrlen);
 		to += addrlen;
+		bytes_copied += addrlen;
 		cnt ++;
 		if (cnt >= getaddrs.addr_num) break;
 	}
 
 copy_getaddrs:
+	sctp_read_unlock(addr_lock);
+
+	/* copy the entire address list into the user provided space */
+	if (copy_to_user(to, addrs, bytes_copied)) {
+		err = -EFAULT;
+		goto error;
+	}
+
+	/* copy the leading structure back to user */
 	getaddrs.addr_num = cnt;
 	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old)))
 		err = -EFAULT;
 
-unlock:
-	sctp_read_unlock(addr_lock);
+error:
+	kfree(addrs);
 	return err;
 }
 
@@ -4095,7 +4256,8 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	rwlock_t *addr_lock;
 	int err = 0;
 	size_t space_left;
-	int bytes_copied;
+	int bytes_copied = 0;
+	void *addrs;
 
 	if (len <= sizeof(struct sctp_getaddrs))
 		return -EINVAL;
@@ -4123,6 +4285,9 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	to = optval + offsetof(struct sctp_getaddrs,addrs);
 	space_left = len - sizeof(struct sctp_getaddrs) -
 			 offsetof(struct sctp_getaddrs,addrs);
+	addrs = kmalloc(space_left, GFP_KERNEL);
+	if (!addrs)
+		return -ENOMEM;
 
 	sctp_read_lock(addr_lock);
 
@@ -4133,11 +4298,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
 		if (sctp_is_any(&addr->a)) {
-			cnt = sctp_copy_laddrs_to_user(sk, bp->port,
-						       &to, space_left);
+			cnt = sctp_copy_laddrs(sk, bp->port, addrs,
+						space_left, &bytes_copied);
 			if (cnt < 0) {
 				err = cnt;
-				goto unlock;
+				goto error;
 			}
 			goto copy_getaddrs;
 		}
@@ -4148,26 +4313,31 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 		memcpy(&temp, &addr->a, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if(space_left < addrlen)
-			return -ENOMEM; /*fixme: right error?*/
-		if (copy_to_user(to, &temp, addrlen)) {
-			err = -EFAULT;
-			goto unlock;
+		if (space_left < addrlen) {
+			err =  -ENOMEM; /*fixme: right error?*/
+			goto error;
 		}
+		memcpy(addrs, &temp, addrlen);
 		to += addrlen;
+		bytes_copied += addrlen;
 		cnt ++;
 		space_left -= addrlen;
 	}
 
 copy_getaddrs:
+	sctp_read_unlock(addr_lock);
+
+	if (copy_to_user(to, addrs, bytes_copied)) {
+		err = -EFAULT;
+		goto error;
+	}
 	if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num))
 		return -EFAULT;
-	bytes_copied = ((char __user *)to) - optval;
 	if (put_user(bytes_copied, optlen))
 		return -EFAULT;
 
-unlock:
-	sctp_read_unlock(addr_lock);
+error:
+	kfree(addrs);
 	return err;
 }
 
@@ -4530,6 +4700,77 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
 	return 0;
 }
 
+/*
+ * 7.1.24.  Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
+ */
+static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
+					       char __user *optval, int __user *optlen)
+{
+	int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+
+	val = sctp_sk(sk)->frag_interleave;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * 7.1.25.  Set or Get the sctp partial delivery point
+ * (chapter and verse is quoted at sctp_setsockopt_partial_delivery_point())
+ */
+static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
+						  char __user *optval,
+						  int __user *optlen)
+{
+        u32 val;
+
+	if (len < sizeof(u32))
+		return -EINVAL;
+
+	len = sizeof(u32);
+
+	val = sctp_sk(sk)->pd_point;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return -ENOTSUPP;
+}
+
+/*
+ * 7.1.28.  Set or Get the maximum burst (SCTP_MAX_BURST)
+ * (chapter and verse is quoted at sctp_setsockopt_maxburst())
+ */
+static int sctp_getsockopt_maxburst(struct sock *sk, int len,
+				    char __user *optval,
+				    int __user *optlen)
+{
+        int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+
+	val = sctp_sk(sk)->max_burst;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return -ENOTSUPP;
+}
+
 SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
@@ -4642,10 +4883,21 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_CONTEXT:
 		retval = sctp_getsockopt_context(sk, len, optval, optlen);
 		break;
+	case SCTP_FRAGMENT_INTERLEAVE:
+		retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
+							     optlen);
+		break;
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		retval = sctp_getsockopt_partial_delivery_point(sk, len, optval,
+								optlen);
+		break;
+	case SCTP_MAX_BURST:
+		retval = sctp_getsockopt_maxburst(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	sctp_release_sock(sk);
 	return retval;
@@ -4970,7 +5222,8 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 		break;
 	default:
 		break;
-	};
+	}
+
 	if (err)
 		goto cleanup;
 
@@ -5233,7 +5486,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
 
 		default:
 			return -EINVAL;
-		};
+		}
 	}
 	return 0;
 }
@@ -5638,6 +5891,36 @@ void sctp_wait_for_close(struct sock *sk, long timeout)
 	finish_wait(sk->sk_sleep, &wait);
 }
 
+static void sctp_sock_rfree_frag(struct sk_buff *skb)
+{
+	struct sk_buff *frag;
+
+	if (!skb->data_len)
+		goto done;
+
+	/* Don't forget the fragments. */
+	for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next)
+		sctp_sock_rfree_frag(frag);
+
+done:
+	sctp_sock_rfree(skb);
+}
+
+static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
+{
+	struct sk_buff *frag;
+
+	if (!skb->data_len)
+		goto done;
+
+	/* Don't forget the fragments. */
+	for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next)
+		sctp_skb_set_owner_r_frag(frag, sk);
+
+done:
+	sctp_skb_set_owner_r(skb, sk);
+}
+
 /* Populate the fields of the newsk from the oldsk and migrate the assoc
  * and its messages to the newsk.
  */
@@ -5692,10 +5975,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
 		event = sctp_skb2event(skb);
 		if (event->asoc == assoc) {
-			sctp_sock_rfree(skb);
+			sctp_sock_rfree_frag(skb);
 			__skb_unlink(skb, &oldsk->sk_receive_queue);
 			__skb_queue_tail(&newsk->sk_receive_queue, skb);
-			sctp_skb_set_owner_r(skb, newsk);
+			sctp_skb_set_owner_r_frag(skb, newsk);
 		}
 	}
 
@@ -5706,9 +5989,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
 	 */
 	skb_queue_head_init(&newsp->pd_lobby);
-	sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode;
+	atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
 
-	if (sctp_sk(oldsk)->pd_mode) {
+	if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
 		struct sk_buff_head *queue;
 
 		/* Decide which queue to move pd_lobby skbs to. */
@@ -5723,10 +6006,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 		sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
 			event = sctp_skb2event(skb);
 			if (event->asoc == assoc) {
-				sctp_sock_rfree(skb);
+				sctp_sock_rfree_frag(skb);
 				__skb_unlink(skb, &oldsp->pd_lobby);
 				__skb_queue_tail(queue, skb);
-				sctp_skb_set_owner_r(skb, newsk);
+				sctp_skb_set_owner_r_frag(skb, newsk);
 			}
 		}
 
@@ -5734,8 +6017,18 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 		 * delivery to finish.
 		 */
 		if (assoc->ulpq.pd_mode)
-			sctp_clear_pd(oldsk);
+			sctp_clear_pd(oldsk, NULL);
+
+	}
+
+	sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) {
+		sctp_sock_rfree_frag(skb);
+		sctp_skb_set_owner_r_frag(skb, newsk);
+	}
 
+	sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) {
+		sctp_sock_rfree_frag(skb);
+		sctp_skb_set_owner_r_frag(skb, newsk);
 	}
 
 	/* Set the type of socket to indicate that it is peeled off from the
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4d8c2ab864fc..961df275d5b9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -507,7 +507,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 			transport->cwnd = max(transport->cwnd/2,
 						 4*transport->asoc->pathmtu);
 		break;
-	};
+	}
 
 	transport->partial_bytes_acked = 0;
 	SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 2e11bc8d5d35..661ea2dd78ba 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -131,19 +131,54 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
 struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
 	const struct sctp_association *asoc,
 	__u16 flags, __u16 state, __u16 error, __u16 outbound,
-	__u16 inbound, gfp_t gfp)
+	__u16 inbound, struct sctp_chunk *chunk, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_assoc_change *sac;
 	struct sk_buff *skb;
 
-	event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
+	/* If the lower layer passed in the chunk, it will be
+	 * an ABORT, so we need to include it in the sac_info.
+	 */
+	if (chunk) {
+		/* sctp_inqu_pop() has allready pulled off the chunk
+		 * header.  We need to put it back temporarily
+		 */
+		skb_push(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+		/* Copy the chunk data to a new skb and reserve enough
+		 * head room to use as notification.
+		 */
+		skb = skb_copy_expand(chunk->skb,
+				      sizeof(struct sctp_assoc_change), 0, gfp);
+
+		if (!skb)
+			goto fail;
+
+		/* put back the chunk header now that we have a copy */
+		skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+		/* Embed the event fields inside the cloned skb.  */
+		event = sctp_skb2event(skb);
+		sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
+
+		/* Include the notification structure */
+		sac = (struct sctp_assoc_change *)
+			skb_push(skb, sizeof(struct sctp_assoc_change));
+
+		/* Trim the buffer to the right length.  */
+		skb_trim(skb, sizeof(struct sctp_assoc_change) +
+			 ntohs(chunk->chunk_hdr->length));
+	} else {
+		event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
 				  MSG_NOTIFICATION, gfp);
-	if (!event)
-		goto fail;
-	skb = sctp_event2skb(event);
-	sac = (struct sctp_assoc_change *)
-		skb_put(skb, sizeof(struct sctp_assoc_change));
+		if (!event)
+			goto fail;
+
+		skb = sctp_event2skb(event);
+		sac = (struct sctp_assoc_change *) skb_put(skb,
+					sizeof(struct sctp_assoc_change));
+	}
 
 	/* Socket Extensions for SCTP
 	 * 5.3.1.1 SCTP_ASSOC_CHANGE
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index bfb197e37da3..34eb977a204d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -138,26 +138,59 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 /* Clear the partial delivery mode for this socket.   Note: This
  * assumes that no association is currently in partial delivery mode.
  */
-int sctp_clear_pd(struct sock *sk)
+int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
 
-	sp->pd_mode = 0;
-	if (!skb_queue_empty(&sp->pd_lobby)) {
-		struct list_head *list;
-		sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
-		list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
-		INIT_LIST_HEAD(list);
-		return 1;
+	if (atomic_dec_and_test(&sp->pd_mode)) {
+		/* This means there are no other associations in PD, so
+		 * we can go ahead and clear out the lobby in one shot
+		 */
+		if (!skb_queue_empty(&sp->pd_lobby)) {
+			struct list_head *list;
+			sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
+			list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
+			INIT_LIST_HEAD(list);
+			return 1;
+		}
+	} else {
+		/* There are other associations in PD, so we only need to
+		 * pull stuff out of the lobby that belongs to the
+		 * associations that is exiting PD (all of its notifications
+		 * are posted here).
+		 */
+		if (!skb_queue_empty(&sp->pd_lobby) && asoc) {
+			struct sk_buff *skb, *tmp;
+			struct sctp_ulpevent *event;
+
+			sctp_skb_for_each(skb, &sp->pd_lobby, tmp) {
+				event = sctp_skb2event(skb);
+				if (event->asoc == asoc) {
+					__skb_unlink(skb, &sp->pd_lobby);
+					__skb_queue_tail(&sk->sk_receive_queue,
+							 skb);
+				}
+			}
+		}
 	}
+
 	return 0;
 }
 
+/* Set the pd_mode on the socket and ulpq */
+static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq)
+{
+	struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk);
+
+	atomic_inc(&sp->pd_mode);
+	ulpq->pd_mode = 1;
+}
+
 /* Clear the pd_mode and restart any pending messages waiting for delivery. */
 static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
 {
 	ulpq->pd_mode = 0;
-	return sctp_clear_pd(ulpq->asoc->base.sk);
+	return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
 }
 
 /* If the SKB of 'event' is on a list, it is the first such member
@@ -187,18 +220,35 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 	 * the association the cause of the partial delivery.
 	 */
 
-	if (!sctp_sk(sk)->pd_mode) {
+	if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) {
 		queue = &sk->sk_receive_queue;
-	} else if (ulpq->pd_mode) {
-		if (event->msg_flags & MSG_NOTIFICATION)
-			queue = &sctp_sk(sk)->pd_lobby;
-		else {
-			clear_pd = event->msg_flags & MSG_EOR;
-			queue = &sk->sk_receive_queue;
+	} else {
+		if (ulpq->pd_mode) {
+			/* If the association is in partial delivery, we
+			 * need to finish delivering the partially processed
+			 * packet before passing any other data.  This is
+			 * because we don't truly support stream interleaving.
+			 */
+			if ((event->msg_flags & MSG_NOTIFICATION) ||
+			    (SCTP_DATA_NOT_FRAG ==
+				    (event->msg_flags & SCTP_DATA_FRAG_MASK)))
+				queue = &sctp_sk(sk)->pd_lobby;
+			else {
+				clear_pd = event->msg_flags & MSG_EOR;
+				queue = &sk->sk_receive_queue;
+			}
+		} else {
+			/*
+			 * If fragment interleave is enabled, we
+			 * can queue this to the recieve queue instead
+			 * of the lobby.
+			 */
+			if (sctp_sk(sk)->frag_interleave)
+				queue = &sk->sk_receive_queue;
+			else
+				queue = &sctp_sk(sk)->pd_lobby;
 		}
-	} else
-		queue = &sctp_sk(sk)->pd_lobby;
-
+	}
 
 	/* If we are harvesting multiple skbs they will be
 	 * collected on a list.
@@ -341,7 +391,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
 			break;
 		pos->next = pnext;
 		pos = pnext;
-	};
+	}
 
 	event = sctp_skb2event(f_frag);
 	SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
@@ -360,6 +410,11 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 	struct sk_buff *first_frag = NULL;
 	__u32 ctsn, next_tsn;
 	struct sctp_ulpevent *retval = NULL;
+	struct sk_buff *pd_first = NULL;
+	struct sk_buff *pd_last = NULL;
+	size_t pd_len = 0;
+	struct sctp_association *asoc;
+	u32 pd_point;
 
 	/* Initialized to 0 just to avoid compiler warning message.  Will
 	 * never be used with this value. It is referenced only after it
@@ -375,6 +430,10 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 	 * we expect to find the remaining middle fragments and the last
 	 * fragment in order. If not, first_frag is reset to NULL and we
 	 * start the next pass when we find another first fragment.
+	 *
+	 * There is a potential to do partial delivery if user sets
+	 * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here
+	 * to see if can do PD.
 	 */
 	skb_queue_walk(&ulpq->reasm, pos) {
 		cevent = sctp_skb2event(pos);
@@ -382,14 +441,32 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 
 		switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
 		case SCTP_DATA_FIRST_FRAG:
+			/* If this "FIRST_FRAG" is the first
+			 * element in the queue, then count it towards
+			 * possible PD.
+			 */
+			if (pos == ulpq->reasm.next) {
+			    pd_first = pos;
+			    pd_last = pos;
+			    pd_len = pos->len;
+			} else {
+			    pd_first = NULL;
+			    pd_last = NULL;
+			    pd_len = 0;
+			}
+
 			first_frag = pos;
 			next_tsn = ctsn + 1;
 			break;
 
 		case SCTP_DATA_MIDDLE_FRAG:
-			if ((first_frag) && (ctsn == next_tsn))
+			if ((first_frag) && (ctsn == next_tsn)) {
 				next_tsn++;
-			else
+				if (pd_first) {
+				    pd_last = pos;
+				    pd_len += pos->len;
+				}
+			} else
 				first_frag = NULL;
 			break;
 
@@ -399,8 +476,29 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 			else
 				first_frag = NULL;
 			break;
-		};
+		}
+	}
 
+	asoc = ulpq->asoc;
+	if (pd_first) {
+		/* Make sure we can enter partial deliver.
+		 * We can trigger partial delivery only if framgent
+		 * interleave is set, or the socket is not already
+		 * in  partial delivery.
+		 */
+		if (!sctp_sk(asoc->base.sk)->frag_interleave &&
+		    atomic_read(&sctp_sk(asoc->base.sk)->pd_mode))
+			goto done;
+
+		cevent = sctp_skb2event(pd_first);
+		pd_point = sctp_sk(asoc->base.sk)->pd_point;
+		if (pd_point && pd_point <= pd_len) {
+			retval = sctp_make_reassembled_event(&ulpq->reasm,
+							     pd_first,
+							     pd_last);
+			if (retval)
+				sctp_ulpq_set_pd(ulpq);
+		}
 	}
 done:
 	return retval;
@@ -458,7 +556,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
 			goto done;
 		default:
 			return NULL;
-		};
+		}
 	}
 
 	/* We have the reassembled event. There is no need to look
@@ -550,7 +648,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
 			break;
 		default:
 			return NULL;
-		};
+		}
 	}
 
 	/* We have the reassembled event. There is no need to look
@@ -819,19 +917,29 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 {
 	struct sctp_ulpevent *event;
 	struct sctp_association *asoc;
+	struct sctp_sock *sp;
 
 	asoc = ulpq->asoc;
+	sp = sctp_sk(asoc->base.sk);
 
-	/* Are we already in partial delivery mode?  */
-	if (!sctp_sk(asoc->base.sk)->pd_mode) {
+	/* If the association is already in Partial Delivery mode
+	 * we have noting to do.
+	 */
+	if (ulpq->pd_mode)
+		return;
 
+	/* If the user enabled fragment interleave socket option,
+	 * multiple associations can enter partial delivery.
+	 * Otherwise, we can only enter partial delivery if the
+	 * socket is not in partial deliver mode.
+	 */
+	if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) {
 		/* Is partial delivery possible?  */
 		event = sctp_ulpq_retrieve_first(ulpq);
 		/* Send event to the ULP.   */
 		if (event) {
 			sctp_ulpq_tail_event(ulpq, event);
-			sctp_sk(asoc->base.sk)->pd_mode = 1;
-			ulpq->pd_mode = 1;
+			sctp_ulpq_set_pd(ulpq);
 			return;
 		}
 	}
diff --git a/net/socket.c b/net/socket.c
index 9566e57ac7f5..1ad62c08377b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -585,6 +585,37 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	return result;
 }
 
+/*
+ * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
+ */
+void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	ktime_t kt = skb->tstamp;
+
+	if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+		struct timeval tv;
+		/* Race occurred between timestamp enabling and packet
+		   receiving.  Fill in the current time for now. */
+		if (kt.tv64 == 0)
+			kt = ktime_get_real();
+		skb->tstamp = kt;
+		tv = ktime_to_timeval(kt);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
+	} else {
+		struct timespec ts;
+		/* Race occurred between timestamp enabling and packet
+		   receiving.  Fill in the current time for now. */
+		if (kt.tv64 == 0)
+			kt = ktime_get_real();
+		skb->tstamp = kt;
+		ts = ktime_to_timespec(kt);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+	}
+}
+
+EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
+
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {
@@ -1292,7 +1323,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
 	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if(sock) {
+	if (sock) {
 		err = move_addr_to_kernel(umyaddr, addrlen, address);
 		if (err >= 0) {
 			err = security_socket_bind(sock,
@@ -1381,7 +1412,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
 
 	err = sock_attach_fd(newsock, newfile);
 	if (err < 0)
-		goto out_fd;
+		goto out_fd_simple;
 
 	err = security_socket_accept(sock, newsock);
 	if (err)
@@ -1414,6 +1445,11 @@ out_put:
 	fput_light(sock->file, fput_needed);
 out:
 	return err;
+out_fd_simple:
+	sock_release(newsock);
+	put_filp(newfile);
+	put_unused_fd(newfd);
+	goto out_put;
 out_fd:
 	fput(newfile);
 	put_unused_fd(newfd);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f02f24ae9468..543b085ae2c1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1237,20 +1237,12 @@ static int content_open(struct inode *inode, struct file *file)
 
 	return res;
 }
-static int content_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *m = (struct seq_file *)file->private_data;
-	struct handle *han = m->private;
-	kfree(han);
-	m->private = NULL;
-	return seq_release(inode, file);
-}
 
 static const struct file_operations content_file_operations = {
 	.open		= content_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= content_release,
+	.release	= seq_release_private,
 };
 
 static ssize_t read_flush(struct file *file, char __user *buf,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6d7221fe990a..396cdbe249d1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1046,6 +1046,8 @@ call_status(struct rpc_task *task)
 		rpc_delay(task, 3*HZ);
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
+		if (task->tk_client->cl_discrtry)
+			xprt_disconnect(task->tk_xprt);
 		break;
 	case -ECONNREFUSED:
 	case -ENOTCONN:
@@ -1169,6 +1171,8 @@ call_decode(struct rpc_task *task)
 out_retry:
 	req->rq_received = req->rq_private_buf.len = 0;
 	task->tk_status = 0;
+	if (task->tk_client->cl_discrtry)
+		xprt_disconnect(task->tk_xprt);
 }
 
 /*
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 634885b0c04d..1d377d1ab7f4 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -154,7 +154,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 	desc.offset = sizeof(struct udphdr);
 	desc.count = skb->len - desc.offset;
 
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		goto no_checksum;
 
 	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 9bae4090254c..2bd23ea2aa8b 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -383,7 +383,10 @@ void svcauth_unix_purge(void)
 static inline struct ip_map *
 ip_map_cached_get(struct svc_rqst *rqstp)
 {
-	struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix;
+	struct ip_map *ipm;
+	struct svc_sock *svsk = rqstp->rq_sock;
+	spin_lock_bh(&svsk->sk_defer_lock);
+	ipm = svsk->sk_info_authunix;
 	if (ipm != NULL) {
 		if (!cache_valid(&ipm->h)) {
 			/*
@@ -391,12 +394,14 @@ ip_map_cached_get(struct svc_rqst *rqstp)
 			 * remembered, e.g. by a second mount from the
 			 * same IP address.
 			 */
-			rqstp->rq_sock->sk_info_authunix = NULL;
+			svsk->sk_info_authunix = NULL;
+			spin_unlock_bh(&svsk->sk_defer_lock);
 			cache_put(&ipm->h, &ip_map_cache);
 			return NULL;
 		}
 		cache_get(&ipm->h);
 	}
+	spin_unlock_bh(&svsk->sk_defer_lock);
 	return ipm;
 }
 
@@ -405,9 +410,15 @@ ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
 {
 	struct svc_sock *svsk = rqstp->rq_sock;
 
-	if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL)
-		svsk->sk_info_authunix = ipm;	/* newly cached, keep the reference */
-	else
+	spin_lock_bh(&svsk->sk_defer_lock);
+	if (svsk->sk_sock->type == SOCK_STREAM &&
+	    svsk->sk_info_authunix == NULL) {
+		/* newly cached, keep the reference */
+		svsk->sk_info_authunix = ipm;
+		ipm = NULL;
+	}
+	spin_unlock_bh(&svsk->sk_defer_lock);
+	if (ipm)
 		cache_put(&ipm->h, &ip_map_cache);
 }
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f6e1eb1ea720..22f61aee4824 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -452,6 +452,8 @@ union svc_pktinfo_u {
 	struct in_pktinfo pkti;
 	struct in6_pktinfo pkti6;
 };
+#define SVC_PKTINFO_SPACE \
+	CMSG_SPACE(sizeof(union svc_pktinfo_u))
 
 static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 {
@@ -491,8 +493,11 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 	struct svc_sock	*svsk = rqstp->rq_sock;
 	struct socket	*sock = svsk->sk_sock;
 	int		slen;
-	char 		buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
-	struct cmsghdr *cmh = (struct cmsghdr *)buffer;
+	union {
+		struct cmsghdr	hdr;
+		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
+	} buffer;
+	struct cmsghdr *cmh = &buffer.hdr;
 	int		len = 0;
 	int		result;
 	int		size;
@@ -745,8 +750,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 	struct svc_sock	*svsk = rqstp->rq_sock;
 	struct svc_serv	*serv = svsk->sk_server;
 	struct sk_buff	*skb;
-	char		buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
-	struct cmsghdr *cmh = (struct cmsghdr *)buffer;
+	union {
+		struct cmsghdr	hdr;
+		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
+	} buffer;
+	struct cmsghdr *cmh = &buffer.hdr;
 	int		err, len;
 	struct msghdr msg = {
 		.msg_name = svc_addr(rqstp),
@@ -779,8 +787,8 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 	}
 
 	clear_bit(SK_DATA, &svsk->sk_flags);
-	while ((err == kernel_recvmsg(svsk->sk_sock, &msg, NULL,
-				      0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
+	while ((err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
+				     0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
 	       (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
 		if (err == -EAGAIN) {
 			svc_sock_received(svsk);
@@ -790,16 +798,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		dprintk("svc: recvfrom returned error %d\n", -err);
 	}
 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
-	if (skb->tstamp.off_sec == 0) {
-		struct timeval tv;
-
-		tv.tv_sec = xtime.tv_sec;
-		tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
-		skb_set_timestamp(skb, &tv);
+	if (skb->tstamp.tv64 == 0) {
+		skb->tstamp = ktime_get_real();
 		/* Don't enable netstamp, sunrpc doesn't
 		   need that much accuracy */
 	}
-	skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
+	svsk->sk_sk->sk_stamp = skb->tstamp;
 	set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
 
 	/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ee6ffa01dfb1..456a14510308 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -735,16 +735,6 @@ void xprt_transmit(struct rpc_task *task)
 			xprt_reset_majortimeo(req);
 			/* Turn off autodisconnect */
 			del_singleshot_timer_sync(&xprt->timer);
-		} else {
-			/* If all request bytes have been sent,
-			 * then we must be retransmitting this one */
-			if (!req->rq_bytes_sent) {
-				if (task->tk_client->cl_discrtry) {
-					xprt_disconnect(xprt);
-					task->tk_status = -ENOTCONN;
-					return;
-				}
-			}
 		}
 	} else if (!req->rq_bytes_sent)
 		return;
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 14789a82de53..c71337a22d33 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -89,7 +89,7 @@ struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
 int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
 			void *tlv_data, int tlv_data_size)
 {
-	struct tlv_desc *tlv = (struct tlv_desc *)buf->tail;
+	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
 	int new_tlv_space = TLV_SPACE(tlv_data_size);
 
 	if (skb_tailroom(buf) < new_tlv_space) {
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 9be4839e32c5..67bb29b44d1b 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -73,7 +73,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
 
 	clone = skb_clone(buf, GFP_ATOMIC);
 	if (clone) {
-		clone->nh.raw = clone->data;
+		skb_reset_network_header(clone);
 		dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
 		clone->dev = dev;
 		dev->hard_header(clone, dev, ETH_P_TIPC,
@@ -99,8 +99,8 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
 
 	if (likely(eb_ptr->bearer)) {
 	       if (likely(!dev->promiscuity) ||
-		   !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) ||
-		   !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) {
+		   !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
+		   !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
 			size = msg_size((struct tipc_msg *)buf->data);
 			skb_trim(buf, size);
 			if (likely(buf->len == size)) {
@@ -140,7 +140,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
 		return -EDQUOT;
 	if (!eb_ptr->dev) {
 		eb_ptr->dev = dev;
-		eb_ptr->tipc_packet_type.type = __constant_htons(ETH_P_TIPC);
+		eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
 		eb_ptr->tipc_packet_type.dev = dev;
 		eb_ptr->tipc_packet_type.func = recv_msg;
 		eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 71c2f2fd405c..2124f32ef29f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1001,7 +1001,7 @@ static int link_bundle_buf(struct link *l_ptr,
 		return 0;
 
 	skb_put(bundler, pad + size);
-	memcpy(bundler->data + to_pos, buf->data, size);
+	skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
 	msg_set_size(bundler_msg, to_pos + size);
 	msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
 	dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n",
@@ -1109,8 +1109,8 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 			if (bundler) {
 				msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
 					 TIPC_OK, INT_H_SIZE, l_ptr->addr);
-				memcpy(bundler->data, (unchar *)&bundler_hdr,
-				       INT_H_SIZE);
+				skb_copy_to_linear_data(bundler, &bundler_hdr,
+							INT_H_SIZE);
 				skb_trim(bundler, INT_H_SIZE);
 				link_bundle_buf(l_ptr, bundler, buf);
 				buf = bundler;
@@ -1383,9 +1383,9 @@ again:
 	if (!buf)
 		return -ENOMEM;
 	buf->next = NULL;
-	memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+	skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
 	hsz = msg_hdr_sz(hdr);
-	memcpy(buf->data + INT_H_SIZE, (unchar *)hdr, hsz);
+	skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
 	msg_dbg(buf_msg(buf), ">BUILD>");
 
 	/* Chop up message: */
@@ -1416,8 +1416,8 @@ error:
 				return -EFAULT;
 			}
 		} else
-			memcpy(buf->data + fragm_crs, sect_crs, sz);
-
+			skb_copy_to_linear_data_offset(buf, fragm_crs,
+						       sect_crs, sz);
 		sect_crs += sz;
 		sect_rest -= sz;
 		fragm_crs += sz;
@@ -1442,7 +1442,7 @@ error:
 
 			buf->next = NULL;
 			prev->next = buf;
-			memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+			skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
 			fragm_crs = INT_H_SIZE;
 			fragm_rest = fragm_sz;
 			msg_dbg(buf_msg(buf),"  >BUILD>");
@@ -2130,7 +2130,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 		buf = l_ptr->proto_msg_queue;
 		if (!buf)
 			return;
-		memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+		skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
 		return;
 	}
 	msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
@@ -2143,7 +2143,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 	if (!buf)
 		return;
 
-	memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+	skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
 	msg_set_size(buf_msg(buf), msg_size);
 
 	if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -2319,8 +2319,8 @@ void tipc_link_tunnel(struct link *l_ptr,
 		     "unable to send tunnel msg\n");
 		return;
 	}
-	memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE);
-	memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length);
+	skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
+	skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
 	dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
 	msg_dbg(buf_msg(buf), ">SEND>");
 	tipc_link_send_buf(tunnel, buf);
@@ -2361,7 +2361,7 @@ void tipc_link_changeover(struct link *l_ptr)
 
 		buf = buf_acquire(INT_H_SIZE);
 		if (buf) {
-			memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
+			skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
 			msg_set_size(&tunnel_hdr, INT_H_SIZE);
 			dbg("%c->%c:", l_ptr->b_ptr->net_plane,
 			    tunnel->b_ptr->net_plane);
@@ -2426,8 +2426,9 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
 			     "unable to send duplicate msg\n");
 			return;
 		}
-		memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
-		memcpy(outbuf->data + INT_H_SIZE, iter->data, length);
+		skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
+		skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
+					       length);
 		dbg("%c->%c:", l_ptr->b_ptr->net_plane,
 		    tunnel->b_ptr->net_plane);
 		msg_dbg(buf_msg(outbuf), ">SEND>");
@@ -2457,7 +2458,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
 
 	eb = buf_acquire(size);
 	if (eb)
-		memcpy(eb->data, (unchar *)msg, size);
+		skb_copy_to_linear_data(eb, msg, size);
 	return eb;
 }
 
@@ -2569,7 +2570,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
 		if (obuf == NULL) {
 			warn("Link unable to unbundle message(s)\n");
 			break;
-		};
+		}
 		pos += align(msg_size(buf_msg(obuf)));
 		msg_dbg(buf_msg(obuf), "     /");
 		tipc_net_route_msg(obuf);
@@ -2631,9 +2632,9 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 			goto exit;
 		}
 		msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
-		memcpy(fragm->data, (unchar *)&fragm_hdr, INT_H_SIZE);
-		memcpy(fragm->data + INT_H_SIZE, crs, fragm_sz);
-
+		skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
+		skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
+					       fragm_sz);
 		/*  Send queued messages first, if any: */
 
 		l_ptr->stats.sent_fragments++;
@@ -2733,8 +2734,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 		if (pbuf != NULL) {
 			pbuf->next = *pending;
 			*pending = pbuf;
-			memcpy(pbuf->data, (unchar *)imsg, msg_data_sz(fragm));
-
+			skb_copy_to_linear_data(pbuf, imsg,
+						msg_data_sz(fragm));
 			/*  Prepare buffer for subsequent fragments. */
 
 			set_long_msg_seqno(pbuf, long_msg_seq_no);
@@ -2750,7 +2751,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 		u32 fsz = get_fragm_size(pbuf);
 		u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
 		u32 exp_frags = get_expected_frags(pbuf) - 1;
-		memcpy(pbuf->data + crs, msg_data(fragm), dsz);
+		skb_copy_to_linear_data_offset(pbuf, crs,
+					       msg_data(fragm), dsz);
 		buf_discard(fbuf);
 
 		/* Is message complete? */
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 62d549063604..35d5ba1d4f42 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,8 +1,8 @@
 /*
  * net/tipc/msg.h: Include file for TIPC message header routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2000-2007, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -71,8 +71,11 @@ static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
 static inline void msg_set_bits(struct tipc_msg *m, u32 w,
 				u32 pos, u32 mask, u32 val)
 {
-	u32 word = msg_word(m,w) & ~(mask << pos);
-	msg_set_word(m, w, (word |= (val << pos)));
+	val = (val & mask) << pos;
+	val = htonl(val);
+	mask = htonl(mask << pos);
+	m->hdr[w] &= ~mask;
+	m->hdr[w] |= val;
 }
 
 /*
@@ -786,15 +789,16 @@ static inline int msg_build(struct tipc_msg *hdr,
 	*buf = buf_acquire(sz);
 	if (!(*buf))
 		return -ENOMEM;
-	memcpy((*buf)->data, (unchar *)hdr, hsz);
+	skb_copy_to_linear_data(*buf, hdr, hsz);
 	for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
 		if (likely(usrmem))
 			res = !copy_from_user((*buf)->data + pos,
 					      msg_sect[cnt].iov_base,
 					      msg_sect[cnt].iov_len);
 		else
-			memcpy((*buf)->data + pos, msg_sect[cnt].iov_base,
-			       msg_sect[cnt].iov_len);
+			skb_copy_to_linear_data_offset(*buf, pos,
+						       msg_sect[cnt].iov_base,
+						       msg_sect[cnt].iov_len);
 		pos += msg_sect[cnt].iov_len;
 	}
 	if (likely(res))
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b8e1edc2badc..4cdafa2d1d4d 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -57,7 +57,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 
 	if (rep_buf) {
 		skb_push(rep_buf, hdr_space);
-		rep_nlh = (struct nlmsghdr *)rep_buf->data;
+		rep_nlh = nlmsg_hdr(rep_buf);
 		memcpy(rep_nlh, req_nlh, hdr_space);
 		rep_nlh->nlmsg_len = rep_buf->len;
 		genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 5f8217d4b452..bcd5da00737b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -464,7 +464,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 	msg_set_size(rmsg, data_sz + hdr_sz);
 	msg_set_nametype(rmsg, msg_nametype(msg));
 	msg_set_nameinst(rmsg, msg_nameinst(msg));
-	memcpy(rbuf->data + hdr_sz, msg_data(msg), data_sz);
+	skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz);
 
 	/* send self-abort message when rejecting on a connected port */
 	if (msg_connected(msg)) {
@@ -1419,7 +1419,7 @@ int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
 		return -ENOMEM;
 
 	skb_push(buf, hsz);
-	memcpy(buf->data, (unchar *)msg, hsz);
+	skb_copy_to_linear_data(buf, msg, hsz);
 	destnode = msg_destnode(msg);
 	p_ptr->publ.congested = 1;
 	if (!tipc_port_congested(p_ptr)) {
@@ -1555,7 +1555,7 @@ int tipc_forward_buf2name(u32 ref,
 	if (skb_cow(buf, LONG_H_SIZE))
 		return -ENOMEM;
 	skb_push(buf, LONG_H_SIZE);
-	memcpy(buf->data, (unchar *)msg, LONG_H_SIZE);
+	skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
 	msg_dbg(buf_msg(buf),"PREP:");
 	if (likely(destport || destnode)) {
 		p_ptr->sent++;
@@ -1679,7 +1679,7 @@ int tipc_forward_buf2port(u32 ref,
 		return -ENOMEM;
 
 	skb_push(buf, DIR_MSG_H_SIZE);
-	memcpy(buf->data, (unchar *)msg, DIR_MSG_H_SIZE);
+	skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
 	msg_dbg(msg, "buf2port: ");
 	p_ptr->sent++;
 	if (dest->node == tipc_own_addr)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b71739fbe2c6..45832fb75ea4 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1020,7 +1020,7 @@ restart:
 
 	if (!err) {
 		buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
-		sz = buf->tail - buf_crs;
+		sz = skb_tail_pointer(buf) - buf_crs;
 
 		needed = (buf_len - sz_copied);
 		sz_to_copy = (sz <= needed) ? sz : needed;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 606971645b33..aec8cf165e1a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1319,7 +1319,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		unix_attach_fds(siocb->scm, skb);
 	unix_get_secdata(siocb->scm, skb);
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
 	if (err)
 		goto out_free;
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 5d2d93dc0837..7a19e0ede289 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -277,8 +277,8 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
 		skb_push(skb, 7);
 		skb->data[0] = 0;
 		skb->data[1] = NLPID_SNAP;
-		memcpy(&skb->data[2], wanrouter_oui_ether,
-		       sizeof(wanrouter_oui_ether));
+		skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
+					       sizeof(wanrouter_oui_ether));
 		*((unsigned short*)&skb->data[5]) = htons(type);
 		break;
 
@@ -339,7 +339,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
 	skb->protocol = ethertype;
 	skb->pkt_type = PACKET_HOST;	/*	Physically point to point */
 	skb_pull(skb, cnt);
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	return ethertype;
 }
 
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
new file mode 100644
index 000000000000..a228d56a91b8
--- /dev/null
+++ b/net/wireless/Kconfig
@@ -0,0 +1,16 @@
+config CFG80211
+        tristate "Improved wireless configuration API"
+
+config WIRELESS_EXT
+	bool "Wireless extensions"
+	default n
+	---help---
+	  This option enables the legacy wireless extensions
+	  (wireless network interface configuration via ioctls.)
+
+	  Wireless extensions will be replaced by cfg80211 and
+	  will be required only by legacy drivers that implement
+	  wireless extension handlers.
+
+	  Say N (if you can) unless you know you need wireless
+	  extensions for external modules.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
new file mode 100644
index 000000000000..3a96ae60271c
--- /dev/null
+++ b/net/wireless/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_WIRELESS_EXT) += wext.o
+obj-$(CONFIG_CFG80211) += cfg80211.o
+
+cfg80211-y += core.o sysfs.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
new file mode 100644
index 000000000000..7eabd55417a5
--- /dev/null
+++ b/net/wireless/core.c
@@ -0,0 +1,224 @@
+/*
+ * This is the linux wireless configuration interface.
+ *
+ * Copyright 2006, 2007		Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/if.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/nl80211.h>
+#include <linux/debugfs.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <net/genetlink.h>
+#include <net/cfg80211.h>
+#include <net/wireless.h>
+#include "core.h"
+#include "sysfs.h"
+
+/* name for sysfs, %d is appended */
+#define PHY_NAME "phy"
+
+MODULE_AUTHOR("Johannes Berg");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("wireless configuration support");
+
+/* RCU might be appropriate here since we usually
+ * only read the list, and that can happen quite
+ * often because we need to do it for each command */
+LIST_HEAD(cfg80211_drv_list);
+DEFINE_MUTEX(cfg80211_drv_mutex);
+static int wiphy_counter;
+
+/* for debugfs */
+static struct dentry *ieee80211_debugfs_dir;
+
+/* exported functions */
+
+struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
+{
+	struct cfg80211_registered_device *drv;
+	int alloc_size;
+
+	alloc_size = sizeof(*drv) + sizeof_priv;
+
+	drv = kzalloc(alloc_size, GFP_KERNEL);
+	if (!drv)
+		return NULL;
+
+	drv->ops = ops;
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+	drv->idx = wiphy_counter;
+
+	/* now increase counter for the next device unless
+	 * it has wrapped previously */
+	if (wiphy_counter >= 0)
+		wiphy_counter++;
+
+	mutex_unlock(&cfg80211_drv_mutex);
+
+	if (unlikely(drv->idx < 0)) {
+		/* ugh, wrapped! */
+		kfree(drv);
+		return NULL;
+	}
+
+	/* give it a proper name */
+	snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
+		 PHY_NAME "%d", drv->idx);
+
+	mutex_init(&drv->mtx);
+	mutex_init(&drv->devlist_mtx);
+	INIT_LIST_HEAD(&drv->netdev_list);
+
+	device_initialize(&drv->wiphy.dev);
+	drv->wiphy.dev.class = &ieee80211_class;
+	drv->wiphy.dev.platform_data = drv;
+
+	return &drv->wiphy;
+}
+EXPORT_SYMBOL(wiphy_new);
+
+int wiphy_register(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+	int res;
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+	res = device_add(&drv->wiphy.dev);
+	if (res)
+		goto out_unlock;
+
+	list_add(&drv->list, &cfg80211_drv_list);
+
+	/* add to debugfs */
+	drv->wiphy.debugfsdir =
+		debugfs_create_dir(wiphy_name(&drv->wiphy),
+				   ieee80211_debugfs_dir);
+
+	res = 0;
+out_unlock:
+	mutex_unlock(&cfg80211_drv_mutex);
+	return res;
+}
+EXPORT_SYMBOL(wiphy_register);
+
+void wiphy_unregister(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+
+	/* protect the device list */
+	mutex_lock(&cfg80211_drv_mutex);
+
+	BUG_ON(!list_empty(&drv->netdev_list));
+
+	/*
+	 * Try to grab drv->mtx. If a command is still in progress,
+	 * hopefully the driver will refuse it since it's tearing
+	 * down the device already. We wait for this command to complete
+	 * before unlinking the item from the list.
+	 * Note: as codified by the BUG_ON above we cannot get here if
+	 * a virtual interface is still associated. Hence, we can only
+	 * get to lock contention here if userspace issues a command
+	 * that identified the hardware by wiphy index.
+	 */
+	mutex_lock(&drv->mtx);
+	/* unlock again before freeing */
+	mutex_unlock(&drv->mtx);
+
+	list_del(&drv->list);
+	device_del(&drv->wiphy.dev);
+	debugfs_remove(drv->wiphy.debugfsdir);
+
+	mutex_unlock(&cfg80211_drv_mutex);
+}
+EXPORT_SYMBOL(wiphy_unregister);
+
+void cfg80211_dev_free(struct cfg80211_registered_device *drv)
+{
+	mutex_destroy(&drv->mtx);
+	mutex_destroy(&drv->devlist_mtx);
+	kfree(drv);
+}
+
+void wiphy_free(struct wiphy *wiphy)
+{
+	put_device(&wiphy->dev);
+}
+EXPORT_SYMBOL(wiphy_free);
+
+static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
+					 unsigned long state,
+					 void *ndev)
+{
+	struct net_device *dev = ndev;
+	struct cfg80211_registered_device *rdev;
+
+	if (!dev->ieee80211_ptr)
+		return 0;
+
+	rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
+
+	switch (state) {
+	case NETDEV_REGISTER:
+		mutex_lock(&rdev->devlist_mtx);
+		list_add(&dev->ieee80211_ptr->list, &rdev->netdev_list);
+		if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
+				      "phy80211")) {
+			printk(KERN_ERR "wireless: failed to add phy80211 "
+				"symlink to netdev!\n");
+		}
+		dev->ieee80211_ptr->netdev = dev;
+		mutex_unlock(&rdev->devlist_mtx);
+		break;
+	case NETDEV_UNREGISTER:
+		mutex_lock(&rdev->devlist_mtx);
+		if (!list_empty(&dev->ieee80211_ptr->list)) {
+			sysfs_remove_link(&dev->dev.kobj, "phy80211");
+			list_del_init(&dev->ieee80211_ptr->list);
+		}
+		mutex_unlock(&rdev->devlist_mtx);
+		break;
+	}
+
+	return 0;
+}
+
+static struct notifier_block cfg80211_netdev_notifier = {
+	.notifier_call = cfg80211_netdev_notifier_call,
+};
+
+static int cfg80211_init(void)
+{
+	int err = wiphy_sysfs_init();
+	if (err)
+		goto out_fail_sysfs;
+
+	err = register_netdevice_notifier(&cfg80211_netdev_notifier);
+	if (err)
+		goto out_fail_notifier;
+
+	ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL);
+
+	return 0;
+
+out_fail_notifier:
+	wiphy_sysfs_exit();
+out_fail_sysfs:
+	return err;
+}
+module_init(cfg80211_init);
+
+static void cfg80211_exit(void)
+{
+	debugfs_remove(ieee80211_debugfs_dir);
+	unregister_netdevice_notifier(&cfg80211_netdev_notifier);
+	wiphy_sysfs_exit();
+}
+module_exit(cfg80211_exit);
diff --git a/net/wireless/core.h b/net/wireless/core.h
new file mode 100644
index 000000000000..158db1edb92a
--- /dev/null
+++ b/net/wireless/core.h
@@ -0,0 +1,49 @@
+/*
+ * Wireless configuration interface internals.
+ *
+ * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ */
+#ifndef __NET_WIRELESS_CORE_H
+#define __NET_WIRELESS_CORE_H
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+#include <net/wireless.h>
+#include <net/cfg80211.h>
+
+struct cfg80211_registered_device {
+	struct cfg80211_ops *ops;
+	struct list_head list;
+	/* we hold this mutex during any call so that
+	 * we cannot do multiple calls at once, and also
+	 * to avoid the deregister call to proceed while
+	 * any call is in progress */
+	struct mutex mtx;
+
+	/* wiphy index, internal only */
+	int idx;
+
+	/* associate netdev list */
+	struct mutex devlist_mtx;
+	struct list_head netdev_list;
+
+	/* must be last because of the way we do wiphy_priv(),
+	 * and it should at least be aligned to NETDEV_ALIGN */
+	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
+};
+
+static inline
+struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
+{
+	BUG_ON(!wiphy);
+	return container_of(wiphy, struct cfg80211_registered_device, wiphy);
+}
+
+extern struct mutex cfg80211_drv_mutex;
+extern struct list_head cfg80211_drv_list;
+
+/* free object */
+extern void cfg80211_dev_free(struct cfg80211_registered_device *drv);
+
+#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
new file mode 100644
index 000000000000..3ebae1442963
--- /dev/null
+++ b/net/wireless/sysfs.c
@@ -0,0 +1,80 @@
+/*
+ * This file provides /sys/class/ieee80211/<wiphy name>/
+ * and some default attributes.
+ *
+ * Copyright 2005-2006	Jiri Benc <jbenc@suse.cz>
+ * Copyright 2006	Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/nl80211.h>
+#include <linux/rtnetlink.h>
+#include <net/cfg80211.h>
+#include "sysfs.h"
+#include "core.h"
+
+static inline struct cfg80211_registered_device *dev_to_rdev(
+	struct device *dev)
+{
+	return container_of(dev, struct cfg80211_registered_device, wiphy.dev);
+}
+
+static ssize_t _show_index(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	return sprintf(buf, "%d\n", dev_to_rdev(dev)->idx);
+}
+
+static ssize_t _show_permaddr(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	char *addr = dev_to_rdev(dev)->wiphy.perm_addr;
+
+	return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
+		       addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+}
+
+static struct device_attribute ieee80211_dev_attrs[] = {
+	__ATTR(index, S_IRUGO, _show_index, NULL),
+	__ATTR(macaddress, S_IRUGO, _show_permaddr, NULL),
+	{}
+};
+
+static void wiphy_dev_release(struct device *dev)
+{
+	struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
+
+	cfg80211_dev_free(rdev);
+}
+
+static int wiphy_uevent(struct device *dev, char **envp,
+			int num_envp, char *buf, int size)
+{
+	/* TODO, we probably need stuff here */
+	return 0;
+}
+
+struct class ieee80211_class = {
+	.name = "ieee80211",
+	.owner = THIS_MODULE,
+	.dev_release = wiphy_dev_release,
+	.dev_attrs = ieee80211_dev_attrs,
+#ifdef CONFIG_HOTPLUG
+	.dev_uevent = wiphy_uevent,
+#endif
+};
+
+int wiphy_sysfs_init(void)
+{
+	return class_register(&ieee80211_class);
+}
+
+void wiphy_sysfs_exit(void)
+{
+	class_unregister(&ieee80211_class);
+}
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
new file mode 100644
index 000000000000..65acbebd3711
--- /dev/null
+++ b/net/wireless/sysfs.h
@@ -0,0 +1,9 @@
+#ifndef __WIRELESS_SYSFS_H
+#define __WIRELESS_SYSFS_H
+
+extern int wiphy_sysfs_init(void);
+extern void wiphy_sysfs_exit(void);
+
+extern struct class ieee80211_class;
+
+#endif /* __WIRELESS_SYSFS_H */
diff --git a/net/core/wireless.c b/net/wireless/wext.c
index 9936ab11e6e0..d6aaf65192e9 100644
--- a/net/core/wireless.c
+++ b/net/wireless/wext.c
@@ -2,7 +2,7 @@
  * This file implement the Wireless Extensions APIs.
  *
  * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.
+ * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
  *
  * (As all part of the Linux kernel, this file is GPL)
  */
@@ -76,6 +76,9 @@
  *	o Change length in ESSID and NICK to strlen() instead of strlen()+1
  *	o Make standard_ioctl_num and standard_event_num unsigned
  *	o Remove (struct net_device *)->get_wireless_stats()
+ *
+ * v10 - 16.3.07 - Jean II
+ *	o Prevent leaking of kernel space in stream on 64 bits.
  */
 
 /***************************** INCLUDES *****************************/
@@ -94,22 +97,10 @@
 #include <linux/wireless.h>		/* Pretty obvious */
 #include <net/iw_handler.h>		/* New driver API */
 #include <net/netlink.h>
+#include <net/wext.h>
 
 #include <asm/uaccess.h>		/* copy_to_user() */
 
-/**************************** CONSTANTS ****************************/
-
-/* Debugging stuff */
-#undef WE_IOCTL_DEBUG		/* Debug IOCTL API */
-#undef WE_RTNETLINK_DEBUG	/* Debug RtNetlink API */
-#undef WE_EVENT_DEBUG		/* Debug Event dispatcher */
-#undef WE_SPY_DEBUG		/* Debug enhanced spy support */
-
-/* Options */
-//CONFIG_NET_WIRELESS_RTNETLINK	/* Wireless requests over RtNetlink */
-#define WE_EVENT_RTNETLINK	/* Propagate events using RtNetlink */
-#define WE_SET_EVENT		/* Generate an event on some set commands */
-
 /************************* GLOBAL VARIABLES *************************/
 /*
  * You should not use global variables, because of re-entrancy.
@@ -346,8 +337,7 @@ static const struct iw_ioctl_description standard_ioctl[] = {
 		.max_tokens	= sizeof(struct iw_pmksa),
 	},
 };
-static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) /
-					    sizeof(struct iw_ioctl_description));
+static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
 
 /*
  * Meta-data about all the additional standard Wireless Extension events
@@ -397,8 +387,7 @@ static const struct iw_ioctl_description standard_event[] = {
 		.max_tokens	= sizeof(struct iw_pmkid_cand),
 	},
 };
-static const unsigned standard_event_num = (sizeof(standard_event) /
-					    sizeof(struct iw_ioctl_description));
+static const unsigned standard_event_num = ARRAY_SIZE(standard_event);
 
 /* Size (in bytes) of the various private data types */
 static const char iw_priv_type_size[] = {
@@ -427,6 +416,21 @@ static const int event_type_size[] = {
 	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
 };
 
+/* Size (in bytes) of various events, as packed */
+static const int event_type_pk_size[] = {
+	IW_EV_LCP_PK_LEN,		/* IW_HEADER_TYPE_NULL */
+	0,
+	IW_EV_CHAR_PK_LEN,		/* IW_HEADER_TYPE_CHAR */
+	0,
+	IW_EV_UINT_PK_LEN,		/* IW_HEADER_TYPE_UINT */
+	IW_EV_FREQ_PK_LEN,		/* IW_HEADER_TYPE_FREQ */
+	IW_EV_ADDR_PK_LEN,		/* IW_HEADER_TYPE_ADDR */
+	0,
+	IW_EV_POINT_PK_LEN,		/* Without variable payload */
+	IW_EV_PARAM_PK_LEN,		/* IW_HEADER_TYPE_PARAM */
+	IW_EV_QUAL_PK_LEN,		/* IW_HEADER_TYPE_QUAL */
+};
+
 /************************ COMMON SUBROUTINES ************************/
 /*
  * Stuff that may be used in various place or doesn't fit in one
@@ -436,26 +440,24 @@ static const int event_type_size[] = {
 /* ---------------------------------------------------------------- */
 /*
  * Return the driver handler associated with a specific Wireless Extension.
- * Called from various place, so make sure it remains efficient.
  */
-static inline iw_handler get_handler(struct net_device *dev,
-				     unsigned int cmd)
+static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
 {
 	/* Don't "optimise" the following variable, it will crash */
 	unsigned int	index;		/* *MUST* be unsigned */
 
 	/* Check if we have some wireless handlers defined */
-	if(dev->wireless_handlers == NULL)
+	if (dev->wireless_handlers == NULL)
 		return NULL;
 
 	/* Try as a standard command */
 	index = cmd - SIOCIWFIRST;
-	if(index < dev->wireless_handlers->num_standard)
+	if (index < dev->wireless_handlers->num_standard)
 		return dev->wireless_handlers->standard[index];
 
 	/* Try as a private command */
 	index = cmd - SIOCIWFIRSTPRIV;
-	if(index < dev->wireless_handlers->num_private)
+	if (index < dev->wireless_handlers->num_private)
 		return dev->wireless_handlers->private[index];
 
 	/* Not found */
@@ -466,15 +468,15 @@ static inline iw_handler get_handler(struct net_device *dev,
 /*
  * Get statistics out of the driver
  */
-static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
+static struct iw_statistics *get_wireless_stats(struct net_device *dev)
 {
 	/* New location */
-	if((dev->wireless_handlers != NULL) &&
+	if ((dev->wireless_handlers != NULL) &&
 	   (dev->wireless_handlers->get_wireless_stats != NULL))
 		return dev->wireless_handlers->get_wireless_stats(dev);
 
 	/* Not found */
-	return (struct iw_statistics *) NULL;
+	return NULL;
 }
 
 /* ---------------------------------------------------------------- */
@@ -496,14 +498,14 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
  * netif_running(dev) test. I'm open on that one...
  * Hopefully, the driver will remember to do a commit in "open()" ;-)
  */
-static inline int call_commit_handler(struct net_device *	dev)
+static int call_commit_handler(struct net_device *dev)
 {
-	if((netif_running(dev)) &&
-	   (dev->wireless_handlers->standard[0] != NULL)) {
+	if ((netif_running(dev)) &&
+	   (dev->wireless_handlers->standard[0] != NULL))
 		/* Call the commit handler on the driver */
 		return dev->wireless_handlers->standard[0](dev, NULL,
 							   NULL, NULL);
-	} else
+	else
 		return 0;		/* Command completed successfully */
 }
 
@@ -552,14 +554,13 @@ static int iw_handler_get_iwstats(struct net_device *		dev,
 	struct iw_statistics *stats;
 
 	stats = get_wireless_stats(dev);
-	if (stats != (struct iw_statistics *) NULL) {
-
+	if (stats) {
 		/* Copy statistics to extra */
 		memcpy(extra, stats, sizeof(struct iw_statistics));
 		wrqu->data.length = sizeof(struct iw_statistics);
 
 		/* Check if we need to clear the updated flag */
-		if(wrqu->data.flags != 0)
+		if (wrqu->data.flags != 0)
 			stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
 		return 0;
 	} else
@@ -578,12 +579,12 @@ static int iw_handler_get_private(struct net_device *		dev,
 				  char *			extra)
 {
 	/* Check if the driver has something to export */
-	if((dev->wireless_handlers->num_private_args == 0) ||
+	if ((dev->wireless_handlers->num_private_args == 0) ||
 	   (dev->wireless_handlers->private_args == NULL))
 		return -EOPNOTSUPP;
 
 	/* Check if there is enough buffer up there */
-	if(wrqu->data.length < dev->wireless_handlers->num_private_args) {
+	if (wrqu->data.length < dev->wireless_handlers->num_private_args) {
 		/* User space can't know in advance how large the buffer
 		 * needs to be. Give it a hint, so that we can support
 		 * any size buffer we want somewhat efficiently... */
@@ -618,8 +619,8 @@ static int iw_handler_get_private(struct net_device *		dev,
 /*
  * Print one entry (line) of /proc/net/wireless
  */
-static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
-						 struct net_device *dev)
+static void wireless_seq_printf_stats(struct seq_file *seq,
+				      struct net_device *dev)
 {
 	/* Get stats from the driver */
 	struct iw_statistics *stats = get_wireless_stats(dev);
@@ -662,7 +663,7 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations wireless_seq_ops = {
+static const struct seq_operations wireless_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -682,7 +683,7 @@ static const struct file_operations wireless_seq_fops = {
 	.release = seq_release,
 };
 
-int __init wireless_proc_init(void)
+int __init wext_proc_init(void)
 {
 	/* Create /proc/net/wireless entry */
 	if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
@@ -717,32 +718,24 @@ static int ioctl_standard_call(struct net_device *	dev,
 	int					ret = -EINVAL;
 
 	/* Get the description of the IOCTL */
-	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
 		return -EOPNOTSUPP;
 	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
 
-#ifdef WE_IOCTL_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_IOCTL_DEBUG */
-
 	/* Prepare the call */
 	info.cmd = cmd;
 	info.flags = 0;
 
 	/* Check if we have a pointer to user space data or not */
-	if(descr->header_type != IW_HEADER_TYPE_POINT) {
+	if (descr->header_type != IW_HEADER_TYPE_POINT) {
 
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, &(iwr->u), NULL);
 
-#ifdef WE_SET_EVENT
 		/* Generate an event to notify listeners of the change */
-		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
 		   ((ret == 0) || (ret == -EIWCOMMIT)))
 			wireless_send_event(dev, cmd, &(iwr->u), NULL);
-#endif	/* WE_SET_EVENT */
 	} else {
 		char *	extra;
 		int	extra_size;
@@ -782,19 +775,19 @@ static int ioctl_standard_call(struct net_device *	dev,
 		iwr->u.data.length -= essid_compat;
 
 		/* Check what user space is giving us */
-		if(IW_IS_SET(cmd)) {
+		if (IW_IS_SET(cmd)) {
 			/* Check NULL pointer */
-			if((iwr->u.data.pointer == NULL) &&
+			if ((iwr->u.data.pointer == NULL) &&
 			   (iwr->u.data.length != 0))
 				return -EFAULT;
 			/* Check if number of token fits within bounds */
-			if(iwr->u.data.length > descr->max_tokens)
+			if (iwr->u.data.length > descr->max_tokens)
 				return -E2BIG;
-			if(iwr->u.data.length < descr->min_tokens)
+			if (iwr->u.data.length < descr->min_tokens)
 				return -EINVAL;
 		} else {
 			/* Check NULL pointer */
-			if(iwr->u.data.pointer == NULL)
+			if (iwr->u.data.pointer == NULL)
 				return -EFAULT;
 			/* Save user space buffer size for checking */
 			user_length = iwr->u.data.length;
@@ -804,7 +797,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 			 * implied by the test at the end. */
 
 			/* Support for very large requests */
-			if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+			if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
 			   (user_length > descr->max_tokens)) {
 				/* Allow userspace to GET more than max so
 				 * we can support any size GET requests.
@@ -817,20 +810,14 @@ static int ioctl_standard_call(struct net_device *	dev,
 			}
 		}
 
-#ifdef WE_IOCTL_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_IOCTL_DEBUG */
-
 		/* Create the kernel buffer */
 		/*    kzalloc ensures NULL-termination for essid_compat */
 		extra = kzalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL) {
+		if (extra == NULL)
 			return -ENOMEM;
-		}
 
 		/* If it is a SET, get all the extra data in here */
-		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
 			err = copy_from_user(extra, iwr->u.data.pointer,
 					     iwr->u.data.length *
 					     descr->token_size);
@@ -838,11 +825,6 @@ static int ioctl_standard_call(struct net_device *	dev,
 				kfree(extra);
 				return -EFAULT;
 			}
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
-			       dev->name,
-			       iwr->u.data.length * descr->token_size);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
 		/* Call the handler */
@@ -853,7 +835,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 		/* If we have something to return to the user */
 		if (!ret && IW_IS_GET(cmd)) {
 			/* Check if there is enough buffer up there */
-			if(user_length < iwr->u.data.length) {
+			if (user_length < iwr->u.data.length) {
 				kfree(extra);
 				return -E2BIG;
 			}
@@ -863,18 +845,12 @@ static int ioctl_standard_call(struct net_device *	dev,
 					   descr->token_size);
 			if (err)
 				ret =  -EFAULT;
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
-			       dev->name,
-			       iwr->u.data.length * descr->token_size);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
-#ifdef WE_SET_EVENT
 		/* Generate an event to notify listeners of the change */
-		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
 		   ((ret == 0) || (ret == -EIWCOMMIT))) {
-			if(descr->flags & IW_DESCR_FLAG_RESTRICT)
+			if (descr->flags & IW_DESCR_FLAG_RESTRICT)
 				/* If the event is restricted, don't
 				 * export the payload */
 				wireless_send_event(dev, cmd, &(iwr->u), NULL);
@@ -882,14 +858,13 @@ static int ioctl_standard_call(struct net_device *	dev,
 				wireless_send_event(dev, cmd, &(iwr->u),
 						    extra);
 		}
-#endif	/* WE_SET_EVENT */
 
 		/* Cleanup - I told you it wasn't that long ;-) */
 		kfree(extra);
 	}
 
 	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
+	if (ret == -EIWCOMMIT)
 		ret = call_commit_handler(dev);
 
 	/* Here, we will generate the appropriate event if needed */
@@ -913,10 +888,8 @@ static int ioctl_standard_call(struct net_device *	dev,
  * a iw_handler but process it in your ioctl handler (i.e. use the
  * old driver API).
  */
-static inline int ioctl_private_call(struct net_device *	dev,
-				     struct ifreq *		ifr,
-				     unsigned int		cmd,
-				     iw_handler		handler)
+static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
+			      unsigned int cmd, iw_handler handler)
 {
 	struct iwreq *			iwr = (struct iwreq *) ifr;
 	const struct iw_priv_args *	descr = NULL;
@@ -926,28 +899,18 @@ static inline int ioctl_private_call(struct net_device *	dev,
 	int				ret = -EINVAL;
 
 	/* Get the description of the IOCTL */
-	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
 			descr = &(dev->wireless_handlers->private_args[i]);
 			break;
 		}
 
-#ifdef WE_IOCTL_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	if(descr) {
-		printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
-		       dev->name, descr->name,
-		       descr->set_args, descr->get_args);
-	}
-#endif	/* WE_IOCTL_DEBUG */
-
 	/* Compute the size of the set/get arguments */
-	if(descr != NULL) {
-		if(IW_IS_SET(cmd)) {
+	if (descr != NULL) {
+		if (IW_IS_SET(cmd)) {
 			int	offset = 0;	/* For sub-ioctls */
 			/* Check for sub-ioctl handler */
-			if(descr->name[0] == '\0')
+			if (descr->name[0] == '\0')
 				/* Reserve one int for sub-ioctl index */
 				offset = sizeof(__u32);
 
@@ -955,7 +918,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 			extra_size = get_priv_size(descr->set_args);
 
 			/* Does it fits in iwr ? */
-			if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+			if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
 			   ((extra_size + offset) <= IFNAMSIZ))
 				extra_size = 0;
 		} else {
@@ -963,7 +926,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 			extra_size = get_priv_size(descr->get_args);
 
 			/* Does it fits in iwr ? */
-			if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+			if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
 			   (extra_size <= IFNAMSIZ))
 				extra_size = 0;
 		}
@@ -974,7 +937,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 	info.flags = 0;
 
 	/* Check if we have a pointer to user space data or not. */
-	if(extra_size == 0) {
+	if (extra_size == 0) {
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
 	} else {
@@ -982,46 +945,33 @@ static inline int ioctl_private_call(struct net_device *	dev,
 		int	err;
 
 		/* Check what user space is giving us */
-		if(IW_IS_SET(cmd)) {
+		if (IW_IS_SET(cmd)) {
 			/* Check NULL pointer */
-			if((iwr->u.data.pointer == NULL) &&
+			if ((iwr->u.data.pointer == NULL) &&
 			   (iwr->u.data.length != 0))
 				return -EFAULT;
 
 			/* Does it fits within bounds ? */
-			if(iwr->u.data.length > (descr->set_args &
+			if (iwr->u.data.length > (descr->set_args &
 						 IW_PRIV_SIZE_MASK))
 				return -E2BIG;
-		} else {
-			/* Check NULL pointer */
-			if(iwr->u.data.pointer == NULL)
-				return -EFAULT;
-		}
-
-#ifdef WE_IOCTL_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_IOCTL_DEBUG */
+		} else if (iwr->u.data.pointer == NULL)
+			return -EFAULT;
 
 		/* Always allocate for max space. Easier, and won't last
 		 * long... */
 		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL) {
+		if (extra == NULL)
 			return -ENOMEM;
-		}
 
 		/* If it is a SET, get all the extra data in here */
-		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
 			err = copy_from_user(extra, iwr->u.data.pointer,
 					     extra_size);
 			if (err) {
 				kfree(extra);
 				return -EFAULT;
 			}
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
-			       dev->name, iwr->u.data.length);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
 		/* Call the handler */
@@ -1041,10 +991,6 @@ static inline int ioctl_private_call(struct net_device *	dev,
 					   extra_size);
 			if (err)
 				ret =  -EFAULT;
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
-			       dev->name, iwr->u.data.length);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
 		/* Cleanup - I told you it wasn't that long ;-) */
@@ -1053,7 +999,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 
 
 	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
+	if (ret == -EIWCOMMIT)
 		ret = call_commit_handler(dev);
 
 	return ret;
@@ -1061,11 +1007,10 @@ static inline int ioctl_private_call(struct net_device *	dev,
 
 /* ---------------------------------------------------------------- */
 /*
- * Main IOCTl dispatcher. Called from the main networking code
- * (dev_ioctl() in net/core/dev.c).
+ * Main IOCTl dispatcher.
  * Check the type of IOCTL and call the appropriate wrapper...
  */
-int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
+static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
 {
 	struct net_device *dev;
 	iw_handler	handler;
@@ -1080,789 +1025,54 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
 	/* A bunch of special cases, then the generic case...
 	 * Note that 'cmd' is already filtered in dev_ioctl() with
 	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
-	switch(cmd)
-	{
-		case SIOCGIWSTATS:
-			/* Get Wireless Stats */
-			return ioctl_standard_call(dev,
-						   ifr,
-						   cmd,
-						   &iw_handler_get_iwstats);
-
-		case SIOCGIWPRIV:
-			/* Check if we have some wireless handlers defined */
-			if(dev->wireless_handlers != NULL) {
-				/* We export to user space the definition of
-				 * the private handler ourselves */
-				return ioctl_standard_call(dev,
-							   ifr,
-							   cmd,
-							   &iw_handler_get_private);
-			}
-			// ## Fall-through for old API ##
-		default:
-			/* Generic IOCTL */
-			/* Basic check */
-			if (!netif_device_present(dev))
-				return -ENODEV;
-			/* New driver API : try to find the handler */
-			handler = get_handler(dev, cmd);
-			if(handler != NULL) {
-				/* Standard and private are not the same */
-				if(cmd < SIOCIWFIRSTPRIV)
-					return ioctl_standard_call(dev,
-								   ifr,
-								   cmd,
-								   handler);
-				else
-					return ioctl_private_call(dev,
-								  ifr,
-								  cmd,
-								  handler);
-			}
-			/* Old driver API : call driver ioctl handler */
-			if (dev->do_ioctl) {
-				return dev->do_ioctl(dev, ifr, cmd);
-			}
-			return -EOPNOTSUPP;
-	}
-	/* Not reached */
-	return -EINVAL;
-}
-
-/********************** RTNETLINK REQUEST API **********************/
-/*
- * The alternate user space API to configure all those Wireless Extensions
- * is through RtNetlink.
- * This API support only the new driver API (iw_handler).
- *
- * This RtNetlink API use the same query/reply model as the ioctl API.
- * Maximum effort has been done to fit in the RtNetlink model, and
- * we support both RtNetlink Set and RtNelink Get operations.
- * On the other hand, we don't offer Dump operations because of the
- * following reasons :
- *	o Large number of parameters, most optional
- *	o Large size of some parameters (> 100 bytes)
- *	o Each parameters need to be extracted from hardware
- *	o Scan requests can take seconds and disable network activity.
- * Because of this high cost/overhead, we want to return only the
- * parameters the user application is really interested in.
- * We could offer partial Dump using the IW_DESCR_FLAG_DUMP flag.
- *
- * The API uses the standard RtNetlink socket. When the RtNetlink code
- * find a IFLA_WIRELESS field in a RtNetlink SET_LINK request,
- * it calls here.
- */
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension GET handler.
- * We do various checks and call the handler with the proper args.
- */
-static int rtnetlink_standard_get(struct net_device *	dev,
-				  struct iw_event *	request,
-				  int			request_len,
-				  iw_handler		handler,
-				  char **		p_buf,
-				  int *			p_len)
-{
-	const struct iw_ioctl_description *	descr = NULL;
-	unsigned int				cmd;
-	union iwreq_data *			wrqu;
-	int					hdr_len;
-	struct iw_request_info			info;
-	char *					buffer = NULL;
-	int					buffer_size = 0;
-	int					ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
-		return -EOPNOTSUPP;
-	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found standard handler for 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Check if wrqu is complete */
-	hdr_len = event_type_size[descr->header_type];
-	if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have extra data in the reply or not */
-	if(descr->header_type != IW_HEADER_TYPE_POINT) {
-
-		/* Create the kernel buffer that we will return.
-		 * It's at an offset to match the TYPE_POINT case... */
-		buffer_size = request_len + IW_EV_POINT_OFF;
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-		/* Copy event data */
-		memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
-		/* Use our own copy of wrqu */
-		wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
-					     + IW_EV_LCP_LEN);
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, NULL);
-
-	} else {
-		union iwreq_data	wrqu_point;
-		char *			extra = NULL;
-		int			extra_size = 0;
-
-		/* Get a temp copy of wrqu (skip pointer) */
-		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       ((char *) request) + IW_EV_LCP_LEN,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-
-		/* Calculate space needed by arguments. Always allocate
-		 * for max space. Easier, and won't last long... */
-		extra_size = descr->max_tokens * descr->token_size;
-		/* Support for very large requests */
-		if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
-		   (wrqu_point.data.length > descr->max_tokens))
-			extra_size = (wrqu_point.data.length
-				      * descr->token_size);
-		buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF;
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
-		       dev->name, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Create the kernel buffer that we will return */
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Put wrqu in the right place (just before extra).
-		 * Leave space for IWE header and dummy pointer...
-		 * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned...
-		 */
-		memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
-		       ((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN);
-
-		/* Extra comes logically after that. Offset +12 bytes. */
-		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN;
-
-		/* Call the handler */
-		ret = handler(dev, &info, wrqu, extra);
-
-		/* Calculate real returned length */
-		extra_size = (wrqu->data.length * descr->token_size);
-		/* Re-adjust reply size */
-		request->len = extra_size + IW_EV_POINT_LEN;
-
-		/* Put the iwe header where it should, i.e. scrap the
-		 * dummy pointer. */
-		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Check if there is enough buffer up there */
-		if(wrqu_point.data.length < wrqu->data.length)
-			ret = -E2BIG;
-	}
-
-	/* Return the buffer to the caller */
-	if (!ret) {
-		*p_buf = buffer;
-		*p_len = request->len;
-	} else {
-		/* Cleanup */
-		if(buffer)
-			kfree(buffer);
-	}
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension SET handler.
- * We do various checks and call the handler with the proper args.
- */
-static inline int rtnetlink_standard_set(struct net_device *	dev,
-					 struct iw_event *	request,
-					 int			request_len,
-					 iw_handler		handler)
-{
-	const struct iw_ioctl_description *	descr = NULL;
-	unsigned int				cmd;
-	union iwreq_data *			wrqu;
-	union iwreq_data			wrqu_point;
-	int					hdr_len;
-	char *					extra = NULL;
-	int					extra_size = 0;
-	struct iw_request_info			info;
-	int					ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
-		return -EOPNOTSUPP;
-	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found standard SET handler for 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Extract fixed header from request. This is properly aligned. */
-	wrqu = &request->u;
-
-	/* Check if wrqu is complete */
-	hdr_len = event_type_size[descr->header_type];
-	if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have extra data in the request or not */
-	if(descr->header_type != IW_HEADER_TYPE_POINT) {
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, NULL);
-
-	} else {
-		int	extra_len;
-
-		/* Put wrqu in the right place (skip pointer) */
-		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-		/* Don't forget about the event code... */
-		wrqu = &wrqu_point;
-
-		/* Check if number of token fits within bounds */
-		if(wrqu_point.data.length > descr->max_tokens)
-			return -E2BIG;
-		if(wrqu_point.data.length < descr->min_tokens)
-			return -EINVAL;
-
-		/* Real length of payload */
-		extra_len = wrqu_point.data.length * descr->token_size;
-
-		/* Check if request is self consistent */
-		if((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
-			printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
-			       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-			return -EINVAL;
-		}
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Always allocate for max space. Easier, and won't last
-		 * long... */
-		extra_size = descr->max_tokens * descr->token_size;
-		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL)
-			return -ENOMEM;
-
-		/* Copy extra in aligned buffer */
-		memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
-		/* Call the handler */
-		ret = handler(dev, &info, &wrqu_point, extra);
-	}
-
-#ifdef WE_SET_EVENT
-	/* Generate an event to notify listeners of the change */
-	if((descr->flags & IW_DESCR_FLAG_EVENT) &&
-	   ((ret == 0) || (ret == -EIWCOMMIT))) {
-		if(descr->flags & IW_DESCR_FLAG_RESTRICT)
-			/* If the event is restricted, don't
-			 * export the payload */
-			wireless_send_event(dev, cmd, wrqu, NULL);
-		else
-			wireless_send_event(dev, cmd, wrqu, extra);
-	}
-#endif	/* WE_SET_EVENT */
-
-	/* Cleanup - I told you it wasn't that long ;-) */
-	if(extra)
-		kfree(extra);
-
-	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
-		ret = call_commit_handler(dev);
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension GET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_get(struct net_device *	dev,
-					struct iw_event *	request,
-					int			request_len,
-					iw_handler		handler,
-					char **			p_buf,
-					int *			p_len)
-{
-	const struct iw_priv_args *	descr = NULL;
-	unsigned int			cmd;
-	union iwreq_data *		wrqu;
-	int				hdr_len;
-	struct iw_request_info		info;
-	int				extra_size = 0;
-	int				i;
-	char *				buffer = NULL;
-	int				buffer_size = 0;
-	int				ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
-			descr = &(dev->wireless_handlers->private_args[i]);
-			break;
-		}
-	if(descr == NULL)
-		return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
-	       dev->name, descr->name, descr->set_args, descr->get_args);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Compute the max size of the get arguments */
-	extra_size = get_priv_size(descr->get_args);
-
-	/* Does it fits in wrqu ? */
-	if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
-	   (extra_size <= IFNAMSIZ)) {
-		hdr_len = extra_size;
-		extra_size = 0;
-	} else {
-		hdr_len = IW_EV_POINT_LEN;
-	}
-
-	/* Check if wrqu is complete */
-	if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have a pointer to user space data or not. */
-	if(extra_size == 0) {
-
-		/* Create the kernel buffer that we will return.
-		 * It's at an offset to match the TYPE_POINT case... */
-		buffer_size = request_len + IW_EV_POINT_OFF;
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-		/* Copy event data */
-		memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
-		/* Use our own copy of wrqu */
-		wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
-					     + IW_EV_LCP_LEN);
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, (char *) wrqu);
-
-	} else {
-		char *	extra;
-
-		/* Buffer for full reply */
-		buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF;
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
-		       dev->name, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Create the kernel buffer that we will return */
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Put wrqu in the right place (just before extra).
-		 * Leave space for IWE header and dummy pointer...
-		 * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned...
-		 */
-		memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
-		       ((char *) request) + IW_EV_LCP_LEN,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN);
-
-		/* Extra comes logically after that. Offset +12 bytes. */
-		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN;
-
-		/* Call the handler */
-		ret = handler(dev, &info, wrqu, extra);
-
-		/* Adjust for the actual length if it's variable,
-		 * avoid leaking kernel bits outside. */
-		if (!(descr->get_args & IW_PRIV_SIZE_FIXED))
-			extra_size = adjust_priv_size(descr->get_args, wrqu);
-		/* Re-adjust reply size */
-		request->len = extra_size + IW_EV_POINT_LEN;
-
-		/* Put the iwe header where it should, i.e. scrap the
-		 * dummy pointer. */
-		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-	}
-
-	/* Return the buffer to the caller */
-	if (!ret) {
-		*p_buf = buffer;
-		*p_len = request->len;
-	} else {
-		/* Cleanup */
-		if(buffer)
-			kfree(buffer);
-	}
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension SET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_set(struct net_device *	dev,
-					struct iw_event *	request,
-					int			request_len,
-					iw_handler		handler)
-{
-	const struct iw_priv_args *	descr = NULL;
-	unsigned int			cmd;
-	union iwreq_data *		wrqu;
-	union iwreq_data		wrqu_point;
-	int				hdr_len;
-	char *				extra = NULL;
-	int				extra_size = 0;
-	int				offset = 0;	/* For sub-ioctls */
-	struct iw_request_info		info;
-	int				i;
-	int				ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
-			descr = &(dev->wireless_handlers->private_args[i]);
-			break;
-		}
-	if(descr == NULL)
-		return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
-	       dev->name, descr->name, descr->set_args, descr->get_args);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Compute the size of the set arguments */
-	/* Check for sub-ioctl handler */
-	if(descr->name[0] == '\0')
-		/* Reserve one int for sub-ioctl index */
-		offset = sizeof(__u32);
-
-	/* Size of set arguments */
-	extra_size = get_priv_size(descr->set_args);
-
-	/* Does it fits in wrqu ? */
-	if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
-	   (extra_size <= IFNAMSIZ)) {
-		hdr_len = IW_EV_LCP_LEN + extra_size;
-		extra_size = 0;
-	} else {
-		hdr_len = IW_EV_POINT_LEN;
-	}
+	if (cmd == SIOCGIWSTATS)
+		return ioctl_standard_call(dev, ifr, cmd,
+					   &iw_handler_get_iwstats);
 
-	/* Extract fixed header from request. This is properly aligned. */
-	wrqu = &request->u;
-
-	/* Check if wrqu is complete */
-	if(request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have a pointer to user space data or not. */
-	if(extra_size == 0) {
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, (char *) wrqu);
-
-	} else {
-		int	extra_len;
-
-		/* Put wrqu in the right place (skip pointer) */
-		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-
-		/* Does it fits within bounds ? */
-		if(wrqu_point.data.length > (descr->set_args &
-					     IW_PRIV_SIZE_MASK))
-			return -E2BIG;
-
-		/* Real length of payload */
-		extra_len = adjust_priv_size(descr->set_args, &wrqu_point);
-
-		/* Check if request is self consistent */
-		if((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
-			printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
-			       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-			return -EINVAL;
-		}
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Always allocate for max space. Easier, and won't last
-		 * long... */
-		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL)
-			return -ENOMEM;
-
-		/* Copy extra in aligned buffer */
-		memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
-		/* Call the handler */
-		ret = handler(dev, &info, &wrqu_point, extra);
-
-		/* Cleanup - I told you it wasn't that long ;-) */
-		kfree(extra);
-	}
-
-	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
-		ret = call_commit_handler(dev);
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_getlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_get(struct net_device *	dev,
-			   char *		data,
-			   int			len,
-			   char **		p_buf,
-			   int *		p_len)
-{
-	struct iw_event *	request = (struct iw_event *) data;
-	iw_handler		handler;
-
-	/* Check length */
-	if(len < IW_EV_LCP_LEN) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
-		       dev->name, len);
-		return -EINVAL;
-	}
-
-	/* ReCheck length (len may have padding) */
-	if(request->len > len) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
-		       dev->name, request->len, len);
-		return -EINVAL;
-	}
-
-	/* Only accept GET requests in here */
-	if(!IW_IS_GET(request->cmd))
-		return -EOPNOTSUPP;
-
-	/* If command is `get the encoding parameters', check if
-	 * the user has the right to do it */
-	if (request->cmd == SIOCGIWENCODE ||
-	    request->cmd == SIOCGIWENCODEEXT) {
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-	}
-
-	/* Special cases */
-	if(request->cmd == SIOCGIWSTATS)
-		/* Get Wireless Stats */
-		return rtnetlink_standard_get(dev,
-					      request,
-					      request->len,
-					      &iw_handler_get_iwstats,
-					      p_buf, p_len);
-	if(request->cmd == SIOCGIWPRIV) {
-		/* Check if we have some wireless handlers defined */
-		if(dev->wireless_handlers == NULL)
-			return -EOPNOTSUPP;
-		/* Get Wireless Stats */
-		return rtnetlink_standard_get(dev,
-					      request,
-					      request->len,
-					      &iw_handler_get_private,
-					      p_buf, p_len);
-	}
+	if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
+		return ioctl_standard_call(dev, ifr, cmd,
+					   &iw_handler_get_private);
 
 	/* Basic check */
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
-	/* Try to find the handler */
-	handler = get_handler(dev, request->cmd);
-	if(handler != NULL) {
+	/* New driver API : try to find the handler */
+	handler = get_handler(dev, cmd);
+	if (handler) {
 		/* Standard and private are not the same */
-		if(request->cmd < SIOCIWFIRSTPRIV)
-			return rtnetlink_standard_get(dev,
-						      request,
-						      request->len,
-						      handler,
-						      p_buf, p_len);
+		if (cmd < SIOCIWFIRSTPRIV)
+			return ioctl_standard_call(dev, ifr, cmd, handler);
 		else
-			return rtnetlink_private_get(dev,
-						     request,
-						     request->len,
-						     handler,
-						     p_buf, p_len);
+			return ioctl_private_call(dev, ifr, cmd, handler);
 	}
-
+	/* Old driver API : call driver ioctl handler */
+	if (dev->do_ioctl)
+		return dev->do_ioctl(dev, ifr, cmd);
 	return -EOPNOTSUPP;
 }
 
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_setlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_set(struct net_device *	dev,
-			   char *		data,
-			   int			len)
+/* entry point from dev ioctl */
+int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
+		      void __user *arg)
 {
-	struct iw_event *	request = (struct iw_event *) data;
-	iw_handler		handler;
-
-	/* Check length */
-	if(len < IW_EV_LCP_LEN) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
-		       dev->name, len);
-		return -EINVAL;
-	}
-
-	/* ReCheck length (len may have padding) */
-	if(request->len > len) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
-		       dev->name, request->len, len);
-		return -EINVAL;
-	}
-
-	/* Only accept SET requests in here */
-	if(!IW_IS_SET(request->cmd))
-		return -EOPNOTSUPP;
-
-	/* Basic check */
-	if (!netif_device_present(dev))
-		return -ENODEV;
-
-	/* New driver API : try to find the handler */
-	handler = get_handler(dev, request->cmd);
-	if(handler != NULL) {
-		/* Standard and private are not the same */
-		if(request->cmd < SIOCIWFIRSTPRIV)
-			return rtnetlink_standard_set(dev,
-						      request,
-						      request->len,
-						      handler);
-		else
-			return rtnetlink_private_set(dev,
-						     request,
-						     request->len,
-						     handler);
-	}
+	int ret;
 
-	return -EOPNOTSUPP;
+	/* If command is `set a parameter', or
+	 * `get the encoding parameters', check if
+	 * the user has the right to do it */
+	if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
+	    && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	dev_load(ifr->ifr_name);
+	rtnl_lock();
+	ret = wireless_process_ioctl(ifr, cmd);
+	rtnl_unlock();
+	if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return ret;
 }
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 
 /************************* EVENT PROCESSING *************************/
 /*
@@ -1870,7 +1080,6 @@ int wireless_rtnetlink_set(struct net_device *	dev,
  * Most often, the event will be propagated through rtnetlink
  */
 
-#ifdef WE_EVENT_RTNETLINK
 /* ---------------------------------------------------------------- */
 /*
  * Locking...
@@ -1915,15 +1124,12 @@ static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
  * current wireless config. Dumping the wireless config is far too
  * expensive (for each parameter, the driver need to query the hardware).
  */
-static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
-					struct net_device *	dev,
-					int			type,
-					char *			event,
-					int			event_len)
+static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev,
+				 int type, char *event, int event_len)
 {
 	struct ifinfomsg *r;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char	 *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
 	r = NLMSG_DATA(nlh);
@@ -1937,12 +1143,12 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
 	/* Add the wireless events in the netlink packet */
 	RTA_PUT(skb, IFLA_WIRELESS, event_len, event);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1953,9 +1159,7 @@ rtattr_failure:
  * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
  * within a RTM_NEWLINK event.
  */
-static inline void rtmsg_iwinfo(struct net_device *	dev,
-				char *			event,
-				int			event_len)
+static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
 {
 	struct sk_buff *skb;
 	int size = NLMSG_GOODSIZE;
@@ -1974,8 +1178,6 @@ static inline void rtmsg_iwinfo(struct net_device *	dev,
 	tasklet_schedule(&wireless_nlevent_tasklet);
 }
 
-#endif	/* WE_EVENT_RTNETLINK */
-
 /* ---------------------------------------------------------------- */
 /*
  * Main event dispatcher. Called from other parts and drivers.
@@ -1997,17 +1199,17 @@ void wireless_send_event(struct net_device *	dev,
 	unsigned	cmd_index;		/* *MUST* be unsigned */
 
 	/* Get the description of the Event */
-	if(cmd <= SIOCIWLAST) {
+	if (cmd <= SIOCIWLAST) {
 		cmd_index = cmd - SIOCIWFIRST;
-		if(cmd_index < standard_ioctl_num)
+		if (cmd_index < standard_ioctl_num)
 			descr = &(standard_ioctl[cmd_index]);
 	} else {
 		cmd_index = cmd - IWEVFIRST;
-		if(cmd_index < standard_event_num)
+		if (cmd_index < standard_event_num)
 			descr = &(standard_event[cmd_index]);
 	}
 	/* Don't accept unknown events */
-	if(descr == NULL) {
+	if (descr == NULL) {
 		/* Note : we don't return an error to the driver, because
 		 * the driver would not know what to do about it. It can't
 		 * return an error to the user, because the event is not
@@ -2019,63 +1221,50 @@ void wireless_send_event(struct net_device *	dev,
 		       dev->name, cmd);
 		return;
 	}
-#ifdef WE_EVENT_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_EVENT_DEBUG */
 
 	/* Check extra parameters and set extra_len */
-	if(descr->header_type == IW_HEADER_TYPE_POINT) {
+	if (descr->header_type == IW_HEADER_TYPE_POINT) {
 		/* Check if number of token fits within bounds */
-		if(wrqu->data.length > descr->max_tokens) {
+		if (wrqu->data.length > descr->max_tokens) {
 			printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
 			return;
 		}
-		if(wrqu->data.length < descr->min_tokens) {
+		if (wrqu->data.length < descr->min_tokens) {
 			printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
 			return;
 		}
 		/* Calculate extra_len - extra is NULL for restricted events */
-		if(extra != NULL)
+		if (extra != NULL)
 			extra_len = wrqu->data.length * descr->token_size;
 		/* Always at an offset in wrqu */
 		wrqu_off = IW_EV_POINT_OFF;
-#ifdef WE_EVENT_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
-#endif	/* WE_EVENT_DEBUG */
 	}
 
 	/* Total length of the event */
 	hdr_len = event_type_size[descr->header_type];
 	event_len = hdr_len + extra_len;
 
-#ifdef WE_EVENT_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
-#endif	/* WE_EVENT_DEBUG */
-
 	/* Create temporary buffer to hold the event */
 	event = kmalloc(event_len, GFP_ATOMIC);
-	if(event == NULL)
+	if (event == NULL)
 		return;
 
 	/* Fill event */
 	event->len = event_len;
 	event->cmd = cmd;
 	memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
-	if(extra != NULL)
+	if (extra)
 		memcpy(((char *) event) + hdr_len, extra, extra_len);
 
-#ifdef WE_EVENT_RTNETLINK
 	/* Send via the RtNetlink event channel */
 	rtmsg_iwinfo(dev, (char *) event, event_len);
-#endif	/* WE_EVENT_RTNETLINK */
 
 	/* Cleanup */
 	kfree(event);
 
 	return;		/* Always success, I guess ;-) */
 }
+EXPORT_SYMBOL(wireless_send_event);
 
 /********************** ENHANCED IWSPY SUPPORT **********************/
 /*
@@ -2095,11 +1284,11 @@ void wireless_send_event(struct net_device *	dev,
  * Because this is called on the Rx path via wireless_spy_update(),
  * we want it to be efficient...
  */
-static inline struct iw_spy_data * get_spydata(struct net_device *dev)
+static inline struct iw_spy_data *get_spydata(struct net_device *dev)
 {
 	/* This is the new way */
-	if(dev->wireless_data)
-		return(dev->wireless_data->spy_data);
+	if (dev->wireless_data)
+		return dev->wireless_data->spy_data;
 	return NULL;
 }
 
@@ -2116,7 +1305,7 @@ int iw_handler_set_spy(struct net_device *	dev,
 	struct sockaddr *	address = (struct sockaddr *) extra;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	/* Disable spy collection while we copy the addresses.
@@ -2133,29 +1322,16 @@ int iw_handler_set_spy(struct net_device *	dev,
 	smp_wmb();
 
 	/* Are there are addresses to copy? */
-	if(wrqu->data.length > 0) {
+	if (wrqu->data.length > 0) {
 		int i;
 
 		/* Copy addresses */
-		for(i = 0; i < wrqu->data.length; i++)
+		for (i = 0; i < wrqu->data.length; i++)
 			memcpy(spydata->spy_address[i], address[i].sa_data,
 			       ETH_ALEN);
 		/* Reset stats */
 		memset(spydata->spy_stat, 0,
 		       sizeof(struct iw_quality) * IW_MAX_SPY);
-
-#ifdef WE_SPY_DEBUG
-		printk(KERN_DEBUG "iw_handler_set_spy() :  wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
-		for (i = 0; i < wrqu->data.length; i++)
-			printk(KERN_DEBUG
-			       "%02X:%02X:%02X:%02X:%02X:%02X \n",
-			       spydata->spy_address[i][0],
-			       spydata->spy_address[i][1],
-			       spydata->spy_address[i][2],
-			       spydata->spy_address[i][3],
-			       spydata->spy_address[i][4],
-			       spydata->spy_address[i][5]);
-#endif	/* WE_SPY_DEBUG */
 	}
 
 	/* Make sure above is updated before re-enabling */
@@ -2166,6 +1342,7 @@ int iw_handler_set_spy(struct net_device *	dev,
 
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_set_spy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -2181,26 +1358,27 @@ int iw_handler_get_spy(struct net_device *	dev,
 	int			i;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	wrqu->data.length = spydata->spy_number;
 
 	/* Copy addresses. */
-	for(i = 0; i < spydata->spy_number; i++) 	{
+	for (i = 0; i < spydata->spy_number; i++) 	{
 		memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
 		address[i].sa_family = AF_UNIX;
 	}
 	/* Copy stats to the user buffer (just after). */
-	if(spydata->spy_number > 0)
+	if (spydata->spy_number > 0)
 		memcpy(extra  + (sizeof(struct sockaddr) *spydata->spy_number),
 		       spydata->spy_stat,
 		       sizeof(struct iw_quality) * spydata->spy_number);
 	/* Reset updated flags. */
-	for(i = 0; i < spydata->spy_number; i++)
+	for (i = 0; i < spydata->spy_number; i++)
 		spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_get_spy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -2215,7 +1393,7 @@ int iw_handler_set_thrspy(struct net_device *	dev,
 	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	/* Just do it */
@@ -2225,12 +1403,9 @@ int iw_handler_set_thrspy(struct net_device *	dev,
 	/* Clear flag */
 	memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
 
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "iw_handler_set_thrspy() :  low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
-#endif	/* WE_SPY_DEBUG */
-
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_set_thrspy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -2245,7 +1420,7 @@ int iw_handler_get_thrspy(struct net_device *	dev,
 	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	/* Just do it */
@@ -2254,6 +1429,7 @@ int iw_handler_get_thrspy(struct net_device *	dev,
 
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_get_thrspy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -2279,16 +1455,6 @@ static void iw_send_thrspy_event(struct net_device *	dev,
 	memcpy(&(threshold.low), &(spydata->spy_thr_low),
 	       2 * sizeof(struct iw_quality));
 
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
-	       threshold.addr.sa_data[0],
-	       threshold.addr.sa_data[1],
-	       threshold.addr.sa_data[2],
-	       threshold.addr.sa_data[3],
-	       threshold.addr.sa_data[4],
-	       threshold.addr.sa_data[5], threshold.qual.level);
-#endif	/* WE_SPY_DEBUG */
-
 	/* Send event to user space */
 	wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
 }
@@ -2309,16 +1475,12 @@ void wireless_spy_update(struct net_device *	dev,
 	int			match = -1;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return;
 
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "wireless_spy_update() :  wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
-#endif	/* WE_SPY_DEBUG */
-
 	/* Update all records that match */
-	for(i = 0; i < spydata->spy_number; i++)
-		if(!compare_ether_addr(address, spydata->spy_address[i])) {
+	for (i = 0; i < spydata->spy_number; i++)
+		if (!compare_ether_addr(address, spydata->spy_address[i])) {
 			memcpy(&(spydata->spy_stat[i]), wstats,
 			       sizeof(struct iw_quality));
 			match = i;
@@ -2328,15 +1490,15 @@ void wireless_spy_update(struct net_device *	dev,
 	 * To avoid event storms, we have a simple hysteresis : we generate
 	 * event only when we go under the low threshold or above the
 	 * high threshold. */
-	if(match >= 0) {
-		if(spydata->spy_thr_under[match]) {
-			if(wstats->level > spydata->spy_thr_high.level) {
+	if (match >= 0) {
+		if (spydata->spy_thr_under[match]) {
+			if (wstats->level > spydata->spy_thr_high.level) {
 				spydata->spy_thr_under[match] = 0;
 				iw_send_thrspy_event(dev, spydata,
 						     address, wstats);
 			}
 		} else {
-			if(wstats->level < spydata->spy_thr_low.level) {
+			if (wstats->level < spydata->spy_thr_low.level) {
 				spydata->spy_thr_under[match] = 1;
 				iw_send_thrspy_event(dev, spydata,
 						     address, wstats);
@@ -2344,10 +1506,4 @@ void wireless_spy_update(struct net_device *	dev,
 		}
 	}
 }
-
-EXPORT_SYMBOL(iw_handler_get_spy);
-EXPORT_SYMBOL(iw_handler_get_thrspy);
-EXPORT_SYMBOL(iw_handler_set_spy);
-EXPORT_SYMBOL(iw_handler_set_thrspy);
-EXPORT_SYMBOL(wireless_send_event);
 EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e62ba41b05c5..0d6002fc77b2 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -951,7 +951,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	 *	Incoming Call User Data.
 	 */
 	if (skb->len >= 0) {
-		memcpy(makex25->calluserdata.cuddata, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
 		makex25->calluserdata.cudlength = skb->len;
 	}
 
@@ -1058,9 +1058,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	 */
 	SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
 
-	asmptr = skb->h.raw = skb_put(skb, len);
+	skb_reset_transport_header(skb);
+	skb_put(skb, len);
 
-	rc = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	rc = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (rc)
 		goto out_kfree_skb;
 
@@ -1210,8 +1211,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		}
 	}
 
-	skb->h.raw = skb->data;
-
+	skb_reset_transport_header(skb);
 	copied = skb->len;
 
 	if (copied > size) {
@@ -1280,6 +1280,12 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 				rc = sock_get_timestamp(sk,
 						(struct timeval __user *)argp);
 			break;
+		case SIOCGSTAMPNS:
+			rc = -EINVAL;
+			if (sk)
+				rc = sock_get_timestampns(sk,
+						(struct timespec __user *)argp);
+			break;
 		case SIOCGIFADDR:
 		case SIOCSIFADDR:
 		case SIOCGIFDSTADDR:
@@ -1521,6 +1527,12 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
 			rc = compat_sock_get_timestamp(sk,
 					(struct timeval __user*)argp);
 		break;
+	case SIOCGSTAMPNS:
+		rc = -EINVAL;
+		if (sk)
+			rc = compat_sock_get_timestampns(sk,
+					(struct timespec __user*)argp);
+		break;
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index c7221de98a95..848a6b6f90a6 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -48,7 +48,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 	if ((sk = x25_find_socket(lci, nb)) != NULL) {
 		int queued = 1;
 
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		bh_lock_sock(sk);
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
@@ -191,7 +191,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
 {
 	unsigned char *dptr;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	switch (nb->dev->type) {
 		case ARPHRD_X25:
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index c5239fcdefa0..1c88762c2794 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -53,17 +53,20 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
 
 		skb_queue_tail(&x25->fragment_queue, skb);
 
-		skbn->h.raw = skbn->data;
+		skb_reset_transport_header(skbn);
 
 		skbo = skb_dequeue(&x25->fragment_queue);
-		memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+		skb_copy_from_linear_data(skbo, skb_put(skbn, skbo->len),
+					  skbo->len);
 		kfree_skb(skbo);
 
 		while ((skbo =
 			skb_dequeue(&x25->fragment_queue)) != NULL) {
 			skb_pull(skbo, (x25->neighbour->extended) ?
 					X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
-			memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+			skb_copy_from_linear_data(skbo,
+						  skb_put(skbn, skbo->len),
+						  skbo->len);
 			kfree_skb(skbo);
 		}
 
@@ -112,8 +115,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 			 *	Copy any Call User Data.
 			 */
 			if (skb->len >= 0) {
-				memcpy(x25->calluserdata.cuddata, skb->data,
-				       skb->len);
+				skb_copy_from_linear_data(skb,
+					      x25->calluserdata.cuddata,
+					      skb->len);
 				x25->calluserdata.cudlength = skb->len;
 			}
 			if (!sock_flag(sk, SOCK_DEAD))
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 6f5737853912..2b96b52114d6 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -61,7 +61,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
 
 	if (skb->len - header_len > max_len) {
 		/* Save a copy of the Header */
-		memcpy(header, skb->data, header_len);
+		skb_copy_from_linear_data(skb, header, header_len);
 		skb_pull(skb, header_len);
 
 		frontlen = skb_headroom(skb);
@@ -84,12 +84,12 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
 			len = max_len > skb->len ? skb->len : max_len;
 
 			/* Copy the user data */
-			memcpy(skb_put(skbn, len), skb->data, len);
+			skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 			skb_pull(skb, len);
 
 			/* Duplicate the Header */
 			skb_push(skbn, header_len);
-			memcpy(skbn->data, header, header_len);
+			skb_copy_to_linear_data(skbn, header, header_len);
 
 			if (skb->len > 0) {
 				if (x25->neighbour->extended)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f373a8a7d9c8..6249a9405bb8 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -612,175 +612,6 @@ EXPORT_SYMBOL_GPL(skb_icv_walk);
 
 #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
 
-/* Looking generic it is not used in another places. */
-
-int
-skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	int elt = 0;
-
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		sg[elt].page = virt_to_page(skb->data + offset);
-		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
-		sg[elt].length = copy;
-		elt++;
-		if ((len -= copy) == 0)
-			return elt;
-		offset += copy;
-	}
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
-		if ((copy = end - offset) > 0) {
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-			if (copy > len)
-				copy = len;
-			sg[elt].page = frag->page;
-			sg[elt].offset = frag->page_offset+offset-start;
-			sg[elt].length = copy;
-			elt++;
-			if (!(len -= copy))
-				return elt;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	if (skb_shinfo(skb)->frag_list) {
-		struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
-
-			end = start + list->len;
-			if ((copy = end - offset) > 0) {
-				if (copy > len)
-					copy = len;
-				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
-				if ((len -= copy) == 0)
-					return elt;
-				offset += copy;
-			}
-			start = end;
-		}
-	}
-	BUG_ON(len);
-	return elt;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
-
-/* Check that skb data bits are writable. If they are not, copy data
- * to newly created private area. If "tailbits" is given, make sure that
- * tailbits bytes beyond current end of skb are writable.
- *
- * Returns amount of elements of scatterlist to load for subsequent
- * transformations and pointer to writable trailer skb.
- */
-
-int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
-{
-	int copyflag;
-	int elt;
-	struct sk_buff *skb1, **skb_p;
-
-	/* If skb is cloned or its head is paged, reallocate
-	 * head pulling out all the pages (pages are considered not writable
-	 * at the moment even if they are anonymous).
-	 */
-	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
-	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
-		return -ENOMEM;
-
-	/* Easy case. Most of packets will go this way. */
-	if (!skb_shinfo(skb)->frag_list) {
-		/* A little of trouble, not enough of space for trailer.
-		 * This should not happen, when stack is tuned to generate
-		 * good frames. OK, on miss we reallocate and reserve even more
-		 * space, 128 bytes is fair. */
-
-		if (skb_tailroom(skb) < tailbits &&
-		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
-			return -ENOMEM;
-
-		/* Voila! */
-		*trailer = skb;
-		return 1;
-	}
-
-	/* Misery. We are in troubles, going to mincer fragments... */
-
-	elt = 1;
-	skb_p = &skb_shinfo(skb)->frag_list;
-	copyflag = 0;
-
-	while ((skb1 = *skb_p) != NULL) {
-		int ntail = 0;
-
-		/* The fragment is partially pulled by someone,
-		 * this can happen on input. Copy it and everything
-		 * after it. */
-
-		if (skb_shared(skb1))
-			copyflag = 1;
-
-		/* If the skb is the last, worry about trailer. */
-
-		if (skb1->next == NULL && tailbits) {
-			if (skb_shinfo(skb1)->nr_frags ||
-			    skb_shinfo(skb1)->frag_list ||
-			    skb_tailroom(skb1) < tailbits)
-				ntail = tailbits + 128;
-		}
-
-		if (copyflag ||
-		    skb_cloned(skb1) ||
-		    ntail ||
-		    skb_shinfo(skb1)->nr_frags ||
-		    skb_shinfo(skb1)->frag_list) {
-			struct sk_buff *skb2;
-
-			/* Fuck, we are miserable poor guys... */
-			if (ntail == 0)
-				skb2 = skb_copy(skb1, GFP_ATOMIC);
-			else
-				skb2 = skb_copy_expand(skb1,
-						       skb_headroom(skb1),
-						       ntail,
-						       GFP_ATOMIC);
-			if (unlikely(skb2 == NULL))
-				return -ENOMEM;
-
-			if (skb1->sk)
-				skb_set_owner_w(skb2, skb1->sk);
-
-			/* Looking around. Are we still alive?
-			 * OK, link new skb, drop old one */
-
-			skb2->next = skb1->next;
-			*skb_p = skb2;
-			kfree_skb(skb1);
-			skb1 = skb2;
-		}
-		elt++;
-		*trailer = skb1;
-		skb_p = &skb1->next;
-	}
-
-	return elt;
-}
-EXPORT_SYMBOL_GPL(skb_cow_data);
-
 void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
 {
 	if (tail != skb) {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ee15bdae1419..5c4695840c58 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 	case IPPROTO_COMP:
 		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
 			return -EINVAL;
-		*spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2)));
+		*spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2)));
 		*seq = 0;
 		return 0;
 	default:
@@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 	if (!pskb_may_pull(skb, 16))
 		return -EINVAL;
 
-	*spi = *(__be32*)(skb->h.raw + offset);
-	*seq = *(__be32*)(skb->h.raw + offset_seq);
+	*spi = *(__be32*)(skb_transport_header(skb) + offset);
+	*seq = *(__be32*)(skb_transport_header(skb) + offset_seq);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_parse_spi);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 785c3e39f062..263e34e45265 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -268,7 +268,7 @@ static inline unsigned long make_jiffies(long secs)
 static void xfrm_policy_timer(unsigned long data)
 {
 	struct xfrm_policy *xp = (struct xfrm_policy*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
+	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
 	int dir;
@@ -579,8 +579,22 @@ static inline int xfrm_byidx_should_resize(int total)
 	return 0;
 }
 
-static DEFINE_MUTEX(hash_resize_mutex);
+void xfrm_spd_getinfo(struct xfrm_spdinfo *si)
+{
+	read_lock_bh(&xfrm_policy_lock);
+	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
+	si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
+	si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
+	si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+	si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+	si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+	si->spdhcnt = xfrm_idx_hmask;
+	si->spdhmcnt = xfrm_policy_hashmax;
+	read_unlock_bh(&xfrm_policy_lock);
+}
+EXPORT_SYMBOL(xfrm_spd_getinfo);
 
+static DEFINE_MUTEX(hash_resize_mutex);
 static void xfrm_hash_resize(struct work_struct *__unused)
 {
 	int dir, total;
@@ -690,7 +704,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
 	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
-	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.add_time = get_seconds();
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
 		xfrm_pol_hold(policy);
@@ -1049,7 +1063,7 @@ static inline int policy_to_flow_dir(int dir)
 		return FLOW_DIR_OUT;
 	case XFRM_POLICY_FWD:
 		return FLOW_DIR_FWD;
-	};
+	}
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
@@ -1133,7 +1147,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 	old_pol = sk->sk_policy[dir];
 	sk->sk_policy[dir] = pol;
 	if (pol) {
-		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+		pol->curlft.add_time = get_seconds();
 		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
@@ -1330,6 +1344,40 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
 	return err;
 }
 
+static int inline
+xfrm_dst_alloc_copy(void **target, void *src, int size)
+{
+	if (!*target) {
+		*target = kmalloc(size, GFP_ATOMIC);
+		if (!*target)
+			return -ENOMEM;
+	}
+	memcpy(*target, src, size);
+	return 0;
+}
+
+static int inline
+xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+	return xfrm_dst_alloc_copy((void **)&(xdst->partner),
+				   sel, sizeof(*sel));
+#else
+	return 0;
+#endif
+}
+
+static int inline
+xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+	return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
+#else
+	return 0;
+#endif
+}
 
 static int stale_bundle(struct dst_entry *dst);
 
@@ -1386,7 +1434,7 @@ restart:
 		return 0;
 
 	family = dst_orig->ops->family;
-	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.use_time = get_seconds();
 	pols[0] = policy;
 	npols ++;
 	xfrm_nr += pols[0]->xfrm_nr;
@@ -1518,6 +1566,18 @@ restart:
 			err = -EHOSTUNREACH;
 			goto error;
 		}
+
+		if (npols > 1)
+			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+		else
+			err = xfrm_dst_update_origin(dst, fl);
+		if (unlikely(err)) {
+			write_unlock_bh(&policy->lock);
+			if (dst)
+				dst_free(dst);
+			goto error;
+		}
+
 		dst->next = policy->bundles;
 		policy->bundles = dst;
 		dst_hold(dst);
@@ -1682,7 +1742,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 1;
 	}
 
-	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+	pol->curlft.use_time = get_seconds();
 
 	pols[0] = pol;
 	npols ++;
@@ -1694,7 +1754,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		if (pols[1]) {
 			if (IS_ERR(pols[1]))
 				return 0;
-			pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+			pols[1]->curlft.use_time = get_seconds();
 			npols ++;
 		}
 	}
@@ -1933,6 +1993,15 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
 	    (dst->dev && !netif_running(dst->dev)))
 		return 0;
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (fl) {
+		if (first->origin && !flow_cache_uli_match(first->origin, fl))
+			return 0;
+		if (first->partner &&
+		    !xfrm_selector_match(first->partner, fl, family))
+			return 0;
+	}
+#endif
 
 	last = NULL;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5c5f6dcab974..f3a61ebd8d65 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -233,7 +233,7 @@ static inline unsigned long make_jiffies(long secs)
 static void xfrm_timer_handler(unsigned long data)
 {
 	struct xfrm_state *x = (struct xfrm_state*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
+	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
 	int err = 0;
@@ -326,7 +326,7 @@ struct xfrm_state *xfrm_state_alloc(void)
 		init_timer(&x->rtimer);
 		x->rtimer.function = xfrm_replay_timer_handler;
 		x->rtimer.data     = (unsigned long)x;
-		x->curlft.add_time = (unsigned long)xtime.tv_sec;
+		x->curlft.add_time = get_seconds();
 		x->lft.soft_byte_limit = XFRM_INF;
 		x->lft.soft_packet_limit = XFRM_INF;
 		x->lft.hard_byte_limit = XFRM_INF;
@@ -421,6 +421,16 @@ restart:
 }
 EXPORT_SYMBOL(xfrm_state_flush);
 
+void xfrm_sad_getinfo(struct xfrm_sadinfo *si)
+{
+	spin_lock_bh(&xfrm_state_lock);
+	si->sadcnt = xfrm_state_num;
+	si->sadhcnt = xfrm_state_hmask;
+	si->sadhmcnt = xfrm_state_hashmax;
+	spin_unlock_bh(&xfrm_state_lock);
+}
+EXPORT_SYMBOL(xfrm_sad_getinfo);
+
 static int
 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 		  struct xfrm_tmpl *tmpl,
@@ -458,7 +468,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
 					     x->id.daddr.a6))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -493,7 +503,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
 					     x->props.saddr.a6))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -722,7 +732,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 					     (struct in6_addr *)saddr))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -755,7 +765,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
 				       (struct in6_addr *)daddr);
 			break;
-		};
+		}
 
 		x->km.state = XFRM_STATE_ACQ;
 		x->id.proto = proto;
@@ -1051,7 +1061,7 @@ EXPORT_SYMBOL(xfrm_state_update);
 int xfrm_state_check_expire(struct xfrm_state *x)
 {
 	if (!x->curlft.use_time)
-		x->curlft.use_time = (unsigned long)xtime.tv_sec;
+		x->curlft.use_time = get_seconds();
 
 	if (x->km.state != XFRM_STATE_VALID)
 		return -EINVAL;
@@ -1371,7 +1381,8 @@ int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
 		return 0;
 
 	diff = x->replay.seq - seq;
-	if (diff >= x->props.replay_window) {
+	if (diff >= min_t(unsigned int, x->props.replay_window,
+			  sizeof(x->replay.bitmap) * 8)) {
 		x->stats.replay_window++;
 		return -EINVAL;
 	}
@@ -1666,37 +1677,17 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
 
-/*
- * This function is NOT optimal.  For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal.  However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 {
-	int res = mtu;
-
-	res -= x->props.header_len;
-
-	for (;;) {
-		int m = res;
-
-		if (m < 68)
-			return 68;
-
-		spin_lock_bh(&x->lock);
-		if (x->km.state == XFRM_STATE_VALID &&
-		    x->type && x->type->get_max_size)
-			m = x->type->get_max_size(x, m);
-		else
-			m += x->props.header_len;
-		spin_unlock_bh(&x->lock);
-
-		if (m <= mtu)
-			break;
-		res -= (m - mtu);
-	}
+	int res;
 
+	spin_lock_bh(&x->lock);
+	if (x->km.state == XFRM_STATE_VALID &&
+	    x->type && x->type->get_mtu)
+		res = x->type->get_mtu(x, mtu);
+	else
+		res = mtu;
+	spin_unlock_bh(&x->lock);
 	return res;
 }
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e81e2fb3d429..4210d91624cd 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -71,7 +71,7 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
 	return 0;
@@ -152,7 +152,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	err = -EINVAL;
 	switch (p->id.proto) {
@@ -192,7 +192,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
 		goto out;
@@ -217,7 +217,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	err = 0;
 
@@ -272,9 +272,8 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a
 }
 
 
-static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp)
+static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
 {
-	struct xfrm_sec_ctx *xfrm_ctx = xp->security;
 	int len = 0;
 
 	if (xfrm_ctx) {
@@ -577,7 +576,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	struct sk_buff *skb = sp->out_skb;
 	struct xfrm_usersa_info *p;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	if (sp->this_idx < sp->start_idx)
 		goto out;
@@ -622,14 +621,14 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	if (x->lastused)
 		RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 out:
 	sp->this_idx++;
 	return 0;
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -673,6 +672,136 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
 	return skb;
 }
 
+static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+	struct xfrm_spdinfo si;
+	struct nlmsghdr *nlh;
+	u32 *f;
+
+	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
+	if (nlh == NULL) /* shouldnt really happen ... */
+		return -EMSGSIZE;
+
+	f = nlmsg_data(nlh);
+	*f = flags;
+	xfrm_spd_getinfo(&si);
+
+	if (flags & XFRM_SPD_HMASK)
+		NLA_PUT_U32(skb, XFRMA_SPDHMASK, si.spdhcnt);
+	if (flags & XFRM_SPD_HMAX)
+		NLA_PUT_U32(skb, XFRMA_SPDHMAX, si.spdhmcnt);
+	if (flags & XFRM_SPD_ICNT)
+		NLA_PUT_U32(skb, XFRMA_SPDICNT, si.incnt);
+	if (flags & XFRM_SPD_OCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDOCNT, si.outcnt);
+	if (flags & XFRM_SPD_FCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDFCNT, si.fwdcnt);
+	if (flags & XFRM_SPD_ISCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDISCNT, si.inscnt);
+	if (flags & XFRM_SPD_OSCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDOSCNT, si.inscnt);
+	if (flags & XFRM_SPD_FSCNT)
+		NLA_PUT_U32(skb, XFRMA_SPDFSCNT, si.inscnt);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+		struct rtattr **xfrma)
+{
+	struct sk_buff *r_skb;
+	u32 *flags = NLMSG_DATA(nlh);
+	u32 spid = NETLINK_CB(skb).pid;
+	u32 seq = nlh->nlmsg_seq;
+	int len = NLMSG_LENGTH(sizeof(u32));
+
+
+	if (*flags & XFRM_SPD_HMASK)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_HMAX)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_ICNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_OCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_FCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_ISCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_OSCNT)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SPD_FSCNT)
+		len += RTA_SPACE(sizeof(u32));
+
+	r_skb = alloc_skb(len, GFP_ATOMIC);
+	if (r_skb == NULL)
+		return -ENOMEM;
+
+	if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
+		BUG();
+
+	return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
+static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+	struct xfrm_sadinfo si;
+	struct nlmsghdr *nlh;
+	u32 *f;
+
+	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
+	if (nlh == NULL) /* shouldnt really happen ... */
+		return -EMSGSIZE;
+
+	f = nlmsg_data(nlh);
+	*f = flags;
+	xfrm_sad_getinfo(&si);
+
+	if (flags & XFRM_SAD_HMASK)
+		NLA_PUT_U32(skb, XFRMA_SADHMASK, si.sadhcnt);
+	if (flags & XFRM_SAD_HMAX)
+		NLA_PUT_U32(skb, XFRMA_SADHMAX, si.sadhmcnt);
+	if (flags & XFRM_SAD_CNT)
+		NLA_PUT_U32(skb, XFRMA_SADCNT, si.sadcnt);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+		struct rtattr **xfrma)
+{
+	struct sk_buff *r_skb;
+	u32 *flags = NLMSG_DATA(nlh);
+	u32 spid = NETLINK_CB(skb).pid;
+	u32 seq = nlh->nlmsg_seq;
+	int len = NLMSG_LENGTH(sizeof(u32));
+
+	if (*flags & XFRM_SAD_HMASK)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SAD_HMAX)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SAD_CNT)
+		len += RTA_SPACE(sizeof(u32));
+
+	r_skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (r_skb == NULL)
+		return -ENOMEM;
+
+	if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
+		BUG();
+
+	return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct rtattr **xfrma)
 {
@@ -712,7 +841,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	if (p->min > p->max)
 		return -EINVAL;
@@ -790,7 +919,7 @@ static int verify_policy_dir(u8 dir)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -806,7 +935,7 @@ static int verify_policy_type(u8 type)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -822,7 +951,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	switch (p->action) {
 	case XFRM_POLICY_ALLOW:
@@ -831,7 +960,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	switch (p->sel.family) {
 	case AF_INET:
@@ -846,7 +975,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return verify_policy_dir(p->dir);
 }
@@ -913,7 +1042,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 #endif
 		default:
 			return -EINVAL;
-		};
+		}
 	}
 
 	return 0;
@@ -1158,7 +1287,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	struct sk_buff *in_skb = sp->in_skb;
 	struct sk_buff *skb = sp->out_skb;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	if (sp->this_idx < sp->start_idx)
 		goto out;
@@ -1177,13 +1306,13 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 out:
 	sp->this_idx++;
 	return 0;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1331,7 +1460,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 	struct xfrm_aevent_id *id;
 	struct nlmsghdr *nlh;
 	struct xfrm_lifetime_cur ltime;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id));
 	id = NLMSG_DATA(nlh);
@@ -1363,12 +1492,12 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 		RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer);
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1745,7 +1874,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
 	struct xfrm_migrate *mp;
 	struct xfrm_userpolicy_id *pol_id;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	int i;
 
 	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id));
@@ -1765,10 +1894,10 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
 			goto nlmsg_failure;
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1824,6 +1953,8 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
 	[XFRM_MSG_REPORT      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
+	[XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
 };
 
 #undef XMSGSIZE
@@ -1851,55 +1982,40 @@ static struct xfrm_link {
 	[XFRM_MSG_NEWAE       - XFRM_MSG_BASE] = { .doit = xfrm_new_ae  },
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = { .doit = xfrm_get_ae  },
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate    },
+	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo   },
+	[XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo   },
 };
 
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct rtattr *xfrma[XFRMA_MAX];
 	struct xfrm_link *link;
 	int type, min_len;
 
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
-		return 0;
-
 	type = nlh->nlmsg_type;
-
-	/* A control message: ignore them */
-	if (type < XFRM_MSG_BASE)
-		return 0;
-
-	/* Unknown message: reply with EINVAL */
 	if (type > XFRM_MSG_MAX)
-		goto err_einval;
+		return -EINVAL;
 
 	type -= XFRM_MSG_BASE;
 	link = &xfrm_dispatch[type];
 
 	/* All operations require privileges, even GET */
-	if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		*errp = -EPERM;
-		return -1;
-	}
+	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
 	     type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
 	    (nlh->nlmsg_flags & NLM_F_DUMP)) {
 		if (link->dump == NULL)
-			goto err_einval;
-
-		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
-						link->dump, NULL)) != 0) {
-			return -1;
-		}
+			return -EINVAL;
 
-		netlink_queue_skip(nlh, skb);
-		return -1;
+		return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL);
 	}
 
 	memset(xfrma, 0, sizeof(xfrma));
 
 	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
-		goto err_einval;
+		return -EINVAL;
 
 	if (nlh->nlmsg_len > min_len) {
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -1909,7 +2025,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
 			unsigned short flavor = attr->rta_type;
 			if (flavor) {
 				if (flavor > XFRMA_MAX)
-					goto err_einval;
+					return -EINVAL;
 				xfrma[flavor - 1] = attr;
 			}
 			attr = RTA_NEXT(attr, attrlen);
@@ -1917,14 +2033,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
 	}
 
 	if (link->doit == NULL)
-		goto err_einval;
-	*errp = link->doit(skb, nlh, xfrma);
-
-	return *errp;
+		return -EINVAL;
 
-err_einval:
-	*errp = -EINVAL;
-	return -1;
+	return link->doit(skb, nlh, xfrma);
 }
 
 static void xfrm_netlink_rcv(struct sock *sk, int len)
@@ -1943,7 +2054,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 {
 	struct xfrm_user_expire *ue;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE,
 			sizeof(*ue));
@@ -1953,11 +2064,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 	copy_to_user_state(x, &ue->state);
 	ue->hard = (c->data.hard != 0) ? 1 : 0;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -2000,7 +2111,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)
 	struct xfrm_usersa_flush *p;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
 
 	skb = alloc_skb(len, GFP_ATOMIC);
@@ -2046,7 +2157,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
 	struct xfrm_usersa_id *id;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = xfrm_sa_len(x);
 	int headlen;
 
@@ -2130,7 +2241,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 {
 	struct xfrm_user_acquire *ua;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	__u32 seq = xfrm_get_acqseq();
 
 	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
@@ -2154,11 +2265,11 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -2170,7 +2281,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
-	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(x->security));
 #ifdef CONFIG_XFRM_SUB_POLICY
 	len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
 #endif
@@ -2250,7 +2361,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	struct xfrm_user_polexpire *upe;
 	struct nlmsghdr *nlh;
 	int hard = c->data.hard;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
 	upe = NLMSG_DATA(nlh);
@@ -2265,11 +2376,11 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 		goto nlmsg_failure;
 	upe->hard = !!hard;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -2280,7 +2391,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
-	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp->security));
 #ifdef CONFIG_XFRM_SUB_POLICY
 	len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
 #endif
@@ -2301,7 +2412,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
 	struct xfrm_userpolicy_id *id;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	int headlen;
 
@@ -2358,7 +2469,7 @@ static int xfrm_notify_policy_flush(struct km_event *c)
 {
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = 0;
 #ifdef CONFIG_XFRM_SUB_POLICY
 	len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
@@ -2411,7 +2522,7 @@ static int build_report(struct sk_buff *skb, u8 proto,
 {
 	struct xfrm_user_report *ur;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
 	ur = NLMSG_DATA(nlh);
@@ -2423,12 +2534,12 @@ static int build_report(struct sk_buff *skb, u8 proto,
 	if (addr)
 		RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -2467,7 +2578,7 @@ static int __init xfrm_user_init(void)
 	printk(KERN_INFO "Initializing XFRM netlink socket\n");
 
 	nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
-				     xfrm_netlink_rcv, THIS_MODULE);
+				     xfrm_netlink_rcv, NULL, THIS_MODULE);
 	if (nlsk == NULL)
 		return -ENOMEM;
 	rcu_assign_pointer(xfrm_nl, nlsk);
author	Wim Van Sebroeck <wim@iguana.be>	2007-05-01 08:53:01 +0200
committer	Wim Van Sebroeck <wim@iguana.be>	2007-05-01 08:53:01 +0200
commit	48a7afe314bfc4d7f50e1608632f503dbba7e013 (patch)
tree	4a80e6b96321a71affd1bacea817de93be08894b /net
parent	[WATCHDOG] Semi-typical watchdog bug re early misc_register() (diff)
parent	libata: honour host controllers that want just one host (diff)
download	linux-48a7afe314bfc4d7f50e1608632f503dbba7e013.tar.xz linux-48a7afe314bfc4d7f50e1608632f503dbba7e013.zip