From 6eac56040787c3ff604fe7d48bbbb7897cd1387c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 28 Aug 2008 01:08:02 -0700
Subject: tcp: Skip empty hash buckets faster in /proc/net/tcp

On most systems most of the TCP established/time-wait hash buckets are empty.
When walking the hash table for /proc/net/tcp their read locks would
always be aquired just to find out they're empty. This patch changes the code
to check first if the buckets have any entries before taking the lock, which
is much cheaper than taking a lock. Since the hash tables are large
this makes a measurable difference on processing /proc/net/tcp,
especially on architectures with slow read_lock (e.g. PPC)

On a 2GB Core2 system time cat /proc/net/tcp > /dev/null (with a mostly
empty hash table) goes from 0.046s to 0.005s.

On systems with slower atomics (like P4 or POWER4) or larger hash tables
(more RAM) the difference is much higher.

This can be noticeable because there are some daemons around who regularly
scan /proc/net/tcp.

Original idea for this patch from Marcus Meissner, but redone by me.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'net/ipv4/tcp_ipv4.c')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44c1e934824b..37ca3843c40b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1946,6 +1946,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 	return rc;
 }
 
+static inline int empty_bucket(struct tcp_iter_state *st)
+{
+	return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
+		hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+}
+
 static void *established_get_first(struct seq_file *seq)
 {
 	struct tcp_iter_state* st = seq->private;
@@ -1958,6 +1964,10 @@ static void *established_get_first(struct seq_file *seq)
 		struct inet_timewait_sock *tw;
 		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
+		/* Lockless fast path for the common case of empty buckets */
+		if (empty_bucket(st))
+			continue;
+
 		read_lock_bh(lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family ||
@@ -2008,13 +2018,15 @@ get_tw:
 		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
-		if (++st->bucket < tcp_hashinfo.ehash_size) {
-			read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
-			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
-		} else {
-			cur = NULL;
-			goto out;
-		}
+		/* Look for next non empty bucket */
+		while (++st->bucket < tcp_hashinfo.ehash_size &&
+				empty_bucket(st))
+			;
+		if (st->bucket >= tcp_hashinfo.ehash_size)
+			return NULL;
+
+		read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+		sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
 	} else
 		sk = sk_next(sk);
 
-- 
cgit v1.2.3


From f5fff5dc8a7a3f395b0525c02ba92c95d42b7390 Mon Sep 17 00:00:00 2001
From: Tom Quetchenbach <virtualphtn@gmail.com>
Date: Sun, 21 Sep 2008 00:21:51 -0700
Subject: tcp: advertise MSS requested by user

I'm trying to use the TCP_MAXSEG option to setsockopt() to set the MSS
for both sides of a bidirectional connection.

man tcp says: "If this option is set before connection establishment, it
also changes the MSS value announced to the other end in the initial
packet."

However, the kernel only uses the MTU/route cache to set the advertised
MSS. That means if I set the MSS to, say, 500 before calling connect(),
I will send at most 500-byte packets, but I will still receive 1500-byte
packets in reply.

This is a bug, either in the kernel or the documentation.

This patch (applies to latest net-2.6) reduces the advertised value to
that requested by the user as long as setsockopt() is called before
connect() or accept(). This seems like the behavior that one would
expect as well as that which is documented.

I've tried to make sure that things that depend on the advertised MSS
are set correctly.

Signed-off-by: Tom Quetchenbach <virtualphtn@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c   |  4 ++++
 net/ipv4/tcp_output.c | 13 ++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'net/ipv4/tcp_ipv4.c')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3dfbc21e555a..44aef1c1f373 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1364,6 +1364,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	tcp_mtup_init(newsk);
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (tcp_sk(sk)->rx_opt.user_mss &&
+	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
+		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+
 	tcp_initialize_rcv_mss(newsk);
 
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8f9793a37b61..c3d58ee3e16f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2232,6 +2232,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
 	__u8 *md5_hash_location;
+	int mss;
 
 	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
 	if (skb == NULL)
@@ -2242,13 +2243,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	skb->dst = dst_clone(dst);
 
+	mss = dst_metric(dst, RTAX_ADVMSS);
+	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+		mss = tp->rx_opt.user_mss;
+
 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
 		__u8 rcv_wscale;
 		/* Set this up on the first call only */
 		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
 		/* tcp_full_space because it is guaranteed to be the first packet */
 		tcp_select_initial_window(tcp_full_space(sk),
-			dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+			mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
 			&req->rcv_wnd,
 			&req->window_clamp,
 			ireq->wscale_ok,
@@ -2258,8 +2263,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	memset(&opts, 0, sizeof(opts));
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_header_size = tcp_synack_options(sk, req,
-					     dst_metric(dst, RTAX_ADVMSS),
+	tcp_header_size = tcp_synack_options(sk, req, mss,
 					     skb, &opts, &md5) +
 			  sizeof(struct tcphdr);
 
@@ -2333,6 +2337,9 @@ static void tcp_connect_init(struct sock *sk)
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
 	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
+		tp->advmss = tp->rx_opt.user_mss;
+
 	tcp_initialize_rcv_mss(sk);
 
 	tcp_select_initial_window(tcp_full_space(sk),
-- 
cgit v1.2.3


From 4dd7972d1204c3851a4092cecd2207e05eb29b09 Mon Sep 17 00:00:00 2001
From: Vitaliy Gusev <vgusev@openvz.org>
Date: Wed, 1 Oct 2008 01:51:39 -0700
Subject: tcp: Fix NULL dereference in tcp_4_send_ack()

Fix NULL dereference in tcp_4_send_ack().

As skb->dev is reset to NULL in tcp_v4_rcv() thus OOPS occurs:

BUG: unable to handle kernel NULL pointer dereference at 00000000000004d0
IP: [<ffffffff80498503>] tcp_v4_send_ack+0x203/0x250

Stack:  ffff810005dbb000 ffff810015c8acc0 e77b2c6e5f861600 a01610802e90cb6d
 0a08010100000000 88afffff88afffff 0000000080762be8 0000000115c872e8
 0004122000000000 0000000000000001 ffffffff80762b88 0000000000000020
Call Trace:
 <IRQ>  [<ffffffff80499c33>] tcp_v4_reqsk_send_ack+0x20/0x22
 [<ffffffff8049bce5>] tcp_check_req+0x108/0x14c
 [<ffffffff8047aaf7>] ? rt_intern_hash+0x322/0x33c
 [<ffffffff80499846>] tcp_v4_do_rcv+0x399/0x4ec
 [<ffffffff8045ce4b>] ? skb_checksum+0x4f/0x272
 [<ffffffff80485b74>] ? __inet_lookup_listener+0x14a/0x15c
 [<ffffffff8049babc>] tcp_v4_rcv+0x6a1/0x701
 [<ffffffff8047e739>] ip_local_deliver_finish+0x157/0x24a
 [<ffffffff8047ec9a>] ip_local_deliver+0x72/0x7c
 [<ffffffff8047e5bd>] ip_rcv_finish+0x38d/0x3b2
 [<ffffffff803d3548>] ? scsi_io_completion+0x19d/0x39e
 [<ffffffff8047ebe5>] ip_rcv+0x2a2/0x2e5
 [<ffffffff80462faa>] netif_receive_skb+0x293/0x303
 [<ffffffff80465a9b>] process_backlog+0x80/0xd0
 [<ffffffff802630b4>] ? __rcu_process_callbacks+0x125/0x1b4
 [<ffffffff8046560e>] net_rx_action+0xb9/0x17f
 [<ffffffff80234cc5>] __do_softirq+0xa3/0x164
 [<ffffffff8020c52c>] call_softirq+0x1c/0x28
 <EOI>  [<ffffffff8020de1c>] do_softirq+0x34/0x72
 [<ffffffff80234b8e>] local_bh_enable_ip+0x3f/0x50
 [<ffffffff804d43ca>] _spin_unlock_bh+0x12/0x14
 [<ffffffff804599cd>] release_sock+0xb8/0xc1
 [<ffffffff804a6f9a>] inet_stream_connect+0x146/0x25c
 [<ffffffff80243078>] ? autoremove_wake_function+0x0/0x38
 [<ffffffff8045751f>] sys_connect+0x68/0x8e
 [<ffffffff80291818>] ? fd_install+0x5f/0x68
 [<ffffffff80457784>] ? sock_map_fd+0x55/0x62
 [<ffffffff8020b39b>] system_call_after_swapgs+0x7b/0x80

Code: 41 10 11 d0 83 d0 00 4d 85 ed 89 45 c0 c7 45 c4 08 00 00 00 74 07 41 8b 45 04 89 45 c8 48 8b 43 20 8b 4d b8 48 8d 55 b0 48 89 de <48> 8b 80 d0 04 00 00 48 8b b8 60 01 00 00 e8 20 ae fe ff 65 48
RIP  [<ffffffff80498503>] tcp_v4_send_ack+0x203/0x250
 RSP <ffffffff80762b78>
CR2: 00000000000004d0

Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_ipv4.c')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1b4fee20fc93..011478e46c40 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -618,7 +618,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			];
 	} rep;
 	struct ip_reply_arg arg;
-	struct net *net = dev_net(skb->dev);
+	struct net *net = dev_net(skb->dst->dev);
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
-- 
cgit v1.2.3


From 88ef4a5a78e63420dd1dd770f1bd1dc198926b04 Mon Sep 17 00:00:00 2001
From: KOVACS Krisztian <hidden@sch.bme.hu>
Date: Wed, 1 Oct 2008 07:41:00 -0700
Subject: tcp: Handle TCP SYN+ACK/ACK/RST transparency

The TCP stack sends out SYN+ACK/ACK/RST reply packets in response to
incoming packets. The non-local source address check on output bites
us again, as replies for transparently redirected traffic won't have a
chance to leave the node.

This patch selectively sets the FLOWI_FLAG_ANYSRC flag when doing the
route lookup for those replies. Transparent replies are enabled if the
listening socket has the transparent socket flag set.

Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h |  8 +++++++-
 include/net/ip.h        |  3 +++
 net/ipv4/tcp_ipv4.c     | 12 +++++++++---
 3 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'net/ipv4/tcp_ipv4.c')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 139b78b4dfeb..dced3f64f975 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -72,7 +72,8 @@ struct inet_request_sock {
 				sack_ok	   : 1,
 				wscale_ok  : 1,
 				ecn_ok	   : 1,
-				acked	   : 1;
+				acked	   : 1,
+				no_srccheck: 1;
 	struct ip_options	*opt;
 };
 
@@ -204,4 +205,9 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
 	return req;
 }
 
+static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
+{
+	return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 250e6ef025a4..90b27f634b76 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -140,12 +140,15 @@ static inline void ip_tr_mc_map(__be32 addr, char *buf)
 
 struct ip_reply_arg {
 	struct kvec iov[1];   
+	int	    flags;
 	__wsum 	    csum;
 	int	    csumoffset; /* u16 offset of csum in iov[0].iov_base */
 				/* -1 if not needed */ 
 	int	    bound_dev_if;
 }; 
 
+#define IP_REPLY_ARG_NOSRCCHECK 1
+
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
 		   unsigned int len); 
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d13688e3558d..8b24bd833cb4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -591,6 +591,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 				      ip_hdr(skb)->saddr, /* XXX */
 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
+	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 
 	net = dev_net(skb->dst->dev);
 	ip_send_reply(net->ipv4.tcp_sock, skb,
@@ -606,7 +607,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 ts, int oif,
-			    struct tcp_md5sig_key *key)
+			    struct tcp_md5sig_key *key,
+			    int reply_flags)
 {
 	struct tcphdr *th = tcp_hdr(skb);
 	struct {
@@ -659,6 +661,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 				    ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
+	arg.flags = reply_flags;
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 				      ip_hdr(skb)->saddr, /* XXX */
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
@@ -681,7 +684,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcptw->tw_ts_recent,
 			tw->tw_bound_dev_if,
-			tcp_twsk_md5_key(tcptw)
+			tcp_twsk_md5_key(tcptw),
+			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
 			);
 
 	inet_twsk_put(tw);
@@ -694,7 +698,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
 			req->ts_recent,
 			0,
-			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr));
+			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
+			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
 }
 
 /*
@@ -1244,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->loc_addr = daddr;
 	ireq->rmt_addr = saddr;
+	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(sk, skb);
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
-- 
cgit v1.2.3


From 9a1f27c48065ce713eb47f2fd475b717e63ef239 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 7 Oct 2008 11:41:57 -0700
Subject: inet_hashtables: Add inet_lookup_skb helpers

To be able to use the cached socket reference in the skb during input
processing we add a new set of lookup functions that receive the skb on
their argument list.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: KOVACS Krisztian <hidden@sch.bme.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_hashtables.h | 11 +++++++++++
 include/net/inet_hashtables.h  | 14 ++++++++++++++
 net/dccp/ipv4.c                |  5 ++---
 net/dccp/ipv6.c                |  6 ++----
 net/ipv4/tcp_ipv4.c            |  3 +--
 net/ipv6/tcp_ipv6.c            |  6 +-----
 6 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'net/ipv4/tcp_ipv4.c')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index e48989f04c24..995efbb031ab 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -91,6 +91,17 @@ static inline struct sock *__inet6_lookup(struct net *net,
 	return inet6_lookup_listener(net, hashinfo, daddr, hnum, dif);
 }
 
+static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
+					      struct sk_buff *skb,
+					      const __be16 sport,
+					      const __be16 dport)
+{
+	return __inet6_lookup(dev_net(skb->dst->dev), hashinfo,
+			      &ipv6_hdr(skb)->saddr, sport,
+			      &ipv6_hdr(skb)->daddr, ntohs(dport),
+			      inet6_iif(skb));
+}
+
 extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 				 const struct in6_addr *saddr, const __be16 sport,
 				 const struct in6_addr *daddr, const __be16 dport,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index bb619d80f2e2..3522bbcd546d 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -16,6 +16,7 @@
 
 
 #include <linux/interrupt.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -28,6 +29,7 @@
 #include <net/inet_connection_sock.h>
 #include <net/inet_sock.h>
 #include <net/sock.h>
+#include <net/route.h>
 #include <net/tcp_states.h>
 #include <net/netns/hash.h>
 
@@ -371,6 +373,18 @@ static inline struct sock *inet_lookup(struct net *net,
 	return sk;
 }
 
+static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
+					     struct sk_buff *skb,
+					     const __be16 sport,
+					     const __be16 dport)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+
+	return __inet_lookup(dev_net(skb->dst->dev), hashinfo,
+			     iph->saddr, sport,
+			     iph->daddr, dport, inet_iif(skb));
+}
+
 extern int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		struct sock *sk, u32 port_offset,
 		int (*check_established)(struct inet_timewait_death_row *,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 882c5c4de69e..e3dfddab21cc 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -811,9 +811,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
-	sk = __inet_lookup(dev_net(skb->dst->dev), &dccp_hashinfo,
-			   iph->saddr, dh->dccph_sport,
-			   iph->daddr, dh->dccph_dport, inet_iif(skb));
+	sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+			       dh->dccph_sport, dh->dccph_dport);
 	/*
 	 * Step 2:
 	 *	If no socket ...
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5e1ee0da2c40..caa7f3469626 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -805,10 +805,8 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
-	sk = __inet6_lookup(dev_net(skb->dst->dev), &dccp_hashinfo,
-			    &ipv6_hdr(skb)->saddr, dh->dccph_sport,
-			    &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
-			    inet6_iif(skb));
+	sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+			        dh->dccph_sport, dh->dccph_dport);
 	/*
 	 * Step 2:
 	 *	If no socket ...
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8b24bd833cb4..24ffc5e1d3da 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1577,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->flags	 = iph->tos;
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
-	sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
-			th->source, iph->daddr, th->dest, inet_iif(skb));
+	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
 	if (!sk)
 		goto no_tcp_socket;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index df16b68644e7..6268d266c034 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1681,11 +1681,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
 	TCP_SKB_CB(skb)->sacked = 0;
 
-	sk = __inet6_lookup(net, &tcp_hashinfo,
-			&ipv6_hdr(skb)->saddr, th->source,
-			&ipv6_hdr(skb)->daddr, ntohs(th->dest),
-			inet6_iif(skb));
-
+	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
 	if (!sk)
 		goto no_tcp_socket;
 
-- 
cgit v1.2.3


From 52cd5750e81ec8d213949fa7c0d2e08907bf498b Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 8 Oct 2008 11:34:06 -0700
Subject: tcp: fix length used for checksum in a reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While looking for some common code I came across difference
in checksum calculation between tcp_v6_send_(reset|ack) I
couldn't explain. I checked both v4 and v6 and found out that
both seem to have the same "feature". I couldn't find anything
in rfc nor anywhere else which would state that md5 option
should be ignored like it was in case of reset so I came to
a conclusion that this is probably a genuine bug. I suspect
that addition of md5 just was fooled by the excessive
copy-paste code in those functions and the reset part was
never tested well enough to find out the problem.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 net/ipv6/tcp_ipv6.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv4/tcp_ipv4.c')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 24ffc5e1d3da..ba46769c6e97 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -589,7 +589,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 				      ip_hdr(skb)->saddr, /* XXX */
-				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
+				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 424d9c4a67ac..e8b0fdd9edb8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1012,14 +1012,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 
-	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
+	buff->csum = csum_partial((char *)t1, tot_len, 0);
 
 	memset(&fl, 0, sizeof(fl));
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
-				    sizeof(*t1), IPPROTO_TCP,
+				    tot_len, IPPROTO_TCP,
 				    buff->csum);
 
 	fl.proto = IPPROTO_TCP;
-- 
cgit v1.2.3


From 78e645cb890b0f32ea81a974e29427d9cd2f64f0 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Thu, 9 Oct 2008 14:37:47 -0700
Subject: tcpv[46]: fix md5 pseudoheader address field ordering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maybe it's just me but I guess those md5 people made a mess
out of it by having *_md5_hash_* to use daddr, saddr order
instead of the one that is natural (and equal to what csum
functions use). For the segment were sending, the original
addresses are reversed so buff's saddr == skb's daddr and
vice-versa.

Maybe I can finally proceed with unification of some code
after fixing it first... :-)

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 4 ++--
 net/ipv6/tcp_ipv6.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net/ipv4/tcp_ipv4.c')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ba46769c6e97..5c8fa7f1e327 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -583,8 +583,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		rep.th.doff = arg.iov[0].iov_len / 4;
 
 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
-				     key, ip_hdr(skb)->daddr,
-				     ip_hdr(skb)->saddr, &rep.th);
+				     key, ip_hdr(skb)->saddr,
+				     ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index dd7bdde7bddc..eab10bc7ff87 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1007,8 +1007,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			       (TCPOPT_MD5SIG << 8) |
 			       TCPOLEN_MD5SIG);
 		tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key,
-				    &ipv6_hdr(skb)->daddr,
-				    &ipv6_hdr(skb)->saddr, t1);
+				    &ipv6_hdr(skb)->saddr,
+				    &ipv6_hdr(skb)->daddr, t1);
 	}
 #endif
 
-- 
cgit v1.2.3