summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c10
-rw-r--r--net/ipv4/arp.c21
-rw-r--r--net/ipv4/devinet.c65
-rw-r--r--net/ipv4/esp4.c17
-rw-r--r--net/ipv4/fib_frontend.c19
-rw-r--r--net/ipv4/fib_hash.c2
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c28
-rw-r--r--net/ipv4/icmp.c17
-rw-r--r--net/ipv4/igmp.c31
-rw-r--r--net/ipv4/inet_connection_sock.c16
-rw-r--r--net/ipv4/inet_diag.c9
-rw-r--r--net/ipv4/inet_timewait_sock.c7
-rw-r--r--net/ipv4/ip_fragment.c40
-rw-r--r--net/ipv4/ip_gre.c21
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_output.c106
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ipmr.c6
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c7
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c4
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/ipv4/multipath_wrandom.c10
-rw-r--r--net/ipv4/netfilter/Kconfig62
-rw-r--r--net/ipv4/netfilter/Makefile13
-rw-r--r--net/ipv4/netfilter/arp_tables.c215
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c186
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c21
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c223
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c27
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c5
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c74
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c43
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c30
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c4
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c3
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c25
-rw-r--r--net/ipv4/netfilter/ip_queue.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c19
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c12
-rw-r--r--net/ipv4/netfilter/ipt_CONNMARK.c23
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_NOTRACK.c4
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c2
-rw-r--r--net/ipv4/netfilter/ipt_connbytes.c39
-rw-r--r--net/ipv4/netfilter/ipt_connmark.c10
-rw-r--r--net/ipv4/netfilter/ipt_conntrack.c96
-rw-r--r--net/ipv4/netfilter/ipt_helper.c54
-rw-r--r--net/ipv4/netfilter/ipt_recent.c1
-rw-r--r--net/ipv4/netfilter/ipt_state.c6
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c571
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c301
-rw-r--r--net/ipv4/proc.c14
-rw-r--r--net/ipv4/route.c11
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp.c12
-rw-r--r--net/ipv4/tcp_bic.c16
-rw-r--r--net/ipv4/tcp_cong.c40
-rw-r--r--net/ipv4/tcp_highspeed.c11
-rw-r--r--net/ipv4/tcp_htcp.c13
-rw-r--r--net/ipv4/tcp_hybla.c6
-rw-r--r--net/ipv4/tcp_input.c291
-rw-r--r--net/ipv4/tcp_ipv4.c44
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c75
-rw-r--r--net/ipv4/tcp_scalable.c14
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/tcp_vegas.c42
-rw-r--r--net/ipv4/udp.c7
82 files changed, 2366 insertions, 834 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a9d84f93442c..d368cf249000 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -147,8 +147,7 @@ void inet_sock_destruct(struct sock *sk)
BUG_TRAP(!sk->sk_wmem_queued);
BUG_TRAP(!sk->sk_forward_alloc);
- if (inet->opt)
- kfree(inet->opt);
+ kfree(inet->opt);
dst_release(sk->sk_dst_cache);
sk_refcnt_debug_dec(sk);
}
@@ -229,13 +228,14 @@ static int inet_create(struct socket *sock, int protocol)
unsigned char answer_flags;
char answer_no_check;
int try_loading_module = 0;
- int err = -ESOCKTNOSUPPORT;
+ int err;
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
answer = NULL;
lookup_protocol:
+ err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_rcu(p, &inetsw[sock->type]) {
answer = list_entry(p, struct inet_protosw, list);
@@ -253,6 +253,7 @@ lookup_protocol:
if (IPPROTO_IP == answer->protocol)
break;
}
+ err = -EPROTONOSUPPORT;
answer = NULL;
}
@@ -281,9 +282,6 @@ lookup_protocol:
err = -EPERM;
if (answer->capability > 0 && !capable(answer->capability))
goto out_rcu_unlock;
- err = -EPROTONOSUPPORT;
- if (!protocol)
- goto out_rcu_unlock;
sock->ops = answer->ops;
answer_prot = answer->prot;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8bf312bdea13..b425748f02d7 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -241,7 +241,7 @@ static int arp_constructor(struct neighbour *neigh)
neigh->type = inet_addr_type(addr);
rcu_read_lock();
- in_dev = rcu_dereference(__in_dev_get(dev));
+ in_dev = __in_dev_get_rcu(dev);
if (in_dev == NULL) {
rcu_read_unlock();
return -EINVAL;
@@ -697,12 +697,6 @@ void arp_send(int type, int ptype, u32 dest_ip,
arp_xmit(skb);
}
-static void parp_redo(struct sk_buff *skb)
-{
- nf_reset(skb);
- arp_rcv(skb, skb->dev, NULL, skb->dev);
-}
-
/*
* Process an arp request.
*/
@@ -922,6 +916,11 @@ out:
return 0;
}
+static void parp_redo(struct sk_buff *skb)
+{
+ arp_process(skb);
+}
+
/*
* Receive an arp request from the device layer.
@@ -990,8 +989,8 @@ static int arp_req_set(struct arpreq *r, struct net_device * dev)
ipv4_devconf.proxy_arp = 1;
return 0;
}
- if (__in_dev_get(dev)) {
- __in_dev_get(dev)->cnf.proxy_arp = 1;
+ if (__in_dev_get_rtnl(dev)) {
+ __in_dev_get_rtnl(dev)->cnf.proxy_arp = 1;
return 0;
}
return -ENXIO;
@@ -1096,8 +1095,8 @@ static int arp_req_delete(struct arpreq *r, struct net_device * dev)
ipv4_devconf.proxy_arp = 0;
return 0;
}
- if (__in_dev_get(dev)) {
- __in_dev_get(dev)->cnf.proxy_arp = 0;
+ if (__in_dev_get_rtnl(dev)) {
+ __in_dev_get_rtnl(dev)->cnf.proxy_arp = 0;
return 0;
}
return -ENXIO;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ba2895ae8151..04a6fe3e95a2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -234,7 +234,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
int destroy)
{
struct in_ifaddr *promote = NULL;
- struct in_ifaddr *ifa1 = *ifap;
+ struct in_ifaddr *ifa, *ifa1 = *ifap;
+ struct in_ifaddr *last_prim = in_dev->ifa_list;
+ struct in_ifaddr *prev_prom = NULL;
+ int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
ASSERT_RTNL();
@@ -243,18 +246,22 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
**/
if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
- struct in_ifaddr *ifa;
struct in_ifaddr **ifap1 = &ifa1->ifa_next;
while ((ifa = *ifap1) != NULL) {
+ if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
+ ifa1->ifa_scope <= ifa->ifa_scope)
+ last_prim = ifa;
+
if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
ifa1->ifa_mask != ifa->ifa_mask ||
!inet_ifa_match(ifa1->ifa_address, ifa)) {
ifap1 = &ifa->ifa_next;
+ prev_prom = ifa;
continue;
}
- if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
+ if (!do_promote) {
*ifap1 = ifa->ifa_next;
rtmsg_ifa(RTM_DELADDR, ifa);
@@ -283,18 +290,31 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
*/
rtmsg_ifa(RTM_DELADDR, ifa1);
notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
- if (destroy) {
- inet_free_ifa(ifa1);
- if (!in_dev->ifa_list)
- inetdev_destroy(in_dev);
- }
+ if (promote) {
+
+ if (prev_prom) {
+ prev_prom->ifa_next = promote->ifa_next;
+ promote->ifa_next = last_prim->ifa_next;
+ last_prim->ifa_next = promote;
+ }
- if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) {
- /* not sure if we should send a delete notify first? */
promote->ifa_flags &= ~IFA_F_SECONDARY;
rtmsg_ifa(RTM_NEWADDR, promote);
notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote);
+ for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
+ if (ifa1->ifa_mask != ifa->ifa_mask ||
+ !inet_ifa_match(ifa1->ifa_address, ifa))
+ continue;
+ fib_add_ifaddr(ifa);
+ }
+
+ }
+ if (destroy) {
+ inet_free_ifa(ifa1);
+
+ if (!in_dev->ifa_list)
+ inetdev_destroy(in_dev);
}
}
@@ -351,7 +371,7 @@ static int inet_insert_ifa(struct in_ifaddr *ifa)
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
ASSERT_RTNL();
@@ -449,7 +469,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
goto out;
rc = -ENOBUFS;
- if ((in_dev = __in_dev_get(dev)) == NULL) {
+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
in_dev = inetdev_init(dev);
if (!in_dev)
goto out;
@@ -584,7 +604,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
if (colon)
*colon = ':';
- if ((in_dev = __in_dev_get(dev)) != NULL) {
+ if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
if (tryaddrmatch) {
/* Matthias Andree */
/* compare label and address (4.4BSD style) */
@@ -715,6 +735,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
break;
ret = 0;
if (ifa->ifa_mask != sin->sin_addr.s_addr) {
+ u32 old_mask = ifa->ifa_mask;
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_mask = sin->sin_addr.s_addr;
ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -728,7 +749,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
if ((dev->flags & IFF_BROADCAST) &&
(ifa->ifa_prefixlen < 31) &&
(ifa->ifa_broadcast ==
- (ifa->ifa_local|~ifa->ifa_mask))) {
+ (ifa->ifa_local|~old_mask))) {
ifa->ifa_broadcast = (ifa->ifa_local |
~sin->sin_addr.s_addr);
}
@@ -748,7 +769,7 @@ rarok:
static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
{
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
@@ -791,7 +812,7 @@ u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
struct in_device *in_dev;
rcu_read_lock();
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto no_in_dev;
@@ -818,7 +839,7 @@ no_in_dev:
read_lock(&dev_base_lock);
rcu_read_lock();
for (dev = dev_base; dev; dev = dev->next) {
- if ((in_dev = __in_dev_get(dev)) == NULL)
+ if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
continue;
for_primary_ifa(in_dev) {
@@ -887,7 +908,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
if (dev) {
rcu_read_lock();
- if ((in_dev = __in_dev_get(dev)))
+ if ((in_dev = __in_dev_get_rcu(dev)))
addr = confirm_addr_indev(in_dev, dst, local, scope);
rcu_read_unlock();
@@ -897,7 +918,7 @@ u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scop
read_lock(&dev_base_lock);
rcu_read_lock();
for (dev = dev_base; dev; dev = dev->next) {
- if ((in_dev = __in_dev_get(dev))) {
+ if ((in_dev = __in_dev_get_rcu(dev))) {
addr = confirm_addr_indev(in_dev, dst, local, scope);
if (addr)
break;
@@ -957,7 +978,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = ptr;
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
ASSERT_RTNL();
@@ -1078,7 +1099,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
if (idx > s_idx)
s_ip_idx = 0;
rcu_read_lock();
- if ((in_dev = __in_dev_get(dev)) == NULL) {
+ if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
rcu_read_unlock();
continue;
}
@@ -1149,7 +1170,7 @@ void inet_forward_change(void)
for (dev = dev_base; dev; dev = dev->next) {
struct in_device *in_dev;
rcu_read_lock();
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rcu(dev);
if (in_dev)
in_dev->cnf.forwarding = on;
rcu_read_unlock();
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b5a09d1b90b..1b18ce66e7b7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -5,6 +5,7 @@
#include <net/esp.h>
#include <asm/scatterlist.h>
#include <linux/crypto.h>
+#include <linux/kernel.h>
#include <linux/pfkeyv2.h>
#include <linux/random.h>
#include <net/icmp.h>
@@ -42,10 +43,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esp = x->data;
alen = esp->auth.icv_trunc_len;
tfm = esp->conf.tfm;
- blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
- clen = (clen + 2 + blksize-1)&~(blksize-1);
+ blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4);
+ clen = ALIGN(clen + 2, blksize);
if (esp->conf.padlen)
- clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+ clen = ALIGN(clen, esp->conf.padlen);
if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
goto error;
@@ -143,7 +144,7 @@ static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc
struct ip_esp_hdr *esph;
struct esp_data *esp = x->data;
struct sk_buff *trailer;
- int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+ int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
int alen = esp->auth.icv_trunc_len;
int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
int nfrags;
@@ -304,16 +305,16 @@ static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap,
static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
- u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+ u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4);
if (x->props.mode) {
- mtu = (mtu + 2 + blksize-1)&~(blksize-1);
+ mtu = ALIGN(mtu + 2, blksize);
} else {
/* The worst case. */
- mtu += 2 + blksize;
+ mtu = ALIGN(mtu + 2, 4) + blksize - 4;
}
if (esp->conf.padlen)
- mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+ mtu = ALIGN(mtu, esp->conf.padlen);
return mtu + x->props.header_len + esp->auth.icv_trunc_len;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4e1379f71269..19b1b984d687 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -173,7 +173,7 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
no_addr = rpf = 0;
rcu_read_lock();
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
no_addr = in_dev->ifa_list == NULL;
rpf = IN_DEV_RPFILTER(in_dev);
@@ -266,8 +266,7 @@ int ip_rt_ioctl(unsigned int cmd, void __user *arg)
if (tb)
err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
}
- if (rta.rta_mx)
- kfree(rta.rta_mx);
+ kfree(rta.rta_mx);
}
rtnl_unlock();
return err;
@@ -408,7 +407,7 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr
tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
}
-static void fib_add_ifaddr(struct in_ifaddr *ifa)
+void fib_add_ifaddr(struct in_ifaddr *ifa)
{
struct in_device *in_dev = ifa->ifa_dev;
struct net_device *dev = in_dev->dev;
@@ -545,12 +544,16 @@ static void nl_fib_input(struct sock *sk, int len)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh = NULL;
struct fib_result_nl *frn;
- int err;
u32 pid;
struct fib_table *tb;
- skb = skb_recv_datagram(sk, 0, 0, &err);
+ skb = skb_dequeue(&sk->sk_receive_queue);
nlh = (struct nlmsghdr *)skb->data;
+ if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
+ nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
+ kfree_skb(skb);
+ return;
+ }
frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
tb = fib_get_table(frn->tb_id_in);
@@ -591,7 +594,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa);
- if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
+ if (ifa->ifa_dev->ifa_list == NULL) {
/* Last address was deleted from this interface.
Disable IP.
*/
@@ -607,7 +610,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
if (event == NETDEV_UNREGISTER) {
fib_disable_ip(dev, 2);
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 2a8c9afc3695..7ea0209cb169 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -975,7 +975,7 @@ static void fib_seq_stop(struct seq_file *seq, void *v)
static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
{
- static unsigned type2flags[RTN_MAX + 1] = {
+ static const unsigned type2flags[RTN_MAX + 1] = {
[7] = RTF_REJECT, [8] = RTF_REJECT,
};
unsigned flags = type2flags[type];
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d41219e8037c..6d2a6ac070e3 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -83,7 +83,7 @@ for (nhsel=0; nhsel < 1; nhsel++)
#define endfor_nexthops(fi) }
-static struct
+static const struct
{
int error;
u8 scope;
@@ -1087,7 +1087,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
rta->rta_oif = &dev->ifindex;
if (colon) {
struct in_ifaddr *ifa;
- struct in_device *in_dev = __in_dev_get(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
if (!in_dev)
return -ENODEV;
*colon = ':';
@@ -1268,7 +1268,7 @@ int fib_sync_up(struct net_device *dev)
}
if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
continue;
- if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
+ if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
continue;
alive++;
spin_lock_bh(&fib_multipath_lock);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 50c0519cd70d..705e3ce86df9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -286,6 +286,8 @@ static inline void check_tnode(const struct tnode *tn)
static int halve_threshold = 25;
static int inflate_threshold = 50;
+static int halve_threshold_root = 15;
+static int inflate_threshold_root = 25;
static void __alias_free_mem(struct rcu_head *head)
@@ -449,6 +451,8 @@ static struct node *resize(struct trie *t, struct tnode *tn)
int i;
int err = 0;
struct tnode *old_tn;
+ int inflate_threshold_use;
+ int halve_threshold_use;
if (!tn)
return NULL;
@@ -541,10 +545,17 @@ static struct node *resize(struct trie *t, struct tnode *tn)
check_tnode(tn);
+ /* Keep root node larger */
+
+ if(!tn->parent)
+ inflate_threshold_use = inflate_threshold_root;
+ else
+ inflate_threshold_use = inflate_threshold;
+
err = 0;
while ((tn->full_children > 0 &&
50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
- inflate_threshold * tnode_child_length(tn))) {
+ inflate_threshold_use * tnode_child_length(tn))) {
old_tn = tn;
tn = inflate(t, tn);
@@ -564,10 +575,18 @@ static struct node *resize(struct trie *t, struct tnode *tn)
* node is above threshold.
*/
+
+ /* Keep root node larger */
+
+ if(!tn->parent)
+ halve_threshold_use = halve_threshold_root;
+ else
+ halve_threshold_use = halve_threshold;
+
err = 0;
while (tn->bits > 1 &&
100 * (tnode_child_length(tn) - tn->empty_children) <
- halve_threshold * tnode_child_length(tn)) {
+ halve_threshold_use * tnode_child_length(tn)) {
old_tn = tn;
tn = halve(t, tn);
@@ -2359,6 +2378,7 @@ static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
*/
static int fib_route_seq_show(struct seq_file *seq, void *v)
{
+ const struct fib_trie_iter *iter = seq->private;
struct leaf *l = v;
int i;
char bf[128];
@@ -2370,6 +2390,8 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
return 0;
}
+ if (iter->trie == trie_local)
+ return 0;
if (IS_TNODE(l))
return 0;
@@ -2385,7 +2407,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
prefix = htonl(l->key);
list_for_each_entry_rcu(fa, &li->falh, fa_list) {
- const struct fib_info *fi = rcu_dereference(fa->fa_info);
+ const struct fib_info *fi = fa->fa_info;
unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 24eb56ae1b5a..92e23b2ad4d2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -188,7 +188,7 @@ struct icmp_err icmp_err_convert[] = {
/* Control parameters for ECHO replies. */
int sysctl_icmp_echo_ignore_all;
-int sysctl_icmp_echo_ignore_broadcasts;
+int sysctl_icmp_echo_ignore_broadcasts = 1;
/* Control parameter - ignore bogus broadcast responses? */
int sysctl_icmp_ignore_bogus_error_responses;
@@ -220,7 +220,7 @@ struct icmp_control {
short error; /* This ICMP is classed as an error message */
};
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
/*
* The ICMP socket(s). This is the most convenient way to flow control
@@ -934,11 +934,11 @@ int icmp_rcv(struct sk_buff *skb)
case CHECKSUM_HW:
if (!(u16)csum_fold(skb->csum))
break;
- LIMIT_NETDEBUG(KERN_DEBUG "icmp v4 hw csum failure\n");
+ /* fall through */
case CHECKSUM_NONE:
- if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
+ skb->csum = 0;
+ if (__skb_checksum_complete(skb))
goto error;
- default:;
}
if (!pskb_pull(skb, sizeof(struct icmphdr)))
@@ -994,7 +994,7 @@ error:
/*
* This table is the definition of how we handle ICMP.
*/
-static struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
+static const struct icmp_control icmp_pointers[NR_ICMP_TYPES + 1] = {
[ICMP_ECHOREPLY] = {
.output_entry = ICMP_MIB_OUTECHOREPS,
.input_entry = ICMP_MIB_INECHOREPS,
@@ -1108,12 +1108,9 @@ void __init icmp_init(struct net_proto_family *ops)
struct inet_sock *inet;
int i;
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_cpu(i) {
int err;
- if (!cpu_possible(i))
- continue;
-
err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
&per_cpu(__icmp_socket, i));
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 70c44e4c3ceb..4a195c724f01 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -872,11 +872,18 @@ int igmp_rcv(struct sk_buff *skb)
return 0;
}
- if (!pskb_may_pull(skb, sizeof(struct igmphdr)) ||
- (u16)csum_fold(skb_checksum(skb, 0, len, 0))) {
- in_dev_put(in_dev);
- kfree_skb(skb);
- return 0;
+ if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
+ goto drop;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_HW:
+ if (!(u16)csum_fold(skb->csum))
+ break;
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ if (__skb_checksum_complete(skb))
+ goto drop;
}
ih = skb->h.igmph;
@@ -890,7 +897,10 @@ int igmp_rcv(struct sk_buff *skb)
/* Is it our report looped back? */
if (((struct rtable*)skb->dst)->fl.iif == 0)
break;
- igmp_heard_report(in_dev, ih->group);
+ /* don't rely on MC router hearing unicast reports */
+ if (skb->pkt_type == PACKET_MULTICAST ||
+ skb->pkt_type == PACKET_BROADCAST)
+ igmp_heard_report(in_dev, ih->group);
break;
case IGMP_PIM:
#ifdef CONFIG_IP_PIMSM_V1
@@ -906,6 +916,8 @@ int igmp_rcv(struct sk_buff *skb)
default:
NETDEBUG(KERN_DEBUG "New IGMP type=%d, why we do not know about it?\n", ih->type);
}
+
+drop:
in_dev_put(in_dev);
kfree_skb(skb);
return 0;
@@ -1323,7 +1335,7 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
}
if (dev) {
imr->imr_ifindex = dev->ifindex;
- idev = __in_dev_get(dev);
+ idev = __in_dev_get_rtnl(dev);
}
return idev;
}
@@ -1908,8 +1920,11 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
goto done;
}
- } else
+ } else {
newpsl = NULL;
+ (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
+ msf->imsf_fmode, 0, NULL, 0);
+ }
psl = pmc->sflist;
if (psl) {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fe3c6d3d0c91..3fe021f1a566 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -78,17 +78,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
int low = sysctl_local_port_range[0];
int high = sysctl_local_port_range[1];
int remaining = (high - low) + 1;
- int rover;
+ int rover = net_random() % (high - low) + low;
- spin_lock(&hashinfo->portalloc_lock);
- if (hashinfo->port_rover < low)
- rover = low;
- else
- rover = hashinfo->port_rover;
do {
- rover++;
- if (rover > high)
- rover = low;
head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
@@ -97,9 +89,9 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
break;
next:
spin_unlock(&head->lock);
+ if (++rover > high)
+ rover = low;
} while (--remaining > 0);
- hashinfo->port_rover = rover;
- spin_unlock(&hashinfo->portalloc_lock);
/* Exhausted local port range during search? It is not
* possible for us to be holding one of the bind hash
@@ -494,7 +486,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
- const unsigned int __nocast priority)
+ const gfp_t priority)
{
struct sock *newsk = sk_clone(sk, priority);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 71f3c7350c6e..39061ed53cfd 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -724,12 +724,6 @@ done:
return skb->len;
}
-static int inet_diag_dump_done(struct netlink_callback *cb)
-{
- return 0;
-}
-
-
static __inline__ int
inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
@@ -760,8 +754,7 @@ inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
goto err_inval;
}
return netlink_dump_start(idiagnl, skb, nlh,
- inet_diag_dump,
- inet_diag_dump_done);
+ inet_diag_dump, NULL);
} else {
return inet_diag_get_exact(skb, nlh);
}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 4d1502a49852..a010e9a68811 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -20,7 +20,7 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi
struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb;
/* Unlink from established hashes. */
- struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent];
+ struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash);
write_lock(&ehead->lock);
if (hlist_unhashed(&tw->tw_node)) {
@@ -60,7 +60,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
{
const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent];
+ struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
struct inet_bind_hashbucket *bhead;
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
@@ -106,11 +106,12 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_dport = inet->dport;
tw->tw_family = sk->sk_family;
tw->tw_reuse = sk->sk_reuse;
- tw->tw_hashent = sk->sk_hashent;
+ tw->tw_hash = sk->sk_hash;
tw->tw_ipv6only = 0;
tw->tw_prot = sk->sk_prot_creator;
atomic_set(&tw->tw_refcnt, 1);
inet_twsk_dead_node_init(tw);
+ __module_get(tw->tw_prot->owner);
}
return tw;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e7d26d9943c2..8ce0ce2ee48e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -71,7 +71,7 @@ struct ipfrag_skb_cb
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
- struct ipq *next; /* linked list pointers */
+ struct hlist_node list;
struct list_head lru_list; /* lru list member */
u32 user;
u32 saddr;
@@ -89,7 +89,6 @@ struct ipq {
spinlock_t lock;
atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
- struct ipq **pprev;
int iif;
struct timeval stamp;
};
@@ -99,7 +98,7 @@ struct ipq {
#define IPQ_HASHSZ 64
/* Per-bucket lock is easy to add now. */
-static struct ipq *ipq_hash[IPQ_HASHSZ];
+static struct hlist_head ipq_hash[IPQ_HASHSZ];
static DEFINE_RWLOCK(ipfrag_lock);
static u32 ipfrag_hash_rnd;
static LIST_HEAD(ipq_lru_list);
@@ -107,9 +106,7 @@ int ip_frag_nqueues = 0;
static __inline__ void __ipq_unlink(struct ipq *qp)
{
- if(qp->next)
- qp->next->pprev = qp->pprev;
- *qp->pprev = qp->next;
+ hlist_del(&qp->list);
list_del(&qp->lru_list);
ip_frag_nqueues--;
}
@@ -139,27 +136,18 @@ static void ipfrag_secret_rebuild(unsigned long dummy)
get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
for (i = 0; i < IPQ_HASHSZ; i++) {
struct ipq *q;
+ struct hlist_node *p, *n;
- q = ipq_hash[i];
- while (q) {
- struct ipq *next = q->next;
+ hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
unsigned int hval = ipqhashfn(q->id, q->saddr,
q->daddr, q->protocol);
if (hval != i) {
- /* Unlink. */
- if (q->next)
- q->next->pprev = q->pprev;
- *q->pprev = q->next;
+ hlist_del(&q->list);
/* Relink to new hash chain. */
- if ((q->next = ipq_hash[hval]) != NULL)
- q->next->pprev = &q->next;
- ipq_hash[hval] = q;
- q->pprev = &ipq_hash[hval];
+ hlist_add_head(&q->list, &ipq_hash[hval]);
}
-
- q = next;
}
}
write_unlock(&ipfrag_lock);
@@ -310,14 +298,16 @@ out:
static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
{
struct ipq *qp;
-
+#ifdef CONFIG_SMP
+ struct hlist_node *n;
+#endif
write_lock(&ipfrag_lock);
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
* such entry could be created on other cpu, while we
* promoted read lock to write lock.
*/
- for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+ hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
if(qp->id == qp_in->id &&
qp->saddr == qp_in->saddr &&
qp->daddr == qp_in->daddr &&
@@ -337,10 +327,7 @@ static struct ipq *ip_frag_intern(unsigned int hash, struct ipq *qp_in)
atomic_inc(&qp->refcnt);
atomic_inc(&qp->refcnt);
- if((qp->next = ipq_hash[hash]) != NULL)
- qp->next->pprev = &qp->next;
- ipq_hash[hash] = qp;
- qp->pprev = &ipq_hash[hash];
+ hlist_add_head(&qp->list, &ipq_hash[hash]);
INIT_LIST_HEAD(&qp->lru_list);
list_add_tail(&qp->lru_list, &ipq_lru_list);
ip_frag_nqueues++;
@@ -392,9 +379,10 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
__u8 protocol = iph->protocol;
unsigned int hash = ipqhashfn(id, saddr, daddr, protocol);
struct ipq *qp;
+ struct hlist_node *n;
read_lock(&ipfrag_lock);
- for(qp = ipq_hash[hash]; qp; qp = qp->next) {
+ hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
if(qp->id == id &&
qp->saddr == saddr &&
qp->daddr == daddr &&
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f0d5740d7e22..a4c347c3b8e3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -577,15 +577,16 @@ static int ipgre_rcv(struct sk_buff *skb)
goto drop_nolock;
if (flags&GRE_CSUM) {
- if (skb->ip_summed == CHECKSUM_HW) {
+ switch (skb->ip_summed) {
+ case CHECKSUM_HW:
csum = (u16)csum_fold(skb->csum);
- if (csum)
- skb->ip_summed = CHECKSUM_NONE;
- }
- if (skb->ip_summed == CHECKSUM_NONE) {
- skb->csum = skb_checksum(skb, 0, skb->len, 0);
+ if (!csum)
+ break;
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ csum = __skb_checksum_complete(skb);
skb->ip_summed = CHECKSUM_HW;
- csum = (u16)csum_fold(skb->csum);
}
offset += 4;
}
@@ -1104,10 +1105,10 @@ static int ipgre_open(struct net_device *dev)
return -EADDRNOTAVAIL;
dev = rt->u.dst.dev;
ip_rt_put(rt);
- if (__in_dev_get(dev) == NULL)
+ if (__in_dev_get_rtnl(dev) == NULL)
return -EADDRNOTAVAIL;
t->mlink = dev->ifindex;
- ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr);
+ ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
}
return 0;
}
@@ -1216,7 +1217,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
return 0;
}
-int __init ipgre_fb_tunnel_init(struct net_device *dev)
+static int __init ipgre_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
struct iphdr *iph = &tunnel->parms.iph;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index bce4e875193b..dbe12da8d8b3 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -510,8 +510,7 @@ static int ip_options_get_finish(struct ip_options **optp,
kfree(opt);
return -EINVAL;
}
- if (*optp)
- kfree(*optp);
+ kfree(*optp);
*optp = opt;
return 0;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3f1a263e1249..eba64e2bd397 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -275,7 +275,8 @@ int ip_output(struct sk_buff *skb)
{
IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
- if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->tso_size)
+ if (skb->len > dst_mtu(skb->dst) &&
+ !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
return ip_fragment(skb, ip_finish_output);
else
return ip_finish_output(skb);
@@ -352,7 +353,8 @@ packet_routed:
ip_options_build(skb, opt, inet->daddr, rt, 0);
}
- ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
+ ip_select_ident_more(iph, &rt->u.dst, sk,
+ (skb_shinfo(skb)->tso_segs ?: 1) - 1);
/* Add an IP checksum. */
ip_send_check(iph);
@@ -391,6 +393,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->nfct = from->nfct;
nf_conntrack_get(to->nfct);
to->nfctinfo = from->nfctinfo;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ to->ipvs_property = from->ipvs_property;
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(to->nf_bridge);
to->nf_bridge = from->nf_bridge;
@@ -685,6 +690,60 @@ csum_page(struct page *page, int offset, int copy)
return csum;
}
+static inline int ip_ufo_append_data(struct sock *sk,
+ int getfrag(void *from, char *to, int offset, int len,
+ int odd, struct sk_buff *skb),
+ void *from, int length, int hh_len, int fragheaderlen,
+ int transhdrlen, int mtu,unsigned int flags)
+{
+ struct sk_buff *skb;
+ int err;
+
+ /* There is support for UDP fragmentation offload by network
+ * device, so create one single skb packet containing complete
+ * udp datagram
+ */
+ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+ skb = sock_alloc_send_skb(sk,
+ hh_len + fragheaderlen + transhdrlen + 20,
+ (flags & MSG_DONTWAIT), &err);
+
+ if (skb == NULL)
+ return err;
+
+ /* reserve space for Hardware header */
+ skb_reserve(skb, hh_len);
+
+ /* create space for UDP/IP header */
+ skb_put(skb,fragheaderlen + transhdrlen);
+
+ /* initialize network header pointer */
+ skb->nh.raw = skb->data;
+
+ /* initialize protocol header pointer */
+ skb->h.raw = skb->data + fragheaderlen;
+
+ skb->ip_summed = CHECKSUM_HW;
+ skb->csum = 0;
+ sk->sk_sndmsg_off = 0;
+ }
+
+ err = skb_append_datato_frags(sk,skb, getfrag, from,
+ (length - transhdrlen));
+ if (!err) {
+ /* specify the length of each IP datagram fragment*/
+ skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+
+ return 0;
+ }
+ /* There is not enough support do UFO ,
+ * so follow normal path
+ */
+ kfree_skb(skb);
+ return err;
+}
+
/*
* ip_append_data() and ip_append_page() can make one large IP datagram
* from many pieces of data. Each pieces will be holded on the socket
@@ -774,6 +833,15 @@ int ip_append_data(struct sock *sk,
csummode = CHECKSUM_HW;
inet->cork.length += length;
+ if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
+ (rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+ if(ip_ufo_append_data(sk, getfrag, from, length, hh_len,
+ fragheaderlen, transhdrlen, mtu, flags))
+ goto error;
+
+ return 0;
+ }
/* So, what's going on in the loop below?
*
@@ -1005,14 +1073,23 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
return -EINVAL;
inet->cork.length += size;
+ if ((sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->u.dst.dev->features & NETIF_F_UFO))
+ skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+
while (size > 0) {
int i;
- /* Check if the remaining data fits into current packet. */
- len = mtu - skb->len;
- if (len < size)
- len = maxfraglen - skb->len;
+ if (skb_shinfo(skb)->ufo_size)
+ len = size;
+ else {
+
+ /* Check if the remaining data fits into current packet. */
+ len = mtu - skb->len;
+ if (len < size)
+ len = maxfraglen - skb->len;
+ }
if (len <= 0) {
struct sk_buff *skb_prev;
char *data;
@@ -1020,10 +1097,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
int alloclen;
skb_prev = skb;
- if (skb_prev)
- fraggap = skb_prev->len - maxfraglen;
- else
- fraggap = 0;
+ fraggap = skb_prev->len - maxfraglen;
alloclen = fragheaderlen + hh_len + fraggap + 15;
skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
@@ -1189,10 +1263,8 @@ int ip_push_pending_frames(struct sock *sk)
out:
inet->cork.flags &= ~IPCORK_OPT;
- if (inet->cork.opt) {
- kfree(inet->cork.opt);
- inet->cork.opt = NULL;
- }
+ kfree(inet->cork.opt);
+ inet->cork.opt = NULL;
if (inet->cork.rt) {
ip_rt_put(inet->cork.rt);
inet->cork.rt = NULL;
@@ -1216,10 +1288,8 @@ void ip_flush_pending_frames(struct sock *sk)
kfree_skb(skb);
inet->cork.flags &= ~IPCORK_OPT;
- if (inet->cork.opt) {
- kfree(inet->cork.opt);
- inet->cork.opt = NULL;
- }
+ kfree(inet->cork.opt);
+ inet->cork.opt = NULL;
if (inet->cork.rt) {
ip_rt_put(inet->cork.rt);
inet->cork.rt = NULL;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2f0b47da5b37..4f2d87257309 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -202,8 +202,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s
if (ra->sk == sk) {
if (on) {
write_unlock_bh(&ip_ra_lock);
- if (new_ra)
- kfree(new_ra);
+ kfree(new_ra);
return -EADDRINUSE;
}
*rap = ra->next;
@@ -446,8 +445,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
#endif
}
opt = xchg(&inet->opt, opt);
- if (opt)
- kfree(opt);
+ kfree(opt);
break;
}
case IP_PKTINFO:
@@ -828,10 +826,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
err = ip_mc_msfilter(sk, msf, ifindex);
mc_msf_out:
- if (msf)
- kfree(msf);
- if (gsf)
- kfree(gsf);
+ kfree(msf);
+ kfree(gsf);
break;
}
case IP_ROUTER_ALERT:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9dbf5909f3a6..302b7eb507c9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -149,7 +149,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
dev->flags |= IFF_MULTICAST;
- in_dev = __in_dev_get(dev);
+ in_dev = __in_dev_get_rtnl(dev);
if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
goto failure;
in_dev->cnf.rp_filter = 0;
@@ -278,7 +278,7 @@ static int vif_delete(int vifi)
dev_set_allmulti(dev, -1);
- if ((in_dev = __in_dev_get(dev)) != NULL) {
+ if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
in_dev->cnf.mc_forwarding--;
ip_rt_multicast_event(in_dev);
}
@@ -421,7 +421,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
return -EINVAL;
}
- if ((in_dev = __in_dev_get(dev)) == NULL)
+ if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
return -EADDRNOTAVAIL;
in_dev->cnf.mc_forwarding++;
dev_set_allmulti(dev, +1);
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 6e092dadb388..d7eb680101c2 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -110,8 +110,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
return 0;
out:
- if (inc->timeout_table)
- kfree(inc->timeout_table);
+ kfree(inc->timeout_table);
kfree(inc);
return ret;
}
@@ -136,8 +135,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
list_del(&inc->a_list);
- if (inc->timeout_table != NULL)
- kfree(inc->timeout_table);
+ kfree(inc->timeout_table);
kfree(inc);
}
@@ -604,7 +602,7 @@ static struct file_operations ip_vs_app_fops = {
/*
* Replace a segment of data with a new segment
*/
-int ip_vs_skb_replace(struct sk_buff *skb, int pri,
+int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
struct iphdr *iph;
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index f828fa2eb7de..2a3a8c59c655 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -771,7 +771,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
* The drop rate array needs tuning for real environments.
* Called from timer bh only => no locking
*/
- static char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
static char todrop_counter[9] = {0};
int i;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 981cc3244ef2..1a0843cd58a9 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -1009,11 +1009,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
if (sysctl_ip_vs_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
- } else {
- /* don't restart its timer, and silently
- drop the packet. */
- __ip_vs_conn_put(cp);
}
+ /* don't restart its timer, and silently
+ drop the packet. */
+ __ip_vs_conn_put(cp);
return NF_DROP;
}
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 2d66848e7aa0..9bdcf31b760e 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -1909,7 +1909,7 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
#define MAX_ARG_LEN SVCDEST_ARG_LEN
-static unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
+static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
[SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
[SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
[SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
@@ -2180,7 +2180,7 @@ __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
-static unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
+static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
[GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
[GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
[GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index c19408973c09..0e878fd6215c 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -251,7 +251,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
#define TCP_DIR_OUTPUT 4
#define TCP_DIR_INPUT_ONLY 8
-static int tcp_state_off[IP_VS_DIR_LAST] = {
+static const int tcp_state_off[IP_VS_DIR_LAST] = {
[IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
[IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
[IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c
index bd7d75b6abe0..d34a9fa608e0 100644
--- a/net/ipv4/multipath_wrandom.c
+++ b/net/ipv4/multipath_wrandom.c
@@ -207,16 +207,12 @@ static void wrandom_select_route(const struct flowi *flp,
decision = mpc->rt;
last_power = mpc->power;
- if (last_mpc)
- kfree(last_mpc);
-
+ kfree(last_mpc);
last_mpc = mpc;
}
- if (last_mpc) {
- /* concurrent __multipath_flush may lead to !last_mpc */
- kfree(last_mpc);
- }
+ /* concurrent __multipath_flush may lead to !last_mpc */
+ kfree(last_mpc);
decision->u.dst.__use++;
*rp = decision;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3cf9b451675c..0bc00528d888 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -5,6 +5,20 @@
menu "IP: Netfilter Configuration"
depends on INET && NETFILTER
+config NF_CONNTRACK_IPV4
+ tristate "IPv4 support for new connection tracking (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && NF_CONNTRACK
+ ---help---
+ Connection tracking keeps a record of what packets have passed
+ through your machine, in order to figure out how they are related
+ into connections.
+
+ This is IPv4 support on Layer 3 independent connection tracking.
+ Layer 3 independent connection tracking is experimental scheme
+ which generalize ip_conntrack to support other layer 3 protocols.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
# connection tracking, helpers and protocols
config IP_NF_CONNTRACK
tristate "Connection tracking (required for masq/NAT)"
@@ -139,9 +153,10 @@ config IP_NF_AMANDA
config IP_NF_PPTP
tristate 'PPTP protocol support'
+ depends on IP_NF_CONNTRACK
help
This module adds support for PPTP (Point to Point Tunnelling
- Protocol, RFC2637) conncection tracking and NAT.
+ Protocol, RFC2637) connection tracking and NAT.
If you are running PPTP sessions over a stateful firewall or NAT
box, you may want to enable this feature.
@@ -208,8 +223,8 @@ config IP_NF_MATCH_PKTTYPE
tristate "Packet type match support"
depends on IP_NF_IPTABLES
help
- Packet type matching allows you to match a packet by
- its "class", eg. BROADCAST, MULTICAST, ...
+ Packet type matching allows you to match a packet by
+ its "class", eg. BROADCAST, MULTICAST, ...
Typical usage:
iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG
@@ -316,7 +331,8 @@ config IP_NF_MATCH_TCPMSS
config IP_NF_MATCH_HELPER
tristate "Helper match support"
- depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+ depends on IP_NF_IPTABLES
+ depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
Helper matching allows you to match packets in dynamic connections
tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -325,7 +341,8 @@ config IP_NF_MATCH_HELPER
config IP_NF_MATCH_STATE
tristate "Connection state match support"
- depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+ depends on IP_NF_IPTABLES
+ depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
Connection state matching allows you to match packets based on their
relationship to a tracked connection (ie. previous packets). This
@@ -335,7 +352,8 @@ config IP_NF_MATCH_STATE
config IP_NF_MATCH_CONNTRACK
tristate "Connection tracking match support"
- depends on IP_NF_CONNTRACK && IP_NF_IPTABLES
+ depends on IP_NF_IPTABLES
+ depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
This is a general conntrack match module, a superset of the state match.
@@ -421,7 +439,8 @@ config IP_NF_MATCH_COMMENT
config IP_NF_MATCH_CONNMARK
tristate 'Connection mark match support'
- depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES
+ depends on IP_NF_IPTABLES
+ depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
help
This option adds a `connmark' match, which allows you to match the
connection mark value previously set for the session by `CONNMARK'.
@@ -432,7 +451,8 @@ config IP_NF_MATCH_CONNMARK
config IP_NF_MATCH_CONNBYTES
tristate 'Connection byte/packet counter match support'
- depends on IP_NF_CT_ACCT && IP_NF_IPTABLES
+ depends on IP_NF_IPTABLES
+ depends on (IP_NF_CONNTRACK && IP_NF_CT_ACCT) || (NF_CT_ACCT && NF_CONNTRACK_IPV4)
help
This option adds a `connbytes' match, which allows you to match the
number of bytes and/or packets for each direction within a connection.
@@ -498,9 +518,14 @@ config IP_NF_TARGET_LOG
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ULOG
- tristate "ULOG target support"
+ tristate "ULOG target support (OBSOLETE)"
depends on IP_NF_IPTABLES
---help---
+
+ This option enables the old IPv4-only "ipt_ULOG" implementation
+ which has been obsoleted by the new "nfnetlink_log" code (see
+ CONFIG_NETFILTER_NETLINK_LOG).
+
This option adds a `ULOG' target, which allows you to create rules in
any iptables table. The packet is passed to a userspace logging
daemon using netlink multicast sockets; unlike the LOG target
@@ -537,6 +562,17 @@ config IP_NF_TARGET_TCPMSS
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_TARGET_NFQUEUE
+ tristate "NFQUEUE Target Support"
+ depends on IP_NF_IPTABLES
+ help
+ This Target replaced the old obsolete QUEUE target.
+
+ As opposed to QUEUE, it supports 65535 different queues,
+ not just one.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
# NAT + specific targets
config IP_NF_NAT
tristate "Full NAT"
@@ -730,7 +766,8 @@ config IP_NF_TARGET_TTL
config IP_NF_TARGET_CONNMARK
tristate 'CONNMARK target support'
- depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE
+ depends on IP_NF_MANGLE
+ depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
help
This option adds a `CONNMARK' target, which allows one to manipulate
the connection mark value. Similar to the MARK target, but
@@ -742,7 +779,8 @@ config IP_NF_TARGET_CONNMARK
config IP_NF_TARGET_CLUSTERIP
tristate "CLUSTERIP target support (EXPERIMENTAL)"
- depends on IP_NF_CONNTRACK_MARK && IP_NF_IPTABLES && EXPERIMENTAL
+ depends on IP_NF_MANGLE && EXPERIMENTAL
+ depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
help
The CLUSTERIP target allows you to build load-balancing clusters of
network servers without having a dedicated load-balancing
@@ -765,7 +803,7 @@ config IP_NF_RAW
config IP_NF_TARGET_NOTRACK
tristate 'NOTRACK target support'
depends on IP_NF_RAW
- depends on IP_NF_CONNTRACK
+ depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
help
The NOTRACK target allows a select rule to specify
which packets *not* to enter the conntrack/NAT
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3d45d3c0283c..058c48e258fc 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -4,7 +4,8 @@
# objects for the standalone - connection tracking / NAT
ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
-iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
+iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
@@ -40,7 +41,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o ip_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
# matches
@@ -92,6 +93,7 @@ obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
+obj-$(CONFIG_IP_NF_TARGET_NFQUEUE) += ipt_NFQUEUE.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
@@ -101,4 +103,9 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
-obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+
+# l3 independent conntrack
+obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index fa1634256680..3c2e9639bba6 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -347,58 +347,106 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
return verdict;
}
-static inline void *find_inlist_lock_noload(struct list_head *head,
- const char *name,
- int *error,
- struct semaphore *mutex)
+/*
+ * These are weird, but module loading must not be done with mutex
+ * held (since they will register), and we have to have a single
+ * function to use try_then_request_module().
+ */
+
+/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
+static inline struct arpt_table *find_table_lock(const char *name)
{
- void *ret;
+ struct arpt_table *t;
- *error = down_interruptible(mutex);
- if (*error != 0)
- return NULL;
+ if (down_interruptible(&arpt_mutex) != 0)
+ return ERR_PTR(-EINTR);
- ret = list_named_find(head, name);
- if (!ret) {
- *error = -ENOENT;
- up(mutex);
- }
- return ret;
+ list_for_each_entry(t, &arpt_tables, list)
+ if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+ return t;
+ up(&arpt_mutex);
+ return NULL;
}
-#ifndef CONFIG_KMOD
-#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
-#else
-static void *
-find_inlist_lock(struct list_head *head,
- const char *name,
- const char *prefix,
- int *error,
- struct semaphore *mutex)
+
+/* Find target, grabs ref. Returns ERR_PTR() on error. */
+static inline struct arpt_target *find_target(const char *name, u8 revision)
{
- void *ret;
+ struct arpt_target *t;
+ int err = 0;
- ret = find_inlist_lock_noload(head, name, error, mutex);
- if (!ret) {
- duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
- request_module("%s%s", prefix, name);
- ret = find_inlist_lock_noload(head, name, error, mutex);
+ if (down_interruptible(&arpt_mutex) != 0)
+ return ERR_PTR(-EINTR);
+
+ list_for_each_entry(t, &arpt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision == revision) {
+ if (try_module_get(t->me)) {
+ up(&arpt_mutex);
+ return t;
+ }
+ } else
+ err = -EPROTOTYPE; /* Found something. */
+ }
}
+ up(&arpt_mutex);
+ return ERR_PTR(err);
+}
- return ret;
+struct arpt_target *arpt_find_target(const char *name, u8 revision)
+{
+ struct arpt_target *target;
+
+ target = try_then_request_module(find_target(name, revision),
+ "arpt_%s", name);
+ if (IS_ERR(target) || !target)
+ return NULL;
+ return target;
}
-#endif
-static inline struct arpt_table *arpt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+static int target_revfn(const char *name, u8 revision, int *bestp)
{
- return find_inlist_lock(&arpt_tables, name, "arptable_", error, mutex);
+ struct arpt_target *t;
+ int have_rev = 0;
+
+ list_for_each_entry(t, &arpt_target, list) {
+ if (strcmp(t->name, name) == 0) {
+ if (t->revision > *bestp)
+ *bestp = t->revision;
+ if (t->revision == revision)
+ have_rev =1;
+ }
+ }
+ return have_rev;
}
-static struct arpt_target *arpt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+/* Returns true or false (if no such extension at all) */
+static inline int find_revision(const char *name, u8 revision,
+ int (*revfn)(const char *, u8, int *),
+ int *err)
{
- return find_inlist_lock(&arpt_target, name, "arpt_", error, mutex);
+ int have_rev, best = -1;
+
+ if (down_interruptible(&arpt_mutex) != 0) {
+ *err = -EINTR;
+ return 1;
+ }
+ have_rev = revfn(name, revision, &best);
+ up(&arpt_mutex);
+
+ /* Nothing at all? Return 0 to try loading module. */
+ if (best == -1) {
+ *err = -ENOENT;
+ return 0;
+ }
+
+ *err = best;
+ if (!have_rev)
+ *err = -EPROTONOSUPPORT;
+ return 1;
}
+
/* All zeroes == unconditional rule. */
static inline int unconditional(const struct arpt_arp *arp)
{
@@ -544,17 +592,15 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
}
t = arpt_get_target(e);
- target = arpt_find_target_lock(t->u.user.name, &ret, &arpt_mutex);
- if (!target) {
+ target = try_then_request_module(find_target(t->u.user.name,
+ t->u.user.revision),
+ "arpt_%s", t->u.user.name);
+ if (IS_ERR(target) || !target) {
duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ ret = target ? PTR_ERR(target) : -ENOENT;
goto out;
}
- if (!try_module_get((target->me))) {
- ret = -ENOENT;
- goto out_unlock;
- }
t->u.kernel.target = target;
- up(&arpt_mutex);
if (t->u.kernel.target == &arpt_standard_target) {
if (!standard_check(t, size)) {
@@ -576,8 +622,6 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i
(*i)++;
return 0;
-out_unlock:
- up(&arpt_mutex);
out:
return ret;
}
@@ -716,8 +760,10 @@ static int translate_table(const char *name,
}
/* And one copy for every other CPU */
- for (i = 1; i < num_possible_cpus(); i++) {
- memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ for_each_cpu(i) {
+ if (i == 0)
+ continue;
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
}
@@ -767,7 +813,7 @@ static void get_counters(const struct arpt_table_info *t,
unsigned int cpu;
unsigned int i;
- for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+ for_each_cpu(cpu) {
i = 0;
ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
@@ -844,8 +890,8 @@ static int get_entries(const struct arpt_get_entries *entries,
int ret;
struct arpt_table *t;
- t = arpt_find_table_lock(entries->name, &ret, &arpt_mutex);
- if (t) {
+ t = find_table_lock(entries->name);
+ if (t || !IS_ERR(t)) {
duprintf("t->private->number = %u\n",
t->private->number);
if (entries->size == t->private->size)
@@ -857,10 +903,10 @@ static int get_entries(const struct arpt_get_entries *entries,
entries->size);
ret = -EINVAL;
}
+ module_put(t->me);
up(&arpt_mutex);
} else
- duprintf("get_entries: Can't find %s!\n",
- entries->name);
+ ret = t ? PTR_ERR(t) : -ENOENT;
return ret;
}
@@ -885,7 +931,8 @@ static int do_replace(void __user *user, unsigned int len)
return -ENOMEM;
newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(tmp.size) * num_possible_cpus());
+ + SMP_ALIGN(tmp.size) *
+ (highest_possible_processor_id()+1));
if (!newinfo)
return -ENOMEM;
@@ -910,22 +957,19 @@ static int do_replace(void __user *user, unsigned int len)
duprintf("arp_tables: Translated table\n");
- t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
- if (!t)
+ t = try_then_request_module(find_table_lock(tmp.name),
+ "arptable_%s", tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
+ }
/* You lied! */
if (tmp.valid_hooks != t->valid_hooks) {
duprintf("Valid hook crap: %08X vs %08X\n",
tmp.valid_hooks, t->valid_hooks);
ret = -EINVAL;
- goto free_newinfo_counters_untrans_unlock;
- }
-
- /* Get a reference in advance, we're not allowed fail later */
- if (!try_module_get(t->me)) {
- ret = -EBUSY;
- goto free_newinfo_counters_untrans_unlock;
+ goto put_module;
}
oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
@@ -956,7 +1000,6 @@ static int do_replace(void __user *user, unsigned int len)
put_module:
module_put(t->me);
- free_newinfo_counters_untrans_unlock:
up(&arpt_mutex);
free_newinfo_counters_untrans:
ARPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry, NULL);
@@ -986,7 +1029,7 @@ static int do_add_counters(void __user *user, unsigned int len)
unsigned int i;
struct arpt_counters_info tmp, *paddc;
struct arpt_table *t;
- int ret;
+ int ret = 0;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1003,9 +1046,11 @@ static int do_add_counters(void __user *user, unsigned int len)
goto free;
}
- t = arpt_find_table_lock(tmp.name, &ret, &arpt_mutex);
- if (!t)
+ t = find_table_lock(tmp.name);
+ if (!t || IS_ERR(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
+ }
write_lock_bh(&t->lock);
if (t->private->number != paddc->num_counters) {
@@ -1022,6 +1067,7 @@ static int do_add_counters(void __user *user, unsigned int len)
unlock_up_free:
write_unlock_bh(&t->lock);
up(&arpt_mutex);
+ module_put(t->me);
free:
vfree(paddc);
@@ -1076,8 +1122,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
break;
}
name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
- t = arpt_find_table_lock(name, &ret, &arpt_mutex);
- if (t) {
+
+ t = try_then_request_module(find_table_lock(name),
+ "arptable_%s", name);
+ if (t && !IS_ERR(t)) {
struct arpt_getinfo info;
info.valid_hooks = t->valid_hooks;
@@ -1093,9 +1141,10 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
ret = -EFAULT;
else
ret = 0;
-
up(&arpt_mutex);
- }
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
}
break;
@@ -1116,6 +1165,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
break;
}
+ case ARPT_SO_GET_REVISION_TARGET: {
+ struct arpt_get_revision rev;
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+
+ try_then_request_module(find_revision(rev.name, rev.revision,
+ target_revfn, &ret),
+ "arpt_%s", rev.name);
+ break;
+ }
+
default:
duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
ret = -EINVAL;
@@ -1133,12 +1200,9 @@ int arpt_register_target(struct arpt_target *target)
if (ret != 0)
return ret;
- if (!list_named_insert(&arpt_target, target)) {
- duprintf("arpt_register_target: `%s' already in list!\n",
- target->name);
- ret = -EINVAL;
- }
+ list_add(&target->list, &arpt_target);
up(&arpt_mutex);
+
return ret;
}
@@ -1158,7 +1222,8 @@ int arpt_register_table(struct arpt_table *table,
= { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(repl->size) * num_possible_cpus());
+ + SMP_ALIGN(repl->size) *
+ (highest_possible_processor_id()+1));
if (!newinfo) {
ret = -ENOMEM;
return ret;
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index dc20881004bc..e52847fa10f5 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -37,7 +37,7 @@ MODULE_LICENSE("GPL");
module_param(master_timeout, int, 0600);
MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
-static char *conns[] = { "DATA ", "MESG ", "INDEX " };
+static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
/* This is slow, but it's simple. --RR */
static char *amanda_buffer;
@@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb,
/* increase the UDP timeout of the master connection as replies from
* Amanda clients to the server can be quite delayed */
- ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
+ ip_ct_refresh(ct, *pskb, master_timeout * HZ);
/* No data? */
dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index c1f82e0c81cf..7a4ecddd597b 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -50,7 +50,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
-#define IP_CONNTRACK_VERSION "2.3"
+#define IP_CONNTRACK_VERSION "2.4"
#if 0
#define DEBUGP printk
@@ -148,16 +148,20 @@ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
static int ip_conntrack_hash_rnd_initted;
static unsigned int ip_conntrack_hash_rnd;
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
+static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
+ unsigned int size, unsigned int rnd)
{
-#if 0
- dump_tuple(tuple);
-#endif
return (jhash_3words(tuple->src.ip,
(tuple->dst.ip ^ tuple->dst.protonum),
(tuple->src.u.all | (tuple->dst.u.all << 16)),
- ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
+ rnd) % size);
+}
+
+static u_int32_t
+hash_conntrack(const struct ip_conntrack_tuple *tuple)
+{
+ return __hash_conntrack(tuple, ip_conntrack_htable_size,
+ ip_conntrack_hash_rnd);
}
int
@@ -1112,45 +1116,49 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
synchronize_net();
}
-static inline void ct_add_counters(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb)
-{
-#ifdef CONFIG_IP_NF_CT_ACCT
- if (skb) {
- ct->counters[CTINFO2DIR(ctinfo)].packets++;
- ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- ntohs(skb->nh.iph->tot_len);
- }
-#endif
-}
-
-/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
-void ip_ct_refresh_acct(struct ip_conntrack *ct,
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __ip_ct_refresh_acct(struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
- unsigned long extra_jiffies)
+ unsigned long extra_jiffies,
+ int do_acct)
{
+ int event = 0;
+
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
+ IP_NF_ASSERT(skb);
+
+ write_lock_bh(&ip_conntrack_lock);
/* If not in hash table, timer will not be active yet */
if (!is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
- ct_add_counters(ct, ctinfo, skb);
+ event = IPCT_REFRESH;
} else {
- write_lock_bh(&ip_conntrack_lock);
/* Need del_timer for race avoidance (may already be dying). */
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
- /* FIXME: We loose some REFRESH events if this function
- * is called without an skb. I'll fix this later -HW */
- if (skb)
- ip_conntrack_event_cache(IPCT_REFRESH, skb);
+ event = IPCT_REFRESH;
}
- ct_add_counters(ct, ctinfo, skb);
- write_unlock_bh(&ip_conntrack_lock);
}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+ if (do_acct) {
+ ct->counters[CTINFO2DIR(ctinfo)].packets++;
+ ct->counters[CTINFO2DIR(ctinfo)].bytes +=
+ ntohs(skb->nh.iph->tot_len);
+ if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
+ || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
+ event |= IPCT_COUNTER_FILLING;
+ }
+#endif
+
+ write_unlock_bh(&ip_conntrack_lock);
+
+ /* must be unlocked when calling event cache */
+ if (event)
+ ip_conntrack_event_cache(event, skb);
}
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
@@ -1337,17 +1345,16 @@ static int kill_all(struct ip_conntrack *i, void *data)
return 1;
}
-static void free_conntrack_hash(void)
+static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
{
- if (ip_conntrack_vmalloc)
- vfree(ip_conntrack_hash);
+ if (vmalloced)
+ vfree(hash);
else
- free_pages((unsigned long)ip_conntrack_hash,
- get_order(sizeof(struct list_head)
- * ip_conntrack_htable_size));
+ free_pages((unsigned long)hash,
+ get_order(sizeof(struct list_head) * size));
}
-void ip_conntrack_flush()
+void ip_conntrack_flush(void)
{
/* This makes sure all current packets have passed through
netfilter framework. Roll on, two-stage module
@@ -1374,12 +1381,83 @@ void ip_conntrack_cleanup(void)
ip_conntrack_flush();
kmem_cache_destroy(ip_conntrack_cachep);
kmem_cache_destroy(ip_conntrack_expect_cachep);
- free_conntrack_hash();
+ free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+ ip_conntrack_htable_size);
nf_unregister_sockopt(&so_getorigdst);
}
-static int hashsize;
-module_param(hashsize, int, 0400);
+static struct list_head *alloc_hashtable(int size, int *vmalloced)
+{
+ struct list_head *hash;
+ unsigned int i;
+
+ *vmalloced = 0;
+ hash = (void*)__get_free_pages(GFP_KERNEL,
+ get_order(sizeof(struct list_head)
+ * size));
+ if (!hash) {
+ *vmalloced = 1;
+ printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
+ hash = vmalloc(sizeof(struct list_head) * size);
+ }
+
+ if (hash)
+ for (i = 0; i < size; i++)
+ INIT_LIST_HEAD(&hash[i]);
+
+ return hash;
+}
+
+static int set_hashsize(const char *val, struct kernel_param *kp)
+{
+ int i, bucket, hashsize, vmalloced;
+ int old_vmalloced, old_size;
+ int rnd;
+ struct list_head *hash, *old_hash;
+ struct ip_conntrack_tuple_hash *h;
+
+ /* On boot, we can set this without any fancy locking. */
+ if (!ip_conntrack_htable_size)
+ return param_set_int(val, kp);
+
+ hashsize = simple_strtol(val, NULL, 0);
+ if (!hashsize)
+ return -EINVAL;
+
+ hash = alloc_hashtable(hashsize, &vmalloced);
+ if (!hash)
+ return -ENOMEM;
+
+ /* We have to rehash for the new table anyway, so we also can
+ * use a new random seed */
+ get_random_bytes(&rnd, 4);
+
+ write_lock_bh(&ip_conntrack_lock);
+ for (i = 0; i < ip_conntrack_htable_size; i++) {
+ while (!list_empty(&ip_conntrack_hash[i])) {
+ h = list_entry(ip_conntrack_hash[i].next,
+ struct ip_conntrack_tuple_hash, list);
+ list_del(&h->list);
+ bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+ list_add_tail(&h->list, &hash[bucket]);
+ }
+ }
+ old_size = ip_conntrack_htable_size;
+ old_vmalloced = ip_conntrack_vmalloc;
+ old_hash = ip_conntrack_hash;
+
+ ip_conntrack_htable_size = hashsize;
+ ip_conntrack_vmalloc = vmalloced;
+ ip_conntrack_hash = hash;
+ ip_conntrack_hash_rnd = rnd;
+ write_unlock_bh(&ip_conntrack_lock);
+
+ free_conntrack_hash(old_hash, old_vmalloced, old_size);
+ return 0;
+}
+
+module_param_call(hashsize, set_hashsize, param_get_uint,
+ &ip_conntrack_htable_size, 0600);
int __init ip_conntrack_init(void)
{
@@ -1388,9 +1466,7 @@ int __init ip_conntrack_init(void)
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 256 buckets. >= 1GB machines have 8192 buckets. */
- if (hashsize) {
- ip_conntrack_htable_size = hashsize;
- } else {
+ if (!ip_conntrack_htable_size) {
ip_conntrack_htable_size
= (((num_physpages << PAGE_SHIFT) / 16384)
/ sizeof(struct list_head));
@@ -1412,20 +1488,8 @@ int __init ip_conntrack_init(void)
return ret;
}
- /* AK: the hash table is twice as big than needed because it
- uses list_head. it would be much nicer to caches to use a
- single pointer list head here. */
- ip_conntrack_vmalloc = 0;
- ip_conntrack_hash
- =(void*)__get_free_pages(GFP_KERNEL,
- get_order(sizeof(struct list_head)
- *ip_conntrack_htable_size));
- if (!ip_conntrack_hash) {
- ip_conntrack_vmalloc = 1;
- printk(KERN_WARNING "ip_conntrack: falling back to vmalloc.\n");
- ip_conntrack_hash = vmalloc(sizeof(struct list_head)
- * ip_conntrack_htable_size);
- }
+ ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
+ &ip_conntrack_vmalloc);
if (!ip_conntrack_hash) {
printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
goto err_unreg_sockopt;
@@ -1457,9 +1521,6 @@ int __init ip_conntrack_init(void)
ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
write_unlock_bh(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++)
- INIT_LIST_HEAD(&ip_conntrack_hash[i]);
-
/* For use by ipt_REJECT */
ip_ct_attach = ip_conntrack_attach;
@@ -1474,7 +1535,8 @@ int __init ip_conntrack_init(void)
err_free_conntrack_slab:
kmem_cache_destroy(ip_conntrack_cachep);
err_free_hash:
- free_conntrack_hash();
+ free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
+ ip_conntrack_htable_size);
err_unreg_sockopt:
nf_unregister_sockopt(&so_getorigdst);
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index d77d6b3f5f80..68b173bcda60 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -29,9 +29,9 @@ static char *ftp_buffer;
static DEFINE_SPINLOCK(ip_ftp_lock);
#define MAX_PORTS 8
-static short ports[MAX_PORTS];
+static unsigned short ports[MAX_PORTS];
static int ports_c;
-module_param_array(ports, short, &ports_c, 0400);
+module_param_array(ports, ushort, &ports_c, 0400);
static int loose;
module_param(loose, int, 0600);
@@ -55,7 +55,7 @@ static int try_rfc959(const char *, size_t, u_int32_t [], char);
static int try_eprt(const char *, size_t, u_int32_t [], char);
static int try_epsv_response(const char *, size_t, u_int32_t [], char);
-static struct ftp_search {
+static const struct ftp_search {
enum ip_conntrack_dir dir;
const char *pattern;
size_t plen;
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 79db5b70d5f6..4108a5e12b3c 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -172,7 +172,6 @@ static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
sibling->proto.gre.timeout = 0;
sibling->proto.gre.stream_timeout = 0;
- /* refresh_acct will not modify counters if skb == NULL */
if (del_timer(&sibling->timeout))
sibling->timeout.function((unsigned long)sibling);
ip_conntrack_put(sibling);
@@ -223,8 +222,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct)
static inline int
exp_gre(struct ip_conntrack *master,
u_int32_t seq,
- u_int16_t callid,
- u_int16_t peer_callid)
+ __be16 callid,
+ __be16 peer_callid)
{
struct ip_conntrack_tuple inv_tuple;
struct ip_conntrack_tuple exp_tuples[] = {
@@ -263,7 +262,7 @@ exp_gre(struct ip_conntrack *master,
exp_orig->mask.src.ip = 0xffffffff;
exp_orig->mask.src.u.all = 0;
exp_orig->mask.dst.u.all = 0;
- exp_orig->mask.dst.u.gre.key = 0xffff;
+ exp_orig->mask.dst.u.gre.key = htons(0xffff);
exp_orig->mask.dst.ip = 0xffffffff;
exp_orig->mask.dst.protonum = 0xff;
@@ -271,14 +270,10 @@ exp_gre(struct ip_conntrack *master,
exp_orig->expectfn = pptp_expectfn;
exp_orig->flags = 0;
- exp_orig->dir = IP_CT_DIR_ORIGINAL;
-
/* both expectations are identical apart from tuple */
memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
- exp_reply->dir = !exp_orig->dir;
-
if (ip_nat_pptp_hook_exp_gre)
ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
else {
@@ -340,7 +335,8 @@ pptp_inbound_pkt(struct sk_buff **pskb,
unsigned int reqlen;
union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg, *cid, *pcid;
+ u_int16_t msg;
+ __be16 *cid, *pcid;
u_int32_t seq;
ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
@@ -485,7 +481,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
if (info->pns_call_id != ntohs(*pcid)) {
DEBUGP("%s for unknown CallID %u\n",
- pptp_msg_name[msg], ntohs(*cid));
+ pptp_msg_name[msg], ntohs(*pcid));
break;
}
@@ -551,7 +547,8 @@ pptp_outbound_pkt(struct sk_buff **pskb,
unsigned int reqlen;
union pptp_ctrl_union _pptpReq, *pptpReq;
struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg, *cid, *pcid;
+ u_int16_t msg;
+ __be16 *cid, *pcid;
ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
if (!ctlh)
@@ -755,7 +752,7 @@ static struct ip_conntrack_helper pptp = {
}
},
.mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = 0xffff } }
+ .u = { .tcp = { .port = __constant_htons(0xffff) } }
},
.dst = { .ip = 0,
.u = { .all = 0 },
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 15457415a4f3..d7c40421d0d1 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -34,7 +34,7 @@
#include <linux/moduleparam.h>
#define MAX_PORTS 8
-static short ports[MAX_PORTS];
+static unsigned short ports[MAX_PORTS];
static int ports_c;
static int max_dcc_channels = 8;
static unsigned int dcc_timeout = 300;
@@ -52,14 +52,14 @@ EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
MODULE_LICENSE("GPL");
-module_param_array(ports, short, &ports_c, 0400);
+module_param_array(ports, ushort, &ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of IRC servers");
module_param(max_dcc_channels, int, 0400);
MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
module_param(dcc_timeout, int, 0400);
MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
-static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
+static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
#define MINMATCHLEN 5
#if 0
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 71ef19d126d0..186646eb249f 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -58,7 +58,7 @@ static int help(struct sk_buff **pskb,
goto out;
rcu_read_lock();
- in_dev = __in_dev_get(rt->u.dst.dev);
+ in_dev = __in_dev_get_rcu(rt->u.dst.dev);
if (in_dev != NULL) {
for_primary_ifa(in_dev) {
if (ifa->ifa_broadcast == iph->daddr) {
@@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb,
ip_conntrack_expect_related(exp);
ip_conntrack_expect_put(exp);
- ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ);
+ ip_ct_refresh(ct, *pskb, timeout * HZ);
out:
return NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index b08a432efcf8..3fce91bcc0ba 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -27,12 +27,10 @@
#include <linux/errno.h>
#include <linux/netlink.h>
#include <linux/spinlock.h>
+#include <linux/interrupt.h>
#include <linux/notifier.h>
-#include <linux/rtnetlink.h>
#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
@@ -58,14 +56,19 @@ ctnetlink_dump_tuples_proto(struct sk_buff *skb,
const struct ip_conntrack_tuple *tuple)
{
struct ip_conntrack_protocol *proto;
+ int ret = 0;
NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
+ /* If no protocol helper is found, this function will return the
+ * generic protocol helper, so proto won't *ever* be NULL */
proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
- if (proto && proto->tuple_to_nfattr)
- return proto->tuple_to_nfattr(skb, tuple);
+ if (likely(proto->tuple_to_nfattr))
+ ret = proto->tuple_to_nfattr(skb, tuple);
+
+ ip_conntrack_proto_put(proto);
- return 0;
+ return ret;
nfattr_failure:
return -1;
@@ -128,9 +131,11 @@ ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
struct nfattr *nest_proto;
int ret;
-
- if (!proto || !proto->to_nfattr)
+
+ if (!proto->to_nfattr) {
+ ip_conntrack_proto_put(proto);
return 0;
+ }
nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
@@ -175,13 +180,13 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
{
enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
struct nfattr *nest_count = NFA_NEST(skb, type);
- u_int64_t tmp;
+ u_int32_t tmp;
- tmp = cpu_to_be64(ct->counters[dir].packets);
- NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp);
+ tmp = htonl(ct->counters[dir].packets);
+ NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
- tmp = cpu_to_be64(ct->counters[dir].bytes);
- NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp);
+ tmp = htonl(ct->counters[dir].bytes);
+ NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
NFA_NEST_END(skb, nest_count);
@@ -467,7 +472,7 @@ out:
}
#endif
-static const int cta_min_ip[CTA_IP_MAX] = {
+static const size_t cta_min_ip[CTA_IP_MAX] = {
[CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
[CTA_IP_V4_DST-1] = sizeof(u_int32_t),
};
@@ -479,9 +484,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
DEBUGP("entered %s\n", __FUNCTION__);
-
- if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0)
- goto nfattr_failure;
+ nfattr_parse_nested(tb, CTA_IP_MAX, attr);
if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
return -EINVAL;
@@ -497,12 +500,9 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
DEBUGP("leaving\n");
return 0;
-
-nfattr_failure:
- return -1;
}
-static const int cta_min_proto[CTA_PROTO_MAX] = {
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
[CTA_PROTO_NUM-1] = sizeof(u_int16_t),
[CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
[CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
@@ -521,8 +521,7 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
DEBUGP("entered %s\n", __FUNCTION__);
- if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0)
- goto nfattr_failure;
+ nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
return -EINVAL;
@@ -533,15 +532,12 @@ ctnetlink_parse_tuple_proto(struct nfattr *attr,
proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
- if (likely(proto && proto->nfattr_to_tuple)) {
+ if (likely(proto->nfattr_to_tuple))
ret = proto->nfattr_to_tuple(tb, tuple);
- ip_conntrack_proto_put(proto);
- }
+
+ ip_conntrack_proto_put(proto);
return ret;
-
-nfattr_failure:
- return -1;
}
static inline int
@@ -555,8 +551,7 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
memset(tuple, 0, sizeof(*tuple));
- if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0)
- goto nfattr_failure;
+ nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
if (!tb[CTA_TUPLE_IP-1])
return -EINVAL;
@@ -583,13 +578,10 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
DEBUGP("leaving\n");
return 0;
-
-nfattr_failure:
- return -1;
}
#ifdef CONFIG_IP_NF_NAT_NEEDED
-static const int cta_min_protonat[CTA_PROTONAT_MAX] = {
+static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
[CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t),
[CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t),
};
@@ -603,15 +595,12 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
DEBUGP("entered %s\n", __FUNCTION__);
- if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0)
- goto nfattr_failure;
+ nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
- goto nfattr_failure;
+ return -EINVAL;
npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
- if (!npt)
- return 0;
if (!npt->nfattr_to_range) {
ip_nat_proto_put(npt);
@@ -626,11 +615,13 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
DEBUGP("leaving\n");
return 0;
-
-nfattr_failure:
- return -1;
}
+static const size_t cta_min_nat[CTA_NAT_MAX] = {
+ [CTA_NAT_MINIP-1] = sizeof(u_int32_t),
+ [CTA_NAT_MAXIP-1] = sizeof(u_int32_t),
+};
+
static inline int
ctnetlink_parse_nat(struct nfattr *cda[],
const struct ip_conntrack *ct, struct ip_nat_range *range)
@@ -642,8 +633,10 @@ ctnetlink_parse_nat(struct nfattr *cda[],
memset(range, 0, sizeof(*range));
- if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0)
- goto nfattr_failure;
+ nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
+
+ if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
+ return -EINVAL;
if (tb[CTA_NAT_MINIP-1])
range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
@@ -665,9 +658,6 @@ ctnetlink_parse_nat(struct nfattr *cda[],
DEBUGP("leaving\n");
return 0;
-
-nfattr_failure:
- return -1;
}
#endif
@@ -678,8 +668,7 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
DEBUGP("entered %s\n", __FUNCTION__);
- if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0)
- goto nfattr_failure;
+ nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
if (!tb[CTA_HELP_NAME-1])
return -EINVAL;
@@ -687,11 +676,16 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
*helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
return 0;
-
-nfattr_failure:
- return -1;
}
+static const size_t cta_min[CTA_MAX] = {
+ [CTA_STATUS-1] = sizeof(u_int32_t),
+ [CTA_TIMEOUT-1] = sizeof(u_int32_t),
+ [CTA_MARK-1] = sizeof(u_int32_t),
+ [CTA_USE-1] = sizeof(u_int32_t),
+ [CTA_ID-1] = sizeof(u_int32_t)
+};
+
static int
ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
@@ -703,6 +697,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
DEBUGP("entered %s\n", __FUNCTION__);
+ if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+ return -EINVAL;
+
if (cda[CTA_TUPLE_ORIG-1])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
else if (cda[CTA_TUPLE_REPLY-1])
@@ -785,6 +782,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
return 0;
}
+ if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+ return -EINVAL;
+
if (cda[CTA_TUPLE_ORIG-1])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
else if (cda[CTA_TUPLE_REPLY-1])
@@ -804,7 +804,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
ct = tuplehash_to_ctrack(h);
err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2) {
ip_conntrack_put(ct);
return -ENOMEM;
@@ -815,7 +815,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
IPCTNL_MSG_CT_NEW, 1, ct);
ip_conntrack_put(ct);
if (err <= 0)
- goto out;
+ goto free;
err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
if (err < 0)
@@ -824,16 +824,17 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
DEBUGP("leaving\n");
return 0;
+free:
+ kfree_skb(skb2);
out:
- if (skb2)
- kfree_skb(skb2);
- return -1;
+ return err;
}
static inline int
ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
{
- unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]);
+ unsigned long d;
+ unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
d = ct->status ^ status;
if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -948,6 +949,25 @@ ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
return 0;
}
+static inline int
+ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
+{
+ struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
+ struct ip_conntrack_protocol *proto;
+ u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+ int err = 0;
+
+ nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
+
+ proto = ip_conntrack_proto_find_get(npt);
+
+ if (proto->from_nfattr)
+ err = proto->from_nfattr(tb, ct);
+ ip_conntrack_proto_put(proto);
+
+ return err;
+}
+
static int
ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
{
@@ -973,6 +993,17 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
return err;
}
+ if (cda[CTA_PROTOINFO-1]) {
+ err = ctnetlink_change_protoinfo(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+ if (cda[CTA_MARK-1])
+ ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
DEBUGP("all done\n");
return 0;
}
@@ -1002,6 +1033,12 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
if (err < 0)
goto err;
+ if (cda[CTA_PROTOINFO-1]) {
+ err = ctnetlink_change_protoinfo(ct, cda);
+ if (err < 0)
+ return err;
+ }
+
ct->helper = ip_conntrack_helper_find_get(rtuple);
add_timer(&ct->timeout);
@@ -1010,6 +1047,11 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
if (ct->helper)
ip_conntrack_helper_put(ct->helper);
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+ if (cda[CTA_MARK-1])
+ ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
DEBUGP("conntrack with id %u inserted\n", ct->id);
return 0;
@@ -1028,6 +1070,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
DEBUGP("entered %s\n", __FUNCTION__);
+ if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+ return -EINVAL;
+
if (cda[CTA_TUPLE_ORIG-1]) {
err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
if (err < 0)
@@ -1233,6 +1278,11 @@ out:
return skb->len;
}
+static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
+ [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t),
+ [CTA_EXPECT_ID-1] = sizeof(u_int32_t)
+};
+
static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
@@ -1244,6 +1294,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
DEBUGP("entered %s\n", __FUNCTION__);
+ if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+ return -EINVAL;
+
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct nfgenmsg *msg = NLMSG_DATA(nlh);
u32 rlen;
@@ -1274,6 +1327,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (!exp)
return -ENOENT;
+ if (cda[CTA_EXPECT_ID-1]) {
+ u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+ if (exp->id != ntohl(id)) {
+ ip_conntrack_expect_put(exp);
+ return -ENOENT;
+ }
+ }
+
err = -ENOMEM;
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb2)
@@ -1284,21 +1345,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
1, exp);
if (err <= 0)
- goto out;
+ goto free;
ip_conntrack_expect_put(exp);
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
- if (err < 0)
- goto free;
-
- return err;
+ return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+free:
+ kfree_skb(skb2);
out:
ip_conntrack_expect_put(exp);
-free:
- if (skb2)
- kfree_skb(skb2);
return err;
}
@@ -1311,6 +1367,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct ip_conntrack_helper *h;
int err;
+ if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+ return -EINVAL;
+
if (cda[CTA_EXPECT_TUPLE-1]) {
/* delete a single expect by tuple */
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
@@ -1354,7 +1413,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
ip_conntrack_expect_put(exp);
}
}
- write_unlock(&ip_conntrack_lock);
+ write_unlock_bh(&ip_conntrack_lock);
} else {
/* This basically means we have to flush everything*/
write_lock_bh(&ip_conntrack_lock);
@@ -1440,6 +1499,9 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
DEBUGP("entered %s\n", __FUNCTION__);
+ if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+ return -EINVAL;
+
if (!cda[CTA_EXPECT_TUPLE-1]
|| !cda[CTA_EXPECT_MASK-1]
|| !cda[CTA_EXPECT_MASTER-1])
@@ -1482,29 +1544,22 @@ static struct notifier_block ctnl_notifier_exp = {
static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
[IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
- .attr_count = CTA_MAX,
- .cap_required = CAP_NET_ADMIN },
+ .attr_count = CTA_MAX, },
[IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX,
- .cap_required = CAP_NET_ADMIN },
+ .attr_count = CTA_MAX, },
[IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
- .attr_count = CTA_MAX,
- .cap_required = CAP_NET_ADMIN },
+ .attr_count = CTA_MAX, },
[IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX,
- .cap_required = CAP_NET_ADMIN },
+ .attr_count = CTA_MAX, },
};
static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
[IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
- .attr_count = CTA_EXPECT_MAX,
- .cap_required = CAP_NET_ADMIN },
+ .attr_count = CTA_EXPECT_MAX, },
[IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
- .attr_count = CTA_EXPECT_MAX,
- .cap_required = CAP_NET_ADMIN },
+ .attr_count = CTA_EXPECT_MAX, },
[IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
- .attr_count = CTA_EXPECT_MAX,
- .cap_required = CAP_NET_ADMIN },
+ .attr_count = CTA_EXPECT_MAX, },
};
static struct nfnetlink_subsystem ctnl_subsys = {
@@ -1521,6 +1576,8 @@ static struct nfnetlink_subsystem ctnl_exp_subsys = {
.cb = ctnl_exp_cb,
};
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
+
static int __init ctnetlink_init(void)
{
int ret;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index de3cb9db6f85..744abb9d377a 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -247,6 +247,7 @@ static int gre_packet(struct ip_conntrack *ct,
ct->proto.gre.stream_timeout);
/* Also, more likely to be important, and not a probe. */
set_bit(IPS_ASSURED_BIT, &ct->status);
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
} else
ip_ct_refresh_acct(ct, conntrackinfo, skb,
ct->proto.gre.timeout);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 838d1d69b36e..5f9925db608e 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -13,6 +13,7 @@
#include <linux/in.h>
#include <linux/icmp.h>
#include <linux/seq_file.h>
+#include <linux/skbuff.h>
#include <net/ip.h>
#include <net/checksum.h>
#include <linux/netfilter.h>
@@ -50,7 +51,7 @@ static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *orig)
{
/* Add 1; spaces filled with 0. */
- static u_int8_t invmap[]
+ static const u_int8_t invmap[]
= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
@@ -109,7 +110,7 @@ static int icmp_packet(struct ip_conntrack *ct,
return NF_ACCEPT;
}
-static u_int8_t valid_new[] = {
+static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
[ICMP_TIMESTAMP] = 1,
[ICMP_INFO_REQUEST] = 1,
@@ -151,13 +152,13 @@ icmp_error_message(struct sk_buff *skb,
/* Not enough header? */
inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
if (inside == NULL)
- return NF_ACCEPT;
+ return -NF_ACCEPT;
/* Ignore ICMP's containing fragments (shouldn't happen) */
if (inside->ip.frag_off & htons(IP_OFFSET)) {
DEBUGP("icmp_error_track: fragment of proto %u\n",
inside->ip.protocol);
- return NF_ACCEPT;
+ return -NF_ACCEPT;
}
innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
@@ -166,7 +167,7 @@ icmp_error_message(struct sk_buff *skb,
if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
ip_conntrack_proto_put(innerproto);
- return NF_ACCEPT;
+ return -NF_ACCEPT;
}
/* Ordinarily, we'd expect the inverted tupleproto, but it's
@@ -174,7 +175,7 @@ icmp_error_message(struct sk_buff *skb,
if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
DEBUGP("icmp_error_track: Can't invert tuple\n");
ip_conntrack_proto_put(innerproto);
- return NF_ACCEPT;
+ return -NF_ACCEPT;
}
ip_conntrack_proto_put(innerproto);
@@ -190,7 +191,7 @@ icmp_error_message(struct sk_buff *skb,
if (!h) {
DEBUGP("icmp_error_track: no match\n");
- return NF_ACCEPT;
+ return -NF_ACCEPT;
}
/* Reverse direction from that found */
if (DIRECTION(h) != IP_CT_DIR_REPLY)
@@ -230,19 +231,15 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
case CHECKSUM_HW:
if (!(u16)csum_fold(skb->csum))
break;
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: bad HW ICMP checksum ");
- return -NF_ACCEPT;
+ /* fall through */
case CHECKSUM_NONE:
- if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+ skb->csum = 0;
+ if (__skb_checksum_complete(skb)) {
if (LOG_INVALID(IPPROTO_ICMP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
"ip_ct_icmp: bad ICMP checksum ");
return -NF_ACCEPT;
}
- default:
- break;
}
checksum_skipped:
@@ -305,7 +302,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
tuple->dst.u.icmp.code =
*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
tuple->src.u.icmp.id =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+ *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
return 0;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index a875f35e576d..977fb59d4563 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -65,7 +65,7 @@ static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
-static unsigned long * sctp_timeouts[]
+static const unsigned long * sctp_timeouts[]
= { NULL, /* SCTP_CONNTRACK_NONE */
&ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
&ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
@@ -118,7 +118,7 @@ cookie echoed to closed.
*/
/* SCTP conntrack state transitions */
-static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
@@ -416,6 +416,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
DEBUGP("Setting assured bit\n");
set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
}
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 1985abc59d24..aeb7353d4777 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -99,7 +99,7 @@ unsigned long ip_ct_tcp_timeout_close = 10 SECS;
to ~13-30min depending on RTO. */
unsigned long ip_ct_tcp_timeout_max_retrans = 5 MINS;
-static unsigned long * tcp_timeouts[]
+static const unsigned long * tcp_timeouts[]
= { NULL, /* TCP_CONNTRACK_NONE */
&ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
&ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
@@ -170,7 +170,7 @@ enum tcp_bit_set {
* if they are invalid
* or we do not support the request (simultaneous open)
*/
-static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
+static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
@@ -272,9 +272,9 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
- * sSS -> sIV Might be a half-open connection.
+ * sSS -> sIG Might be a half-open connection.
* sSR -> sSR Might answer late resent SYN.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
@@ -341,17 +341,51 @@ static int tcp_print_conntrack(struct seq_file *s,
static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
const struct ip_conntrack *ct)
{
+ struct nfattr *nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+
read_lock_bh(&tcp_lock);
NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
&ct->proto.tcp.state);
read_unlock_bh(&tcp_lock);
+ NFA_NEST_END(skb, nest_parms);
+
return 0;
nfattr_failure:
read_unlock_bh(&tcp_lock);
return -1;
}
+
+static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
+ [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
+};
+
+static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
+{
+ struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
+ struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+
+ /* updates could not contain anything about the private
+ * protocol info, in that case skip the parsing */
+ if (!attr)
+ return 0;
+
+ nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
+
+ if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
+ return -EINVAL;
+
+ if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+ return -EINVAL;
+
+ write_lock_bh(&tcp_lock);
+ ct->proto.tcp.state =
+ *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+ write_unlock_bh(&tcp_lock);
+
+ return 0;
+}
#endif
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
@@ -783,10 +817,11 @@ void ip_conntrack_tcp_update(struct sk_buff *skb,
#define TH_CWR 0x80
/* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
{
[TH_SYN] = 1,
[TH_SYN|TH_ACK] = 1,
+ [TH_SYN|TH_PUSH] = 1,
[TH_SYN|TH_ACK|TH_PUSH] = 1,
[TH_RST] = 1,
[TH_RST|TH_ACK] = 1,
@@ -882,8 +917,12 @@ static int tcp_packet(struct ip_conntrack *conntrack,
switch (new_state) {
case TCP_CONNTRACK_IGNORE:
- /* Either SYN in ORIGINAL
- * or SYN/ACK in REPLY. */
+ /* Ignored packets:
+ *
+ * a) SYN in ORIGINAL
+ * b) SYN/ACK in REPLY
+ * c) ACK in reply direction after initial SYN in original.
+ */
if (index == TCP_SYNACK_SET
&& conntrack->proto.tcp.last_index == TCP_SYN_SET
&& conntrack->proto.tcp.last_dir != dir
@@ -950,13 +989,20 @@ static int tcp_packet(struct ip_conntrack *conntrack,
}
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
- && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_SYN_SET
+ && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
+ && conntrack->proto.tcp.last_index == TCP_SYN_SET)
+ || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
+ && conntrack->proto.tcp.last_index == TCP_ACK_SET))
&& ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
- /* RST sent to invalid SYN we had let trough
- * SYN was in window then, tear down connection.
+ /* RST sent to invalid SYN or ACK we had let trough
+ * at a) and c) above:
+ *
+ * a) SYN was in window then
+ * c) we hold a half-open connection.
+ *
+ * Delete our connection entry.
* We skip window checking, because packet might ACK
- * segments we ignored in the SYN. */
+ * segments we ignored. */
goto in_window;
}
/* Just fall trough */
@@ -1014,7 +1060,8 @@ static int tcp_packet(struct ip_conntrack *conntrack,
/* Set ASSURED if we see see valid ack in ESTABLISHED
after SYN_RECV or a valid answer for a picked up
connection. */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ set_bit(IPS_ASSURED_BIT, &conntrack->status);
+ ip_conntrack_event_cache(IPCT_STATUS, skb);
}
ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
@@ -1122,6 +1169,7 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
.to_nfattr = tcp_to_nfattr,
+ .from_nfattr = nfattr_to_tcp,
.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index d3c7808010ec..dd476b191f4b 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -989,7 +989,7 @@ EXPORT_SYMBOL(need_ip_conntrack);
EXPORT_SYMBOL(ip_conntrack_helper_register);
EXPORT_SYMBOL(ip_conntrack_helper_unregister);
EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(__ip_ct_refresh_acct);
EXPORT_SYMBOL(ip_conntrack_expect_alloc);
EXPORT_SYMBOL(ip_conntrack_expect_put);
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index a78736b8525d..d3c5a371f993 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -26,9 +26,9 @@ MODULE_DESCRIPTION("tftp connection tracking helper");
MODULE_LICENSE("GPL");
#define MAX_PORTS 8
-static short ports[MAX_PORTS];
+static unsigned short ports[MAX_PORTS];
static int ports_c;
-module_param_array(ports, short, &ports_c, 0400);
+module_param_array(ports, ushort, &ports_c, 0400);
MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#if 0
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index c3ea891d38e7..c1a61462507f 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -49,7 +49,7 @@ static unsigned int ip_nat_htable_size;
static struct list_head *bysource;
#define MAX_IP_NAT_PROTO 256
-struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
+static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
static inline struct ip_nat_protocol *
__ip_nat_proto_find(u_int8_t protonum)
@@ -66,20 +66,20 @@ ip_nat_proto_find_get(u_int8_t protonum)
* removed until we've grabbed the reference */
preempt_disable();
p = __ip_nat_proto_find(protonum);
- if (p) {
- if (!try_module_get(p->me))
- p = &ip_nat_unknown_protocol;
- }
+ if (!try_module_get(p->me))
+ p = &ip_nat_unknown_protocol;
preempt_enable();
return p;
}
+EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
void
ip_nat_proto_put(struct ip_nat_protocol *p)
{
module_put(p->me);
}
+EXPORT_SYMBOL_GPL(ip_nat_proto_put);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
@@ -111,6 +111,7 @@ ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
oldcheck^0xFFFF));
}
+EXPORT_SYMBOL(ip_nat_cheat_check);
/* Is this tuple already taken? (not by us) */
int
@@ -127,6 +128,7 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
invert_tuplepr(&reply, tuple);
return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
}
+EXPORT_SYMBOL(ip_nat_used_tuple);
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range. */
@@ -347,6 +349,7 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
return NF_ACCEPT;
}
+EXPORT_SYMBOL(ip_nat_setup_info);
/* Returns true if succeeded. */
static int
@@ -387,10 +390,10 @@ manip_pkt(u_int16_t proto,
}
/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int nat_packet(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
+unsigned int ip_nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
@@ -417,12 +420,13 @@ unsigned int nat_packet(struct ip_conntrack *ct,
}
return NF_ACCEPT;
}
+EXPORT_SYMBOL_GPL(ip_nat_packet);
/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_nat_manip_type manip,
- enum ip_conntrack_dir dir)
+int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir)
{
struct {
struct icmphdr icmp;
@@ -509,6 +513,7 @@ int icmp_reply_translation(struct sk_buff **pskb,
return 1;
}
+EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
/* Protocol registration. */
int ip_nat_protocol_register(struct ip_nat_protocol *proto)
@@ -525,6 +530,7 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
write_unlock_bh(&ip_nat_lock);
return ret;
}
+EXPORT_SYMBOL(ip_nat_protocol_register);
/* Noone stores the protocol anywhere; simply delete it. */
void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
@@ -536,6 +542,7 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
}
+EXPORT_SYMBOL(ip_nat_protocol_unregister);
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
@@ -582,7 +589,7 @@ EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
#endif
-int __init ip_nat_init(void)
+static int __init ip_nat_init(void)
{
size_t i;
@@ -624,10 +631,14 @@ static int clean_nat(struct ip_conntrack *i, void *data)
return 0;
}
-/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
-void ip_nat_cleanup(void)
+static void __exit ip_nat_cleanup(void)
{
ip_ct_iterate_cleanup(&clean_nat, NULL);
ip_conntrack_destroyed = NULL;
vfree(bysource);
}
+
+MODULE_LICENSE("GPL");
+
+module_init(ip_nat_init);
+module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index d2dd5d313556..5d506e0564d5 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -199,6 +199,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
}
return 1;
}
+EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
/* Generic function for mangling variable-length address changes inside
* NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
@@ -256,6 +257,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
return 1;
}
+EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
/* Adjust one found SACK option including checksum correction */
static void
@@ -399,6 +401,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
return 1;
}
+EXPORT_SYMBOL(ip_nat_seq_adjust);
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
@@ -425,3 +428,4 @@ void ip_nat_follow_master(struct ip_conntrack *ct,
/* hook doesn't matter, but it has to do destination manip */
ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
}
+EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 3cdd0684d30d..e546203f5662 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -73,6 +73,7 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
struct ip_conntrack_tuple t;
struct ip_ct_pptp_master *ct_pptp_info;
struct ip_nat_pptp *nat_pptp_info;
+ struct ip_nat_range range;
ct_pptp_info = &master->help.ct_pptp_info;
nat_pptp_info = &master->nat.help.nat_pptp_info;
@@ -110,7 +111,30 @@ static void pptp_nat_expected(struct ip_conntrack *ct,
DEBUGP("not found!\n");
}
- ip_nat_follow_master(ct, exp);
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.min = range.max = exp->saved_proto;
+ }
+ /* hook doesn't matter, but it has to do source manip */
+ ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.ip;
+ if (exp->dir == IP_CT_DIR_REPLY) {
+ range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+ range.min = range.max = exp->saved_proto;
+ }
+ /* hook doesn't matter, but it has to do destination manip */
+ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
}
/* outbound packets == from PNS to PAC */
@@ -213,9 +237,10 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
/* alter expectation for PNS->PAC direction */
invert_tuplepr(&inv_t, &expect_orig->tuple);
- expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
+ expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id);
expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+ expect_orig->dir = IP_CT_DIR_ORIGINAL;
inv_t.src.ip = reply_t->src.ip;
inv_t.dst.ip = reply_t->dst.ip;
inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
@@ -233,6 +258,7 @@ pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+ expect_reply->dir = IP_CT_DIR_REPLY;
inv_t.src.ip = orig_t->src.ip;
inv_t.dst.ip = orig_t->dst.ip;
inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
index 7c1285401672..f7cad7cf1aec 100644
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -139,8 +139,8 @@ gre_manip_pkt(struct sk_buff **pskb,
break;
case GRE_VERSION_PPTP:
DEBUGP("call_id -> 0x%04x\n",
- ntohl(tuple->dst.u.gre.key));
- pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+ ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
break;
default:
DEBUGP("can't nat unknown GRE version\n");
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 99bbef56f84e..f0099a646a0b 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -62,7 +62,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range)
struct ip_nat_protocol ip_nat_unknown_protocol = {
.name = "unknown",
- .me = THIS_MODULE,
+ /* .me isn't set: getting a ref to this cannot fail. */
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
.unique_tuple = unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 93b2c5111bb2..8acb7ed40b47 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1161,8 +1161,7 @@ static int snmp_parse_mangle(unsigned char *msg,
if (!snmp_object_decode(&ctx, obj)) {
if (*obj) {
- if ((*obj)->id)
- kfree((*obj)->id);
+ kfree((*obj)->id);
kfree(*obj);
}
kfree(obj);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 0ff368b131f6..30cd4e18c129 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -108,8 +108,8 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!icmp_reply_translation(pskb, ct, maniptype,
- CTINFO2DIR(ctinfo)))
+ if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
+ CTINFO2DIR(ctinfo)))
return NF_DROP;
else
return NF_ACCEPT;
@@ -152,7 +152,7 @@ ip_nat_fn(unsigned int hooknum,
}
IP_NF_ASSERT(info);
- return nat_packet(ct, ctinfo, hooknum, pskb);
+ return ip_nat_packet(ct, ctinfo, hooknum, pskb);
}
static unsigned int
@@ -325,15 +325,10 @@ static int init_or_cleanup(int init)
printk("ip_nat_init: can't setup rules.\n");
goto cleanup_nothing;
}
- ret = ip_nat_init();
- if (ret < 0) {
- printk("ip_nat_init: can't setup rules.\n");
- goto cleanup_rule_init;
- }
ret = nf_register_hook(&ip_nat_in_ops);
if (ret < 0) {
printk("ip_nat_init: can't register in hook.\n");
- goto cleanup_nat;
+ goto cleanup_rule_init;
}
ret = nf_register_hook(&ip_nat_out_ops);
if (ret < 0) {
@@ -374,8 +369,6 @@ static int init_or_cleanup(int init)
nf_unregister_hook(&ip_nat_out_ops);
cleanup_inops:
nf_unregister_hook(&ip_nat_in_ops);
- cleanup_nat:
- ip_nat_cleanup();
cleanup_rule_init:
ip_nat_rule_cleanup();
cleanup_nothing:
@@ -395,14 +388,4 @@ static void __exit fini(void)
module_init(init);
module_exit(fini);
-EXPORT_SYMBOL(ip_nat_setup_info);
-EXPORT_SYMBOL(ip_nat_protocol_register);
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-EXPORT_SYMBOL(ip_nat_cheat_check);
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-EXPORT_SYMBOL(ip_nat_used_tuple);
-EXPORT_SYMBOL(ip_nat_follow_master);
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index d54f14d926f6..36339eb39e17 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
pmsg->packet_id = (unsigned long )entry;
pmsg->data_len = data_len;
- pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec;
- pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec;
+ pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
+ pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
pmsg->mark = entry->skb->nfmark;
pmsg->hook = entry->info->hook;
pmsg->hw_protocol = entry->skb->protocol;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index eef99a1b5de6..45886c8475e8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -27,6 +27,7 @@
#include <asm/semaphore.h>
#include <linux/proc_fs.h>
#include <linux/err.h>
+#include <linux/cpumask.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -921,8 +922,10 @@ translate_table(const char *name,
}
/* And one copy for every other CPU */
- for (i = 1; i < num_possible_cpus(); i++) {
- memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ for_each_cpu(i) {
+ if (i == 0)
+ continue;
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
}
@@ -943,7 +946,7 @@ replace_table(struct ipt_table *table,
struct ipt_entry *table_base;
unsigned int i;
- for (i = 0; i < num_possible_cpus(); i++) {
+ for_each_cpu(i) {
table_base =
(void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i);
@@ -990,7 +993,7 @@ get_counters(const struct ipt_table_info *t,
unsigned int cpu;
unsigned int i;
- for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+ for_each_cpu(cpu) {
i = 0;
IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
@@ -1128,7 +1131,8 @@ do_replace(void __user *user, unsigned int len)
return -ENOMEM;
newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(tmp.size) * num_possible_cpus());
+ + SMP_ALIGN(tmp.size) *
+ (highest_possible_processor_id()+1));
if (!newinfo)
return -ENOMEM;
@@ -1458,7 +1462,8 @@ int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
= { 0, 0, 0, { 0 }, { 0 }, { } };
newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(repl->size) * num_possible_cpus());
+ + SMP_ALIGN(repl->size) *
+ (highest_possible_processor_id()+1));
if (!newinfo)
return -ENOMEM;
@@ -1887,7 +1892,7 @@ static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
return pos;
}
-static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
+static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
{ { "ip_tables_names", ipt_get_tables },
{ "ip_tables_targets", ipt_get_targets },
{ "ip_tables_matches", ipt_get_matches },
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 9bcb398fbc1f..45c52d8f4d99 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,7 +29,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
#define CLUSTERIP_VERSION "0.8"
@@ -316,14 +316,14 @@ target(struct sk_buff **pskb,
{
const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
enum ip_conntrack_info ctinfo;
- struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
- u_int32_t hash;
+ u_int32_t *mark, hash;
/* don't need to clusterip_config_get() here, since refcount
* is only decremented by destroy() - and ip_tables guarantees
* that the ->target() function isn't called after ->destroy() */
- if (!ct) {
+ mark = nf_ct_get_mark((*pskb), &ctinfo);
+ if (mark == NULL) {
printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
/* FIXME: need to drop invalid ones, since replies
* to outgoing connections of other nodes will be
@@ -346,7 +346,7 @@ target(struct sk_buff **pskb,
switch (ctinfo) {
case IP_CT_NEW:
- ct->mark = hash;
+ *mark = hash;
break;
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -363,7 +363,7 @@ target(struct sk_buff **pskb,
#ifdef DEBUG_CLUSTERP
DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
- DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
+ DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
if (!clusterip_responsible(cipinfo->config, hash)) {
DEBUGP("not responsible\n");
return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 134638021339..8acac5a40a92 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -29,7 +29,7 @@ MODULE_LICENSE("GPL");
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CONNMARK.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
static unsigned int
target(struct sk_buff **pskb,
@@ -43,24 +43,24 @@ target(struct sk_buff **pskb,
u_int32_t diff;
u_int32_t nfmark;
u_int32_t newmark;
+ u_int32_t ctinfo;
+ u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
- if (ct) {
+ if (ctmark) {
switch(markinfo->mode) {
case IPT_CONNMARK_SET:
- newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
- if (newmark != ct->mark)
- ct->mark = newmark;
+ newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
+ if (newmark != *ctmark)
+ *ctmark = newmark;
break;
case IPT_CONNMARK_SAVE:
- newmark = (ct->mark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
- if (ct->mark != newmark)
- ct->mark = newmark;
+ newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask);
+ if (*ctmark != newmark)
+ *ctmark = newmark;
break;
case IPT_CONNMARK_RESTORE:
nfmark = (*pskb)->nfmark;
- diff = (ct->mark ^ nfmark) & markinfo->mask;
+ diff = (*ctmark ^ nfmark) & markinfo->mask;
if (diff != 0)
(*pskb)->nfmark = nfmark ^ diff;
break;
@@ -109,6 +109,7 @@ static struct ipt_target ipt_connmark_reg = {
static int __init init(void)
{
+ need_ip_conntrack();
return ipt_register_target(&ipt_connmark_reg);
}
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 92ed050fac69..30be0f1dae37 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -197,7 +197,7 @@ static void dump_packet(const struct nf_loginfo *info,
}
case IPPROTO_ICMP: {
struct icmphdr _icmph, *ich;
- static size_t required_len[NR_ICMP_TYPES+1]
+ static const size_t required_len[NR_ICMP_TYPES+1]
= { [ICMP_ECHOREPLY] = 4,
[ICMP_DEST_UNREACH]
= 8 + sizeof(struct iphdr),
@@ -351,7 +351,7 @@ static void dump_packet(const struct nf_loginfo *info,
/* maxlen = 230+ 91 + 230 + 252 = 803 */
}
-struct nf_loginfo default_loginfo = {
+static struct nf_loginfo default_loginfo = {
.type = NF_LOG_TYPE_LOG,
.u = {
.log = {
diff --git a/net/ipv4/netfilter/ipt_NOTRACK.c b/net/ipv4/netfilter/ipt_NOTRACK.c
index a4bb9b3bc292..e3c69d072c6e 100644
--- a/net/ipv4/netfilter/ipt_NOTRACK.c
+++ b/net/ipv4/netfilter/ipt_NOTRACK.c
@@ -5,7 +5,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
static unsigned int
target(struct sk_buff **pskb,
@@ -23,7 +23,7 @@ target(struct sk_buff **pskb,
If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */
- (*pskb)->nfct = &ip_conntrack_untracked.ct_general;
+ nf_ct_untrack(*pskb);
(*pskb)->nfctinfo = IP_CT_NEW;
nf_conntrack_get((*pskb)->nfct);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 715cb613405c..5245bfd33d52 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -93,7 +93,7 @@ redirect_target(struct sk_buff **pskb,
newdst = 0;
rcu_read_lock();
- indev = __in_dev_get((*pskb)->dev);
+ indev = __in_dev_get_rcu((*pskb)->dev);
if (indev && (ifa = indev->ifa_list))
newdst = ifa->ifa_local;
rcu_read_unlock();
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index e2c14f3cb2fc..2883ccd8a91d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -225,8 +225,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
/* copy hook, prefix, timestamp, payload, etc. */
pm->data_len = copy_len;
- pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
- pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
+ pm->timestamp_sec = skb->tstamp.off_sec;
+ pm->timestamp_usec = skb->tstamp.off_usec;
pm->mark = skb->nfmark;
pm->hook = hooknum;
if (prefix != NULL)
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index f5909a4c3fc7..e19c2a52d00c 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -48,7 +48,7 @@ static int checkentry(const char *tablename, const struct ipt_ip *ip,
unsigned int hook_mask)
{
if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) {
- printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n.",
+ printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n",
matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info)));
return 0;
}
diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c
index df4a42c6da22..d68a048b7176 100644
--- a/net/ipv4/netfilter/ipt_connbytes.c
+++ b/net/ipv4/netfilter/ipt_connbytes.c
@@ -10,7 +10,7 @@
*/
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_connbytes.h>
@@ -46,60 +46,59 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct ipt_connbytes_info *sinfo = matchinfo;
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack *ct;
u_int64_t what = 0; /* initialize to make gcc happy */
+ const struct ip_conntrack_counter *counters;
- if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)))
+ if (!(counters = nf_ct_get_counters(skb)))
return 0; /* no match */
switch (sinfo->what) {
case IPT_CONNBYTES_PKTS:
switch (sinfo->direction) {
case IPT_CONNBYTES_DIR_ORIGINAL:
- what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
+ what = counters[IP_CT_DIR_ORIGINAL].packets;
break;
case IPT_CONNBYTES_DIR_REPLY:
- what = ct->counters[IP_CT_DIR_REPLY].packets;
+ what = counters[IP_CT_DIR_REPLY].packets;
break;
case IPT_CONNBYTES_DIR_BOTH:
- what = ct->counters[IP_CT_DIR_ORIGINAL].packets;
- what += ct->counters[IP_CT_DIR_REPLY].packets;
+ what = counters[IP_CT_DIR_ORIGINAL].packets;
+ what += counters[IP_CT_DIR_REPLY].packets;
break;
}
break;
case IPT_CONNBYTES_BYTES:
switch (sinfo->direction) {
case IPT_CONNBYTES_DIR_ORIGINAL:
- what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
+ what = counters[IP_CT_DIR_ORIGINAL].bytes;
break;
case IPT_CONNBYTES_DIR_REPLY:
- what = ct->counters[IP_CT_DIR_REPLY].bytes;
+ what = counters[IP_CT_DIR_REPLY].bytes;
break;
case IPT_CONNBYTES_DIR_BOTH:
- what = ct->counters[IP_CT_DIR_ORIGINAL].bytes;
- what += ct->counters[IP_CT_DIR_REPLY].bytes;
+ what = counters[IP_CT_DIR_ORIGINAL].bytes;
+ what += counters[IP_CT_DIR_REPLY].bytes;
break;
}
break;
case IPT_CONNBYTES_AVGPKT:
switch (sinfo->direction) {
case IPT_CONNBYTES_DIR_ORIGINAL:
- what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes,
- ct->counters[IP_CT_DIR_ORIGINAL].packets);
+ what = div64_64(counters[IP_CT_DIR_ORIGINAL].bytes,
+ counters[IP_CT_DIR_ORIGINAL].packets);
break;
case IPT_CONNBYTES_DIR_REPLY:
- what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes,
- ct->counters[IP_CT_DIR_REPLY].packets);
+ what = div64_64(counters[IP_CT_DIR_REPLY].bytes,
+ counters[IP_CT_DIR_REPLY].packets);
break;
case IPT_CONNBYTES_DIR_BOTH:
{
u_int64_t bytes;
u_int64_t pkts;
- bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes +
- ct->counters[IP_CT_DIR_REPLY].bytes;
- pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+
- ct->counters[IP_CT_DIR_REPLY].packets;
+ bytes = counters[IP_CT_DIR_ORIGINAL].bytes +
+ counters[IP_CT_DIR_REPLY].bytes;
+ pkts = counters[IP_CT_DIR_ORIGINAL].packets+
+ counters[IP_CT_DIR_REPLY].packets;
/* FIXME_THEORETICAL: what to do if sum
* overflows ? */
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
index bf8de47ce004..5306ef293b92 100644
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -28,7 +28,7 @@ MODULE_LICENSE("GPL");
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_connmark.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
static int
match(const struct sk_buff *skb,
@@ -39,12 +39,12 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct ipt_connmark_info *info = matchinfo;
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack *ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
- if (!ct)
+ u_int32_t ctinfo;
+ const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
+ if (!ctmark)
return 0;
- return ((ct->mark & info->mask) == info->mark) ^ info->invert;
+ return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
}
static int
diff --git a/net/ipv4/netfilter/ipt_conntrack.c b/net/ipv4/netfilter/ipt_conntrack.c
index c1d22801b7cf..c8d18705469b 100644
--- a/net/ipv4/netfilter/ipt_conntrack.c
+++ b/net/ipv4/netfilter/ipt_conntrack.c
@@ -10,7 +10,14 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
+#else
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_conntrack.h>
@@ -18,6 +25,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("iptables connection tracking match module");
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
+
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -102,6 +111,93 @@ match(const struct sk_buff *skb,
return 1;
}
+#else /* CONFIG_IP_NF_CONNTRACK */
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ const struct ipt_conntrack_info *sinfo = matchinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int statebit;
+
+ ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+
+#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
+
+ if (ct == &nf_conntrack_untracked)
+ statebit = IPT_CONNTRACK_STATE_UNTRACKED;
+ else if (ct)
+ statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
+ else
+ statebit = IPT_CONNTRACK_STATE_INVALID;
+
+ if(sinfo->flags & IPT_CONNTRACK_STATE) {
+ if (ct) {
+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip !=
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip)
+ statebit |= IPT_CONNTRACK_STATE_SNAT;
+
+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip !=
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip)
+ statebit |= IPT_CONNTRACK_STATE_DNAT;
+ }
+
+ if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_PROTO) {
+ if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_STATUS) {
+ if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
+ return 0;
+ }
+
+ if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
+ unsigned long expires;
+
+ if(!ct)
+ return 0;
+
+ expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
+
+ if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
+ return 0;
+ }
+
+ return 1;
+}
+
+#endif /* CONFIG_NF_IP_CONNTRACK */
+
static int check(const char *tablename,
const struct ipt_ip *ip,
void *matchinfo,
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index 3e7dd014de43..bf14e1c7798a 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -13,9 +13,15 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#else
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#endif
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_helper.h>
@@ -29,6 +35,7 @@ MODULE_DESCRIPTION("iptables helper match module");
#define DEBUGP(format, args...)
#endif
+#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
static int
match(const struct sk_buff *skb,
const struct net_device *in,
@@ -73,6 +80,53 @@ out_unlock:
return ret;
}
+#else /* CONFIG_IP_NF_CONNTRACK */
+
+static int
+match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ int *hotdrop)
+{
+ const struct ipt_helper_info *info = matchinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int ret = info->invert;
+
+ ct = nf_ct_get((struct sk_buff *)skb, &ctinfo);
+ if (!ct) {
+ DEBUGP("ipt_helper: Eek! invalid conntrack?\n");
+ return ret;
+ }
+
+ if (!ct->master) {
+ DEBUGP("ipt_helper: conntrack %p has no master\n", ct);
+ return ret;
+ }
+
+ read_lock_bh(&nf_conntrack_lock);
+ if (!ct->master->helper) {
+ DEBUGP("ipt_helper: master ct %p has no helper\n",
+ exp->expectant);
+ goto out_unlock;
+ }
+
+ DEBUGP("master's name = %s , info->name = %s\n",
+ ct->master->helper->name, info->name);
+
+ if (info->name[0] == '\0')
+ ret ^= 1;
+ else
+ ret ^= !strncmp(ct->master->helper->name, info->name,
+ strlen(ct->master->helper->name));
+out_unlock:
+ read_unlock_bh(&nf_conntrack_lock);
+ return ret;
+}
+#endif
+
static int check(const char *tablename,
const struct ipt_ip *ip,
void *matchinfo,
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 2d44b07688af..261cbb4d4c49 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -532,6 +532,7 @@ match(const struct sk_buff *skb,
}
if(info->seconds && info->hit_count) {
for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
+ if(r_list[location].last_pkts[pkt_count] == 0) break;
if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++;
}
if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
diff --git a/net/ipv4/netfilter/ipt_state.c b/net/ipv4/netfilter/ipt_state.c
index b1511b97ea5f..4d7f16b70cec 100644
--- a/net/ipv4/netfilter/ipt_state.c
+++ b/net/ipv4/netfilter/ipt_state.c
@@ -10,7 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <net/netfilter/nf_conntrack_compat.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_state.h>
@@ -30,9 +30,9 @@ match(const struct sk_buff *skb,
enum ip_conntrack_info ctinfo;
unsigned int statebit;
- if (skb->nfct == &ip_conntrack_untracked.ct_general)
+ if (nf_ct_is_untracked(skb))
statebit = IPT_STATE_UNTRACKED;
- else if (!ip_conntrack_get(skb, &ctinfo))
+ else if (!nf_ct_get_ctinfo(skb, &ctinfo))
statebit = IPT_STATE_INVALID;
else
statebit = IPT_STATE_BIT(ctinfo);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
new file mode 100644
index 000000000000..8202c1c0afad
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -0,0 +1,571 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ * - move L3 protocol dependent part to this file.
+ * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ * - add get_features() to support various size of conntrack
+ * structures.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
+
+static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ u_int32_t _addrs[2], *ap;
+ ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
+ sizeof(u_int32_t) * 2, _addrs);
+ if (ap == NULL)
+ return 0;
+
+ tuple->src.u3.ip = ap[0];
+ tuple->dst.u3.ip = ap[1];
+
+ return 1;
+}
+
+static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u3.ip = orig->dst.u3.ip;
+ tuple->dst.u3.ip = orig->src.u3.ip;
+
+ return 1;
+}
+
+static int ipv4_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
+ NIPQUAD(tuple->src.u3.ip),
+ NIPQUAD(tuple->dst.u3.ip));
+}
+
+static int ipv4_print_conntrack(struct seq_file *s,
+ const struct nf_conn *conntrack)
+{
+ return 0;
+}
+
+/* Returns new sk_buff, or NULL */
+static struct sk_buff *
+nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+ skb_orphan(skb);
+
+ local_bh_disable();
+ skb = ip_defrag(skb, user);
+ local_bh_enable();
+
+ if (skb)
+ ip_send_check(skb->nh.iph);
+
+ return skb;
+}
+
+static int
+ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
+ u_int8_t *protonum)
+{
+ /* Never happen */
+ if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+ if (net_ratelimit()) {
+ printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
+ (*pskb)->nh.iph->protocol, hooknum);
+ }
+ return -NF_DROP;
+ }
+
+ *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
+ *protonum = (*pskb)->nh.iph->protocol;
+
+ return NF_ACCEPT;
+}
+
+int nat_module_is_loaded = 0;
+static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
+{
+ if (nat_module_is_loaded)
+ return NF_CT_F_NAT;
+
+ return NF_CT_F_BASIC;
+}
+
+static unsigned int ipv4_confirm(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(pskb);
+}
+
+static unsigned int ipv4_conntrack_help(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(*pskb, &ctinfo);
+ if (ct && ct->helper) {
+ unsigned int ret;
+ ret = ct->helper->help(pskb,
+ (*pskb)->nh.raw - (*pskb)->data
+ + (*pskb)->nh.iph->ihl*4,
+ ct, ctinfo);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ return NF_ACCEPT;
+}
+
+static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
+ /* Previously seen (loopback)? Ignore. Do this before
+ fragment check. */
+ if ((*pskb)->nfct)
+ return NF_ACCEPT;
+#endif
+
+ /* Gather fragments. */
+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+ *pskb = nf_ct_ipv4_gather_frags(*pskb,
+ hooknum == NF_IP_PRE_ROUTING ?
+ IP_DEFRAG_CONNTRACK_IN :
+ IP_DEFRAG_CONNTRACK_OUT);
+ if (!*pskb)
+ return NF_STOLEN;
+ }
+ return NF_ACCEPT;
+}
+
+static unsigned int ipv4_refrag(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
+
+ /* We've seen it coming out the other side: confirm */
+ if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
+ return NF_DROP;
+
+ /* Local packets are never produced too large for their
+ interface. We degfragment them at LOCAL_OUT, however,
+ so we have to refragment them here. */
+ if ((*pskb)->len > dst_mtu(&rt->u.dst) &&
+ !skb_shinfo(*pskb)->tso_size) {
+ /* No hook can be after us, so this should be OK. */
+ ip_fragment(*pskb, okfn);
+ return NF_STOLEN;
+ }
+ return NF_ACCEPT;
+}
+
+static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return nf_conntrack_in(PF_INET, hooknum, pskb);
+}
+
+static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+ struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* root is playing with raw sockets. */
+ if ((*pskb)->len < sizeof(struct iphdr)
+ || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk("ipt_hook: happy cracking.\n");
+ return NF_ACCEPT;
+ }
+ return nf_conntrack_in(PF_INET, hooknum, pskb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ make it the first hook. */
+static struct nf_hook_ops ipv4_conntrack_defrag_ops = {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv4_conntrack_in_ops = {
+ .hook = ipv4_conntrack_in,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+};
+
+static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+};
+
+static struct nf_hook_ops ipv4_conntrack_local_out_ops = {
+ .hook = ipv4_conntrack_local,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK,
+};
+
+/* helpers */
+static struct nf_hook_ops ipv4_conntrack_helper_out_ops = {
+ .hook = ipv4_conntrack_help,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+};
+
+static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
+ .hook = ipv4_conntrack_help,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+};
+
+
+/* Refragmenter; last chance. */
+static struct nf_hook_ops ipv4_conntrack_out_ops = {
+ .hook = ipv4_refrag,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+};
+
+static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
+ .hook = ipv4_confirm,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_IP_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+};
+
+#ifdef CONFIG_SYSCTL
+/* From nf_conntrack_proto_icmp.c */
+extern unsigned long nf_ct_icmp_timeout;
+static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
+
+static ctl_table nf_ct_sysctl_table[] = {
+ {
+ .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT,
+ .procname = "nf_conntrack_icmp_timeout",
+ .data = &nf_ct_icmp_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_netfilter_table[] = {
+ {
+ .ctl_name = NET_NETFILTER,
+ .procname = "netfilter",
+ .mode = 0555,
+ .child = nf_ct_sysctl_table,
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table nf_ct_net_table[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = nf_ct_netfilter_table,
+ },
+ { .ctl_name = 0 }
+};
+#endif
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ blame them). */
+/* Reversing the socket's dst/src point of view gives us the reply
+ mapping. */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+
+ NF_CT_TUPLE_U_BLANK(&tuple);
+ tuple.src.u3.ip = inet->rcv_saddr;
+ tuple.src.u.tcp.port = inet->sport;
+ tuple.dst.u3.ip = inet->daddr;
+ tuple.dst.u.tcp.port = inet->dport;
+ tuple.src.l3num = PF_INET;
+ tuple.dst.protonum = IPPROTO_TCP;
+
+ /* We only do TCP at the moment: is there a better way? */
+ if (strcmp(sk->sk_prot->name, "TCP")) {
+ DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
+ return -ENOPROTOOPT;
+ }
+
+ if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
+ DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
+ *len, sizeof(struct sockaddr_in));
+ return -EINVAL;
+ }
+
+ h = nf_conntrack_find_get(&tuple, NULL);
+ if (h) {
+ struct sockaddr_in sin;
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u.tcp.port;
+ sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u3.ip;
+
+ DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
+ NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ nf_ct_put(ct);
+ if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+ return -EFAULT;
+ else
+ return 0;
+ }
+ DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
+ NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
+ NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+ .pf = PF_INET,
+ .get_optmin = SO_ORIGINAL_DST,
+ .get_optmax = SO_ORIGINAL_DST+1,
+ .get = &getorigdst,
+};
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
+ .l3proto = PF_INET,
+ .name = "ipv4",
+ .pkt_to_tuple = ipv4_pkt_to_tuple,
+ .invert_tuple = ipv4_invert_tuple,
+ .print_tuple = ipv4_print_tuple,
+ .print_conntrack = ipv4_print_conntrack,
+ .prepare = ipv4_prepare,
+ .get_features = ipv4_get_features,
+ .me = THIS_MODULE,
+};
+
+extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
+extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
+static int init_or_cleanup(int init)
+{
+ int ret = 0;
+
+ if (!init) goto cleanup;
+
+ ret = nf_register_sockopt(&so_getorigdst);
+ if (ret < 0) {
+ printk(KERN_ERR "Unable to register netfilter socket option\n");
+ goto cleanup_nothing;
+ }
+
+ ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register tcp.\n");
+ goto cleanup_sockopt;
+ }
+
+ ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register udp.\n");
+ goto cleanup_tcp;
+ }
+
+ ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register icmp.\n");
+ goto cleanup_udp;
+ }
+
+ ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register ipv4\n");
+ goto cleanup_icmp;
+ }
+
+ ret = nf_register_hook(&ipv4_conntrack_defrag_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n");
+ goto cleanup_ipv4;
+ }
+ ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n");
+ goto cleanup_defragops;
+ }
+
+ ret = nf_register_hook(&ipv4_conntrack_in_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register pre-routing hook.\n");
+ goto cleanup_defraglocalops;
+ }
+
+ ret = nf_register_hook(&ipv4_conntrack_local_out_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register local out hook.\n");
+ goto cleanup_inops;
+ }
+
+ ret = nf_register_hook(&ipv4_conntrack_helper_in_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register local helper hook.\n");
+ goto cleanup_inandlocalops;
+ }
+
+ ret = nf_register_hook(&ipv4_conntrack_helper_out_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n");
+ goto cleanup_helperinops;
+ }
+
+ ret = nf_register_hook(&ipv4_conntrack_out_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register post-routing hook.\n");
+ goto cleanup_helperoutops;
+ }
+
+ ret = nf_register_hook(&ipv4_conntrack_local_in_ops);
+ if (ret < 0) {
+ printk("nf_conntrack_ipv4: can't register local in hook.\n");
+ goto cleanup_inoutandlocalops;
+ }
+
+#ifdef CONFIG_SYSCTL
+ nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
+ if (nf_ct_ipv4_sysctl_header == NULL) {
+ printk("nf_conntrack: can't register to sysctl.\n");
+ ret = -ENOMEM;
+ goto cleanup_localinops;
+ }
+#endif
+
+ /* For use by REJECT target */
+ ip_ct_attach = __nf_conntrack_attach;
+
+ return ret;
+
+ cleanup:
+ synchronize_net();
+ ip_ct_attach = NULL;
+#ifdef CONFIG_SYSCTL
+ unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
+ cleanup_localinops:
+#endif
+ nf_unregister_hook(&ipv4_conntrack_local_in_ops);
+ cleanup_inoutandlocalops:
+ nf_unregister_hook(&ipv4_conntrack_out_ops);
+ cleanup_helperoutops:
+ nf_unregister_hook(&ipv4_conntrack_helper_out_ops);
+ cleanup_helperinops:
+ nf_unregister_hook(&ipv4_conntrack_helper_in_ops);
+ cleanup_inandlocalops:
+ nf_unregister_hook(&ipv4_conntrack_local_out_ops);
+ cleanup_inops:
+ nf_unregister_hook(&ipv4_conntrack_in_ops);
+ cleanup_defraglocalops:
+ nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops);
+ cleanup_defragops:
+ nf_unregister_hook(&ipv4_conntrack_defrag_ops);
+ cleanup_ipv4:
+ nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ cleanup_icmp:
+ nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
+ cleanup_udp:
+ nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
+ cleanup_tcp:
+ nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
+ cleanup_sockopt:
+ nf_unregister_sockopt(&so_getorigdst);
+ cleanup_nothing:
+ return ret;
+}
+
+MODULE_LICENSE("GPL");
+
+static int __init init(void)
+{
+ need_nf_conntrack();
+ return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+ init_or_cleanup(0);
+}
+
+module_init(init);
+module_exit(fini);
+
+void need_ip_conntrack(void)
+{
+}
+
+EXPORT_SYMBOL(need_ip_conntrack);
+EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 000000000000..7ddb5c08f7b8
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,301 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ * - enable working with Layer 3 protocol independent connection tracking.
+ *
+ * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_core.h>
+
+unsigned long nf_ct_icmp_timeout = 30*HZ;
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static int icmp_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ struct icmphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return 0;
+
+ tuple->dst.u.icmp.type = hp->type;
+ tuple->src.u.icmp.id = hp->un.echo.id;
+ tuple->dst.u.icmp.code = hp->code;
+
+ return 1;
+}
+
+static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ /* Add 1; spaces filled with 0. */
+ static u_int8_t invmap[]
+ = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+ [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+ [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+ [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+ [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+ [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+ [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+ [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
+
+ if (orig->dst.u.icmp.type >= sizeof(invmap)
+ || !invmap[orig->dst.u.icmp.type])
+ return 0;
+
+ tuple->src.u.icmp.id = orig->src.u.icmp.id;
+ tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+ tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+ return 1;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static int icmp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ return seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+}
+
+/* Print out the private part of the conntrack. */
+static int icmp_print_conntrack(struct seq_file *s,
+ const struct nf_conn *conntrack)
+{
+ return 0;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ int pf,
+ unsigned int hooknum)
+{
+ /* Try to delete connection immediately after all replies:
+ won't actually vanish as we still have skb, and del_timer
+ means this will only run once even if count hits zero twice
+ (theoretically possible with SMP) */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+ if (atomic_dec_and_test(&ct->proto.icmp.count)
+ && del_timer(&ct->timeout))
+ ct->timeout.function((unsigned long)ct);
+ } else {
+ atomic_inc(&ct->proto.icmp.count);
+ nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
+ }
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int icmp_new(struct nf_conn *conntrack,
+ const struct sk_buff *skb, unsigned int dataoff)
+{
+ static u_int8_t valid_new[]
+ = { [ICMP_ECHO] = 1,
+ [ICMP_TIMESTAMP] = 1,
+ [ICMP_INFO_REQUEST] = 1,
+ [ICMP_ADDRESS] = 1 };
+
+ if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
+ || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
+ /* Can't create a new ICMP `conn' with this. */
+ DEBUGP("icmp: can't create new conn with type %u\n",
+ conntrack->tuplehash[0].tuple.dst.u.icmp.type);
+ NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
+ return 0;
+ }
+ atomic_set(&conntrack->proto.icmp.count, 0);
+ return 1;
+}
+
+extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct nf_conntrack_tuple innertuple, origtuple;
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } _in, *inside;
+ struct nf_conntrack_protocol *innerproto;
+ struct nf_conntrack_tuple_hash *h;
+ int dataoff;
+
+ NF_CT_ASSERT(skb->nfct == NULL);
+
+ /* Not enough header? */
+ inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+ if (inside == NULL)
+ return -NF_ACCEPT;
+
+ /* Ignore ICMP's containing fragments (shouldn't happen) */
+ if (inside->ip.frag_off & htons(IP_OFFSET)) {
+ DEBUGP("icmp_error_message: fragment of proto %u\n",
+ inside->ip.protocol);
+ return -NF_ACCEPT;
+ }
+
+ innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+ dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
+ /* Are they talking about one of our connections? */
+ if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
+ inside->ip.protocol, &origtuple,
+ &nf_conntrack_l3proto_ipv4, innerproto)) {
+ DEBUGP("icmp_error_message: ! get_tuple p=%u",
+ inside->ip.protocol);
+ return -NF_ACCEPT;
+ }
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple,
+ &nf_conntrack_l3proto_ipv4, innerproto)) {
+ DEBUGP("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ *ctinfo = IP_CT_RELATED;
+
+ h = nf_conntrack_find_get(&innertuple, NULL);
+ if (!h) {
+ /* Locally generated ICMPs will match inverted if they
+ haven't been SNAT'ed yet */
+ /* FIXME: NAT code has to handle half-done double NAT --RR */
+ if (hooknum == NF_IP_LOCAL_OUT)
+ h = nf_conntrack_find_get(&origtuple, NULL);
+
+ if (!h) {
+ DEBUGP("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ /* Reverse direction from that found */
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+ } else {
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+ }
+
+ /* Update skb to refer to this connection */
+ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return -NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct sk_buff *skb, unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+{
+ struct icmphdr _ih, *icmph;
+
+ /* Not enough header? */
+ icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+ if (icmph == NULL) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* See ip_conntrack_proto_tcp.c */
+ if (hooknum != NF_IP_PRE_ROUTING)
+ goto checksum_skipped;
+
+ switch (skb->ip_summed) {
+ case CHECKSUM_HW:
+ if (!(u16)csum_fold(skb->csum))
+ break;
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: bad HW ICMP checksum ");
+ return -NF_ACCEPT;
+ case CHECKSUM_NONE:
+ if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL,
+ NULL,
+ "nf_ct_icmp: bad ICMP checksum ");
+ return -NF_ACCEPT;
+ }
+ default:
+ break;
+ }
+
+checksum_skipped:
+ /*
+ * 18 is the highest 'known' ICMP type. Anything else is a mystery
+ *
+ * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
+ * discarded.
+ */
+ if (icmph->type > NR_ICMP_TYPES) {
+ if (LOG_INVALID(IPPROTO_ICMP))
+ nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: invalid ICMP type ");
+ return -NF_ACCEPT;
+ }
+
+ /* Need to track icmp error message? */
+ if (icmph->type != ICMP_DEST_UNREACH
+ && icmph->type != ICMP_SOURCE_QUENCH
+ && icmph->type != ICMP_TIME_EXCEEDED
+ && icmph->type != ICMP_PARAMETERPROB
+ && icmph->type != ICMP_REDIRECT)
+ return NF_ACCEPT;
+
+ return icmp_error_message(skb, ctinfo, hooknum);
+}
+
+struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
+{
+ .list = { NULL, NULL },
+ .l3proto = PF_INET,
+ .proto = IPPROTO_ICMP,
+ .name = "icmp",
+ .pkt_to_tuple = icmp_pkt_to_tuple,
+ .invert_tuple = icmp_invert_tuple,
+ .print_tuple = icmp_print_tuple,
+ .print_conntrack = icmp_print_conntrack,
+ .packet = icmp_packet,
+ .new = icmp_new,
+ .error = icmp_error,
+ .destroy = NULL,
+ .me = NULL
+};
+
+EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f7943ba1f43c..0d7dc668db46 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,9 +90,7 @@ fold_field(void *mib[], int offt)
unsigned long res = 0;
int i;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i))
- continue;
+ for_each_cpu(i) {
res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
}
@@ -100,7 +98,7 @@ fold_field(void *mib[], int offt)
}
/* snmp items */
-static struct snmp_mib snmp4_ipstats_list[] = {
+static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS),
SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS),
@@ -121,7 +119,7 @@ static struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_SENTINEL
};
-static struct snmp_mib snmp4_icmp_list[] = {
+static const struct snmp_mib snmp4_icmp_list[] = {
SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS),
SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS),
SNMP_MIB_ITEM("InDestUnreachs", ICMP_MIB_INDESTUNREACHS),
@@ -151,7 +149,7 @@ static struct snmp_mib snmp4_icmp_list[] = {
SNMP_MIB_SENTINEL
};
-static struct snmp_mib snmp4_tcp_list[] = {
+static const struct snmp_mib snmp4_tcp_list[] = {
SNMP_MIB_ITEM("RtoAlgorithm", TCP_MIB_RTOALGORITHM),
SNMP_MIB_ITEM("RtoMin", TCP_MIB_RTOMIN),
SNMP_MIB_ITEM("RtoMax", TCP_MIB_RTOMAX),
@@ -169,7 +167,7 @@ static struct snmp_mib snmp4_tcp_list[] = {
SNMP_MIB_SENTINEL
};
-static struct snmp_mib snmp4_udp_list[] = {
+static const struct snmp_mib snmp4_udp_list[] = {
SNMP_MIB_ITEM("InDatagrams", UDP_MIB_INDATAGRAMS),
SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS),
SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS),
@@ -177,7 +175,7 @@ static struct snmp_mib snmp4_udp_list[] = {
SNMP_MIB_SENTINEL
};
-static struct snmp_mib snmp4_net_list[] = {
+static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("SyncookiesSent", LINUX_MIB_SYNCOOKIESSENT),
SNMP_MIB_ITEM("SyncookiesRecv", LINUX_MIB_SYNCOOKIESRECV),
SNMP_MIB_ITEM("SyncookiesFailed", LINUX_MIB_SYNCOOKIESFAILED),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8549f26e2495..f701a136a6ae 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1371,7 +1371,7 @@ out: kfree_skb(skb);
* are needed for AMPRnet AX.25 paths.
*/
-static unsigned short mtu_plateau[] =
+static const unsigned short mtu_plateau[] =
{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
static __inline__ unsigned short guess_mtu(unsigned short old_mtu)
@@ -2128,7 +2128,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
struct in_device *in_dev;
rcu_read_lock();
- if ((in_dev = __in_dev_get(dev)) != NULL) {
+ if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
int our = ip_check_mc(in_dev, daddr, saddr,
skb->nh.iph->protocol);
if (our
@@ -2443,7 +2443,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
err = -ENODEV;
if (dev_out == NULL)
goto out;
- if (__in_dev_get(dev_out) == NULL) {
+
+ /* RACE: Check return value of inet_select_addr instead. */
+ if (__in_dev_get_rtnl(dev_out) == NULL) {
dev_put(dev_out);
goto out; /* Wrong error code */
}
@@ -3147,8 +3149,7 @@ int __init ip_rt_init(void)
sizeof(struct rt_hash_bucket),
rhash_entries,
(num_physpages >= 128 * 1024) ?
- (27 - PAGE_SHIFT) :
- (29 - PAGE_SHIFT),
+ 15 : 17,
HASH_HIGHMEM,
&rt_hash_log,
&rt_hash_mask,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 652685623519..01444a02b48b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_tcp_congestion_control,
.strategy = &sysctl_tcp_congestion_control,
},
+ {
+ .ctl_name = NET_TCP_ABC,
+ .procname = "tcp_abc",
+ .data = &sysctl_tcp_abc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f3f0013a9580..ef98b14ac56d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1413,7 +1413,7 @@ recv_urg:
* closed.
*/
-static unsigned char new_state[16] = {
+static const unsigned char new_state[16] = {
/* current state: new state: action: */
/* (Invalid) */ TCP_CLOSE,
/* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN,
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
} else if (tcp_need_reset(old_state) ||
(tp->snd_nxt != tp->write_seq &&
(1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
- /* The last check adjusts for discrepance of Linux wrt. RFC
+ /* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
tcp_send_active_reset(sk, gfp_any());
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0;
+ tp->bytes_acked = 0;
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
@@ -2064,8 +2065,7 @@ void __init tcp_init(void)
sizeof(struct inet_ehash_bucket),
thash_entries,
(num_physpages >= 128 * 1024) ?
- (25 - PAGE_SHIFT) :
- (27 - PAGE_SHIFT),
+ 13 : 15,
HASH_HIGHMEM,
&tcp_hashinfo.ehash_size,
NULL,
@@ -2081,8 +2081,7 @@ void __init tcp_init(void)
sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_size,
(num_physpages >= 128 * 1024) ?
- (25 - PAGE_SHIFT) :
- (27 - PAGE_SHIFT),
+ 13 : 15,
HASH_HIGHMEM,
&tcp_hashinfo.bhash_size,
NULL,
@@ -2112,7 +2111,6 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans >>= (3 - order);
sysctl_max_syn_backlog = 128;
}
- tcp_hashinfo.port_rover = sysctl_local_port_range[0] - 1;
sysctl_tcp_mem[0] = 768 << order;
sysctl_tcp_mem[1] = 1024 << order;
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index b940346de4e7..1d0cd86621b1 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -27,7 +27,7 @@
*/
static int fast_convergence = 1;
-static int max_increment = 32;
+static int max_increment = 16;
static int low_window = 14;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int low_utilization_threshold = 153;
@@ -136,7 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
else if (cwnd < ca->last_max_cwnd + max_increment*(BICTCP_B-1))
/* slow start */
ca->cnt = (cwnd * (BICTCP_B-1))
- / cwnd-ca->last_max_cwnd;
+ / (cwnd - ca->last_max_cwnd);
else
/* linear increase */
ca->cnt = cwnd / max_increment;
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
bictcp_low_utilization(sk, data_acked);
- if (in_flight < tp->snd_cwnd)
+ if (!tcp_is_cwnd_limited(sk, in_flight))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- /* In "safe" area, increase. */
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- } else {
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+ else {
bictcp_update(ca, tp->snd_cwnd);
- /* In dangerous area, increase slowly.
+ /* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
*/
if (tp->snd_cwnd_cnt >= ca->cnt) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bbf2d6624e89..c7cc62c8dc12 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (in_flight < tp->snd_cwnd)
+ if (!tcp_is_cwnd_limited(sk, in_flight))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- /* In "safe" area, increase. */
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- } else {
- /* In dangerous area, increase slowly.
- * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
- */
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt = 0;
- } else
- tp->snd_cwnd_cnt++;
- }
+ /* In "safe" area, increase. */
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+
+ /* In dangerous area, increase slowly. */
+ else if (sysctl_tcp_abc) {
+ /* RFC3465: Apppriate Byte Count
+ * increase once for each full cwnd acked
+ */
+ if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
+ } else {
+ /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ } else
+ tp->snd_cwnd_cnt++;
+ }
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6acc04bde080..63cf7e540847 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
}
static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
- u32 in_flight, int good)
+ u32 in_flight, int data_acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hstcp *ca = inet_csk_ca(sk);
- if (in_flight < tp->snd_cwnd)
+ if (!tcp_is_cwnd_limited(sk, in_flight))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- } else {
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+ else {
/* Update AIMD parameters */
if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index e47b37984e95..3284cfb993e6 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
- if (in_flight < tp->snd_cwnd)
+ if (!tcp_is_cwnd_limited(sk, in_flight))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- /* In "safe" area, increase. */
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- } else {
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+ else {
+
measure_rtt(sk);
/* keep track of number of round-trip times since last backoff event */
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
htcp_alpha_update(ca);
}
- /* In dangerous area, increase slowly.
+ /* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
*/
if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) {
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 77add63623df..40dbb3877510 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
ca->minrtt = tp->srtt;
}
+ if (!tcp_is_cwnd_limited(sk, in_flight))
+ return;
+
if (!ca->hybla_en)
return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
- if (in_flight < tp->snd_cwnd)
- return;
-
if (ca->rho == 0)
hybla_recalc_param(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a7537c7bbd06..bf2e23086bce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -42,7 +42,7 @@
* Andi Kleen : Moved open_request checking here
* and process RSTs for open_requests.
* Andi Kleen : Better prune_queue, and other fixes.
- * Andrey Savochkin: Fix RTT measurements in the presnce of
+ * Andrey Savochkin: Fix RTT measurements in the presence of
* timestamps.
* Andrey Savochkin: Check sequence numbers correctly when
* removing SACKs due to in sequence incoming
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf = 1;
+int sysctl_tcp_abc = 1;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
* of receiver window. Check #2.
*
* The scheme does not work when sender sends good segments opening
- * window and then starts to feed us spagetti. But it should work
+ * window and then starts to feed us spaghetti. But it should work
* in common situations. Otherwise, we have to rely on queue collapsing.
*/
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
{
/* Optimize this! */
int truesize = tcp_win_from_space(skb->truesize)/2;
- int window = tcp_full_space(sk)/2;
+ int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
/* Try to select rcvbuf so that 4 mss-sized segments
- * will fit to window and correspoding skbs will fit to our rcvbuf.
+ * will fit to window and corresponding skbs will fit to our rcvbuf.
* (was 3; 4 is minimum to allow fast retransmit to work.)
*/
while (tcp_win_from_space(rcvmem) < tp->advmss)
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
}
-/* 4. Try to fixup all. It is made iimediately after connection enters
+/* 4. Try to fixup all. It is made immediately after connection enters
* established state.
*/
static void tcp_init_buffer_space(struct sock *sk)
@@ -326,39 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- struct sk_buff *skb;
- unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
- int ofo_win = 0;
icsk->icsk_ack.quick = 0;
- skb_queue_walk(&tp->out_of_order_queue, skb) {
- ofo_win += skb->len;
- }
-
- /* If overcommit is due to out of order segments,
- * do not clamp window. Try to expand rcvbuf instead.
- */
- if (ofo_win) {
- if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
- !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
- !tcp_memory_pressure &&
- atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
- sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- sysctl_tcp_rmem[2]);
+ if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+ !tcp_memory_pressure &&
+ atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+ sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
+ sysctl_tcp_rmem[2]);
}
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
- app_win += ofo_win;
- if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
- app_win >>= 1;
- if (app_win > icsk->icsk_ack.rcv_mss)
- app_win -= icsk->icsk_ack.rcv_mss;
- app_win = max(app_win, 2U*tp->advmss);
-
- if (!ofo_win)
- tp->window_clamp = min(tp->window_clamp, app_win);
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
- }
}
/* Receiver "autotuning" code.
@@ -387,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
* are stalled on filesystem I/O.
*
* Also, since we are only going for a minimum in the
- * non-timestamp case, we do not smoothe things out
- * else with timestamps disabled convergance takes too
+ * non-timestamp case, we do not smooth things out
+ * else with timestamps disabled convergence takes too
* long.
*/
if (!win_dep) {
@@ -397,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
} else if (m < new_sample)
new_sample = m << 3;
} else {
- /* No previous mesaure. */
+ /* No previous measure. */
new_sample = m << 3;
}
@@ -526,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
if (icsk->icsk_ack.ato > icsk->icsk_rto)
icsk->icsk_ack.ato = icsk->icsk_rto;
} else if (m > icsk->icsk_rto) {
- /* Too long gap. Apparently sender falled to
+ /* Too long gap. Apparently sender failed to
* restart window, so that we send ACKs quickly.
*/
tcp_incr_quickack(sk);
@@ -550,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
* To save cycles in the RFC 1323 implementation it was better to break
* it up into three procedures. -- erics
*/
-static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
+static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
{
struct tcp_sock *tp = tcp_sk(sk);
- const struct inet_connection_sock *icsk = inet_csk(sk);
long m = mrtt; /* RTT */
/* The following amusing code comes from Jacobson's
@@ -567,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
*
* Funny. This algorithm seems to be very broken.
* These formulae increase RTO, when it should be decreased, increase
- * too slowly, when it should be incresed fastly, decrease too fastly
+ * too slowly, when it should be increased quickly, decrease too quickly
* etc. I guess in BSD RTO takes ONE value, so that it is absolutely
* does not matter how to _calculate_ it. Seems, it was trap
* that VJ failed to avoid. 8)
@@ -612,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
tp->rtt_seq = tp->snd_nxt;
}
-
- if (icsk->icsk_ca_ops->rtt_sample)
- icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -631,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk)
* at least by solaris and freebsd. "Erratic ACKs" has _nothing_
* to do with delayed acks, because at cwnd>2 true delack timeout
* is invisible. Actually, Linux-2.4 also generates erratic
- * ACKs in some curcumstances.
+ * ACKs in some circumstances.
*/
inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
/* 2. Fixups made earlier cannot be right.
* If we do not estimate RTO correctly without them,
* all the algo is pure shit and should be replaced
- * with correct one. It is exaclty, which we pretend to do.
+ * with correct one. It is exactly, which we pretend to do.
*/
}
@@ -796,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk)
* to make it more realistic.
*
* A bit of theory. RTT is time passed after "normal" sized packet
- * is sent until it is ACKed. In normal curcumstances sending small
+ * is sent until it is ACKed. In normal circumstances sending small
* packets force peer to delay ACKs and calculation is correct too.
* The algorithm is adaptive and, provided we follow specs, it
* NEVER underestimate RTT. BUT! If peer tries to make some clever
@@ -921,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
int prior_fackets;
u32 lost_retrans = 0;
int flag = 0;
+ int dup_sack = 0;
int i;
if (!tp->sacked_out)
tp->fackets_out = 0;
prior_fackets = tp->fackets_out;
- for (i=0; i<num_sacks; i++, sp++) {
- struct sk_buff *skb;
- __u32 start_seq = ntohl(sp->start_seq);
- __u32 end_seq = ntohl(sp->end_seq);
- int fack_count = 0;
- int dup_sack = 0;
+ /* SACK fastpath:
+ * if the only SACK change is the increase of the end_seq of
+ * the first block then only apply that SACK block
+ * and use retrans queue hinting otherwise slowpath */
+ flag = 1;
+ for (i = 0; i< num_sacks; i++) {
+ __u32 start_seq = ntohl(sp[i].start_seq);
+ __u32 end_seq = ntohl(sp[i].end_seq);
+
+ if (i == 0){
+ if (tp->recv_sack_cache[i].start_seq != start_seq)
+ flag = 0;
+ } else {
+ if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
+ (tp->recv_sack_cache[i].end_seq != end_seq))
+ flag = 0;
+ }
+ tp->recv_sack_cache[i].start_seq = start_seq;
+ tp->recv_sack_cache[i].end_seq = end_seq;
/* Check for D-SACK. */
if (i == 0) {
@@ -964,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (before(ack, prior_snd_una - tp->max_window))
return 0;
}
+ }
+
+ if (flag)
+ num_sacks = 1;
+ else {
+ int j;
+ tp->fastpath_skb_hint = NULL;
+
+ /* order SACK blocks to allow in order walk of the retrans queue */
+ for (i = num_sacks-1; i > 0; i--) {
+ for (j = 0; j < i; j++){
+ if (after(ntohl(sp[j].start_seq),
+ ntohl(sp[j+1].start_seq))){
+ sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
+ sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
+ sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
+ sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
+ }
+
+ }
+ }
+ }
+
+ /* clear flag as used for different purpose in following code */
+ flag = 0;
+
+ for (i=0; i<num_sacks; i++, sp++) {
+ struct sk_buff *skb;
+ __u32 start_seq = ntohl(sp->start_seq);
+ __u32 end_seq = ntohl(sp->end_seq);
+ int fack_count;
+
+ /* Use SACK fastpath hint if valid */
+ if (tp->fastpath_skb_hint) {
+ skb = tp->fastpath_skb_hint;
+ fack_count = tp->fastpath_cnt_hint;
+ } else {
+ skb = sk->sk_write_queue.next;
+ fack_count = 0;
+ }
/* Event "B" in the comment above. */
if (after(end_seq, tp->high_seq))
flag |= FLAG_DATA_LOST;
- sk_stream_for_retrans_queue(skb, sk) {
+ sk_stream_for_retrans_queue_from(skb, sk) {
int in_sack, pcount;
u8 sacked;
+ tp->fastpath_skb_hint = skb;
+ tp->fastpath_cnt_hint = fack_count;
+
/* The retransmission queue is always in order, so
* we can short-circuit the walk early.
*/
@@ -1047,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
tp->lost_out -= tcp_skb_pcount(skb);
tp->retrans_out -= tcp_skb_pcount(skb);
+
+ /* clear lost hint */
+ tp->retransmit_skb_hint = NULL;
}
} else {
/* New sack for not retransmitted frame,
@@ -1059,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
tp->lost_out -= tcp_skb_pcount(skb);
+
+ /* clear lost hint */
+ tp->retransmit_skb_hint = NULL;
}
}
@@ -1082,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
+ tp->retransmit_skb_hint = NULL;
}
}
}
@@ -1109,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
+ /* clear lost hint */
+ tp->retransmit_skb_hint = NULL;
+
if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1216,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->frto_highmark;
TCP_ECN_queue_cwr(tp);
+
+ clear_all_retrans_hints(tp);
}
void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1253,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->bytes_acked = 0;
tcp_clear_retrans(tp);
/* Push undo marker, if it was plain RTO and nothing
@@ -1281,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
+
+ clear_all_retrans_hints(tp);
}
static int tcp_check_sack_reneging(struct sock *sk)
@@ -1505,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
int packets, u32 high_seq)
{
struct sk_buff *skb;
- int cnt = packets;
+ int cnt;
- BUG_TRAP(cnt <= tp->packets_out);
+ BUG_TRAP(packets <= tp->packets_out);
+ if (tp->lost_skb_hint) {
+ skb = tp->lost_skb_hint;
+ cnt = tp->lost_cnt_hint;
+ } else {
+ skb = sk->sk_write_queue.next;
+ cnt = 0;
+ }
- sk_stream_for_retrans_queue(skb, sk) {
- cnt -= tcp_skb_pcount(skb);
- if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+ sk_stream_for_retrans_queue_from(skb, sk) {
+ /* TODO: do this better */
+ /* this is not the most efficient way to do this... */
+ tp->lost_skb_hint = skb;
+ tp->lost_cnt_hint = cnt;
+ cnt += tcp_skb_pcount(skb);
+ if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
break;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
+
+ /* clear xmit_retransmit_queue hints
+ * if this is beyond hint */
+ if(tp->retransmit_skb_hint != NULL &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
+
+ tp->retransmit_skb_hint = NULL;
+ }
}
}
tcp_sync_left_out(tp);
@@ -1542,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
if (tcp_head_timedout(sk, tp)) {
struct sk_buff *skb;
- sk_stream_for_retrans_queue(skb, sk) {
- if (tcp_skb_timedout(sk, skb) &&
- !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+ skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
+ : sk->sk_write_queue.next;
+
+ sk_stream_for_retrans_queue_from(skb, sk) {
+ if (!tcp_skb_timedout(sk, skb))
+ break;
+
+ if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
+
+ /* clear xmit_retrans hint */
+ if (tp->retransmit_skb_hint &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+
+ tp->retransmit_skb_hint = NULL;
}
}
+
+ tp->scoreboard_skb_hint = skb;
+
tcp_sync_left_out(tp);
}
}
@@ -1628,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
}
tcp_moderate_cwnd(tp);
tp->snd_cwnd_stamp = tcp_time_stamp;
+
+ /* There is something screwy going on with the retrans hints after
+ an undo */
+ clear_all_retrans_hints(tp);
}
static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1711,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
sk_stream_for_retrans_queue(skb, sk) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
+
+ clear_all_retrans_hints(tp);
+
DBGUNDO(sk, tp, "partial loss");
tp->lost_out = 0;
tp->left_out = tp->sacked_out;
@@ -1910,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
TCP_ECN_queue_cwr(tp);
}
+ tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
@@ -1921,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
}
/* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Superceeds RFC1323)
+ * with this code. (Supersedes RFC1323)
*/
-static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
+static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
{
/* RTTM Rule: A TSecr value received in a segment is used to
* update the averaged RTT measurement only if the segment
@@ -1934,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
* 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
*
* Changed: reset backoff as soon as we see the first valid sample.
- * If we do not, we get strongly overstimated rto. With timestamps
+ * If we do not, we get strongly overestimated rto. With timestamps
* samples are accepted even from very old segments: f.e., when rtt=1
* increases to 8, we retransmit 5 times and after 8 seconds delayed
* answer arrives rto becomes 120 seconds! If at least one of segments
@@ -1942,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
*/
struct tcp_sock *tp = tcp_sk(sk);
const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
- tcp_rtt_estimator(sk, seq_rtt, usrtt);
+ tcp_rtt_estimator(sk, seq_rtt);
tcp_set_rto(sk);
inet_csk(sk)->icsk_backoff = 0;
tcp_bound_rto(sk);
}
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag)
+static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
{
/* We don't have a timestamp. Can only use
* packets that are not retransmitted to determine
@@ -1962,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
if (flag & FLAG_RETRANS_DATA_ACKED)
return;
- tcp_rtt_estimator(sk, seq_rtt, usrtt);
+ tcp_rtt_estimator(sk, seq_rtt);
tcp_set_rto(sk);
inet_csk(sk)->icsk_backoff = 0;
tcp_bound_rto(sk);
}
static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
- const s32 seq_rtt, u32 *usrtt)
+ const s32 seq_rtt)
{
const struct tcp_sock *tp = tcp_sk(sk);
/* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
- tcp_ack_saw_tstamp(sk, usrtt, flag);
+ tcp_ack_saw_tstamp(sk, flag);
else if (seq_rtt >= 0)
- tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag);
+ tcp_ack_no_tstamp(sk, seq_rtt, flag);
}
static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
@@ -2056,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
return acked;
}
+static inline u32 tcp_usrtt(const struct sk_buff *skb)
+{
+ struct timeval tv, now;
+
+ do_gettimeofday(&now);
+ skb_get_timestamp(skb, &tv);
+ return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
+}
/* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt)
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
{
struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb;
__u32 now = tcp_time_stamp;
int acked = 0;
__s32 seq_rtt = -1;
- struct timeval usnow;
u32 pkts_acked = 0;
-
- if (seq_usrtt)
- do_gettimeofday(&usnow);
+ void (*rtt_sample)(struct sock *sk, u32 usrtt)
+ = icsk->icsk_ca_ops->rtt_sample;
while ((skb = skb_peek(&sk->sk_write_queue)) &&
skb != sk->sk_send_head) {
@@ -2109,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
tp->retrans_out -= tcp_skb_pcount(skb);
acked |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
- } else if (seq_rtt < 0)
+ } else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- if (seq_usrtt) {
- struct timeval tv;
-
- skb_get_timestamp(skb, &tv);
- *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
- + (usnow.tv_usec - tv.tv_usec);
+ if (rtt_sample)
+ (*rtt_sample)(sk, tcp_usrtt(skb));
}
-
if (sacked & TCPCB_SACKED_ACKED)
tp->sacked_out -= tcp_skb_pcount(skb);
if (sacked & TCPCB_LOST)
@@ -2128,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
!before(scb->end_seq, tp->snd_up))
tp->urg_mode = 0;
}
- } else if (seq_rtt < 0)
+ } else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
+ if (rtt_sample)
+ (*rtt_sample)(sk, tcp_usrtt(skb));
+ }
tcp_dec_pcount_approx(&tp->fackets_out, skb);
tcp_packets_out_dec(tp, skb);
__skb_unlink(skb, &sk->sk_write_queue);
sk_stream_free_skb(sk, skb);
+ clear_all_retrans_hints(tp);
}
if (acked&FLAG_ACKED) {
- const struct inet_connection_sock *icsk = inet_csk(sk);
- tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
+ tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk, tp);
if (icsk->icsk_ca_ops->pkts_acked)
@@ -2241,6 +2337,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
/* Note, it is the only place, where
* fast path is recovered for sending TCP.
*/
+ tp->pred_flags = 0;
tcp_fast_path_check(sk, tp);
if (nwin > tp->max_window) {
@@ -2285,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
}
/* F-RTO affects on two new ACKs following RTO.
- * At latest on third ACK the TCP behavor is back to normal.
+ * At latest on third ACK the TCP behavior is back to normal.
*/
tp->frto_counter = (tp->frto_counter + 1) % 3;
}
@@ -2300,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
u32 ack = TCP_SKB_CB(skb)->ack_seq;
u32 prior_in_flight;
s32 seq_rtt;
- s32 seq_usrtt = 0;
int prior_packets;
/* If the ack is newer than sent or older than previous acks
@@ -2312,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una))
goto old_ack;
+ if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
+ tp->bytes_acked += ack - prior_snd_una;
+
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
* No more checks are required.
@@ -2353,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
prior_in_flight = tcp_packets_in_flight(tp);
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, &seq_rtt,
- icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
+ flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
if (tp->frto_counter)
tcp_process_frto(sk, prior_snd_una);
if (tcp_ack_is_dubious(sk, flag)) {
- /* Advanve CWND, if state allows this. */
+ /* Advance CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
@@ -3149,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
{
struct sk_buff *skb;
- /* First, check that queue is collapsable and find
+ /* First, check that queue is collapsible and find
* the point where collapsing can be useful. */
for (skb = head; skb != tail; ) {
/* No new bits? It is possible on ofo queue. */
@@ -3457,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
/*
* This routine is only called when we have urgent data
- * signalled. Its the 'slow' part of tcp_urg. It could be
+ * signaled. Its the 'slow' part of tcp_urg. It could be
* moved inline now as tcp_urg is only called from one
* place. We handle URGent data wrong. We have to - as
* BSD still doesn't use the correction from RFC961.
@@ -3502,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
* urgent. To do this requires some care. We cannot just ignore
* tp->copied_seq since we would read the last urgent byte again
* as data, nor can we alter copied_seq until this data arrives
- * or we break the sematics of SIOCATMARK (and thus sockatmark())
+ * or we break the semantics of SIOCATMARK (and thus sockatmark())
*
* NOTE. Double Dutch. Rendering to plain English: author of comment
* above did something sort of send("A", MSG_OOB); send("B", MSG_OOB);
@@ -3647,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tp->rx_opt.saw_tstamp = 0;
/* pred_flags is 0xS?10 << 16 + snd_wnd
- * if header_predition is to be made
+ * if header_prediction is to be made
* 'S' will always be tp->tcp_header_len >> 2
* '?' will be 0 for the fast path, otherwise pred_flags is 0 to
* turn it off (when there are holes in the receive
@@ -4243,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!tp->srtt)
- tcp_ack_saw_tstamp(sk, NULL, 0);
+ tcp_ack_saw_tstamp(sk, 0);
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4373,6 +4471,7 @@ discard:
EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_abc);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 13dfb391cdf1..4d5021e1929b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -39,7 +39,7 @@
* request_sock handling and moved
* most of it into the af independent code.
* Added tail drop and some other bugfixes.
- * Added new listen sematics.
+ * Added new listen semantics.
* Mike McLagan : Routing by source
* Juan Jose Ciarlante: ip_dynaddr bits
* Andi Kleen: various fixes.
@@ -93,8 +93,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
.lhash_lock = RW_LOCK_UNLOCKED,
.lhash_users = ATOMIC_INIT(0),
.lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
- .portalloc_lock = SPIN_LOCK_UNLOCKED,
- .port_rover = 1024 - 1,
};
static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -130,19 +128,20 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
- const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size);
- struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
+ unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+ struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
+ prefetch(head->chain.first);
write_lock(&head->lock);
/* Check TIME-WAIT sockets first. */
sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
tw = inet_twsk(sk2);
- if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
+ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
struct tcp_sock *tp = tcp_sk(sk);
@@ -179,7 +178,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
/* And established part... */
sk_for_each(sk2, node, &head->chain) {
- if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+ if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
goto not_unique;
}
@@ -188,7 +187,7 @@ unique:
* in hash table socket with a funny identity. */
inet->num = lport;
inet->sport = htons(lport);
- sk->sk_hashent = hash;
+ sk->sk_hash = hash;
BUG_TRAP(sk_unhashed(sk));
__sk_add_node(sk, &head->chain);
sock_prot_inc_use(sk->sk_prot);
@@ -824,8 +823,7 @@ out:
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
- if (inet_rsk(req)->opt)
- kfree(inet_rsk(req)->opt);
+ kfree(inet_rsk(req)->opt);
}
static inline void syn_flood_warning(struct sk_buff *skb)
@@ -1112,24 +1110,18 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
static int tcp_v4_checksum_init(struct sk_buff *skb)
{
if (skb->ip_summed == CHECKSUM_HW) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
- skb->nh.iph->daddr, skb->csum))
+ skb->nh.iph->daddr, skb->csum)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
return 0;
-
- LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v4 csum failed\n");
- skb->ip_summed = CHECKSUM_NONE;
+ }
}
+
+ skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ skb->len, IPPROTO_TCP, 0);
+
if (skb->len <= 76) {
- if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
- skb->nh.iph->daddr,
- skb_checksum(skb, 0, skb->len, 0)))
- return -1;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- } else {
- skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
- skb->nh.iph->saddr,
- skb->nh.iph->daddr, 0);
+ return __skb_checksum_complete(skb);
}
return 0;
}
@@ -1218,10 +1210,10 @@ int tcp_v4_rcv(struct sk_buff *skb)
/* An explanation is required here, I think.
* Packet length and doff are validated by header prediction,
- * provided case of th->doff==0 is elimineted.
+ * provided case of th->doff==0 is eliminated.
* So, we defer the checks. */
if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
- tcp_v4_checksum_init(skb) < 0))
+ tcp_v4_checksum_init(skb)))
goto bad_packet;
th = skb->h.th;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2c6b4a..1b66a2ac4321 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -158,7 +158,7 @@ kill_with_rst:
/* I am shamed, but failed to make it more elegant.
* Yes, it is direct reference to IP, which is impossible
* to generalize to IPv6. Taking into account that IPv6
- * do not undertsnad recycling in any case, it not
+ * do not understand recycling in any case, it not
* a big problem in practice. --ANK */
if (tw->tw_family == AF_INET &&
tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
@@ -194,7 +194,7 @@ kill_with_rst:
/* In window segment, it may be only reset or bare ack. */
if (th->rst) {
- /* This is TIME_WAIT assasination, in two flavors.
+ /* This is TIME_WAIT assassination, in two flavors.
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/
newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0;
+ newtp->bytes_acked = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
/* RFC793 page 36: "If the connection is in any non-synchronized state ...
* and the incoming segment acknowledges something not yet
- * sent (the segment carries an unaccaptable ACK) ...
+ * sent (the segment carries an unacceptable ACK) ...
* a reset is sent."
*
* Invalid ACK: reset will be sent by listening socket
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5dd6dd7d091e..029c70dfb585 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -190,7 +190,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
}
/* Set initial window to value enough for senders,
- * following RFC1414. Senders, not following this RFC,
+ * following RFC2414. Senders, not following this RFC,
* will be satisfied with 2.
*/
if (mss > (1<<*rcv_wscale)) {
@@ -435,8 +435,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
int nsize, old_factor;
u16 flags;
- BUG_ON(len >= skb->len);
+ BUG_ON(len > skb->len);
+ clear_all_retrans_hints(tp);
nsize = skb_headlen(skb) - len;
if (nsize < 0)
nsize = 0;
@@ -509,7 +510,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
tp->lost_out -= diff;
tp->left_out -= diff;
}
+
if (diff > 0) {
+ /* Adjust Reno SACK estimate. */
+ if (!tp->rx_opt.sack_ok) {
+ tp->sacked_out -= diff;
+ if ((int)tp->sacked_out < 0)
+ tp->sacked_out = 0;
+ tcp_sync_left_out(tp);
+ }
+
tp->fackets_out -= diff;
if ((int)tp->fackets_out < 0)
tp->fackets_out = 0;
@@ -591,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
for TCP options, but includes only bare TCP header.
tp->rx_opt.mss_clamp is mss negotiated at connection setup.
- It is minumum of user_mss and mss received with SYN.
+ It is minimum of user_mss and mss received with SYN.
It also does not include TCP options.
tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1163,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- /* MSS for the peer's data. Previous verions used mss_clamp
+ /* MSS for the peer's data. Previous versions used mss_clamp
* here. I don't know if the value based on our guesses
* of peer's MSS is better for the performance. It's more correct
* but may be worse for the performance because of rcv_mss
@@ -1252,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
BUG_ON(tcp_skb_pcount(skb) != 1 ||
tcp_skb_pcount(next_skb) != 1);
- /* Ok. We will be able to collapse the packet. */
+ /* changing transmit queue under us so clear hints */
+ clear_all_retrans_hints(tp);
+
+ /* Ok. We will be able to collapse the packet. */
__skb_unlink(next_skb, &sk->sk_write_queue);
memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1322,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
}
}
+ clear_all_retrans_hints(tp);
+
if (!lost)
return;
@@ -1353,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int err;
/* Do not sent more than we queued. 1/4 is reserved for possible
- * copying overhead: frgagmentation, tunneling, mangling etc.
+ * copying overhead: fragmentation, tunneling, mangling etc.
*/
if (atomic_read(&sk->sk_wmem_alloc) >
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1460,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int packet_cnt = tp->lost_out;
+ int packet_cnt;
+
+ if (tp->retransmit_skb_hint) {
+ skb = tp->retransmit_skb_hint;
+ packet_cnt = tp->retransmit_cnt_hint;
+ }else{
+ skb = sk->sk_write_queue.next;
+ packet_cnt = 0;
+ }
/* First pass: retransmit lost packets. */
- if (packet_cnt) {
- sk_stream_for_retrans_queue(skb, sk) {
+ if (tp->lost_out) {
+ sk_stream_for_retrans_queue_from(skb, sk) {
__u8 sacked = TCP_SKB_CB(skb)->sacked;
+ /* we could do better than to assign each time */
+ tp->retransmit_skb_hint = skb;
+ tp->retransmit_cnt_hint = packet_cnt;
+
/* Assume this retransmit will generate
* only one packet for congestion window
* calculation purposes. This works because
@@ -1477,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
return;
- if (sacked&TCPCB_LOST) {
+ if (sacked & TCPCB_LOST) {
if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
- if (tcp_retransmit_skb(sk, skb))
+ if (tcp_retransmit_skb(sk, skb)) {
+ tp->retransmit_skb_hint = NULL;
return;
+ }
if (icsk->icsk_ca_state != TCP_CA_Loss)
NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
else
@@ -1493,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
TCP_RTO_MAX);
}
- packet_cnt -= tcp_skb_pcount(skb);
- if (packet_cnt <= 0)
+ packet_cnt += tcp_skb_pcount(skb);
+ if (packet_cnt >= tp->lost_out)
break;
}
}
@@ -1520,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (tcp_may_send_now(sk, tp))
return;
- packet_cnt = 0;
+ if (tp->forward_skb_hint) {
+ skb = tp->forward_skb_hint;
+ packet_cnt = tp->forward_cnt_hint;
+ } else{
+ skb = sk->sk_write_queue.next;
+ packet_cnt = 0;
+ }
+
+ sk_stream_for_retrans_queue_from(skb, sk) {
+ tp->forward_cnt_hint = packet_cnt;
+ tp->forward_skb_hint = skb;
- sk_stream_for_retrans_queue(skb, sk) {
/* Similar to the retransmit loop above we
* can pretend that the retransmitted SKB
* we send out here will be composed of one
@@ -1539,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
continue;
/* Ok, retransmit it. */
- if (tcp_retransmit_skb(sk, skb))
+ if (tcp_retransmit_skb(sk, skb)) {
+ tp->forward_skb_hint = NULL;
break;
+ }
if (skb == skb_peek(&sk->sk_write_queue))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -1601,7 +1641,7 @@ void tcp_send_fin(struct sock *sk)
* was unread data in the receive queue. This behavior is recommended
* by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
*/
-void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority)
+void tcp_send_active_reset(struct sock *sk, gfp_t priority)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -2050,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL(tcp_make_synack);
EXPORT_SYMBOL(tcp_simple_retransmit);
EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 327770bf5522..26d7486ee501 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
u32 in_flight, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (in_flight < tp->snd_cwnd)
+
+ if (!tcp_is_cwnd_limited(sk, in_flight))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
- tp->snd_cwnd++;
- } else {
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
+ tcp_slow_start(tp);
+ else {
tp->snd_cwnd_cnt++;
if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
- tp->snd_cwnd++;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
tp->snd_cwnd_cnt = 0;
}
}
- tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
- tp->snd_cwnd_stamp = tcp_time_stamp;
}
static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 415ee47ac1c5..e1880959614a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
* to prevent DoS attacks. It is called when a retransmission timeout
* or zero probe timeout occurs on orphaned socket.
*
- * Criterium is still not confirmed experimentally and may change.
+ * Criteria is still not confirmed experimentally and may change.
* We kill the socket, if:
* 1. If number of orphaned sockets exceeds an administratively configured
* limit.
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
hole detection. :-(
It is place to make it. It is not made. I do not want
- to make it. It is disguisting. It does not work in any
+ to make it. It is disgusting. It does not work in any
case. Let me to cite the same draft, which requires for
us to implement this:
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 93c5f92070f9..b7d296a8ac6d 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
/* We don't have enough RTT samples to do the Vegas
* calculation, so we'll behave like Reno.
*/
- if (tp->snd_cwnd > tp->snd_ssthresh)
- tp->snd_cwnd++;
+ tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
} else {
u32 rtt, target_cwnd, diff;
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
*/
diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
- if (tp->snd_cwnd < tp->snd_ssthresh) {
+ if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* Slow start. */
if (diff > gamma) {
/* Going too fast. Time to slow down
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
V_PARAM_SHIFT)+1);
}
+ tcp_slow_start(tp);
} else {
/* Congestion avoidance. */
u32 next_snd_cwnd;
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
else if (next_snd_cwnd < tp->snd_cwnd)
tp->snd_cwnd--;
}
- }
- /* Wipe the slate clean for the next RTT. */
- vegas->cntRTT = 0;
- vegas->minRTT = 0x7fffffff;
+ if (tp->snd_cwnd < 2)
+ tp->snd_cwnd = 2;
+ else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
+ tp->snd_cwnd = tp->snd_cwnd_clamp;
+ }
}
- /* The following code is executed for every ack we receive,
- * except for conditions checked in should_advance_cwnd()
- * before the call to tcp_cong_avoid(). Mainly this means that
- * we only execute this code if the ack actually acked some
- * data.
- */
-
- /* If we are in slow start, increase our cwnd in response to this ACK.
- * (If we are not in slow start then we are in congestion avoidance,
- * and adjust our congestion window only once per RTT. See the code
- * above.)
- */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
- tp->snd_cwnd++;
-
- /* to keep cwnd from growing without bound */
- tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
-
- /* Make sure that we are never so timid as to reduce our cwnd below
- * 2 MSS.
- *
- * Going below 2 MSS would risk huge delayed ACKs from our receiver.
- */
- tp->snd_cwnd = max(tp->snd_cwnd, 2U);
+ /* Wipe the slate clean for the next RTT. */
+ vegas->cntRTT = 0;
+ vegas->minRTT = 0x7fffffff;
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e0bd1013cb0d..2422a5f7195d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -761,7 +761,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
{
- return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+ return __skb_checksum_complete(skb);
}
static __inline__ int udp_checksum_complete(struct sk_buff *skb)
@@ -1100,11 +1100,8 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_HW) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
- return 0;
- LIMIT_NETDEBUG(KERN_DEBUG "udp v4 hw csum failure.\n");
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);