summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c5
-rw-r--r--net/ipv4/fib_trie.c16
-rw-r--r--net/ipv4/icmp.c9
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/inetpeer.c61
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_output.c21
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c124
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c52
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/route.c218
-rw-r--r--net/ipv4/tcp_ipv4.c32
-rw-r--r--net/ipv4/tcp_minisocks.c32
-rw-r--r--net/ipv4/tcp_output.c20
-rw-r--r--net/ipv4/xfrm4_policy.c10
18 files changed, 340 insertions, 281 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c8f7aee587d1..e4e8e00a2c91 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -553,7 +553,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
if (!inet_sk(sk)->inet_num && inet_autobind(sk))
return -EAGAIN;
- return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
+ return sk->sk_prot->connect(sk, uaddr, addr_len);
}
EXPORT_SYMBOL(inet_dgram_connect);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cda37be02f8d..2e560f0c757d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -790,7 +790,8 @@ static int arp_process(struct sk_buff *skb)
* Check for bad requests for 127.x.x.x and requests for multicast
* addresses. If this is one such, delete it.
*/
- if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+ if (ipv4_is_multicast(tip) ||
+ (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
goto out;
/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 10e15a144e95..44bf82e3aef7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1500,7 +1500,8 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
if (cnf == net->ipv4.devconf_dflt)
devinet_copy_dflt_conf(net, i);
- if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
+ if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
+ i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
if ((new_value == 0) && (old_value != 0))
rt_cache_flush(net, 0);
}
@@ -1617,6 +1618,8 @@ static struct devinet_sysctl_table {
"force_igmp_version"),
DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
"promote_secondaries"),
+ DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
+ "route_localnet"),
},
};
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 30b88d7b4bd6..9b0f25930fbc 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1007,9 +1007,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
- tn = (struct tnode *) resize(t, (struct tnode *)tn);
+ tn = (struct tnode *)resize(t, tn);
- tnode_put_child_reorg((struct tnode *)tp, cindex,
+ tnode_put_child_reorg(tp, cindex,
(struct rt_trie_node *)tn, wasfull);
tp = node_parent((struct rt_trie_node *) tn);
@@ -1024,7 +1024,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
/* Handle last (top) tnode */
if (IS_TNODE(tn))
- tn = (struct tnode *)resize(t, (struct tnode *)tn);
+ tn = (struct tnode *)resize(t, tn);
rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
tnode_free_flush();
@@ -1125,7 +1125,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
node_set_parent((struct rt_trie_node *)l, tp);
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l);
+ put_child(t, tp, cindex, (struct rt_trie_node *)l);
} else {
/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
/*
@@ -1160,8 +1160,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
if (tp) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, (struct tnode *)tp, cindex,
- (struct rt_trie_node *)tn);
+ put_child(t, tp, cindex, (struct rt_trie_node *)tn);
} else {
rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
tp = tn;
@@ -1620,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
if (tp) {
t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
- put_child(t, (struct tnode *)tp, cindex, NULL);
+ put_child(t, tp, cindex, NULL);
trie_rebalance(t, tp);
} else
RCU_INIT_POINTER(t->trie, NULL);
@@ -1844,6 +1843,8 @@ int fib_table_flush(struct fib_table *tb)
if (ll && hlist_empty(&ll->list))
trie_leaf_remove(t, ll);
+ inetpeer_invalidate_tree(&tb->tb_peers);
+
pr_debug("trie_flush found=%d\n", found);
return found;
}
@@ -1992,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id)
tb->tb_id = id;
tb->tb_default = -1;
tb->tb_num_default = 0;
+ inet_peer_base_init(&tb->tb_peers);
t = (struct trie *) tb->tb_data;
memset(t, 0, sizeof(*t));
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c75efbdc71cb..e1caa1abe5d1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -253,9 +253,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
/* Limit if icmp type is enabled in ratemask. */
if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) {
- if (!rt->peer)
- rt_bind_peer(rt, fl4->daddr, 1);
- rc = inet_peer_xrlim_allow(rt->peer,
+ struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr);
+ rc = inet_peer_xrlim_allow(peer,
net->ipv4.sysctl_icmp_ratelimit);
}
out:
@@ -674,9 +673,7 @@ static void icmp_unreach(struct sk_buff *skb)
LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"),
&iph->daddr);
} else {
- info = ip_rt_frag_needed(net, iph,
- ntohs(icmph->un.frag.mtu),
- skb->dev);
+ info = ntohs(icmph->un.frag.mtu);
if (!info)
goto out;
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5ff2a51b6d0c..85190e69297b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
if (q == NULL)
return NULL;
+ q->net = nf;
f->constructor(q, arg);
atomic_add(f->qsize, &nf->mem);
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
- q->net = nf;
return q;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index dfba343b2509..cac02ad1425d 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -82,23 +82,39 @@ static const struct inet_peer peer_fake_node = {
.avl_height = 0
};
-struct inet_peer_base {
- struct inet_peer __rcu *root;
- seqlock_t lock;
- int total;
-};
+void inet_peer_base_init(struct inet_peer_base *bp)
+{
+ bp->root = peer_avl_empty_rcu;
+ seqlock_init(&bp->lock);
+ bp->flush_seq = ~0U;
+ bp->total = 0;
+}
+EXPORT_SYMBOL_GPL(inet_peer_base_init);
-static struct inet_peer_base v4_peers = {
- .root = peer_avl_empty_rcu,
- .lock = __SEQLOCK_UNLOCKED(v4_peers.lock),
- .total = 0,
-};
+static atomic_t v4_seq = ATOMIC_INIT(0);
+static atomic_t v6_seq = ATOMIC_INIT(0);
-static struct inet_peer_base v6_peers = {
- .root = peer_avl_empty_rcu,
- .lock = __SEQLOCK_UNLOCKED(v6_peers.lock),
- .total = 0,
-};
+static atomic_t *inetpeer_seq_ptr(int family)
+{
+ return (family == AF_INET ? &v4_seq : &v6_seq);
+}
+
+static inline void flush_check(struct inet_peer_base *base, int family)
+{
+ atomic_t *fp = inetpeer_seq_ptr(family);
+
+ if (unlikely(base->flush_seq != atomic_read(fp))) {
+ inetpeer_invalidate_tree(base);
+ base->flush_seq = atomic_read(fp);
+ }
+}
+
+void inetpeer_invalidate_family(int family)
+{
+ atomic_t *fp = inetpeer_seq_ptr(family);
+
+ atomic_inc(fp);
+}
#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
@@ -401,11 +417,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
call_rcu(&p->rcu, inetpeer_free_rcu);
}
-static struct inet_peer_base *family_to_base(int family)
-{
- return family == AF_INET ? &v4_peers : &v6_peers;
-}
-
/* perform garbage collect on all items stacked during a lookup */
static int inet_peer_gc(struct inet_peer_base *base,
struct inet_peer __rcu **stack[PEER_MAXDEPTH],
@@ -443,14 +454,17 @@ static int inet_peer_gc(struct inet_peer_base *base,
return cnt;
}
-struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create)
+struct inet_peer *inet_getpeer(struct inet_peer_base *base,
+ const struct inetpeer_addr *daddr,
+ int create)
{
struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
- struct inet_peer_base *base = family_to_base(daddr->family);
struct inet_peer *p;
unsigned int sequence;
int invalidated, gccnt = 0;
+ flush_check(base, daddr->family);
+
/* Attempt a lockless lookup first.
* Because of a concurrent writer, we might not find an existing entry.
*/
@@ -571,10 +585,9 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
schedule_delayed_work(&gc_work, gc_delay);
}
-void inetpeer_invalidate_tree(int family)
+void inetpeer_invalidate_tree(struct inet_peer_base *base)
{
struct inet_peer *old, *new, *prev;
- struct inet_peer_base *base = family_to_base(family);
write_seqlock_bh(&base->lock);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9dbd3dd6022d..8d07c973409c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
static void ip4_frag_init(struct inet_frag_queue *q, void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
+ struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
+ frags);
+ struct net *net = container_of(ipv4, struct net, ipv4);
+
struct ip4_create_arg *arg = a;
qp->protocol = arg->iph->protocol;
@@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
qp->daddr = arg->iph->daddr;
qp->user = arg->user;
qp->peer = sysctl_ipfrag_max_dist ?
- inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
+ inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
}
static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 451f97c42eb4..0f3185a662c3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -113,19 +113,6 @@ int ip_local_out(struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ip_local_out);
-/* dev_loopback_xmit for use with netfilter. */
-static int ip_dev_loopback_xmit(struct sk_buff *newskb)
-{
- skb_reset_mac_header(newskb);
- __skb_pull(newskb, skb_network_offset(newskb));
- newskb->pkt_type = PACKET_LOOPBACK;
- newskb->ip_summed = CHECKSUM_UNNECESSARY;
- WARN_ON(!skb_dst(newskb));
- skb_dst_force(newskb);
- netif_rx_ni(newskb);
- return 0;
-}
-
static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
int ttl = inet->uc_ttl;
@@ -200,7 +187,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
}
if (skb->sk)
skb_set_owner_w(skb2, skb->sk);
- kfree_skb(skb);
+ consume_skb(skb);
skb = skb2;
}
@@ -281,7 +268,7 @@ int ip_mc_output(struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
newskb, NULL, newskb->dev,
- ip_dev_loopback_xmit);
+ dev_loopback_xmit);
}
/* Multicasts with ttl 0 must not go beyond the host */
@@ -296,7 +283,7 @@ int ip_mc_output(struct sk_buff *skb)
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
- NULL, newskb->dev, ip_dev_loopback_xmit);
+ NULL, newskb->dev, dev_loopback_xmit);
}
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
@@ -709,7 +696,7 @@ slow_path:
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
}
- kfree_skb(skb);
+ consume_skb(skb);
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
return err;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 91747d4ebc26..d79b961a8009 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -207,35 +207,30 @@ static int log_invalid_proto_max = 255;
static ctl_table ip_ct_sysctl_table[] = {
{
.procname = "ip_conntrack_max",
- .data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "ip_conntrack_count",
- .data = &init_net.ct.count,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{
.procname = "ip_conntrack_buckets",
- .data = &init_net.ct.htable_size,
.maxlen = sizeof(unsigned int),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{
.procname = "ip_conntrack_checksum",
- .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "ip_conntrack_log_invalid",
- .data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -351,6 +346,25 @@ static struct nf_sockopt_ops so_getorigdst = {
.owner = THIS_MODULE,
};
+static int ipv4_init_net(struct net *net)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ struct nf_ip_net *in = &net->ct.nf_ct_proto;
+ in->ctl_table = kmemdup(ip_ct_sysctl_table,
+ sizeof(ip_ct_sysctl_table),
+ GFP_KERNEL);
+ if (!in->ctl_table)
+ return -ENOMEM;
+
+ in->ctl_table[0].data = &nf_conntrack_max;
+ in->ctl_table[1].data = &net->ct.count;
+ in->ctl_table[2].data = &net->ct.htable_size;
+ in->ctl_table[3].data = &net->ct.sysctl_checksum;
+ in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
+#endif
+ return 0;
+}
+
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.l3proto = PF_INET,
.name = "ipv4",
@@ -366,8 +380,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
.ctl_table_path = "net/ipv4/netfilter",
- .ctl_table = ip_ct_sysctl_table,
#endif
+ .init_net = ipv4_init_net,
.me = THIS_MODULE,
};
@@ -378,6 +392,65 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
+static int ipv4_net_init(struct net *net)
+{
+ int ret = 0;
+
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_tcp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n");
+ goto out_tcp;
+ }
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_udp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n");
+ goto out_udp;
+ }
+ ret = nf_conntrack_l4proto_register(net,
+ &nf_conntrack_l4proto_icmp);
+ if (ret < 0) {
+ pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n");
+ goto out_icmp;
+ }
+ ret = nf_conntrack_l3proto_register(net,
+ &nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n");
+ goto out_ipv4;
+ }
+ return 0;
+out_ipv4:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_icmp);
+out_icmp:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_udp4);
+out_udp:
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_tcp4);
+out_tcp:
+ return ret;
+}
+
+static void ipv4_net_exit(struct net *net)
+{
+ nf_conntrack_l3proto_unregister(net,
+ &nf_conntrack_l3proto_ipv4);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_icmp);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_udp4);
+ nf_conntrack_l4proto_unregister(net,
+ &nf_conntrack_l4proto_tcp4);
+}
+
+static struct pernet_operations ipv4_net_ops = {
+ .init = ipv4_net_init,
+ .exit = ipv4_net_exit,
+};
+
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
int ret = 0;
@@ -391,35 +464,17 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
return ret;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
+ ret = register_pernet_subsys(&ipv4_net_ops);
if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register tcp.\n");
+ pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
goto cleanup_sockopt;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register udp.\n");
- goto cleanup_tcp;
- }
-
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register icmp.\n");
- goto cleanup_udp;
- }
-
- ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
- if (ret < 0) {
- pr_err("nf_conntrack_ipv4: can't register ipv4\n");
- goto cleanup_icmp;
- }
-
ret = nf_register_hooks(ipv4_conntrack_ops,
ARRAY_SIZE(ipv4_conntrack_ops));
if (ret < 0) {
pr_err("nf_conntrack_ipv4: can't register hooks.\n");
- goto cleanup_ipv4;
+ goto cleanup_pernet;
}
#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
ret = nf_conntrack_ipv4_compat_init();
@@ -431,14 +486,8 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
cleanup_hooks:
nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
#endif
- cleanup_ipv4:
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
- cleanup_icmp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- cleanup_udp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- cleanup_tcp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ cleanup_pernet:
+ unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
return ret;
@@ -451,10 +500,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
nf_conntrack_ipv4_compat_fini();
#endif
nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ unregister_pernet_subsys(&ipv4_net_ops);
nf_unregister_sockopt(&so_getorigdst);
}
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 0847e373d33c..041923cb67ad 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -23,6 +23,11 @@
static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp;
+}
+
static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
@@ -77,7 +82,7 @@ static int icmp_print_tuple(struct seq_file *s,
static unsigned int *icmp_get_timeouts(struct net *net)
{
- return &nf_ct_icmp_timeout;
+ return &icmp_pernet(net)->timeout;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -274,16 +279,18 @@ static int icmp_nlattr_tuple_size(void)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_cttimeout.h>
-static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data)
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
{
unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmp_pernet(net);
if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
*timeout =
ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
} else {
/* Set default ICMP timeout. */
- *timeout = nf_ct_icmp_timeout;
+ *timeout = in->timeout;
}
return 0;
}
@@ -308,11 +315,9 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *icmp_sysctl_header;
static struct ctl_table icmp_sysctl_table[] = {
{
.procname = "nf_conntrack_icmp_timeout",
- .data = &nf_ct_icmp_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -323,7 +328,6 @@ static struct ctl_table icmp_sysctl_table[] = {
static struct ctl_table icmp_compat_sysctl_table[] = {
{
.procname = "ip_conntrack_icmp_timeout",
- .data = &nf_ct_icmp_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -333,6 +337,34 @@ static struct ctl_table icmp_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
+static int icmp_init_net(struct net *net)
+{
+ struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_proto_net *pn = (struct nf_proto_net *)in;
+ in->timeout = nf_ct_icmp_timeout;
+
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmp_sysctl_table,
+ sizeof(icmp_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+ pn->ctl_table[0].data = &in->timeout;
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
+ sizeof(icmp_compat_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_compat_table) {
+ kfree(pn->ctl_table);
+ pn->ctl_table = NULL;
+ return -ENOMEM;
+ }
+ pn->ctl_compat_table[0].data = &in->timeout;
+#endif
+#endif
+ return 0;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
.l3proto = PF_INET,
@@ -362,11 +394,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
.nla_policy = icmp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
- .ctl_table_header = &icmp_sysctl_header,
- .ctl_table = icmp_sysctl_table,
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- .ctl_compat_table = icmp_compat_sysctl_table,
-#endif
-#endif
+ .init_net = icmp_init_net,
};
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 9bb1b8a37a22..742815518b0f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -94,14 +94,14 @@ static struct nf_hook_ops ipv4_defrag_ops[] = {
{
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 746edec8b86e..bac712293fd6 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -405,7 +405,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
ptr = *octets;
while (ctx->pointer < eoc) {
- if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
+ if (!asn1_octet_decode(ctx, ptr++)) {
kfree(*octets);
*octets = NULL;
return 0;
@@ -759,7 +759,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
}
break;
case SNMP_OBJECTID:
- if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
+ if (!asn1_oid_decode(ctx, end, &lp, &len)) {
kfree(id);
return 0;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98b30d08efe9..655506af47ca 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
struct inet_peer *peer;
u32 *p = NULL;
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 1);
-
- peer = rt->peer;
+ peer = rt_get_peer_create(rt, rt->rt_dst);
if (peer) {
u32 *old_p = __DST_METRICS_PTR(old);
unsigned long prev, new;
@@ -680,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth)
static inline int rt_valuable(struct rtable *rth)
{
return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
- (rth->peer && rth->peer->pmtu_expires);
+ (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires);
}
static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -938,7 +935,7 @@ static void rt_cache_invalidate(struct net *net)
get_random_bytes(&shuffle, sizeof(shuffle));
atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
- inetpeer_invalidate_tree(AF_INET);
+ inetpeer_invalidate_family(AF_INET);
}
/*
@@ -1328,14 +1325,20 @@ static u32 rt_peer_genid(void)
void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
{
+ struct inet_peer_base *base;
struct inet_peer *peer;
- peer = inet_getpeer_v4(daddr, create);
+ base = inetpeer_base_ptr(rt->_peer);
+ if (!base)
+ return;
- if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
- inet_putpeer(peer);
- else
- rt->rt_peer_genid = rt_peer_genid();
+ peer = inet_getpeer_v4(base, daddr, create);
+ if (peer) {
+ if (!rt_set_peer(rt, peer))
+ inet_putpeer(peer);
+ else
+ rt->rt_peer_genid = rt_peer_genid();
+ }
}
/*
@@ -1363,14 +1366,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
struct rtable *rt = (struct rtable *) dst;
if (rt && !(rt->dst.flags & DST_NOPEER)) {
- if (rt->peer == NULL)
- rt_bind_peer(rt, rt->rt_dst, 1);
+ struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst);
/* If peer is attached to destination, it is never detached,
so that we need not to grab a lock to dereference it.
*/
- if (rt->peer) {
- iph->id = htons(inet_getid(rt->peer, more));
+ if (peer) {
+ iph->id = htons(inet_getid(peer, more));
return;
}
} else if (!rt)
@@ -1480,10 +1482,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rt->rt_gateway != old_gw)
continue;
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 1);
-
- peer = rt->peer;
+ peer = rt_get_peer_create(rt, rt->rt_dst);
if (peer) {
if (peer->redirect_learned.a4 != new_gw) {
peer->redirect_learned.a4 = new_gw;
@@ -1539,8 +1538,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
rt_genid(dev_net(dst->dev)));
rt_del(hash, rt);
ret = NULL;
- } else if (rt->peer && peer_pmtu_expired(rt->peer)) {
- dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig);
+ } else if (rt_has_peer(rt)) {
+ struct inet_peer *peer = rt_peer_ptr(rt);
+ if (peer_pmtu_expired(peer))
+ dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig);
}
}
return ret;
@@ -1578,9 +1579,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
rcu_read_unlock();
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 1);
- peer = rt->peer;
+ peer = rt_get_peer_create(rt, rt->rt_dst);
if (!peer) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
return;
@@ -1645,9 +1644,7 @@ static int ip_error(struct sk_buff *skb)
break;
}
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 1);
- peer = rt->peer;
+ peer = rt_get_peer_create(rt, rt->rt_dst);
send = true;
if (peer) {
@@ -1668,67 +1665,6 @@ out: kfree_skb(skb);
return 0;
}
-/*
- * The last two values are not from the RFC but
- * are needed for AMPRnet AX.25 paths.
- */
-
-static const unsigned short mtu_plateau[] =
-{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
-
-static inline unsigned short guess_mtu(unsigned short old_mtu)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
- if (old_mtu > mtu_plateau[i])
- return mtu_plateau[i];
- return 68;
-}
-
-unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
- unsigned short new_mtu,
- struct net_device *dev)
-{
- unsigned short old_mtu = ntohs(iph->tot_len);
- unsigned short est_mtu = 0;
- struct inet_peer *peer;
-
- peer = inet_getpeer_v4(iph->daddr, 1);
- if (peer) {
- unsigned short mtu = new_mtu;
-
- if (new_mtu < 68 || new_mtu >= old_mtu) {
- /* BSD 4.2 derived systems incorrectly adjust
- * tot_len by the IP header length, and report
- * a zero MTU in the ICMP message.
- */
- if (mtu == 0 &&
- old_mtu >= 68 + (iph->ihl << 2))
- old_mtu -= iph->ihl << 2;
- mtu = guess_mtu(old_mtu);
- }
-
- if (mtu < ip_rt_min_pmtu)
- mtu = ip_rt_min_pmtu;
- if (!peer->pmtu_expires || mtu < peer->pmtu_learned) {
- unsigned long pmtu_expires;
-
- pmtu_expires = jiffies + ip_rt_mtu_expires;
- if (!pmtu_expires)
- pmtu_expires = 1UL;
-
- est_mtu = mtu;
- peer->pmtu_learned = mtu;
- peer->pmtu_expires = pmtu_expires;
- atomic_inc(&__rt_peer_genid);
- }
-
- inet_putpeer(peer);
- }
- return est_mtu ? : new_mtu;
-}
-
static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer)
{
unsigned long expires = ACCESS_ONCE(peer->pmtu_expires);
@@ -1753,9 +1689,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
dst_confirm(dst);
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 1);
- peer = rt->peer;
+ peer = rt_get_peer_create(rt, rt->rt_dst);
if (peer) {
unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires);
@@ -1781,12 +1715,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
static void ipv4_validate_peer(struct rtable *rt)
{
if (rt->rt_peer_genid != rt_peer_genid()) {
- struct inet_peer *peer;
+ struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst);
- if (!rt->peer)
- rt_bind_peer(rt, rt->rt_dst, 0);
-
- peer = rt->peer;
if (peer) {
check_peer_pmtu(&rt->dst, peer);
@@ -1812,14 +1742,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *) dst;
- struct inet_peer *peer = rt->peer;
if (rt->fi) {
fib_info_put(rt->fi);
rt->fi = NULL;
}
- if (peer) {
- rt->peer = NULL;
+ if (rt_has_peer(rt)) {
+ struct inet_peer *peer = rt_peer_ptr(rt);
inet_putpeer(peer);
}
}
@@ -1832,8 +1761,11 @@ static void ipv4_link_failure(struct sk_buff *skb)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
rt = skb_rtable(skb);
- if (rt && rt->peer && peer_pmtu_cleaned(rt->peer))
- dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig);
+ if (rt && rt_has_peer(rt)) {
+ struct inet_peer *peer = rt_peer_ptr(rt);
+ if (peer_pmtu_cleaned(peer))
+ dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig);
+ }
}
static int ip_rt_bug(struct sk_buff *skb)
@@ -1935,6 +1867,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
struct fib_info *fi)
{
+ struct inet_peer_base *base;
struct inet_peer *peer;
int create = 0;
@@ -1944,8 +1877,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
create = 1;
- rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
+ base = inetpeer_base_ptr(rt->_peer);
+ BUG_ON(!base);
+
+ peer = inet_getpeer_v4(base, rt->rt_dst, create);
if (peer) {
+ __rt_set_peer(rt, peer);
rt->rt_peer_genid = rt_peer_genid();
if (inet_metrics_new(peer))
memcpy(peer->metrics, fi->fib_metrics,
@@ -2023,9 +1960,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return -EINVAL;
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
+ skb->protocol != htons(ETH_P_IP))
goto e_inval;
+ if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
+ if (ipv4_is_loopback(saddr))
+ goto e_inval;
+
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr))
goto e_inval;
@@ -2061,7 +2002,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
rth->rt_peer_genid = 0;
- rth->peer = NULL;
+ rt_init_peer(rth, dev_net(dev)->ipv4.peers);
rth->fi = NULL;
if (our) {
rth->dst.input= ip_local_deliver;
@@ -2189,7 +2130,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
rth->rt_peer_genid = 0;
- rth->peer = NULL;
+ rt_init_peer(rth, &res->table->tb_peers);
rth->fi = NULL;
rth->dst.input = ip_forward;
@@ -2266,8 +2207,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
by fib_lookup.
*/
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- ipv4_is_loopback(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
@@ -2279,9 +2219,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_zeronet(saddr))
goto martian_source;
- if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
+ if (ipv4_is_zeronet(daddr))
goto martian_destination;
+ if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
+ if (ipv4_is_loopback(daddr))
+ goto martian_destination;
+
+ if (ipv4_is_loopback(saddr))
+ goto martian_source;
+ }
+
/*
* Now we are ready to route packet.
*/
@@ -2372,7 +2320,7 @@ local_input:
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
rth->rt_peer_genid = 0;
- rth->peer = NULL;
+ rt_init_peer(rth, net->ipv4.peers);
rth->fi = NULL;
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
@@ -2520,9 +2468,14 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
u16 type = res->type;
struct rtable *rth;
- if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ in_dev = __in_dev_get_rcu(dev_out);
+ if (!in_dev)
return ERR_PTR(-EINVAL);
+ if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
+ if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ return ERR_PTR(-EINVAL);
+
if (ipv4_is_lbcast(fl4->daddr))
type = RTN_BROADCAST;
else if (ipv4_is_multicast(fl4->daddr))
@@ -2533,10 +2486,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL;
- in_dev = __in_dev_get_rcu(dev_out);
- if (!in_dev)
- return ERR_PTR(-EINVAL);
-
if (type == RTN_BROADCAST) {
flags |= RTCF_BROADCAST | RTCF_LOCAL;
fi = NULL;
@@ -2576,7 +2525,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_gateway = fl4->daddr;
rth->rt_spec_dst= fl4->saddr;
rth->rt_peer_genid = 0;
- rth->peer = NULL;
+ rt_init_peer(rth, (res->table ?
+ &res->table->tb_peers :
+ dev_net(dev_out)->ipv4.peers));
rth->fi = NULL;
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2625,6 +2576,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
int orig_oif;
res.fi = NULL;
+ res.table = NULL;
#ifdef CONFIG_IP_MULTIPLE_TABLES
res.r = NULL;
#endif
@@ -2730,6 +2682,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4)
if (fib_lookup(net, fl4, &res)) {
res.fi = NULL;
+ res.table = NULL;
if (fl4->flowi4_oif) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
@@ -2913,9 +2866,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_src = ort->rt_src;
rt->rt_gateway = ort->rt_gateway;
rt->rt_spec_dst = ort->rt_spec_dst;
- rt->peer = ort->peer;
- if (rt->peer)
- atomic_inc(&rt->peer->refcnt);
+ rt_transfer_peer(rt, ort);
rt->fi = ort->fi;
if (rt->fi)
atomic_inc(&rt->fi->fib_clntref);
@@ -2953,7 +2904,6 @@ static int rt_fill_info(struct net *net,
struct rtmsg *r;
struct nlmsghdr *nlh;
unsigned long expires = 0;
- const struct inet_peer *peer = rt->peer;
u32 id = 0, ts = 0, tsage = 0, error;
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
@@ -3009,8 +2959,9 @@ static int rt_fill_info(struct net *net,
goto nla_put_failure;
error = rt->dst.error;
- if (peer) {
- inet_peer_refcheck(rt->peer);
+ if (rt_has_peer(rt)) {
+ const struct inet_peer *peer = rt_peer_ptr(rt);
+ inet_peer_refcheck(peer);
id = atomic_read(&peer->ip_id_count) & 0xffff;
if (peer->tcp_ts_stamp) {
ts = peer->tcp_ts;
@@ -3400,6 +3351,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
.init = rt_genid_init,
};
+static int __net_init ipv4_inetpeer_init(struct net *net)
+{
+ struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
+
+ if (!bp)
+ return -ENOMEM;
+ inet_peer_base_init(bp);
+ net->ipv4.peers = bp;
+ return 0;
+}
+
+static void __net_exit ipv4_inetpeer_exit(struct net *net)
+{
+ struct inet_peer_base *bp = net->ipv4.peers;
+
+ net->ipv4.peers = NULL;
+ inetpeer_invalidate_tree(bp);
+ kfree(bp);
+}
+
+static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
+ .init = ipv4_inetpeer_init,
+ .exit = ipv4_inetpeer_exit,
+};
#ifdef CONFIG_IP_ROUTE_CLASSID
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
@@ -3480,6 +3455,7 @@ int __init ip_rt_init(void)
register_pernet_subsys(&sysctl_route_ops);
#endif
register_pernet_subsys(&rt_genid_ops);
+ register_pernet_subsys(&ipv4_inetpeer_ops);
return rc;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c8d28c433b2b..fda2ca17135e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -848,7 +848,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
err = net_xmit_eval(err);
}
- dst_release(dst);
return err;
}
@@ -1821,40 +1820,25 @@ do_time_wait:
goto discard_it;
}
-struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
+struct inet_peer *tcp_v4_get_peer(struct sock *sk)
{
struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
struct inet_sock *inet = inet_sk(sk);
- struct inet_peer *peer;
- if (!rt ||
- inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
- peer = inet_getpeer_v4(inet->inet_daddr, 1);
- *release_it = true;
- } else {
- if (!rt->peer)
- rt_bind_peer(rt, inet->inet_daddr, 1);
- peer = rt->peer;
- *release_it = false;
- }
-
- return peer;
+ /* If we don't have a valid cached route, or we're doing IP
+ * options which make the IPv4 header destination address
+ * different from our peer's, do not bother with this.
+ */
+ if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr)
+ return NULL;
+ return rt_get_peer_create(rt, inet->inet_daddr);
}
EXPORT_SYMBOL(tcp_v4_get_peer);
-void *tcp_v4_tw_get_peer(struct sock *sk)
-{
- const struct inet_timewait_sock *tw = inet_twsk(sk);
-
- return inet_getpeer_v4(tw->tw_daddr, 1);
-}
-EXPORT_SYMBOL(tcp_v4_tw_get_peer);
-
static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp_timewait_sock),
.twsk_unique = tcp_twsk_unique,
.twsk_destructor= tcp_twsk_destructor,
- .twsk_getpeer = tcp_v4_tw_get_peer,
};
const struct inet_connection_sock_af_ops ipv4_specific = {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b85d9fe7d663..cb015317c9f7 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -60,9 +60,8 @@ static bool tcp_remember_stamp(struct sock *sk)
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct inet_peer *peer;
- bool release_it;
- peer = icsk->icsk_af_ops->get_peer(sk, &release_it);
+ peer = icsk->icsk_af_ops->get_peer(sk);
if (peer) {
if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
@@ -70,8 +69,6 @@ static bool tcp_remember_stamp(struct sock *sk)
peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
peer->tcp_ts = tp->rx_opt.ts_recent;
}
- if (release_it)
- inet_putpeer(peer);
return true;
}
@@ -80,20 +77,19 @@ static bool tcp_remember_stamp(struct sock *sk)
static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw)
{
+ const struct tcp_timewait_sock *tcptw;
struct sock *sk = (struct sock *) tw;
struct inet_peer *peer;
- peer = twsk_getpeer(sk);
+ tcptw = tcp_twsk(sk);
+ peer = tcptw->tw_peer;
if (peer) {
- const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
-
if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
peer->tcp_ts = tcptw->tw_ts_recent;
}
- inet_putpeer(peer);
return true;
}
return false;
@@ -317,9 +313,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
bool recycle_ok = false;
+ bool recycle_on = false;
- if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
+ if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) {
recycle_ok = tcp_remember_stamp(sk);
+ recycle_on = true;
+ }
if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
tw = inet_twsk_alloc(sk, state);
@@ -327,8 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (tw != NULL) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_peer *peer = NULL;
- tw->tw_transparent = inet_sk(sk)->transparent;
+ tw->tw_transparent = inet->transparent;
tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
tcptw->tw_rcv_nxt = tp->rcv_nxt;
tcptw->tw_snd_nxt = tp->snd_nxt;
@@ -350,6 +351,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
}
#endif
+ if (recycle_on)
+ peer = icsk->icsk_af_ops->get_peer(sk);
+ tcptw->tw_peer = peer;
+ if (peer)
+ atomic_inc(&peer->refcnt);
+
#ifdef CONFIG_TCP_MD5SIG
/*
* The timewait bucket does not have the key DB from the
@@ -401,8 +408,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
void tcp_twsk_destructor(struct sock *sk)
{
-#ifdef CONFIG_TCP_MD5SIG
struct tcp_timewait_sock *twsk = tcp_twsk(sk);
+
+ if (twsk->tw_peer)
+ inet_putpeer(twsk->tw_peer);
+#ifdef CONFIG_TCP_MD5SIG
if (twsk->tw_md5_key) {
tcp_free_md5sig_pool();
kfree_rcu(twsk->tw_md5_key, rcu);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 803cbfe82fbc..c465d3e51e28 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2442,7 +2442,16 @@ int tcp_send_synack(struct sock *sk)
return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
-/* Prepare a SYN-ACK. */
+/**
+ * tcp_make_synack - Prepare a SYN-ACK.
+ * sk: listener socket
+ * dst: dst entry attached to the SYNACK
+ * req: request_sock pointer
+ * rvp: request_values pointer
+ *
+ * Allocate one skb and build a SYNACK packet.
+ * @dst is consumed : Caller should not use it again.
+ */
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct request_values *rvp)
@@ -2461,14 +2470,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
s_data_desired = cvp->s_data_desired;
- skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
- if (skb == NULL)
+ skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC);
+ if (unlikely(!skb)) {
+ dst_release(dst);
return NULL;
-
+ }
/* Reserve space for headers. */
skb_reserve(skb, MAX_TCP_HEADER);
- skb_dst_set(skb, dst_clone(dst));
+ skb_dst_set(skb, dst);
mss = dst_metric_advmss(dst);
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0d3426cb5c4f..8855d8268552 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.dst.dev = dev;
dev_hold(dev);
- xdst->u.rt.peer = rt->peer;
- if (rt->peer)
- atomic_inc(&rt->peer->refcnt);
+ rt_transfer_peer(&xdst->u.rt, rt);
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
@@ -212,8 +210,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
dst_destroy_metrics_generic(dst);
- if (likely(xdst->u.rt.peer))
- inet_putpeer(xdst->u.rt.peer);
+ if (rt_has_peer(&xdst->u.rt)) {
+ struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt);
+ inet_putpeer(peer);
+ }
xfrm_dst_destroy(xdst);
}