diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/arp.c | 3 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 5 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 16 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 9 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 2 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 61 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 21 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 124 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 52 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_defrag_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_nat_snmp_basic.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 218 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 32 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 32 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 20 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 10 |
18 files changed, 340 insertions, 281 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c8f7aee587d1..e4e8e00a2c91 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -553,7 +553,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); + return sk->sk_prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cda37be02f8d..2e560f0c757d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -790,7 +790,8 @@ static int arp_process(struct sk_buff *skb) * Check for bad requests for 127.x.x.x and requests for multicast * addresses. If this is one such, delete it. */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) + if (ipv4_is_multicast(tip) || + (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip))) goto out; /* diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 10e15a144e95..44bf82e3aef7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1500,7 +1500,8 @@ static int devinet_conf_proc(ctl_table *ctl, int write, if (cnf == net->ipv4.devconf_dflt) devinet_copy_dflt_conf(net, i); - if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1) + if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 || + i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net, 0); } @@ -1617,6 +1618,8 @@ static struct devinet_sysctl_table { "force_igmp_version"), DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES, "promote_secondaries"), + DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET, + "route_localnet"), }, }; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 30b88d7b4bd6..9b0f25930fbc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1007,9 +1007,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); - tn = (struct tnode *) resize(t, (struct tnode *)tn); + tn = (struct tnode *)resize(t, tn); - tnode_put_child_reorg((struct tnode *)tp, cindex, + tnode_put_child_reorg(tp, cindex, (struct rt_trie_node *)tn, wasfull); tp = node_parent((struct rt_trie_node *) tn); @@ -1024,7 +1024,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) /* Handle last (top) tnode */ if (IS_TNODE(tn)) - tn = (struct tnode *)resize(t, (struct tnode *)tn); + tn = (struct tnode *)resize(t, tn); rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); @@ -1125,7 +1125,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) node_set_parent((struct rt_trie_node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); + put_child(t, tp, cindex, (struct rt_trie_node *)l); } else { /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* @@ -1160,8 +1160,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, - (struct rt_trie_node *)tn); + put_child(t, tp, cindex, (struct rt_trie_node *)tn); } else { rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; @@ -1620,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, NULL); + put_child(t, tp, cindex, NULL); trie_rebalance(t, tp); } else RCU_INIT_POINTER(t->trie, NULL); @@ -1844,6 +1843,8 @@ int fib_table_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll); + inetpeer_invalidate_tree(&tb->tb_peers); + pr_debug("trie_flush found=%d\n", found); return found; } @@ -1992,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; + inet_peer_base_init(&tb->tb_peers); t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c75efbdc71cb..e1caa1abe5d1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,9 +253,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, /* Limit if icmp type is enabled in ratemask. */ if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - if (!rt->peer) - rt_bind_peer(rt, fl4->daddr, 1); - rc = inet_peer_xrlim_allow(rt->peer, + struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); + rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); } out: @@ -674,9 +673,7 @@ static void icmp_unreach(struct sk_buff *skb) LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); } else { - info = ip_rt_frag_needed(net, iph, - ntohs(icmph->un.frag.mtu), - skb->dev); + info = ntohs(icmph->un.frag.mtu); if (!info) goto out; } diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5ff2a51b6d0c..85190e69297b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, if (q == NULL) return NULL; + q->net = nf; f->constructor(q, arg); atomic_add(f->qsize, &nf->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); - q->net = nf; return q; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index dfba343b2509..cac02ad1425d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -82,23 +82,39 @@ static const struct inet_peer peer_fake_node = { .avl_height = 0 }; -struct inet_peer_base { - struct inet_peer __rcu *root; - seqlock_t lock; - int total; -}; +void inet_peer_base_init(struct inet_peer_base *bp) +{ + bp->root = peer_avl_empty_rcu; + seqlock_init(&bp->lock); + bp->flush_seq = ~0U; + bp->total = 0; +} +EXPORT_SYMBOL_GPL(inet_peer_base_init); -static struct inet_peer_base v4_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), - .total = 0, -}; +static atomic_t v4_seq = ATOMIC_INIT(0); +static atomic_t v6_seq = ATOMIC_INIT(0); -static struct inet_peer_base v6_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), - .total = 0, -}; +static atomic_t *inetpeer_seq_ptr(int family) +{ + return (family == AF_INET ? &v4_seq : &v6_seq); +} + +static inline void flush_check(struct inet_peer_base *base, int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + if (unlikely(base->flush_seq != atomic_read(fp))) { + inetpeer_invalidate_tree(base); + base->flush_seq = atomic_read(fp); + } +} + +void inetpeer_invalidate_family(int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + atomic_inc(fp); +} #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ @@ -401,11 +417,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(int family) -{ - return family == AF_INET ? &v4_peers : &v6_peers; -} - /* perform garbage collect on all items stacked during a lookup */ static int inet_peer_gc(struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH], @@ -443,14 +454,17 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) +struct inet_peer *inet_getpeer(struct inet_peer_base *base, + const struct inetpeer_addr *daddr, + int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; + flush_check(base, daddr->family); + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ @@ -571,10 +585,9 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void inetpeer_invalidate_tree(int family) +void inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(family); write_seqlock_bh(&base->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9dbd3dd6022d..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); + struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, + frags); + struct net *net = container_of(ipv4, struct net, ipv4); + struct ip4_create_arg *arg = a; qp->protocol = arg->iph->protocol; @@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 451f97c42eb4..0f3185a662c3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -113,19 +113,6 @@ int ip_local_out(struct sk_buff *skb) } EXPORT_SYMBOL_GPL(ip_local_out); -/* dev_loopback_xmit for use with netfilter. */ -static int ip_dev_loopback_xmit(struct sk_buff *newskb) -{ - skb_reset_mac_header(newskb); - __skb_pull(newskb, skb_network_offset(newskb)); - newskb->pkt_type = PACKET_LOOPBACK; - newskb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!skb_dst(newskb)); - skb_dst_force(newskb); - netif_rx_ni(newskb); - return 0; -} - static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) { int ttl = inet->uc_ttl; @@ -200,7 +187,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } if (skb->sk) skb_set_owner_w(skb2, skb->sk); - kfree_skb(skb); + consume_skb(skb); skb = skb2; } @@ -281,7 +268,7 @@ int ip_mc_output(struct sk_buff *skb) if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, NULL, newskb->dev, - ip_dev_loopback_xmit); + dev_loopback_xmit); } /* Multicasts with ttl 0 must not go beyond the host */ @@ -296,7 +283,7 @@ int ip_mc_output(struct sk_buff *skb) struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, - NULL, newskb->dev, ip_dev_loopback_xmit); + NULL, newskb->dev, dev_loopback_xmit); } return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, @@ -709,7 +696,7 @@ slow_path: IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); } - kfree_skb(skb); + consume_skb(skb); IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS); return err; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 91747d4ebc26..d79b961a8009 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -207,35 +207,30 @@ static int log_invalid_proto_max = 255; static ctl_table ip_ct_sysctl_table[] = { { .procname = "ip_conntrack_max", - .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_count", - .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_buckets", - .data = &init_net.ct.htable_size, .maxlen = sizeof(unsigned int), .mode = 0444, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_checksum", - .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_log_invalid", - .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -351,6 +346,25 @@ static struct nf_sockopt_ops so_getorigdst = { .owner = THIS_MODULE, }; +static int ipv4_init_net(struct net *net) +{ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + struct nf_ip_net *in = &net->ct.nf_ct_proto; + in->ctl_table = kmemdup(ip_ct_sysctl_table, + sizeof(ip_ct_sysctl_table), + GFP_KERNEL); + if (!in->ctl_table) + return -ENOMEM; + + in->ctl_table[0].data = &nf_conntrack_max; + in->ctl_table[1].data = &net->ct.count; + in->ctl_table[2].data = &net->ct.htable_size; + in->ctl_table[3].data = &net->ct.sysctl_checksum; + in->ctl_table[4].data = &net->ct.sysctl_log_invalid; +#endif + return 0; +} + struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .name = "ipv4", @@ -366,8 +380,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { #endif #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) .ctl_table_path = "net/ipv4/netfilter", - .ctl_table = ip_ct_sysctl_table, #endif + .init_net = ipv4_init_net, .me = THIS_MODULE, }; @@ -378,6 +392,65 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("ip_conntrack"); MODULE_LICENSE("GPL"); +static int ipv4_net_init(struct net *net) +{ + int ret = 0; + + ret = nf_conntrack_l4proto_register(net, + &nf_conntrack_l4proto_tcp4); + if (ret < 0) { + pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n"); + goto out_tcp; + } + ret = nf_conntrack_l4proto_register(net, + &nf_conntrack_l4proto_udp4); + if (ret < 0) { + pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n"); + goto out_udp; + } + ret = nf_conntrack_l4proto_register(net, + &nf_conntrack_l4proto_icmp); + if (ret < 0) { + pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n"); + goto out_icmp; + } + ret = nf_conntrack_l3proto_register(net, + &nf_conntrack_l3proto_ipv4); + if (ret < 0) { + pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n"); + goto out_ipv4; + } + return 0; +out_ipv4: + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_icmp); +out_icmp: + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_udp4); +out_udp: + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_tcp4); +out_tcp: + return ret; +} + +static void ipv4_net_exit(struct net *net) +{ + nf_conntrack_l3proto_unregister(net, + &nf_conntrack_l3proto_ipv4); + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_icmp); + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_udp4); + nf_conntrack_l4proto_unregister(net, + &nf_conntrack_l4proto_tcp4); +} + +static struct pernet_operations ipv4_net_ops = { + .init = ipv4_net_init, + .exit = ipv4_net_exit, +}; + static int __init nf_conntrack_l3proto_ipv4_init(void) { int ret = 0; @@ -391,35 +464,17 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) return ret; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); + ret = register_pernet_subsys(&ipv4_net_ops); if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register tcp.\n"); + pr_err("nf_conntrack_ipv4: can't register pernet ops\n"); goto cleanup_sockopt; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register udp.\n"); - goto cleanup_tcp; - } - - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register icmp.\n"); - goto cleanup_udp; - } - - ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register ipv4\n"); - goto cleanup_icmp; - } - ret = nf_register_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register hooks.\n"); - goto cleanup_ipv4; + goto cleanup_pernet; } #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) ret = nf_conntrack_ipv4_compat_init(); @@ -431,14 +486,8 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) cleanup_hooks: nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); #endif - cleanup_ipv4: - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - cleanup_icmp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); - cleanup_udp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); - cleanup_tcp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + cleanup_pernet: + unregister_pernet_subsys(&ipv4_net_ops); cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst); return ret; @@ -451,10 +500,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) nf_conntrack_ipv4_compat_fini(); #endif nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 0847e373d33c..041923cb67ad 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -23,6 +23,11 @@ static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; +static inline struct nf_icmp_net *icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} + static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { @@ -77,7 +82,7 @@ static int icmp_print_tuple(struct seq_file *s, static unsigned int *icmp_get_timeouts(struct net *net) { - return &nf_ct_icmp_timeout; + return &icmp_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -274,16 +279,18 @@ static int icmp_nlattr_tuple_size(void) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> -static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_icmp_net *in = icmp_pernet(net); if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; } else { /* Set default ICMP timeout. */ - *timeout = nf_ct_icmp_timeout; + *timeout = in->timeout; } return 0; } @@ -308,11 +315,9 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { { .procname = "nf_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -323,7 +328,6 @@ static struct ctl_table icmp_sysctl_table[] = { static struct ctl_table icmp_compat_sysctl_table[] = { { .procname = "ip_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -333,6 +337,34 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int icmp_init_net(struct net *net) +{ + struct nf_icmp_net *in = icmp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)in; + in->timeout = nf_ct_icmp_timeout; + +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmp_sysctl_table, + sizeof(icmp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &in->timeout; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, + sizeof(icmp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) { + kfree(pn->ctl_table); + pn->ctl_table = NULL; + return -ENOMEM; + } + pn->ctl_compat_table[0].data = &in->timeout; +#endif +#endif + return 0; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, @@ -362,11 +394,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .nla_policy = icmp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &icmp_sysctl_header, - .ctl_table = icmp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = icmp_compat_sysctl_table, -#endif -#endif + .init_net = icmp_init_net, }; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 9bb1b8a37a22..742815518b0f 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -94,14 +94,14 @@ static struct nf_hook_ops ipv4_defrag_ops[] = { { .hook = ipv4_conntrack_defrag, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv4_conntrack_defrag, .owner = THIS_MODULE, - .pf = PF_INET, + .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK_DEFRAG, }, diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 746edec8b86e..bac712293fd6 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -405,7 +405,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, ptr = *octets; while (ctx->pointer < eoc) { - if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) { + if (!asn1_octet_decode(ctx, ptr++)) { kfree(*octets); *octets = NULL; return 0; @@ -759,7 +759,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, } break; case SNMP_OBJECTID: - if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { + if (!asn1_oid_decode(ctx, end, &lp, &len)) { kfree(id); return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98b30d08efe9..655506af47ca 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; @@ -680,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - (rth->peer && rth->peer->pmtu_expires); + (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -938,7 +935,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(AF_INET); + inetpeer_invalidate_family(AF_INET); } /* @@ -1328,14 +1325,20 @@ static u32 rt_peer_genid(void) void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { + struct inet_peer_base *base; struct inet_peer *peer; - peer = inet_getpeer_v4(daddr, create); + base = inetpeer_base_ptr(rt->_peer); + if (!base) + return; - if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) - inet_putpeer(peer); - else - rt->rt_peer_genid = rt_peer_genid(); + peer = inet_getpeer_v4(base, daddr, create); + if (peer) { + if (!rt_set_peer(rt, peer)) + inet_putpeer(peer); + else + rt->rt_peer_genid = rt_peer_genid(); + } } /* @@ -1363,14 +1366,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) struct rtable *rt = (struct rtable *) dst; if (rt && !(rt->dst.flags & DST_NOPEER)) { - if (rt->peer == NULL) - rt_bind_peer(rt, rt->rt_dst, 1); + struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); /* If peer is attached to destination, it is never detached, so that we need not to grab a lock to dereference it. */ - if (rt->peer) { - iph->id = htons(inet_getid(rt->peer, more)); + if (peer) { + iph->id = htons(inet_getid(peer, more)); return; } } else if (!rt) @@ -1480,10 +1482,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway != old_gw) continue; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { if (peer->redirect_learned.a4 != new_gw) { peer->redirect_learned.a4 = new_gw; @@ -1539,8 +1538,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && peer_pmtu_expired(rt->peer)) { - dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); + } else if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_expired(peer)) + dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); } } return ret; @@ -1578,9 +1579,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); return; @@ -1645,9 +1644,7 @@ static int ip_error(struct sk_buff *skb) break; } - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); send = true; if (peer) { @@ -1668,67 +1665,6 @@ out: kfree_skb(skb); return 0; } -/* - * The last two values are not from the RFC but - * are needed for AMPRnet AX.25 paths. - */ - -static const unsigned short mtu_plateau[] = -{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; - -static inline unsigned short guess_mtu(unsigned short old_mtu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) - if (old_mtu > mtu_plateau[i]) - return mtu_plateau[i]; - return 68; -} - -unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, - unsigned short new_mtu, - struct net_device *dev) -{ - unsigned short old_mtu = ntohs(iph->tot_len); - unsigned short est_mtu = 0; - struct inet_peer *peer; - - peer = inet_getpeer_v4(iph->daddr, 1); - if (peer) { - unsigned short mtu = new_mtu; - - if (new_mtu < 68 || new_mtu >= old_mtu) { - /* BSD 4.2 derived systems incorrectly adjust - * tot_len by the IP header length, and report - * a zero MTU in the ICMP message. - */ - if (mtu == 0 && - old_mtu >= 68 + (iph->ihl << 2)) - old_mtu -= iph->ihl << 2; - mtu = guess_mtu(old_mtu); - } - - if (mtu < ip_rt_min_pmtu) - mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; - - pmtu_expires = jiffies + ip_rt_mtu_expires; - if (!pmtu_expires) - pmtu_expires = 1UL; - - est_mtu = mtu; - peer->pmtu_learned = mtu; - peer->pmtu_expires = pmtu_expires; - atomic_inc(&__rt_peer_genid); - } - - inet_putpeer(peer); - } - return est_mtu ? : new_mtu; -} - static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); @@ -1753,9 +1689,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_confirm(dst); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); @@ -1781,12 +1715,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static void ipv4_validate_peer(struct rtable *rt) { if (rt->rt_peer_genid != rt_peer_genid()) { - struct inet_peer *peer; + struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 0); - - peer = rt->peer; if (peer) { check_peer_pmtu(&rt->dst, peer); @@ -1812,14 +1742,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer = rt->peer; if (rt->fi) { fib_info_put(rt->fi); rt->fi = NULL; } - if (peer) { - rt->peer = NULL; + if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); inet_putpeer(peer); } } @@ -1832,8 +1761,11 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); + if (rt && rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_cleaned(peer)) + dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); + } } static int ip_rt_bug(struct sk_buff *skb) @@ -1935,6 +1867,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { + struct inet_peer_base *base; struct inet_peer *peer; int create = 0; @@ -1944,8 +1877,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); + base = inetpeer_base_ptr(rt->_peer); + BUG_ON(!base); + + peer = inet_getpeer_v4(base, rt->rt_dst, create); if (peer) { + __rt_set_peer(rt, peer); rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, @@ -2023,9 +1960,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, return -EINVAL; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || - ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP)) + skb->protocol != htons(ETH_P_IP)) goto e_inval; + if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) + if (ipv4_is_loopback(saddr)) + goto e_inval; + if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr)) goto e_inval; @@ -2061,7 +2002,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -2189,7 +2130,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; rth->dst.input = ip_forward; @@ -2266,8 +2207,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, by fib_lookup. */ - if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || - ipv4_is_loopback(saddr)) + if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) @@ -2279,9 +2219,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_zeronet(saddr)) goto martian_source; - if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr)) + if (ipv4_is_zeronet(daddr)) goto martian_destination; + if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) { + if (ipv4_is_loopback(daddr)) + goto martian_destination; + + if (ipv4_is_loopback(saddr)) + goto martian_source; + } + /* * Now we are ready to route packet. */ @@ -2372,7 +2320,7 @@ local_input: rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -2520,9 +2468,14 @@ static struct rtable *__mkroute_output(const struct fib_result *res, u16 type = res->type; struct rtable *rth; - if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + in_dev = __in_dev_get_rcu(dev_out); + if (!in_dev) return ERR_PTR(-EINVAL); + if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) + if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + return ERR_PTR(-EINVAL); + if (ipv4_is_lbcast(fl4->daddr)) type = RTN_BROADCAST; else if (ipv4_is_multicast(fl4->daddr)) @@ -2533,10 +2486,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - in_dev = __in_dev_get_rcu(dev_out); - if (!in_dev) - return ERR_PTR(-EINVAL); - if (type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; fi = NULL; @@ -2576,7 +2525,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, (res->table ? + &res->table->tb_peers : + dev_net(dev_out)->ipv4.peers)); rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2625,6 +2576,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) int orig_oif; res.fi = NULL; + res.table = NULL; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif @@ -2730,6 +2682,7 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) if (fib_lookup(net, fl4, &res)) { res.fi = NULL; + res.table = NULL; if (fl4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2913,9 +2866,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; - rt->peer = ort->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) atomic_inc(&rt->fi->fib_clntref); @@ -2953,7 +2904,6 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -3009,8 +2959,9 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; error = rt->dst.error; - if (peer) { - inet_peer_refcheck(rt->peer); + if (rt_has_peer(rt)) { + const struct inet_peer *peer = rt_peer_ptr(rt); + inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; if (peer->tcp_ts_stamp) { ts = peer->tcp_ts; @@ -3400,6 +3351,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, }; +static int __net_init ipv4_inetpeer_init(struct net *net) +{ + struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + + if (!bp) + return -ENOMEM; + inet_peer_base_init(bp); + net->ipv4.peers = bp; + return 0; +} + +static void __net_exit ipv4_inetpeer_exit(struct net *net) +{ + struct inet_peer_base *bp = net->ipv4.peers; + + net->ipv4.peers = NULL; + inetpeer_invalidate_tree(bp); + kfree(bp); +} + +static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { + .init = ipv4_inetpeer_init, + .exit = ipv4_inetpeer_exit, +}; #ifdef CONFIG_IP_ROUTE_CLASSID struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; @@ -3480,6 +3455,7 @@ int __init ip_rt_init(void) register_pernet_subsys(&sysctl_route_ops); #endif register_pernet_subsys(&rt_genid_ops); + register_pernet_subsys(&ipv4_inetpeer_ops); return rc; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c8d28c433b2b..fda2ca17135e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -848,7 +848,6 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, err = net_xmit_eval(err); } - dst_release(dst); return err; } @@ -1821,40 +1820,25 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) +struct inet_peer *tcp_v4_get_peer(struct sock *sk) { struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); - struct inet_peer *peer; - if (!rt || - inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { - peer = inet_getpeer_v4(inet->inet_daddr, 1); - *release_it = true; - } else { - if (!rt->peer) - rt_bind_peer(rt, inet->inet_daddr, 1); - peer = rt->peer; - *release_it = false; - } - - return peer; + /* If we don't have a valid cached route, or we're doing IP + * options which make the IPv4 header destination address + * different from our peer's, do not bother with this. + */ + if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) + return NULL; + return rt_get_peer_create(rt, inet->inet_daddr); } EXPORT_SYMBOL(tcp_v4_get_peer); -void *tcp_v4_tw_get_peer(struct sock *sk) -{ - const struct inet_timewait_sock *tw = inet_twsk(sk); - - return inet_getpeer_v4(tw->tw_daddr, 1); -} -EXPORT_SYMBOL(tcp_v4_tw_get_peer); - static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, - .twsk_getpeer = tcp_v4_tw_get_peer, }; const struct inet_connection_sock_af_ops ipv4_specific = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b85d9fe7d663..cb015317c9f7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,9 +60,8 @@ static bool tcp_remember_stamp(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct inet_peer *peer; - bool release_it; - peer = icsk->icsk_af_ops->get_peer(sk, &release_it); + peer = icsk->icsk_af_ops->get_peer(sk); if (peer) { if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && @@ -70,8 +69,6 @@ static bool tcp_remember_stamp(struct sock *sk) peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; peer->tcp_ts = tp->rx_opt.ts_recent; } - if (release_it) - inet_putpeer(peer); return true; } @@ -80,20 +77,19 @@ static bool tcp_remember_stamp(struct sock *sk) static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) { + const struct tcp_timewait_sock *tcptw; struct sock *sk = (struct sock *) tw; struct inet_peer *peer; - peer = twsk_getpeer(sk); + tcptw = tcp_twsk(sk); + peer = tcptw->tw_peer; if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; peer->tcp_ts = tcptw->tw_ts_recent; } - inet_putpeer(peer); return true; } return false; @@ -317,9 +313,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); bool recycle_ok = false; + bool recycle_on = false; - if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) { recycle_ok = tcp_remember_stamp(sk); + recycle_on = true; + } if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); @@ -327,8 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + struct inet_sock *inet = inet_sk(sk); + struct inet_peer *peer = NULL; - tw->tw_transparent = inet_sk(sk)->transparent; + tw->tw_transparent = inet->transparent; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; @@ -350,6 +351,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } #endif + if (recycle_on) + peer = icsk->icsk_af_ops->get_peer(sk); + tcptw->tw_peer = peer; + if (peer) + atomic_inc(&peer->refcnt); + #ifdef CONFIG_TCP_MD5SIG /* * The timewait bucket does not have the key DB from the @@ -401,8 +408,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) void tcp_twsk_destructor(struct sock *sk) { -#ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); + + if (twsk->tw_peer) + inet_putpeer(twsk->tw_peer); +#ifdef CONFIG_TCP_MD5SIG if (twsk->tw_md5_key) { tcp_free_md5sig_pool(); kfree_rcu(twsk->tw_md5_key, rcu); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 803cbfe82fbc..c465d3e51e28 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2442,7 +2442,16 @@ int tcp_send_synack(struct sock *sk) return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } -/* Prepare a SYN-ACK. */ +/** + * tcp_make_synack - Prepare a SYN-ACK. + * sk: listener socket + * dst: dst entry attached to the SYNACK + * req: request_sock pointer + * rvp: request_values pointer + * + * Allocate one skb and build a SYNACK packet. + * @dst is consumed : Caller should not use it again. + */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct request_values *rvp) @@ -2461,14 +2470,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) s_data_desired = cvp->s_data_desired; - skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC); - if (skb == NULL) + skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); + if (unlikely(!skb)) { + dst_release(dst); return NULL; - + } /* Reserve space for headers. */ skb_reserve(skb, MAX_TCP_HEADER); - skb_dst_set(skb, dst_clone(dst)); + skb_dst_set(skb, dst); mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0d3426cb5c4f..8855d8268552 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - xdst->u.rt.peer = rt->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(&xdst->u.rt, rt); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ @@ -212,8 +210,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) dst_destroy_metrics_generic(dst); - if (likely(xdst->u.rt.peer)) - inet_putpeer(xdst->u.rt.peer); + if (rt_has_peer(&xdst->u.rt)) { + struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); + inet_putpeer(peer); + } xfrm_dst_destroy(xdst); } |