summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c115
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/ah6.c8
-rw-r--r--net/ipv6/esp6.c32
-rw-r--r--net/ipv6/inet6_connection_sock.c56
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/mcast.c77
-rw-r--r--net/ipv6/ndisc.c29
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/Makefile4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c45
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c2
-rw-r--r--net/ipv6/reassembly.c36
-rw-r--r--net/ipv6/route.c156
-rw-r--r--net/ipv6/sit.c14
-rw-r--r--net/ipv6/tcp_ipv6.c151
-rw-r--r--net/ipv6/udp.c10
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c3
19 files changed, 458 insertions, 294 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 848b35591042..5b189c97c2fc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2672,7 +2672,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
/* Flush routes if device is being removed or it is not loopback */
if (how || !(dev->flags & IFF_LOOPBACK))
rt6_ifdown(net, dev);
- neigh_ifdown(&nd_tbl, dev);
idev = __in6_dev_get(dev);
if (idev == NULL)
@@ -3838,6 +3837,15 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
}
+static inline size_t inet6_ifla6_size(void)
+{
+ return nla_total_size(4) /* IFLA_INET6_FLAGS */
+ + nla_total_size(sizeof(struct ifla_cacheinfo))
+ + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+ + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+}
+
static inline size_t inet6_if_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -3845,13 +3853,7 @@ static inline size_t inet6_if_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(4) /* IFLA_MTU */
+ nla_total_size(4) /* IFLA_LINK */
- + nla_total_size( /* IFLA_PROTINFO */
- nla_total_size(4) /* IFLA_INET6_FLAGS */
- + nla_total_size(sizeof(struct ifla_cacheinfo))
- + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
- + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
- + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
- );
+ + nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
}
static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
@@ -3898,15 +3900,70 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+{
+ struct nlattr *nla;
+ struct ifla_cacheinfo ci;
+
+ NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
+
+ ci.max_reasm_len = IPV6_MAXPLEN;
+ ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
+ ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
+ NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+
+ nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+ if (nla == NULL)
+ goto nla_put_failure;
+ ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
+
+ /* XXX - MC not implemented */
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+ if (nla == NULL)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static size_t inet6_get_link_af_size(const struct net_device *dev)
+{
+ if (!__in6_dev_get(dev))
+ return 0;
+
+ return inet6_ifla6_size();
+}
+
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ if (!idev)
+ return -ENODATA;
+
+ if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
u32 pid, u32 seq, int event, unsigned int flags)
{
struct net_device *dev = idev->dev;
- struct nlattr *nla;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
void *protoinfo;
- struct ifla_cacheinfo ci;
nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
if (nlh == NULL)
@@ -3933,30 +3990,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
if (protoinfo == NULL)
goto nla_put_failure;
- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
-
- ci.max_reasm_len = IPV6_MAXPLEN;
- ci.tstamp = cstamp_delta(idev->tstamp);
- ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
- ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time);
- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
-
- nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
- if (nla == NULL)
- goto nla_put_failure;
- ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
-
- /* XXX - MC not implemented */
-
- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
- if (nla == NULL)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
-
- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
- if (nla == NULL)
+ if (inet6_fill_ifla6_attrs(skb, idev) < 0)
goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
nla_nest_end(skb, protoinfo);
return nlmsg_end(skb, nlh);
@@ -4627,6 +4662,12 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_inet6addr_notifier);
+static struct rtnl_af_ops inet6_ops = {
+ .family = AF_INET6,
+ .fill_link_af = inet6_fill_link_af,
+ .get_link_af_size = inet6_get_link_af_size,
+};
+
/*
* Init / cleanup code
*/
@@ -4678,6 +4719,10 @@ int __init addrconf_init(void)
addrconf_verify(0);
+ err = rtnl_af_register(&inet6_ops);
+ if (err < 0)
+ goto errout_af;
+
err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
if (err < 0)
goto errout;
@@ -4693,6 +4738,8 @@ int __init addrconf_init(void)
return 0;
errout:
+ rtnl_af_unregister(&inet6_ops);
+errout_af:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
unregister_pernet_subsys(&addrconf_ops);
@@ -4713,6 +4760,8 @@ void addrconf_cleanup(void)
rtnl_lock();
+ __rtnl_af_unregister(&inet6_ops);
+
/* clean dev list */
for_each_netdev(&init_net, dev) {
if (__in6_dev_get(dev) == NULL)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 54e8e42f7a88..059a3de647db 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -810,7 +810,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
}
rcu_read_unlock();
- if (unlikely(IS_ERR(segs)))
+ if (IS_ERR(segs))
goto out;
for (skb = segs; skb; skb = skb->next) {
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index ee82d4ef26ce..1aba54ae53c4 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -538,14 +538,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
if (!pskb_may_pull(skb, ah_hlen))
goto out;
- ip6h = ipv6_hdr(skb);
-
- skb_push(skb, hdr_len);
if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
goto out;
nfrags = err;
+ ah = (struct ip_auth_hdr *)skb->data;
+ ip6h = ipv6_hdr(skb);
+
+ skb_push(skb, hdr_len);
+
work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
if (!work_iph)
goto out;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ee9b93bdd6a2..1b5c9825743b 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -49,6 +49,8 @@ struct esp_skb_cb {
#define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu);
+
/*
* Allocate an AEAD request structure with extra space for SG and IV.
*
@@ -140,6 +142,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
int blksize;
int clen;
int alen;
+ int plen;
+ int tfclen;
int nfrags;
u8 *iv;
u8 *tail;
@@ -148,18 +152,26 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
/* skb is pure payload to encrypt */
err = -ENOMEM;
- /* Round to block size */
- clen = skb->len;
-
aead = esp->aead;
alen = crypto_aead_authsize(aead);
+ tfclen = 0;
+ if (x->tfcpad) {
+ struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
+ u32 padto;
+
+ padto = min(x->tfcpad, esp6_get_mtu(x, dst->child_mtu_cached));
+ if (skb->len < padto)
+ tfclen = padto - skb->len;
+ }
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
- clen = ALIGN(clen + 2, blksize);
+ clen = ALIGN(skb->len + 2 + tfclen, blksize);
if (esp->padlen)
clen = ALIGN(clen, esp->padlen);
+ plen = clen - skb->len - tfclen;
- if ((err = skb_cow_data(skb, clen - skb->len + alen, &trailer)) < 0)
+ err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
+ if (err < 0)
goto error;
nfrags = err;
@@ -174,13 +186,17 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
/* Fill padding... */
tail = skb_tail_pointer(trailer);
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
do {
int i;
- for (i=0; i<clen-skb->len - 2; i++)
+ for (i = 0; i < plen - 2; i++)
tail[i] = i + 1;
} while (0);
- tail[clen-skb->len - 2] = (clen - skb->len) - 2;
- tail[clen - skb->len - 1] = *skb_mac_header(skb);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = *skb_mac_header(skb);
pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 8a1628023bd1..d144e629d2b4 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -44,7 +44,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
!sk2->sk_bound_dev_if ||
sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
(!sk->sk_reuse || !sk2->sk_reuse ||
- sk2->sk_state == TCP_LISTEN) &&
+ ((1 << sk2->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) &&
ipv6_rcv_saddr_equal(sk, sk2))
break;
}
@@ -54,24 +54,54 @@ int inet6_csk_bind_conflict(const struct sock *sk,
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
+struct dst_entry *inet6_csk_route_req(struct sock *sk,
+ const struct request_sock *req)
+{
+ struct inet6_request_sock *treq = inet6_rsk(req);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *final_p, final;
+ struct dst_entry *dst;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.proto = IPPROTO_TCP;
+ ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
+ final_p = fl6_update_dst(&fl, np->opt, &final);
+ ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
+ fl.oif = sk->sk_bound_dev_if;
+ fl.mark = sk->sk_mark;
+ fl.fl_ip_dport = inet_rsk(req)->rmt_port;
+ fl.fl_ip_sport = inet_rsk(req)->loc_port;
+ security_req_classify_flow(req, &fl);
+
+ if (ip6_dst_lookup(sk, &dst, &fl))
+ return NULL;
+
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+ if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+ return NULL;
+
+ return dst;
+}
+
/*
* request_sock (formerly open request) hash tables.
*/
static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
const u32 rnd, const u16 synq_hsize)
{
- u32 a = (__force u32)raddr->s6_addr32[0];
- u32 b = (__force u32)raddr->s6_addr32[1];
- u32 c = (__force u32)raddr->s6_addr32[2];
-
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
- c += rnd;
- __jhash_mix(a, b, c);
-
- a += (__force u32)raddr->s6_addr32[3];
- b += (__force u32)rport;
- __jhash_mix(a, b, c);
+ u32 c;
+
+ c = jhash_3words((__force u32)raddr->s6_addr32[0],
+ (__force u32)raddr->s6_addr32[1],
+ (__force u32)raddr->s6_addr32[2],
+ rnd);
+
+ c = jhash_2words((__force u32)raddr->s6_addr32[3],
+ (__force u32)rport,
+ c);
return c & (synq_hsize - 1);
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 70e891a20fb9..4f4483e697bd 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -58,8 +58,6 @@ MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
-#define IPV6_TLV_TEL_DST_SIZE 8
-
#ifdef IP6_TNL_DEBUG
#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
#else
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6f32ffce7022..9fab274019c0 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
fl = (struct flowi) {
.oif = vif->link,
- .nl_u = { .ip6_u =
- { .daddr = ipv6h->daddr, }
- }
+ .fl6_dst = ipv6h->daddr,
};
dst = ip6_route_output(net, NULL, &fl);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d1444b95ad7e..49f986d626a0 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
/* Big mc list lock for all the sockets */
-static DEFINE_RWLOCK(ipv6_sk_mc_lock);
+static DEFINE_SPINLOCK(ipv6_sk_mc_lock);
static void igmp6_join_group(struct ifmcaddr6 *ma);
static void igmp6_leave_group(struct ifmcaddr6 *ma);
@@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
* socket join on multicast group
*/
+#define for_each_pmc_rcu(np, pmc) \
+ for (pmc = rcu_dereference(np->ipv6_mc_list); \
+ pmc != NULL; \
+ pmc = rcu_dereference(pmc->next))
+
int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct net_device *dev = NULL;
@@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
- read_lock_bh(&ipv6_sk_mc_lock);
- for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(np, mc_lst) {
if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
ipv6_addr_equal(&mc_lst->addr, addr)) {
- read_unlock_bh(&ipv6_sk_mc_lock);
+ rcu_read_unlock();
return -EADDRINUSE;
}
}
- read_unlock_bh(&ipv6_sk_mc_lock);
+ rcu_read_unlock();
mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
@@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return err;
}
- write_lock_bh(&ipv6_sk_mc_lock);
+ spin_lock(&ipv6_sk_mc_lock);
mc_lst->next = np->ipv6_mc_list;
- np->ipv6_mc_list = mc_lst;
- write_unlock_bh(&ipv6_sk_mc_lock);
+ rcu_assign_pointer(np->ipv6_mc_list, mc_lst);
+ spin_unlock(&ipv6_sk_mc_lock);
rcu_read_unlock();
return 0;
}
+static void ipv6_mc_socklist_reclaim(struct rcu_head *head)
+{
+ kfree(container_of(head, struct ipv6_mc_socklist, rcu));
+}
/*
* socket leave on multicast group
*/
int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct ipv6_mc_socklist *mc_lst, **lnk;
+ struct ipv6_mc_socklist *mc_lst;
+ struct ipv6_mc_socklist __rcu **lnk;
struct net *net = sock_net(sk);
- write_lock_bh(&ipv6_sk_mc_lock);
- for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
+ spin_lock(&ipv6_sk_mc_lock);
+ for (lnk = &np->ipv6_mc_list;
+ (mc_lst = rcu_dereference_protected(*lnk,
+ lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ;
+ lnk = &mc_lst->next) {
if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
ipv6_addr_equal(&mc_lst->addr, addr)) {
struct net_device *dev;
*lnk = mc_lst->next;
- write_unlock_bh(&ipv6_sk_mc_lock);
+ spin_unlock(&ipv6_sk_mc_lock);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
rcu_read_unlock();
- sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+ atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+ call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
return 0;
}
}
- write_unlock_bh(&ipv6_sk_mc_lock);
+ spin_unlock(&ipv6_sk_mc_lock);
return -EADDRNOTAVAIL;
}
@@ -257,7 +271,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
return NULL;
idev = __in6_dev_get(dev);
if (!idev)
- return NULL;;
+ return NULL;
read_lock_bh(&idev->lock);
if (idev->dead) {
read_unlock_bh(&idev->lock);
@@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk)
struct ipv6_mc_socklist *mc_lst;
struct net *net = sock_net(sk);
- write_lock_bh(&ipv6_sk_mc_lock);
- while ((mc_lst = np->ipv6_mc_list) != NULL) {
+ spin_lock(&ipv6_sk_mc_lock);
+ while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list,
+ lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) {
struct net_device *dev;
np->ipv6_mc_list = mc_lst->next;
- write_unlock_bh(&ipv6_sk_mc_lock);
+ spin_unlock(&ipv6_sk_mc_lock);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
@@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk)
} else
(void) ip6_mc_leave_src(sk, mc_lst, NULL);
rcu_read_unlock();
- sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
- write_lock_bh(&ipv6_sk_mc_lock);
+ atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+ call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim);
+
+ spin_lock(&ipv6_sk_mc_lock);
}
- write_unlock_bh(&ipv6_sk_mc_lock);
+ spin_unlock(&ipv6_sk_mc_lock);
}
int ip6_mc_source(int add, int omode, struct sock *sk,
@@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
err = -EADDRNOTAVAIL;
- read_lock(&ipv6_sk_mc_lock);
- for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rcu(inet6, pmc) {
if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
continue;
if (ipv6_addr_equal(&pmc->addr, group))
@@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
done:
if (pmclocked)
write_unlock(&pmc->sflock);
- read_unlock(&ipv6_sk_mc_lock);
read_unlock_bh(&idev->lock);
rcu_read_unlock();
if (leavegroup)
@@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
dev = idev->dev;
err = 0;
- read_lock(&ipv6_sk_mc_lock);
if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
leavegroup = 1;
goto done;
}
- for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rcu(inet6, pmc) {
if (pmc->ifindex != gsf->gf_interface)
continue;
if (ipv6_addr_equal(&pmc->addr, group))
@@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
write_unlock(&pmc->sflock);
err = 0;
done:
- read_unlock(&ipv6_sk_mc_lock);
read_unlock_bh(&idev->lock);
rcu_read_unlock();
if (leavegroup)
@@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
* so reading the list is safe.
*/
- for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+ for_each_pmc_rcu(inet6, pmc) {
if (pmc->ifindex != gsf->gf_interface)
continue;
if (ipv6_addr_equal(group, &pmc->addr))
@@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
struct ip6_sf_socklist *psl;
int rv = 1;
- read_lock(&ipv6_sk_mc_lock);
- for (mc = np->ipv6_mc_list; mc; mc = mc->next) {
+ rcu_read_lock();
+ for_each_pmc_rcu(np, mc) {
if (ipv6_addr_equal(&mc->addr, mc_addr))
break;
}
if (!mc) {
- read_unlock(&ipv6_sk_mc_lock);
+ rcu_read_unlock();
return 1;
}
read_lock(&mc->sflock);
@@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
rv = 0;
}
read_unlock(&mc->sflock);
- read_unlock(&ipv6_sk_mc_lock);
+ rcu_read_unlock();
return rv;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 998d6d27e7cf..2342545a5ee9 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -141,18 +141,18 @@ struct neigh_table nd_tbl = {
.proxy_redo = pndisc_redo,
.id = "ndisc_cache",
.parms = {
- .tbl = &nd_tbl,
- .base_reachable_time = 30 * HZ,
- .retrans_time = 1 * HZ,
- .gc_staletime = 60 * HZ,
- .reachable_time = 30 * HZ,
- .delay_probe_time = 5 * HZ,
- .queue_len = 3,
- .ucast_probes = 3,
- .mcast_probes = 3,
- .anycast_delay = 1 * HZ,
- .proxy_delay = (8 * HZ) / 10,
- .proxy_qlen = 64,
+ .tbl = &nd_tbl,
+ .base_reachable_time = ND_REACHABLE_TIME,
+ .retrans_time = ND_RETRANS_TIMER,
+ .gc_staletime = 60 * HZ,
+ .reachable_time = ND_REACHABLE_TIME,
+ .delay_probe_time = 5 * HZ,
+ .queue_len = 3,
+ .ucast_probes = 3,
+ .mcast_probes = 3,
+ .anycast_delay = 1 * HZ,
+ .proxy_delay = (8 * HZ) / 10,
+ .proxy_qlen = 64,
},
.gc_interval = 30 * HZ,
.gc_thresh1 = 128,
@@ -1259,7 +1259,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
if (rt)
- rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
}
skip_defrtr:
@@ -1377,7 +1378,7 @@ skip_linkparms:
in6_dev->cnf.mtu6 = mtu;
if (rt)
- rt->dst.metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(&rt->dst, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 7155b2451d7c..35915e8617f0 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb)
struct flowi fl = {
.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
.mark = skb->mark,
- .nl_u =
- { .ip6_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr, } },
+ .fl6_dst = iph->daddr,
+ .fl6_src = iph->saddr,
};
dst = ip6_route_output(net, skb->sk, &fl);
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 0a432c9b0795..abfee91ce816 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,13 +11,13 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
# objects for l3 independent conntrack
-nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
+nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
# defrag
-nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
+nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
# matches
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 455582384ece..7d227c644f72 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -897,42 +897,25 @@ get_counters(const struct xt_table_info *t,
struct ip6t_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu = get_cpu();
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- *
- * Bottom half has to be disabled to prevent deadlock
- * if new softirq were to run and call ipt_do_table
- */
- local_bh_disable();
- i = 0;
- xt_entry_foreach(iter, t->entries[curcpu], t->size) {
- SET_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
- ++i;
- }
- local_bh_enable();
- /* Processing counters from other cpus, we can let bottom half enabled,
- * (preemption is disabled)
- */
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqlock_t *lock = &per_cpu(xt_info_locks, cpu).lock;
+
i = 0;
- local_bh_disable();
- xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
- ADD_COUNTER(counters[i], iter->counters.bcnt,
- iter->counters.pcnt);
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqbegin(lock);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqretry(lock, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
++i;
}
- xt_info_wrunlock(cpu);
- local_bh_enable();
}
- put_cpu();
}
static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -945,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
(other than comefrom, which userspace doesn't care
about). */
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc(countersize);
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
@@ -1216,7 +1199,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter;
ret = 0;
- counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 2933396e0281..bf998feac14e 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -124,7 +124,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
skb_reset_network_header(nskb);
ip6h = ipv6_hdr(nskb);
ip6h->version = 6;
- ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
+ ip6h->hop_limit = ip6_dst_hoplimit(dst);
ip6h->nexthdr = IPPROTO_TCP;
ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0f2766453759..07beeb06f752 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -104,26 +104,22 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
const struct in6_addr *daddr, u32 rnd)
{
- u32 a, b, c;
-
- a = (__force u32)saddr->s6_addr32[0];
- b = (__force u32)saddr->s6_addr32[1];
- c = (__force u32)saddr->s6_addr32[2];
-
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
- c += rnd;
- __jhash_mix(a, b, c);
-
- a += (__force u32)saddr->s6_addr32[3];
- b += (__force u32)daddr->s6_addr32[0];
- c += (__force u32)daddr->s6_addr32[1];
- __jhash_mix(a, b, c);
-
- a += (__force u32)daddr->s6_addr32[2];
- b += (__force u32)daddr->s6_addr32[3];
- c += (__force u32)id;
- __jhash_mix(a, b, c);
+ u32 c;
+
+ c = jhash_3words((__force u32)saddr->s6_addr32[0],
+ (__force u32)saddr->s6_addr32[1],
+ (__force u32)saddr->s6_addr32[2],
+ rnd);
+
+ c = jhash_3words((__force u32)saddr->s6_addr32[3],
+ (__force u32)daddr->s6_addr32[0],
+ (__force u32)daddr->s6_addr32[1],
+ c);
+
+ c = jhash_3words((__force u32)daddr->s6_addr32[2],
+ (__force u32)daddr->s6_addr32[3],
+ (__force u32)id,
+ c);
return c & (INETFRAGS_HASHSZ - 1);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7659d6f16e6b..373bd0416f69 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -76,6 +76,8 @@
static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+static unsigned int ip6_default_advmss(const struct dst_entry *dst);
+static unsigned int ip6_default_mtu(const struct dst_entry *dst);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
@@ -103,6 +105,8 @@ static struct dst_ops ip6_dst_ops_template = {
.gc = ip6_dst_gc,
.gc_thresh = 1024,
.check = ip6_dst_check,
+ .default_advmss = ip6_default_advmss,
+ .default_mtu = ip6_default_mtu,
.destroy = ip6_dst_destroy,
.ifdown = ip6_dst_ifdown,
.negative_advice = ip6_negative_advice,
@@ -129,7 +133,6 @@ static struct rt6_info ip6_null_entry_template = {
.__use = 1,
.obsolete = -1,
.error = -ENETUNREACH,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_discard,
.output = ip6_pkt_discard_out,
},
@@ -150,7 +153,6 @@ static struct rt6_info ip6_prohibit_entry_template = {
.__use = 1,
.obsolete = -1,
.error = -EACCES,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_prohibit,
.output = ip6_pkt_prohibit_out,
},
@@ -166,7 +168,6 @@ static struct rt6_info ip6_blk_hole_entry_template = {
.__use = 1,
.obsolete = -1,
.error = -EINVAL,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = dst_discard,
.output = dst_discard,
},
@@ -188,11 +189,29 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
+ struct inet_peer *peer = rt->rt6i_peer;
if (idev != NULL) {
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
+ if (peer) {
+ BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
+ rt->rt6i_peer = NULL;
+ inet_putpeer(peer);
+ }
+}
+
+void rt6_bind_peer(struct rt6_info *rt, int create)
+{
+ struct inet_peer *peer;
+
+ if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
+ return;
+
+ peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
+ if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
+ inet_putpeer(peer);
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -558,11 +577,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
{
struct flowi fl = {
.oif = oif,
- .nl_u = {
- .ip6_u = {
- .daddr = *daddr,
- },
- },
+ .fl6_dst = *daddr,
};
struct dst_entry *dst;
int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
@@ -778,13 +793,9 @@ void ip6_route_input(struct sk_buff *skb)
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi fl = {
.iif = skb->dev->ifindex,
- .nl_u = {
- .ip6_u = {
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
- },
- },
+ .fl6_dst = iph->daddr,
+ .fl6_src = iph->saddr,
+ .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
.mark = skb->mark,
.proto = iph->nexthdr,
};
@@ -834,7 +845,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
new->input = dst_discard;
new->output = dst_discard;
- memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+ dst_copy_metrics(new, &ort->dst);
new->dev = ort->dst.dev;
if (new->dev)
dev_hold(new->dev);
@@ -918,18 +929,22 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) {
+ u32 features = dst_metric(dst, RTAX_FEATURES);
mtu = IPV6_MIN_MTU;
- dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(dst, RTAX_FEATURES, features);
}
- dst->metrics[RTAX_MTU-1] = mtu;
+ dst_metric_set(dst, RTAX_MTU, mtu);
call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
}
-static int ipv6_get_mtu(struct net_device *dev);
-
-static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
+static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
+ struct net_device *dev = dst->dev;
+ unsigned int mtu = dst_mtu(dst);
+ struct net *net = dev_net(dev);
+
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
@@ -946,6 +961,20 @@ static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
return mtu;
}
+static unsigned int ip6_default_mtu(const struct dst_entry *dst)
+{
+ unsigned int mtu = IPV6_MIN_MTU;
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dst->dev);
+ if (idev)
+ mtu = idev->cnf.mtu6;
+ rcu_read_unlock();
+
+ return mtu;
+}
+
static struct dst_entry *icmp6_dst_gc_list;
static DEFINE_SPINLOCK(icmp6_dst_lock);
@@ -979,9 +1008,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->rt6i_idev = idev;
rt->rt6i_nexthop = neigh;
atomic_set(&rt->dst.__refcnt, 1);
- rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
rt->dst.output = ip6_output;
#if 0 /* there's no chance to use these for ndisc */
@@ -1080,23 +1107,10 @@ out:
Remove it only when all the things will work!
*/
-static int ipv6_get_mtu(struct net_device *dev)
-{
- int mtu = IPV6_MIN_MTU;
- struct inet6_dev *idev;
-
- rcu_read_lock();
- idev = __in6_dev_get(dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
- return mtu;
-}
-
int ip6_dst_hoplimit(struct dst_entry *dst)
{
- int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
- if (hoplimit < 0) {
+ int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+ if (hoplimit == 0) {
struct net_device *dev = dst->dev;
struct inet6_dev *idev;
@@ -1110,6 +1124,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
}
return hoplimit;
}
+EXPORT_SYMBOL(ip6_dst_hoplimit);
/*
*
@@ -1295,17 +1310,11 @@ install_route:
goto out;
}
- rt->dst.metrics[type - 1] = nla_get_u32(nla);
+ dst_metric_set(&rt->dst, type, nla_get_u32(nla));
}
}
}
- if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
- rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
- if (!dst_mtu(&rt->dst))
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
- if (!dst_metric(&rt->dst, RTAX_ADVMSS))
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
@@ -1463,12 +1472,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
struct ip6rd_flowi rdfl = {
.fl = {
.oif = dev->ifindex,
- .nl_u = {
- .ip6_u = {
- .daddr = *dest,
- .saddr = *src,
- },
- },
+ .fl6_dst = *dest,
+ .fl6_src = *src,
},
};
@@ -1534,10 +1539,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
nrt->rt6i_nexthop = neigh_clone(neigh);
- /* Reset pmtu, it may be better */
- nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
- nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
- dst_mtu(&nrt->dst));
if (ip6_ins_rt(nrt))
goto out;
@@ -1601,9 +1602,12 @@ again:
would return automatically.
*/
if (rt->rt6i_flags & RTF_CACHE) {
- rt->dst.metrics[RTAX_MTU-1] = pmtu;
- if (allfrag)
- rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
+ if (allfrag) {
+ u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&rt->dst, RTAX_FEATURES, features);
+ }
dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
goto out;
@@ -1620,9 +1624,12 @@ again:
nrt = rt6_alloc_clone(rt, daddr);
if (nrt) {
- nrt->dst.metrics[RTAX_MTU-1] = pmtu;
- if (allfrag)
- nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
+ if (allfrag) {
+ u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
+ features |= RTAX_FEATURE_ALLFRAG;
+ dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
+ }
/* According to RFC 1981, detecting PMTU increase shouldn't be
* happened within 5 mins, the recommended timer is 10 mins.
@@ -1673,7 +1680,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
rt->dst.input = ort->dst.input;
rt->dst.output = ort->dst.output;
- memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+ dst_copy_metrics(&rt->dst, &ort->dst);
rt->dst.error = ort->dst.error;
rt->dst.dev = ort->dst.dev;
if (rt->dst.dev)
@@ -1965,9 +1972,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->dst.output = ip6_output;
rt->rt6i_dev = net->loopback_dev;
rt->rt6i_idev = idev;
- rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
- rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
rt->dst.obsolete = -1;
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
@@ -2004,11 +2009,11 @@ struct arg_dev_net {
static int fib6_ifdown(struct rt6_info *rt, void *arg)
{
- struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
- struct net *net = ((struct arg_dev_net *)arg)->net;
+ const struct arg_dev_net *adn = arg;
+ const struct net_device *dev = adn->dev;
- if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
- rt != net->ipv6.ip6_null_entry) {
+ if ((rt->rt6i_dev == dev || dev == NULL) &&
+ rt != adn->net->ipv6.ip6_null_entry) {
RT6_TRACE("deleted by ifdown %p\n", rt);
return -1;
}
@@ -2036,7 +2041,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
- struct net *net = dev_net(arg->dev);
/* In IPv6 pmtu discovery is not optional,
so that RTAX_MTU lock cannot disable it.
@@ -2067,8 +2071,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
(dst_mtu(&rt->dst) >= arg->mtu ||
(dst_mtu(&rt->dst) < arg->mtu &&
dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
- rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
- rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+ dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
return 0;
}
@@ -2294,7 +2297,7 @@ static int rt6_fill_node(struct net *net,
NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
}
- if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+ if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
if (rt->dst.neighbour)
@@ -2470,8 +2473,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
#ifdef CONFIG_PROC_FS
-#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
-
struct rt6_proc_arg
{
char *buffer;
@@ -2687,6 +2688,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_null_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2697,6 +2699,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_prohibit_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2706,6 +2709,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_blk_hole_entry->dst.path =
(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+ dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
#endif
net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8c4d00c7cd2b..8ce38f10a547 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -731,10 +731,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
{
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = dst,
- .saddr = tiph->saddr,
- .tos = RT_TOS(tos) } },
+ struct flowi fl = { .fl4_dst = dst,
+ .fl4_src = tiph->saddr,
+ .fl4_tos = RT_TOS(tos),
.oif = tunnel->parms.link,
.proto = IPPROTO_IPV6 };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
@@ -856,10 +855,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
iph = &tunnel->parms.iph;
if (iph->daddr) {
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr,
- .tos = RT_TOS(iph->tos) } },
+ struct flowi fl = { .fl4_dst = iph->daddr,
+ .fl4_src = iph->saddr,
+ .fl4_tos = RT_TOS(iph->tos),
.oif = tunnel->parms.link,
.proto = IPPROTO_IPV6 };
struct rtable *rt;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7e41e2cbb85e..20aa95e37359 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -130,6 +130,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct rt6_info *rt;
struct flowi fl;
struct dst_entry *dst;
int addr_type;
@@ -280,6 +281,26 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(sk, dst, NULL, NULL);
+ rt = (struct rt6_info *) dst;
+ if (tcp_death_row.sysctl_tw_recycle &&
+ !tp->rx_opt.ts_recent_stamp &&
+ ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
+ struct inet_peer *peer = rt6_get_peer(rt);
+ /*
+ * VJ's idea. We save last timestamp seen from
+ * the destination in peer table, when entering state
+ * TIME-WAIT * and initialize rx_opt.ts_recent from it,
+ * when trying new connection.
+ */
+ if (peer) {
+ inet_peer_refcheck(peer);
+ if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+ tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+ tp->rx_opt.ts_recent = peer->tcp_ts;
+ }
+ }
+ }
+
icsk->icsk_ext_hdr_len = 0;
if (np->opt)
icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
@@ -906,12 +927,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
};
#endif
-static struct timewait_sock_ops tcp6_timewait_sock_ops = {
- .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
- .twsk_destructor= tcp_twsk_destructor,
-};
-
static void __tcp_v6_send_check(struct sk_buff *skb,
struct in6_addr *saddr, struct in6_addr *daddr)
{
@@ -1176,6 +1191,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u32 isn = TCP_SKB_CB(skb)->when;
+ struct dst_entry *dst = NULL;
#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
#else
@@ -1273,6 +1289,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
TCP_ECN_create_request(req, tcp_hdr(skb));
if (!isn) {
+ struct inet_peer *peer = NULL;
+
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1285,13 +1303,57 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
treq->iif = inet6_iif(skb);
- if (!want_cookie) {
- isn = tcp_v6_init_sequence(skb);
- } else {
+
+ if (want_cookie) {
isn = cookie_v6_init_sequence(sk, skb, &req->mss);
req->cookie_ts = tmp_opt.tstamp_ok;
+ goto have_isn;
}
+
+ /* VJ's idea. We save last timestamp seen
+ * from the destination in peer table, when entering
+ * state TIME-WAIT, and check against it before
+ * accepting new connection request.
+ *
+ * If "isn" is not zero, this request hit alive
+ * timewait bucket, so that all the necessary checks
+ * are made in the function processing timewait state.
+ */
+ if (tmp_opt.saw_tstamp &&
+ tcp_death_row.sysctl_tw_recycle &&
+ (dst = inet6_csk_route_req(sk, req)) != NULL &&
+ (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
+ ipv6_addr_equal((struct in6_addr *)peer->daddr.a6,
+ &treq->rmt_addr)) {
+ inet_peer_refcheck(peer);
+ if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
+ (s32)(peer->tcp_ts - req->ts_recent) >
+ TCP_PAWS_WINDOW) {
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+ goto drop_and_release;
+ }
+ }
+ /* Kill the following clause, if you dislike this way. */
+ else if (!sysctl_tcp_syncookies &&
+ (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (sysctl_max_syn_backlog >> 2)) &&
+ (!peer || !peer->tcp_ts_stamp) &&
+ (!dst || !dst_metric(dst, RTAX_RTT))) {
+ /* Without syncookies last quarter of
+ * backlog is filled with destinations,
+ * proven to be alive.
+ * It means that we continue to communicate
+ * to destinations, already remembered
+ * to the moment of synflood.
+ */
+ LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
+ &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
+ goto drop_and_release;
+ }
+
+ isn = tcp_v6_init_sequence(skb);
}
+have_isn:
tcp_rsk(req)->snt_isn = isn;
security_inet_conn_request(sk, skb, req);
@@ -1304,6 +1366,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;
+drop_and_release:
+ dst_release(dst);
drop_and_free:
reqsk_free(req);
drop:
@@ -1382,28 +1446,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (sk_acceptq_is_full(sk))
goto out_overflow;
- if (dst == NULL) {
- struct in6_addr *final_p, final;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
- final_p = fl6_update_dst(&fl, opt, &final);
- ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
- fl.oif = sk->sk_bound_dev_if;
- fl.mark = sk->sk_mark;
- fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, &fl);
-
- if (ip6_dst_lookup(sk, &dst, &fl))
- goto out;
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+ if (!dst) {
+ dst = inet6_csk_route_req(sk, req);
+ if (!dst)
goto out;
}
@@ -1476,7 +1521,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
- newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+ newtp->advmss = dst_metric_advmss(dst);
tcp_initialize_rcv_mss(newsk);
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
@@ -1818,19 +1863,51 @@ do_time_wait:
goto discard_it;
}
-static int tcp_v6_remember_stamp(struct sock *sk)
+static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
{
- /* Alas, not yet... */
- return 0;
+ struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet_peer *peer;
+
+ if (!rt ||
+ !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
+ peer = inet_getpeer_v6(&np->daddr, 1);
+ *release_it = true;
+ } else {
+ if (!rt->rt6i_peer)
+ rt6_bind_peer(rt, 1);
+ peer = rt->rt6i_peer;
+ *release_it = false;
+ }
+
+ return peer;
}
+static void *tcp_v6_tw_get_peer(struct sock *sk)
+{
+ struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
+ struct inet_timewait_sock *tw = inet_twsk(sk);
+
+ if (tw->tw_family == AF_INET)
+ return tcp_v4_tw_get_peer(sk);
+
+ return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
+}
+
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+ .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
+ .twsk_unique = tcp_twsk_unique,
+ .twsk_destructor= tcp_twsk_destructor,
+ .twsk_getpeer = tcp_v6_tw_get_peer,
+};
+
static const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .remember_stamp = tcp_v6_remember_stamp,
+ .get_peer = tcp_v6_get_peer,
.net_header_len = sizeof(struct ipv6hdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
@@ -1862,7 +1939,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.rebuild_header = inet_sk_rebuild_header,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
- .remember_stamp = tcp_v4_remember_stamp,
+ .get_peer = tcp_v4_get_peer,
.net_header_len = sizeof(struct iphdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index cd6cb7c3e563..9a009c66c8a3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
+ __be32 sk1_rcv_saddr = sk_rcv_saddr(sk);
+ __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(sk_rcv_saddr6);
@@ -227,7 +227,7 @@ begin:
if (result) {
exact_match:
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
daddr, hnum, dif) < badness)) {
@@ -294,7 +294,7 @@ begin:
goto begin;
if (result) {
- if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, saddr, sport,
daddr, dport, dif) < badness)) {
@@ -602,7 +602,7 @@ static void flush_stack(struct sock **stack, unsigned int count,
sk = stack[i];
if (skb1) {
- if (sk_rcvqueues_full(sk, skb)) {
+ if (sk_rcvqueues_full(sk, skb1)) {
kfree_skb(skb1);
goto drop;
}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index b809812c8d30..645cb968d450 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -14,6 +14,7 @@
#include <net/dsfield.h>
#include <net/dst.h>
#include <net/inet_ecn.h>
+#include <net/ip6_route.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
@@ -53,7 +54,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
- top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
+ top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
return 0;