diff options
Diffstat (limited to 'net/ipv6')
37 files changed, 2385 insertions, 1483 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 24f3aa0f2a35..ae14617e607f 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -16,6 +16,7 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o +ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e7a1882db048..87f688857ade 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -335,7 +335,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; - memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf)); + memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -349,7 +349,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (snmp6_alloc_dev(ndev) < 0) { ADBG((KERN_WARNING "%s(): cannot allocate memory for statistics; dev=%s.\n", - __FUNCTION__, dev->name)); + __func__, dev->name)); neigh_parms_release(&nd_tbl, ndev->nd_parms); ndev->dead = 1; in6_dev_finish_destroy(ndev); @@ -359,7 +359,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (snmp6_register_dev(ndev) < 0) { ADBG((KERN_WARNING "%s(): cannot create /proc/net/dev_snmp6/%s\n", - __FUNCTION__, dev->name)); + __func__, dev->name)); neigh_parms_release(&nd_tbl, ndev->nd_parms); ndev->dead = 1; in6_dev_finish_destroy(ndev); @@ -493,7 +493,7 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) dev_forward_change((struct inet6_dev *)table->extra1); if (*p) - rt6_purge_dflt_routers(); + rt6_purge_dflt_routers(net); } #endif @@ -561,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, write_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) { + if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { ADBG(("ipv6_add_addr: already assigned\n")); err = -EEXIST; goto out; @@ -751,9 +751,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { struct in6_addr prefix; struct rt6_info *rt; - + struct net *net = dev_net(ifp->idev->dev); ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1); + rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (onlink == 0) { @@ -893,20 +893,40 @@ out: /* * Choose an appropriate source address (RFC3484) */ +enum { + IPV6_SADDR_RULE_INIT = 0, + IPV6_SADDR_RULE_LOCAL, + IPV6_SADDR_RULE_SCOPE, + IPV6_SADDR_RULE_PREFERRED, +#ifdef CONFIG_IPV6_MIP6 + IPV6_SADDR_RULE_HOA, +#endif + IPV6_SADDR_RULE_OIF, + IPV6_SADDR_RULE_LABEL, +#ifdef CONFIG_IPV6_PRIVACY + IPV6_SADDR_RULE_PRIVACY, +#endif + IPV6_SADDR_RULE_ORCHID, + IPV6_SADDR_RULE_PREFIX, + IPV6_SADDR_RULE_MAX +}; + struct ipv6_saddr_score { - int addr_type; - unsigned int attrs; - int matchlen; - int scope; - unsigned int rule; + int rule; + int addr_type; + struct inet6_ifaddr *ifa; + DECLARE_BITMAP(scorebits, IPV6_SADDR_RULE_MAX); + int scopedist; + int matchlen; }; -#define IPV6_SADDR_SCORE_LOCAL 0x0001 -#define IPV6_SADDR_SCORE_PREFERRED 0x0004 -#define IPV6_SADDR_SCORE_HOA 0x0008 -#define IPV6_SADDR_SCORE_OIF 0x0010 -#define IPV6_SADDR_SCORE_LABEL 0x0020 -#define IPV6_SADDR_SCORE_PRIVACY 0x0040 +struct ipv6_saddr_dst { + struct in6_addr *addr; + int ifindex; + int scope; + int label; + unsigned int prefs; +}; static inline int ipv6_saddr_preferred(int type) { @@ -916,27 +936,152 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -int ipv6_dev_get_saddr(struct net_device *daddr_dev, - struct in6_addr *daddr, struct in6_addr *saddr) +static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score, + struct ipv6_saddr_dst *dst, + int i) { - struct ipv6_saddr_score hiscore; - struct inet6_ifaddr *ifa_result = NULL; - int daddr_type = __ipv6_addr_type(daddr); - int daddr_scope = __ipv6_addr_src_scope(daddr_type); - int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0; - u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex); + int ret; + + if (i <= score->rule) { + switch (i) { + case IPV6_SADDR_RULE_SCOPE: + ret = score->scopedist; + break; + case IPV6_SADDR_RULE_PREFIX: + ret = score->matchlen; + break; + default: + ret = !!test_bit(i, score->scorebits); + } + goto out; + } + + switch (i) { + case IPV6_SADDR_RULE_INIT: + /* Rule 0: remember if hiscore is not ready yet */ + ret = !!score->ifa; + break; + case IPV6_SADDR_RULE_LOCAL: + /* Rule 1: Prefer same address */ + ret = ipv6_addr_equal(&score->ifa->addr, dst->addr); + break; + case IPV6_SADDR_RULE_SCOPE: + /* Rule 2: Prefer appropriate scope + * + * ret + * ^ + * -1 | d 15 + * ---+--+-+---> scope + * | + * | d is scope of the destination. + * B-d | \ + * | \ <- smaller scope is better if + * B-15 | \ if scope is enough for destinaion. + * | ret = B - scope (-1 <= scope >= d <= 15). + * d-C-1 | / + * |/ <- greater is better + * -C / if scope is not enough for destination. + * /| ret = scope - C (-1 <= d < scope <= 15). + * + * d - C - 1 < B -15 (for all -1 <= d <= 15). + * C > d + 14 - B >= 15 + 14 - B = 29 - B. + * Assume B = 0 and we get C > 29. + */ + ret = __ipv6_addr_src_scope(score->addr_type); + if (ret >= dst->scope) + ret = -ret; + else + ret -= 128; /* 30 is enough */ + score->scopedist = ret; + break; + case IPV6_SADDR_RULE_PREFERRED: + /* Rule 3: Avoid deprecated and optimistic addresses */ + ret = ipv6_saddr_preferred(score->addr_type) || + !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + break; +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SADDR_RULE_HOA: + { + /* Rule 4: Prefer home address */ + int prefhome = !(dst->prefs & IPV6_PREFER_SRC_COA); + ret = !(score->ifa->flags & IFA_F_HOMEADDRESS) ^ prefhome; + break; + } +#endif + case IPV6_SADDR_RULE_OIF: + /* Rule 5: Prefer outgoing interface */ + ret = (!dst->ifindex || + dst->ifindex == score->ifa->idev->dev->ifindex); + break; + case IPV6_SADDR_RULE_LABEL: + /* Rule 6: Prefer matching label */ + ret = ipv6_addr_label(&score->ifa->addr, score->addr_type, + score->ifa->idev->dev->ifindex) == dst->label; + break; +#ifdef CONFIG_IPV6_PRIVACY + case IPV6_SADDR_RULE_PRIVACY: + { + /* Rule 7: Prefer public address + * Note: prefer temprary address if use_tempaddr >= 2 + */ + int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ? + !!(dst->prefs & IPV6_PREFER_SRC_TMP) : + score->ifa->idev->cnf.use_tempaddr >= 2; + ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp; + break; + } +#endif + case IPV6_SADDR_RULE_ORCHID: + /* Rule 8-: Prefer ORCHID vs ORCHID or + * non-ORCHID vs non-ORCHID + */ + ret = !(ipv6_addr_orchid(&score->ifa->addr) ^ + ipv6_addr_orchid(dst->addr)); + break; + case IPV6_SADDR_RULE_PREFIX: + /* Rule 8: Use longest matching prefix */ + score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, + dst->addr); + break; + default: + ret = 0; + } + + if (ret) + __set_bit(i, score->scorebits); + score->rule = i; +out: + return ret; +} + +int ipv6_dev_get_saddr(struct net_device *dst_dev, + struct in6_addr *daddr, unsigned int prefs, + struct in6_addr *saddr) +{ + struct ipv6_saddr_score scores[2], + *score = &scores[0], *hiscore = &scores[1]; + struct net *net = dev_net(dst_dev); + struct ipv6_saddr_dst dst; struct net_device *dev; + int dst_type; + + dst_type = __ipv6_addr_type(daddr); + dst.addr = daddr; + dst.ifindex = dst_dev ? dst_dev->ifindex : 0; + dst.scope = __ipv6_addr_src_scope(dst_type); + dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex); + dst.prefs = prefs; - memset(&hiscore, 0, sizeof(hiscore)); + hiscore->rule = -1; + hiscore->ifa = NULL; read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - /* Rule 0: Candidate Source Address (section 4) + /* Candidate Source Address (section 4) * - multicast and link-local destination address, * the set of candidate source address MUST only * include addresses assigned to interfaces @@ -948,9 +1093,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, * belonging to the same site as the outgoing * interface.) */ - if ((daddr_type & IPV6_ADDR_MULTICAST || - daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && - daddr_dev && dev != daddr_dev) + if (((dst_type & IPV6_ADDR_MULTICAST) || + dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && + dst.ifindex && dev->ifindex != dst.ifindex) continue; idev = __in6_dev_get(dev); @@ -958,12 +1103,10 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; read_lock_bh(&idev->lock); - for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { - struct ipv6_saddr_score score; - - score.addr_type = __ipv6_addr_type(&ifa->addr); + for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) { + int i; - /* Rule 0: + /* * - Tentative Address (RFC2462 section 5.4) * - A tentative address is not considered * "assigned to an interface" in the traditional @@ -973,11 +1116,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, * addresses, and the unspecified address MUST * NOT be included in a candidate set. */ - if ((ifa->flags & IFA_F_TENTATIVE) && - (!(ifa->flags & IFA_F_OPTIMISTIC))) + if ((score->ifa->flags & IFA_F_TENTATIVE) && + (!(score->ifa->flags & IFA_F_OPTIMISTIC))) continue; - if (unlikely(score.addr_type == IPV6_ADDR_ANY || - score.addr_type & IPV6_ADDR_MULTICAST)) { + + score->addr_type = __ipv6_addr_type(&score->ifa->addr); + + if (unlikely(score->addr_type == IPV6_ADDR_ANY || + score->addr_type & IPV6_ADDR_MULTICAST)) { LIMIT_NETDEBUG(KERN_DEBUG "ADDRCONF: unspecified / multicast address " "assigned as unicast address on %s", @@ -985,207 +1131,63 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } - score.attrs = 0; - score.matchlen = 0; - score.scope = 0; - score.rule = 0; - - if (ifa_result == NULL) { - /* record it if the first available entry */ - goto record_it; - } - - /* Rule 1: Prefer same address */ - if (hiscore.rule < 1) { - if (ipv6_addr_equal(&ifa_result->addr, daddr)) - hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; - hiscore.rule++; - } - if (ipv6_addr_equal(&ifa->addr, daddr)) { - score.attrs |= IPV6_SADDR_SCORE_LOCAL; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { - score.rule = 1; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) - continue; - } - - /* Rule 2: Prefer appropriate scope */ - if (hiscore.rule < 2) { - hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); - hiscore.rule++; - } - score.scope = __ipv6_addr_src_scope(score.addr_type); - if (hiscore.scope < score.scope) { - if (hiscore.scope < daddr_scope) { - score.rule = 2; - goto record_it; - } else - continue; - } else if (score.scope < hiscore.scope) { - if (score.scope < daddr_scope) - break; /* addresses sorted by scope */ - else { - score.rule = 2; - goto record_it; - } - } + score->rule = -1; + bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); + + for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { + int minihiscore, miniscore; + + minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i); + miniscore = ipv6_get_saddr_eval(score, &dst, i); + + if (minihiscore > miniscore) { + if (i == IPV6_SADDR_RULE_SCOPE && + score->scopedist > 0) { + /* + * special case: + * each remaining entry + * has too small (not enough) + * scope, because ifa entries + * are sorted by their scope + * values. + */ + goto try_nextdev; + } + break; + } else if (minihiscore < miniscore) { + struct ipv6_saddr_score *tmp; - /* Rule 3: Avoid deprecated and optimistic addresses */ - if (hiscore.rule < 3) { - if (ipv6_saddr_preferred(hiscore.addr_type) || - (((ifa_result->flags & - (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) - hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; - hiscore.rule++; - } - if (ipv6_saddr_preferred(score.addr_type) || - (((ifa->flags & - (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { - score.attrs |= IPV6_SADDR_SCORE_PREFERRED; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { - score.rule = 3; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) - continue; - } + if (hiscore->ifa) + in6_ifa_put(hiscore->ifa); - /* Rule 4: Prefer home address */ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - if (hiscore.rule < 4) { - if (ifa_result->flags & IFA_F_HOMEADDRESS) - hiscore.attrs |= IPV6_SADDR_SCORE_HOA; - hiscore.rule++; - } - if (ifa->flags & IFA_F_HOMEADDRESS) { - score.attrs |= IPV6_SADDR_SCORE_HOA; - if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { - score.rule = 4; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) - continue; - } -#else - if (hiscore.rule < 4) - hiscore.rule++; -#endif + in6_ifa_hold(score->ifa); - /* Rule 5: Prefer outgoing interface */ - if (hiscore.rule < 5) { - if (daddr_dev == NULL || - daddr_dev == ifa_result->idev->dev) - hiscore.attrs |= IPV6_SADDR_SCORE_OIF; - hiscore.rule++; - } - if (daddr_dev == NULL || - daddr_dev == ifa->idev->dev) { - score.attrs |= IPV6_SADDR_SCORE_OIF; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { - score.rule = 5; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) - continue; - } + tmp = hiscore; + hiscore = score; + score = tmp; - /* Rule 6: Prefer matching label */ - if (hiscore.rule < 6) { - if (ipv6_addr_label(&ifa_result->addr, - hiscore.addr_type, - ifa_result->idev->dev->ifindex) == daddr_label) - hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; - hiscore.rule++; - } - if (ipv6_addr_label(&ifa->addr, - score.addr_type, - ifa->idev->dev->ifindex) == daddr_label) { - score.attrs |= IPV6_SADDR_SCORE_LABEL; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { - score.rule = 6; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) - continue; - } + /* restore our iterator */ + score->ifa = hiscore->ifa; -#ifdef CONFIG_IPV6_PRIVACY - /* Rule 7: Prefer public address - * Note: prefer temprary address if use_tempaddr >= 2 - */ - if (hiscore.rule < 7) { - if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ - (ifa_result->idev->cnf.use_tempaddr >= 2)) - hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; - hiscore.rule++; - } - if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ - (ifa->idev->cnf.use_tempaddr >= 2)) { - score.attrs |= IPV6_SADDR_SCORE_PRIVACY; - if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { - score.rule = 7; - goto record_it; + break; } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) - continue; - } -#else - if (hiscore.rule < 7) - hiscore.rule++; -#endif - /* Rule 8: Use longest matching prefix */ - if (hiscore.rule < 8) { - hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); - hiscore.rule++; - } - score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); - if (score.matchlen > hiscore.matchlen) { - score.rule = 8; - goto record_it; } -#if 0 - else if (score.matchlen < hiscore.matchlen) - continue; -#endif - - /* Final Rule: choose first available one */ - continue; -record_it: - if (ifa_result) - in6_ifa_put(ifa_result); - in6_ifa_hold(ifa); - ifa_result = ifa; - hiscore = score; } +try_nextdev: read_unlock_bh(&idev->lock); } rcu_read_unlock(); read_unlock(&dev_base_lock); - if (!ifa_result) + if (!hiscore->ifa) return -EADDRNOTAVAIL; - ipv6_addr_copy(saddr, &ifa_result->addr); - in6_ifa_put(ifa_result); + ipv6_addr_copy(saddr, &hiscore->ifa->addr); + in6_ifa_put(hiscore->ifa); return 0; } - -int ipv6_get_saddr(struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) -{ - return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr); -} - -EXPORT_SYMBOL(ipv6_get_saddr); +EXPORT_SYMBOL(ipv6_dev_get_saddr); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) @@ -1231,7 +1233,7 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr, read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp->flags&IFA_F_TENTATIVE)) { @@ -1253,7 +1255,7 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, u8 hash = ipv6_addr_hash(addr); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev) @@ -1271,7 +1273,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr, read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev || @@ -1573,7 +1575,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_expires = expires, .fc_dst_len = plen, .fc_flags = RTF_UP | flags, - .fc_nlinfo.nl_net = &init_net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_dst, pfx); @@ -1600,7 +1602,7 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, - .fc_nlinfo.nl_net = &init_net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); @@ -1617,7 +1619,7 @@ static void sit_route_add(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 96, .fc_flags = RTF_UP | RTF_NONEXTHOP, - .fc_nlinfo.nl_net = &init_net, + .fc_nlinfo.nl_net = dev_net(dev), }; /* prefix length - 96 bits "::d.d.d.d" */ @@ -1718,7 +1720,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (pinfo->onlink) { struct rt6_info *rt; - rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1); + rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, + dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { @@ -1761,7 +1764,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) ok: - ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1); + ifp = ipv6_get_ifaddr(dev_net(dev), &addr, dev, 1); if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -1887,7 +1890,7 @@ ok: * Special case for SIT interfaces where we create a new "virtual" * device. */ -int addrconf_set_dstaddr(void __user *arg) +int addrconf_set_dstaddr(struct net *net, void __user *arg) { struct in6_ifreq ireq; struct net_device *dev; @@ -1899,7 +1902,7 @@ int addrconf_set_dstaddr(void __user *arg) if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex); + dev = __dev_get_by_index(net, ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -1930,7 +1933,8 @@ int addrconf_set_dstaddr(void __user *arg) if (err == 0) { err = -ENOBUFS; - if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL) + dev = __dev_get_by_name(net, p.name); + if (!dev) goto err_exit; err = dev_open(dev); } @@ -1945,8 +1949,9 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, - __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) +static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, + int plen, __u8 ifa_flags, __u32 prefered_lft, + __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -1960,7 +1965,8 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; - if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) + dev = __dev_get_by_index(net, ifindex); + if (!dev) return -ENODEV; if ((idev = addrconf_add_dev(dev)) == NULL) @@ -2005,13 +2011,15 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, return PTR_ERR(ifp); } -static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, + int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; - if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) + dev = __dev_get_by_index(net, ifindex); + if (!dev) return -ENODEV; if ((idev = __in6_dev_get(dev)) == NULL) @@ -2039,7 +2047,7 @@ static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) } -int addrconf_add_ifaddr(void __user *arg) +int addrconf_add_ifaddr(struct net *net, void __user *arg) { struct in6_ifreq ireq; int err; @@ -2051,13 +2059,14 @@ int addrconf_add_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, - IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + ireq.ifr6_prefixlen, IFA_F_PERMANENT, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } -int addrconf_del_ifaddr(void __user *arg) +int addrconf_del_ifaddr(struct net *net, void __user *arg) { struct in6_ifreq ireq; int err; @@ -2069,7 +2078,8 @@ int addrconf_del_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + ireq.ifr6_prefixlen); rtnl_unlock(); return err; } @@ -2080,6 +2090,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) struct inet6_ifaddr * ifp; struct in6_addr addr; struct net_device *dev; + struct net *net = dev_net(idev->dev); int scope; ASSERT_RTNL(); @@ -2106,7 +2117,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) return; } - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2269,15 +2280,16 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) static void ip6_tnl_add_linklocal(struct inet6_dev *idev) { struct net_device *link_dev; + struct net *net = dev_net(idev->dev); /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && - (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { + (link_dev = __dev_get_by_index(net, idev->dev->iflink))) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } /* then try to inherit it from any device */ - for_each_netdev(&init_net, link_dev) { + for_each_netdev(net, link_dev) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -2310,9 +2322,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, int run_pending = 0; int err; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch(event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { @@ -2452,6 +2461,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) { struct inet6_dev *idev; struct inet6_ifaddr *ifa, **bifa; + struct net *net = dev_net(dev); int i; ASSERT_RTNL(); @@ -2459,7 +2469,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) if (dev == init_net.loopback_dev && how == 1) how = 0; - rt6_ifdown(dev); + rt6_ifdown(net, dev); neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); @@ -2775,12 +2785,12 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) { struct inet6_ifaddr *ifa = NULL; struct if6_iter_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { ifa = inet6_addr_lst[state->bucket]; - while (ifa && ifa->idev->dev->nd_net != net) + while (ifa && !net_eq(dev_net(ifa->idev->dev), net)) ifa = ifa->lst_next; if (ifa) break; @@ -2791,12 +2801,12 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) { struct if6_iter_state *state = seq->private; - struct net *net = state->p.net; + struct net *net = seq_file_net(seq); ifa = ifa->lst_next; try_again: if (ifa) { - if (ifa->idev->dev->nd_net != net) { + if (!net_eq(dev_net(ifa->idev->dev), net)) { ifa = ifa->lst_next; goto try_again; } @@ -2914,7 +2924,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { - if (ifp->idev->dev->nd_net != net) + if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && (ifp->flags & IFA_F_HOMEADDRESS)) { @@ -3063,15 +3073,12 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; int err; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3081,7 +3088,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (pfx == NULL) return -EINVAL; - return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); } static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, @@ -3124,7 +3131,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3134,9 +3141,6 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 ifa_flags; int err; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3157,7 +3161,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) valid_lft = INFINITY_LIFE_TIME; } - dev = __dev_get_by_index(&init_net, ifm->ifa_index); + dev = __dev_get_by_index(net, ifm->ifa_index); if (dev == NULL) return -ENODEV; @@ -3170,8 +3174,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) * It would be best to check for !NLM_F_CREATE here but * userspace alreay relies on not having to provide this. */ - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - ifa_flags, preferred_lft, valid_lft); + return inet6_addr_add(net, ifm->ifa_index, pfx, + ifm->ifa_prefixlen, ifa_flags, + preferred_lft, valid_lft); } if (nlh->nlmsg_flags & NLM_F_EXCL || @@ -3336,12 +3341,13 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct inet6_ifaddr *ifa; struct ifmcaddr6 *ifmca; struct ifacaddr6 *ifaca; + struct net *net = sock_net(skb->sk); s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -3408,42 +3414,30 @@ cont: static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; enum addr_type_t type = UNICAST_ADDR; - if (net != &init_net) - return 0; - return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; enum addr_type_t type = MULTICAST_ADDR; - if (net != &init_net) - return 0; - return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; enum addr_type_t type = ANYCAST_ADDR; - if (net != &init_net) - return 0; - return inet6_dump_addr(skb, cb, type); } static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; @@ -3452,9 +3446,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct sk_buff *skb; int err; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) goto errout; @@ -3467,7 +3458,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(&init_net, ifm->ifa_index); + dev = __dev_get_by_index(net, ifm->ifa_index); if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; @@ -3487,7 +3478,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); errout: @@ -3497,6 +3488,7 @@ errout: static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; + struct net *net = dev_net(ifa->idev->dev); int err = -ENOBUFS; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); @@ -3510,10 +3502,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); } static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3672,18 +3664,15 @@ nla_put_failure: static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int idx, err; int s_idx = cb->args[0]; struct net_device *dev; struct inet6_dev *idev; - if (net != &init_net) - return 0; - read_lock(&dev_base_lock); idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if ((idev = in6_dev_get(dev)) == NULL) @@ -3705,6 +3694,7 @@ cont: void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; + struct net *net = dev_net(idev->dev); int err = -ENOBUFS; skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); @@ -3718,10 +3708,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -3774,6 +3764,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; + struct net *net = dev_net(idev->dev); int err = -ENOBUFS; skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); @@ -3787,10 +3778,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); } static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -4185,7 +4176,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, NULL); - __addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name, + __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, idev, &idev->cnf); } @@ -4280,6 +4271,32 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) EXPORT_SYMBOL(unregister_inet6addr_notifier); + +static int addrconf_net_init(struct net *net) +{ + return 0; +} + +static void addrconf_net_exit(struct net *net) +{ + struct net_device *dev; + + rtnl_lock(); + /* clean dev list */ + for_each_netdev(net, dev) { + if (__in6_dev_get(dev) == NULL) + continue; + addrconf_ifdown(dev, 1); + } + addrconf_ifdown(net->loopback_dev, 2); + rtnl_unlock(); +} + +static struct pernet_operations addrconf_net_ops = { + .init = addrconf_net_init, + .exit = addrconf_net_exit, +}; + /* * Init / cleanup code */ @@ -4321,14 +4338,9 @@ int __init addrconf_init(void) if (err) goto errlo; - ip6_null_entry.u.dst.dev = init_net.loopback_dev; - ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.u.dst.dev = init_net.loopback_dev; - ip6_prohibit_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); - ip6_blk_hole_entry.u.dst.dev = init_net.loopback_dev; - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); -#endif + err = register_pernet_device(&addrconf_net_ops); + if (err) + return err; register_netdevice_notifier(&ipv6_dev_notf); @@ -4358,31 +4370,19 @@ errlo: void addrconf_cleanup(void) { - struct net_device *dev; struct inet6_ifaddr *ifa; int i; unregister_netdevice_notifier(&ipv6_dev_notf); + unregister_pernet_device(&addrconf_net_ops); unregister_pernet_subsys(&addrconf_ops); rtnl_lock(); /* - * clean dev list. - */ - - for_each_netdev(&init_net, dev) { - if (__in6_dev_get(dev) == NULL) - continue; - addrconf_ifdown(dev, 1); - } - addrconf_ifdown(init_net.loopback_dev, 2); - - /* * Check hash table. */ - write_lock_bh(&addrconf_hash_lock); for (i=0; i < IN6_ADDR_HSIZE; i++) { for (ifa=inet6_addr_lst[i]; ifa; ) { @@ -4399,6 +4399,7 @@ void addrconf_cleanup(void) write_unlock_bh(&addrconf_hash_lock); del_timer(&addr_chk_timer); - rtnl_unlock(); + + unregister_pernet_subsys(&addrconf_net_ops); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index a3c5a72218fd..9bfa8846f262 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -58,6 +58,7 @@ static struct ip6addrlbl_table * ::ffff:0:0/96 V4MAPPED 4 * fc00::/7 N/A 5 ULA (RFC 4193) * 2001::/32 N/A 6 Teredo (RFC 4380) + * 2001:10::/28 N/A 7 ORCHID (RFC 4843) * * Note: 0xffffffff is used if we do not have any policies. */ @@ -85,6 +86,10 @@ static const __initdata struct ip6addrlbl_init_table .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, .prefixlen = 32, .label = 6, + },{ /* 2001:10::/28 */ + .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, + .prefixlen = 28, + .label = 7, },{ /* ::ffff:0:0 */ .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, .prefixlen = 96, @@ -161,7 +166,7 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) rcu_read_unlock(); ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", - __FUNCTION__, + __func__, NIP6(*addr), type, ifindex, label); @@ -177,7 +182,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, int addrtype; ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", - __FUNCTION__, + __func__, NIP6(*prefix), prefixlen, ifindex, (unsigned int)label); @@ -221,7 +226,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) int ret = 0; ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", - __FUNCTION__, + __func__, newp, replace); if (hlist_empty(&ip6addrlbl_table.head)) { @@ -263,7 +268,7 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, int ret = 0; ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __FUNCTION__, + __func__, NIP6(*prefix), prefixlen, ifindex, (unsigned int)label, @@ -289,7 +294,7 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, int ret = -ESRCH; ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __FUNCTION__, + __func__, NIP6(*prefix), prefixlen, ifindex); @@ -313,7 +318,7 @@ static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, int ret; ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __FUNCTION__, + __func__, NIP6(*prefix), prefixlen, ifindex); @@ -330,7 +335,7 @@ static __init int ip6addrlbl_init(void) int err = 0; int i; - ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__); + ADDRLABEL(KERN_DEBUG "%s()\n", __func__); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, @@ -359,7 +364,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *pfx; @@ -447,7 +452,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb, static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ip6addrlbl_entry *p; struct hlist_node *pos; int idx = 0, s_idx = cb->args[0]; @@ -485,7 +490,7 @@ static inline int ip6addrlbl_msgsize(void) static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *addr; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f0aa97738746..1731b0abf7f5 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -92,9 +92,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -248,6 +245,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; int addr_type = 0; @@ -278,7 +276,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) { + if (inet_addr_type(net, v4addr) != RTN_LOCAL) { err = -EADDRNOTAVAIL; goto out; } @@ -300,7 +298,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out; } - dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + dev = dev_get_by_index(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -312,7 +310,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!ipv6_chk_addr(&init_net, &addr->sin6_addr, + if (!ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { if (dev) dev_put(dev); @@ -440,6 +438,7 @@ EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; + struct net *net = sock_net(sk); switch(cmd) { @@ -452,14 +451,14 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: - return(ipv6_route_ioctl(cmd,(void __user *)arg)); + return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); case SIOCSIFADDR: - return addrconf_add_ifaddr((void __user *) arg); + return addrconf_add_ifaddr(net, (void __user *) arg); case SIOCDIFADDR: - return addrconf_del_ifaddr((void __user *) arg); + return addrconf_del_ifaddr(net, (void __user *) arg); case SIOCSIFDSTADDR: - return addrconf_set_dstaddr((void __user *) arg); + return addrconf_set_dstaddr(net, (void __user *) arg); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; @@ -678,6 +677,129 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL_GPL(ipv6_opt_accepted); +static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, + int proto) +{ + struct inet6_protocol *ops = NULL; + + for (;;) { + struct ipv6_opt_hdr *opth; + int len; + + if (proto != NEXTHDR_HOP) { + ops = rcu_dereference(inet6_protos[proto]); + + if (unlikely(!ops)) + break; + + if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + + if (unlikely(!pskb_may_pull(skb, 8))) + break; + + opth = (void *)skb->data; + len = ipv6_optlen(opth); + + if (unlikely(!pskb_may_pull(skb, len))) + break; + + proto = opth->nexthdr; + __skb_pull(skb, len); + } + + return ops; +} + +static int ipv6_gso_send_check(struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + int err = -EINVAL; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + err = -EPROTONOSUPPORT; + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_send_check)) { + skb_reset_transport_header(skb); + err = ops->gso_send_check(skb); + } + rcu_read_unlock(); + +out: + return err; +} + +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + + if (!(features & NETIF_F_V6_CSUM)) + features &= ~NETIF_F_SG; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + 0))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + goto out; + + ipv6h = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*ipv6h)); + segs = ERR_PTR(-EPROTONOSUPPORT); + + rcu_read_lock(); + ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (likely(ops && ops->gso_segment)) { + skb_reset_transport_header(skb); + segs = ops->gso_segment(skb, features); + } + rcu_read_unlock(); + + if (unlikely(IS_ERR(segs))) + goto out; + + for (skb = segs; skb; skb = skb->next) { + ipv6h = ipv6_hdr(skb); + ipv6h->payload_len = htons(skb->len - skb->mac_len - + sizeof(*ipv6h)); + } + +out: + return segs; +} + +static struct packet_type ipv6_packet_type = { + .type = __constant_htons(ETH_P_IPV6), + .func = ipv6_rcv, + .gso_send_check = ipv6_gso_send_check, + .gso_segment = ipv6_gso_segment, +}; + +static int __init ipv6_packet_init(void) +{ + dev_add_pack(&ipv6_packet_type); + return 0; +} + +static void ipv6_packet_cleanup(void) +{ + dev_remove_pack(&ipv6_packet_type); +} + static int __init init_ipv6_mibs(void) { if (snmp_mib_init((void **)ipv6_statistics, @@ -720,6 +842,8 @@ static void cleanup_ipv6_mibs(void) static int inet6_net_init(struct net *net) { + int err = 0; + net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.flush_delay = 0; net->ipv6.sysctl.ip6_rt_max_size = 4096; @@ -731,12 +855,36 @@ static int inet6_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; net->ipv6.sysctl.icmpv6_time = 1*HZ; - return 0; +#ifdef CONFIG_PROC_FS + err = udp6_proc_init(net); + if (err) + goto out; + err = tcp6_proc_init(net); + if (err) + goto proc_tcp6_fail; + err = ac6_proc_init(net); + if (err) + goto proc_ac6_fail; +out: +#endif + return err; + +#ifdef CONFIG_PROC_FS +proc_ac6_fail: + tcp6_proc_exit(net); +proc_tcp6_fail: + udp6_proc_exit(net); + goto out; +#endif } static void inet6_net_exit(struct net *net) { - return; +#ifdef CONFIG_PROC_FS + udp6_proc_exit(net); + tcp6_proc_exit(net); + ac6_proc_exit(net); +#endif } static struct pernet_operations inet6_net_ops = { @@ -802,19 +950,13 @@ static int __init inet6_init(void) err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; - -#ifdef CONFIG_SYSCTL - err = ipv6_sysctl_register(); - if (err) - goto sysctl_fail; -#endif - err = icmpv6_init(&inet6_family_ops); + err = icmpv6_init(); if (err) goto icmp_fail; - err = ndisc_init(&inet6_family_ops); + err = ndisc_init(); if (err) goto ndisc_fail; - err = igmp6_init(&inet6_family_ops); + err = igmp6_init(); if (err) goto igmp_fail; err = ipv6_netfilter_init(); @@ -825,17 +967,10 @@ static int __init inet6_init(void) err = -ENOMEM; if (raw6_proc_init()) goto proc_raw6_fail; - if (tcp6_proc_init()) - goto proc_tcp6_fail; - if (udp6_proc_init()) - goto proc_udp6_fail; if (udplite6_proc_init()) goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; - - if (ac6_proc_init()) - goto proc_anycast6_fail; if (if6_proc_init()) goto proc_if6_fail; #endif @@ -874,9 +1009,19 @@ static int __init inet6_init(void) err = ipv6_packet_init(); if (err) goto ipv6_packet_fail; + +#ifdef CONFIG_SYSCTL + err = ipv6_sysctl_register(); + if (err) + goto sysctl_fail; +#endif out: return err; +#ifdef CONFIG_SYSCTL +sysctl_fail: + ipv6_packet_cleanup(); +#endif ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -897,16 +1042,10 @@ ip6_route_fail: #ifdef CONFIG_PROC_FS if6_proc_exit(); proc_if6_fail: - ac6_proc_exit(); -proc_anycast6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: udplite6_proc_exit(); proc_udplite6_fail: - udp6_proc_exit(); -proc_udp6_fail: - tcp6_proc_exit(); -proc_tcp6_fail: raw6_proc_exit(); proc_raw6_fail: #endif @@ -918,10 +1057,6 @@ igmp_fail: ndisc_fail: icmpv6_cleanup(); icmp_fail: -#ifdef CONFIG_SYSCTL - ipv6_sysctl_unregister(); -sysctl_fail: -#endif unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: cleanup_ipv6_mibs(); @@ -949,6 +1084,9 @@ static void __exit inet6_exit(void) /* Disallow any further netlink messages */ rtnl_unregister_all(PF_INET6); +#ifdef CONFIG_SYSCTL + ipv6_sysctl_unregister(); +#endif udpv6_exit(); udplitev6_exit(); tcpv6_exit(); @@ -964,11 +1102,8 @@ static void __exit inet6_exit(void) /* Cleanup code parts. */ if6_proc_exit(); - ac6_proc_exit(); ipv6_misc_proc_exit(); udplite6_proc_exit(); - udp6_proc_exit(); - tcp6_proc_exit(); raw6_proc_exit(); #endif ipv6_netfilter_fini(); @@ -976,9 +1111,7 @@ static void __exit inet6_exit(void) ndisc_cleanup(); icmpv6_cleanup(); rawv6_exit(); -#ifdef CONFIG_SYSCTL - ipv6_sysctl_unregister(); -#endif + unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 9c7f83fbc3a1..463bd95d6b13 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -82,6 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) struct net_device *dev = NULL; struct inet6_dev *idev; struct ipv6_ac_socklist *pac; + struct net *net = sock_net(sk); int ishost = !ipv6_devconf.forwarding; int err = 0; @@ -89,7 +90,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(&init_net, addr, NULL, 0)) + if (ipv6_chk_addr(net, addr, NULL, 0)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); @@ -101,7 +102,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); @@ -112,10 +113,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK); } } else - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -176,6 +177,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; + struct net *net = sock_net(sk); write_lock_bh(&ipv6_sk_ac_lock); prev_pac = NULL; @@ -196,7 +198,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(&init_net, pac->acl_ifindex); + dev = dev_get_by_index(net, pac->acl_ifindex); if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); dev_put(dev); @@ -210,6 +212,7 @@ void ipv6_sock_ac_close(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; + struct net *net = sock_net(sk); int prev_index; write_lock_bh(&ipv6_sk_ac_lock); @@ -224,7 +227,7 @@ void ipv6_sock_ac_close(struct sock *sk) if (pac->acl_ifindex != prev_index) { if (dev) dev_put(dev); - dev = dev_get_by_index(&init_net, pac->acl_ifindex); + dev = dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -422,14 +425,15 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) /* * check if given interface (or any, if dev==0) has this anycast address */ -int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) +int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, + struct in6_addr *addr) { int found = 0; if (dev) return ipv6_chk_acast_dev(dev, addr); read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) + for_each_netdev(net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; @@ -441,6 +445,7 @@ int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) #ifdef CONFIG_PROC_FS struct ac6_iter_state { + struct seq_net_private p; struct net_device *dev; struct inet6_dev *idev; }; @@ -451,9 +456,10 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) { struct ifacaddr6 *im = NULL; struct ac6_iter_state *state = ac6_seq_private(seq); + struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -551,8 +557,8 @@ static const struct seq_operations ac6_seq_ops = { static int ac6_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ac6_seq_ops, - sizeof(struct ac6_iter_state)); + return seq_open_net(inode, file, &ac6_seq_ops, + sizeof(struct ac6_iter_state)); } static const struct file_operations ac6_seq_fops = { @@ -560,20 +566,20 @@ static const struct file_operations ac6_seq_fops = { .open = ac6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -int __init ac6_proc_init(void) +int ac6_proc_init(struct net *net) { - if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops)) return -ENOMEM; return 0; } -void ac6_proc_exit(void) +void ac6_proc_exit(struct net *net) { - proc_net_remove(&init_net, "anycast6"); + proc_net_remove(net, "anycast6"); } #endif diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 695c0ca8a417..cac580749ebe 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -29,24 +29,22 @@ struct fib6_rule u8 tclass; }; -static struct fib_rules_ops fib6_rules_ops; - -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, + int flags, pol_lookup_t lookup) { struct fib_lookup_arg arg = { .lookup_ptr = lookup, }; - fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); + fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); if (arg.rule) fib_rule_put(arg.rule); if (arg.result) return arg.result; - dst_hold(&ip6_null_entry.u.dst); - return &ip6_null_entry.u.dst; + dst_hold(&net->ipv6.ip6_null_entry->u.dst); + return &net->ipv6.ip6_null_entry->u.dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, @@ -54,28 +52,29 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, { struct rt6_info *rt = NULL; struct fib6_table *table; + struct net *net = rule->fr_net; pol_lookup_t lookup = arg->lookup_ptr; switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: - rt = &ip6_null_entry; + rt = net->ipv6.ip6_null_entry; goto discard_pkt; default: case FR_ACT_BLACKHOLE: - rt = &ip6_blk_hole_entry; + rt = net->ipv6.ip6_blk_hole_entry; goto discard_pkt; case FR_ACT_PROHIBIT: - rt = &ip6_prohibit_entry; + rt = net->ipv6.ip6_prohibit_entry; goto discard_pkt; } - table = fib6_get_table(rule->table); + table = fib6_get_table(net, rule->table); if (table) - rt = lookup(table, flp, flags); + rt = lookup(net, table, flp, flags); - if (rt != &ip6_null_entry) { + if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; /* @@ -85,8 +84,18 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if ((rule->flags & FIB_RULE_FIND_SADDR) && r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) { struct in6_addr saddr; - if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst, - &saddr)) + unsigned int srcprefs = 0; + + if (flags & RT6_LOOKUP_F_SRCPREF_TMP) + srcprefs |= IPV6_PREFER_SRC_TMP; + if (flags & RT6_LOOKUP_F_SRCPREF_PUBLIC) + srcprefs |= IPV6_PREFER_SRC_PUBLIC; + if (flags & RT6_LOOKUP_F_SRCPREF_COA) + srcprefs |= IPV6_PREFER_SRC_COA; + + if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + &flp->fl6_dst, srcprefs, + &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) @@ -145,13 +154,14 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb) { int err = -EINVAL; + struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (rule->action == FR_ACT_TO_TBL) { if (rule->table == RT6_TABLE_UNSPEC) goto errout; - if (fib6_new_table(rule->table) == NULL) { + if (fib6_new_table(net, rule->table) == NULL) { err = -ENOBUFS; goto errout; } @@ -234,7 +244,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } -static struct fib_rules_ops fib6_rules_ops = { +static struct fib_rules_ops fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), @@ -247,45 +257,64 @@ static struct fib_rules_ops fib6_rules_ops = { .nlmsg_payload = fib6_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV6_RULE, .policy = fib6_rule_policy, - .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), .owner = THIS_MODULE, .fro_net = &init_net, }; -static int __init fib6_default_rules_init(void) +static int fib6_rules_net_init(struct net *net) { - int err; + int err = -ENOMEM; - err = fib_default_rule_add(&fib6_rules_ops, 0, - RT6_TABLE_LOCAL, FIB_RULE_PERMANENT); - if (err < 0) - return err; - err = fib_default_rule_add(&fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0); - if (err < 0) - return err; - return 0; -} + net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template, + sizeof(*net->ipv6.fib6_rules_ops), + GFP_KERNEL); + if (!net->ipv6.fib6_rules_ops) + goto out; -int __init fib6_rules_init(void) -{ - int ret; + net->ipv6.fib6_rules_ops->fro_net = net; + INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list); - ret = fib6_default_rules_init(); - if (ret) - goto out; + err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, + RT6_TABLE_LOCAL, FIB_RULE_PERMANENT); + if (err) + goto out_fib6_rules_ops; - ret = fib_rules_register(&fib6_rules_ops); - if (ret) - goto out_default_rules_init; + err = fib_default_rule_add(net->ipv6.fib6_rules_ops, + 0x7FFE, RT6_TABLE_MAIN, 0); + if (err) + goto out_fib6_default_rule_add; + + err = fib_rules_register(net->ipv6.fib6_rules_ops); + if (err) + goto out_fib6_default_rule_add; out: - return ret; + return err; -out_default_rules_init: - fib_rules_cleanup_ops(&fib6_rules_ops); +out_fib6_default_rule_add: + fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops); +out_fib6_rules_ops: + kfree(net->ipv6.fib6_rules_ops); goto out; } +static void fib6_rules_net_exit(struct net *net) +{ + fib_rules_unregister(net->ipv6.fib6_rules_ops); + kfree(net->ipv6.fib6_rules_ops); +} + +static struct pernet_operations fib6_rules_net_ops = { + .init = fib6_rules_net_init, + .exit = fib6_rules_net_exit, +}; + +int __init fib6_rules_init(void) +{ + return register_pernet_subsys(&fib6_rules_net_ops); +} + + void fib6_rules_cleanup(void) { - fib_rules_unregister(&fib6_rules_ops); + return unregister_pernet_subsys(&fib6_rules_net_ops); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f204a7275a0d..227ce3d2339d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -80,8 +80,10 @@ EXPORT_SYMBOL(icmpv6msg_statistics); * * On SMP we have one ICMP socket per-cpu. */ -static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; -#define icmpv6_socket __get_cpu_var(__icmpv6_socket) +static inline struct sock *icmpv6_sk(struct net *net) +{ + return net->ipv6.icmp_sk[smp_processor_id()]; +} static int icmpv6_rcv(struct sk_buff *skb); @@ -90,11 +92,11 @@ static struct inet6_protocol icmpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static __inline__ int icmpv6_xmit_lock(void) +static __inline__ int icmpv6_xmit_lock(struct sock *sk) { local_bh_disable(); - if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) { + if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. @@ -105,9 +107,9 @@ static __inline__ int icmpv6_xmit_lock(void) return 0; } -static __inline__ void icmpv6_xmit_unlock(void) +static __inline__ void icmpv6_xmit_unlock(struct sock *sk) { - spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock); + spin_unlock_bh(&sk->sk_lock.slock); } /* @@ -161,6 +163,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, struct flowi *fl) { struct dst_entry *dst; + struct net *net = sock_net(sk); int res = 0; /* Informational messages are not limited. */ @@ -176,7 +179,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ - dst = ip6_route_output(sk, fl); + dst = ip6_route_output(net, sk, fl); if (dst->error) { IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -184,7 +187,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, res = 1; } else { struct rt6_info *rt = (struct rt6_info *)dst; - int tmo = init_net.ipv6.sysctl.icmpv6_time; + int tmo = net->ipv6.sysctl.icmpv6_time; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) @@ -303,6 +306,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, struct net_device *dev) { + struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; @@ -332,7 +336,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, */ addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0)) saddr = &hdr->daddr; /* @@ -389,12 +393,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.fl_icmp_code = code; security_skb_classify_flow(skb, &fl); - if (icmpv6_xmit_lock()) - return; - - sk = icmpv6_socket->sk; + sk = icmpv6_sk(net); np = inet6_sk(sk); + if (icmpv6_xmit_lock(sk)) + return; + if (!icmpv6_xrlim_allow(sk, type, &fl)) goto out; @@ -460,9 +464,7 @@ route_done: else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); tclass = np->tclass; if (tclass < 0) @@ -498,13 +500,14 @@ out_put: out_dst_release: dst_release(dst); out: - icmpv6_xmit_unlock(); + icmpv6_xmit_unlock(sk); } EXPORT_SYMBOL(icmpv6_send); static void icmpv6_echo_reply(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; @@ -535,12 +538,12 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl.fl_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); - if (icmpv6_xmit_lock()) - return; - - sk = icmpv6_socket->sk; + sk = icmpv6_sk(net); np = inet6_sk(sk); + if (icmpv6_xmit_lock(sk)) + return; + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; @@ -555,9 +558,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); tclass = np->tclass; if (tclass < 0) @@ -584,7 +585,7 @@ out_put: in6_dev_put(idev); dst_release(dst); out: - icmpv6_xmit_unlock(); + icmpv6_xmit_unlock(sk); } static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) @@ -775,19 +776,41 @@ drop_no_count: return 0; } +void icmpv6_flow_init(struct sock *sk, struct flowi *fl, + u8 type, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + int oif) +{ + memset(fl, 0, sizeof(*fl)); + ipv6_addr_copy(&fl->fl6_src, saddr); + ipv6_addr_copy(&fl->fl6_dst, daddr); + fl->proto = IPPROTO_ICMPV6; + fl->fl_icmp_type = type; + fl->fl_icmp_code = 0; + fl->oif = oif; + security_sk_classify_flow(sk, fl); +} + /* - * Special lock-class for __icmpv6_socket: + * Special lock-class for __icmpv6_sk: */ static struct lock_class_key icmpv6_socket_sk_dst_lock_key; -int __init icmpv6_init(struct net_proto_family *ops) +static int __net_init icmpv6_sk_init(struct net *net) { struct sock *sk; int err, i, j; + net->ipv6.icmp_sk = + kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); + if (net->ipv6.icmp_sk == NULL) + return -ENOMEM; + for_each_possible_cpu(i) { + struct socket *sock; err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, - &per_cpu(__icmpv6_socket, i)); + &sock); if (err < 0) { printk(KERN_ERR "Failed to initialize the ICMP6 control socket " @@ -796,12 +819,14 @@ int __init icmpv6_init(struct net_proto_family *ops) goto fail; } - sk = per_cpu(__icmpv6_socket, i)->sk; + net->ipv6.icmp_sk[i] = sk = sock->sk; + sk_change_net(sk, net); + sk->sk_allocation = GFP_ATOMIC; /* * Split off their lock-class, because sk->sk_dst_lock * gets used from softirqs, which is safe for - * __icmpv6_socket (because those never get directly used + * __icmpv6_sk (because those never get directly used * via userspace syscalls), but unsafe for normal sockets. */ lockdep_set_class(&sk->sk_dst_lock, @@ -815,36 +840,56 @@ int __init icmpv6_init(struct net_proto_family *ops) sk->sk_prot->unhash(sk); } - - - if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) { - printk(KERN_ERR "Failed to register ICMP6 protocol\n"); - err = -EAGAIN; - goto fail; - } - return 0; fail: - for (j = 0; j < i; j++) { - if (!cpu_possible(j)) - continue; - sock_release(per_cpu(__icmpv6_socket, j)); - } - + for (j = 0; j < i; j++) + sk_release_kernel(net->ipv6.icmp_sk[j]); + kfree(net->ipv6.icmp_sk); return err; } -void icmpv6_cleanup(void) +static void __net_exit icmpv6_sk_exit(struct net *net) { int i; for_each_possible_cpu(i) { - sock_release(per_cpu(__icmpv6_socket, i)); + sk_release_kernel(net->ipv6.icmp_sk[i]); } + kfree(net->ipv6.icmp_sk); +} + +static struct pernet_operations icmpv6_sk_ops = { + .init = icmpv6_sk_init, + .exit = icmpv6_sk_exit, +}; + +int __init icmpv6_init(void) +{ + int err; + + err = register_pernet_subsys(&icmpv6_sk_ops); + if (err < 0) + return err; + + err = -EAGAIN; + if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) + goto fail; + return 0; + +fail: + printk(KERN_ERR "Failed to register ICMP6 protocol\n"); + unregister_pernet_subsys(&icmpv6_sk_ops); + return err; +} + +void icmpv6_cleanup(void) +{ + unregister_pernet_subsys(&icmpv6_sk_ops); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } + static const struct icmp6_err { int err; int fatal; @@ -925,6 +970,10 @@ struct ctl_table *ipv6_icmp_sysctl_init(struct net *net) table = kmemdup(ipv6_icmp_table_template, sizeof(ipv6_icmp_table_template), GFP_KERNEL); + + if (table) + table[0].data = &net->ipv6.sysctl.icmpv6_time; + return table; } #endif diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 99fd25f7f005..580014aea4d6 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -24,7 +24,7 @@ void __inet6_hash(struct sock *sk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_head *list; rwlock_t *lock; @@ -43,7 +43,7 @@ void __inet6_hash(struct sock *sk) } __sk_add_node(sk, list); - sock_prot_inuse_add(sk->sk_prot, 1); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -105,7 +105,7 @@ struct sock *inet6_lookup_listener(struct net *net, read_lock(&hashinfo->lhash_lock); sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { - if (sk->sk_net == net && inet_sk(sk)->num == hnum && + if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -172,7 +172,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; - struct net *net = sk->sk_net; + struct net *net = sock_net(sk); prefetch(head->chain.first); write_lock(lock); @@ -204,7 +204,7 @@ unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sk->sk_hash = hash; - sock_prot_inuse_add(sk->sk_prot, 1); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); if (twp != NULL) { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bab72b6f1444..b3f6e03c454c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -48,8 +48,6 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -struct rt6_statistics rt6_stats; - static struct kmem_cache * fib6_node_kmem __read_mostly; enum fib_walk_state_t @@ -66,6 +64,7 @@ enum fib_walk_state_t struct fib6_cleaner_t { struct fib6_walker_t w; + struct net *net; int (*func)(struct rt6_info *, void *arg); void *arg; }; @@ -78,9 +77,10 @@ static DEFINE_RWLOCK(fib6_walker_lock); #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); -static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); -static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); +static void fib6_prune_clones(struct net *net, struct fib6_node *fn, + struct rt6_info *rt); +static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); +static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); static int fib6_walk(struct fib6_walker_t *w); static int fib6_walk_continue(struct fib6_walker_t *w); @@ -93,7 +93,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w); static __u32 rt_sernum; -static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); +static void fib6_gc_timer_cb(unsigned long arg); static struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, @@ -166,22 +166,13 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } -static struct fib6_table fib6_main_tbl = { - .tb6_id = RT6_TABLE_MAIN, - .tb6_root = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, - }, -}; - #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else #define FIB_TABLE_HASHSZ 1 #endif -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -static void fib6_link_table(struct fib6_table *tb) +static void fib6_link_table(struct net *net, struct fib6_table *tb) { unsigned int h; @@ -197,52 +188,46 @@ static void fib6_link_table(struct fib6_table *tb) * No protection necessary, this is the only list mutatation * operation, tables never disappear once they exist. */ - hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); + hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]); } #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static struct fib6_table fib6_local_tbl = { - .tb6_id = RT6_TABLE_LOCAL, - .tb6_root = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, - }, -}; -static struct fib6_table *fib6_alloc_table(u32 id) +static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) { struct fib6_table *table; table = kzalloc(sizeof(*table), GFP_ATOMIC); if (table != NULL) { table->tb6_id = id; - table->tb6_root.leaf = &ip6_null_entry; + table->tb6_root.leaf = net->ipv6.ip6_null_entry; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; } return table; } -struct fib6_table *fib6_new_table(u32 id) +struct fib6_table *fib6_new_table(struct net *net, u32 id) { struct fib6_table *tb; if (id == 0) id = RT6_TABLE_MAIN; - tb = fib6_get_table(id); + tb = fib6_get_table(net, id); if (tb) return tb; - tb = fib6_alloc_table(id); + tb = fib6_alloc_table(net, id); if (tb != NULL) - fib6_link_table(tb); + fib6_link_table(net, tb); return tb; } -struct fib6_table *fib6_get_table(u32 id) +struct fib6_table *fib6_get_table(struct net *net, u32 id) { struct fib6_table *tb; + struct hlist_head *head; struct hlist_node *node; unsigned int h; @@ -250,7 +235,8 @@ struct fib6_table *fib6_get_table(u32 id) id = RT6_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) { if (tb->tb6_id == id) { rcu_read_unlock(); return tb; @@ -261,33 +247,32 @@ struct fib6_table *fib6_get_table(u32 id) return NULL; } -static void __init fib6_tables_init(void) +static void fib6_tables_init(struct net *net) { - fib6_link_table(&fib6_main_tbl); - fib6_link_table(&fib6_local_tbl); + fib6_link_table(net, net->ipv6.fib6_main_tbl); + fib6_link_table(net, net->ipv6.fib6_local_tbl); } - #else -struct fib6_table *fib6_new_table(u32 id) +struct fib6_table *fib6_new_table(struct net *net, u32 id) { - return fib6_get_table(id); + return fib6_get_table(net, id); } -struct fib6_table *fib6_get_table(u32 id) +struct fib6_table *fib6_get_table(struct net *net, u32 id) { - return &fib6_main_tbl; + return net->ipv6.fib6_main_tbl; } -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, + int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags); + return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); } -static void __init fib6_tables_init(void) +static void fib6_tables_init(struct net *net) { - fib6_link_table(&fib6_main_tbl); + fib6_link_table(net, net->ipv6.fib6_main_tbl); } #endif @@ -361,18 +346,16 @@ end: static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; struct fib6_table *tb; struct hlist_node *node; + struct hlist_head *head; int res = 0; - if (net != &init_net) - return 0; - s_h = cb->args[0]; s_e = cb->args[1]; @@ -401,7 +384,8 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry(tb, node, head, tb6_hlist) { if (e < s_e) goto next; res = fib6_dump_table(tb, skb, cb); @@ -667,29 +651,29 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); inet6_rt_notify(RTM_NEWROUTE, rt, info); - rt6_stats.fib_rt_entries++; + info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if ((fn->fn_flags & RTN_RTINFO) == 0) { - rt6_stats.fib_route_nodes++; + info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } return 0; } -static __inline__ void fib6_start_gc(struct rt6_info *rt) +static __inline__ void fib6_start_gc(struct net *net, struct rt6_info *rt) { - if (ip6_fib_timer.expires == 0 && + if (net->ipv6.ip6_fib_timer->expires == 0 && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(&ip6_fib_timer, jiffies + - init_net.ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(net->ipv6.ip6_fib_timer, jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval); } -void fib6_force_start_gc(void) +void fib6_force_start_gc(struct net *net) { - if (ip6_fib_timer.expires == 0) - mod_timer(&ip6_fib_timer, jiffies + - init_net.ipv6.sysctl.ip6_rt_gc_interval); + if (net->ipv6.ip6_fib_timer->expires == 0) + mod_timer(net->ipv6.ip6_fib_timer, jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval); } /* @@ -733,8 +717,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (sfn == NULL) goto st_failure; - sfn->leaf = &ip6_null_entry; - atomic_inc(&ip6_null_entry.rt6i_ref); + sfn->leaf = info->nl_net->ipv6.ip6_null_entry; + atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); sfn->fn_flags = RTN_ROOT; sfn->fn_sernum = fib6_new_sernum(); @@ -776,9 +760,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) err = fib6_add_rt2node(fn, rt, info); if (err == 0) { - fib6_start_gc(rt); + fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(pn, rt); + fib6_prune_clones(info->nl_net, pn, rt); } out: @@ -789,11 +773,11 @@ out: * super-tree leaf node we have to find a new one for it. */ if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn->leaf = fib6_find_prefix(pn); + pn->leaf = fib6_find_prefix(info->nl_net, pn); #if RT6_DEBUG >= 2 if (!pn->leaf) { BUG_TRAP(pn->leaf != NULL); - pn->leaf = &ip6_null_entry; + pn->leaf = info->nl_net->ipv6.ip6_null_entry; } #endif atomic_inc(&pn->leaf->rt6i_ref); @@ -809,7 +793,7 @@ out: */ st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) - fib6_repair_tree(fn); + fib6_repair_tree(info->nl_net, fn); dst_free(&rt->u.dst); return err; #endif @@ -975,10 +959,10 @@ struct fib6_node * fib6_locate(struct fib6_node *root, * */ -static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) +static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) { if (fn->fn_flags&RTN_ROOT) - return &ip6_null_entry; + return net->ipv6.ip6_null_entry; while(fn) { if(fn->left) @@ -997,7 +981,8 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) * is the node we want to try and remove. */ -static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) +static struct fib6_node *fib6_repair_tree(struct net *net, + struct fib6_node *fn) { int children; int nstate; @@ -1024,11 +1009,11 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) || (children && fn->fn_flags&RTN_ROOT) #endif ) { - fn->leaf = fib6_find_prefix(fn); + fn->leaf = fib6_find_prefix(net, fn); #if RT6_DEBUG >= 2 if (fn->leaf==NULL) { BUG_TRAP(fn->leaf); - fn->leaf = &ip6_null_entry; + fn->leaf = net->ipv6.ip6_null_entry; } #endif atomic_inc(&fn->leaf->rt6i_ref); @@ -1101,14 +1086,15 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, { struct fib6_walker_t *w; struct rt6_info *rt = *rtp; + struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); /* Unlink it */ *rtp = rt->u.dst.rt6_next; rt->rt6i_node = NULL; - rt6_stats.fib_rt_entries--; - rt6_stats.fib_discarded_routes++; + net->ipv6.rt6_stats->fib_rt_entries--; + net->ipv6.rt6_stats->fib_discarded_routes++; /* Reset round-robin state, if necessary */ if (fn->rr_ptr == rt) @@ -1131,8 +1117,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, /* If it was last route, expunge its radix tree node */ if (fn->leaf == NULL) { fn->fn_flags &= ~RTN_RTINFO; - rt6_stats.fib_route_nodes--; - fn = fib6_repair_tree(fn); + net->ipv6.rt6_stats->fib_route_nodes--; + fn = fib6_repair_tree(net, fn); } if (atomic_read(&rt->rt6i_ref) != 1) { @@ -1144,7 +1130,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, */ while (fn) { if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) { - fn->leaf = fib6_find_prefix(fn); + fn->leaf = fib6_find_prefix(net, fn); atomic_inc(&fn->leaf->rt6i_ref); rt6_release(rt); } @@ -1160,6 +1146,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, int fib6_del(struct rt6_info *rt, struct nl_info *info) { + struct net *net = info->nl_net; struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; @@ -1169,7 +1156,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) return -ENOENT; } #endif - if (fn == NULL || rt == &ip6_null_entry) + if (fn == NULL || rt == net->ipv6.ip6_null_entry) return -ENOENT; BUG_TRAP(fn->fn_flags&RTN_RTINFO); @@ -1184,7 +1171,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) pn = pn->parent; } #endif - fib6_prune_clones(pn, rt); + fib6_prune_clones(info->nl_net, pn, rt); } /* @@ -1314,12 +1301,12 @@ static int fib6_walk(struct fib6_walker_t *w) static int fib6_clean_node(struct fib6_walker_t *w) { - struct nl_info info = { - .nl_net = &init_net, - }; int res; struct rt6_info *rt; struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); + struct nl_info info = { + .nl_net = c->net, + }; for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { res = c->func(rt, c->arg); @@ -1351,7 +1338,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) * ignoring pure split nodes) will be scanned. */ -static void fib6_clean_tree(struct fib6_node *root, +static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { @@ -1362,23 +1349,26 @@ static void fib6_clean_tree(struct fib6_node *root, c.w.prune = prune; c.func = func; c.arg = arg; + c.net = net; fib6_walk(&c.w); } -void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), +void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { struct fib6_table *table; struct hlist_node *node; + struct hlist_head *head; unsigned int h; rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_rcu(table, node, &fib_table_hash[h], - tb6_hlist) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, node, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); - fib6_clean_tree(&table->tb6_root, func, prune, arg); + fib6_clean_tree(net, &table->tb6_root, + func, prune, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1395,9 +1385,10 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg) return 0; } -static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt) +static void fib6_prune_clones(struct net *net, struct fib6_node *fn, + struct rt6_info *rt) { - fib6_clean_tree(fn, fib6_prune_clone, 1, rt); + fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); } /* @@ -1447,54 +1438,145 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long dummy) +void fib6_run_gc(unsigned long expires, struct net *net) { - if (dummy != ~0UL) { + if (expires != ~0UL) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = dummy ? (int)dummy : - init_net.ipv6.sysctl.ip6_rt_gc_interval; + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; } else { local_bh_disable(); if (!spin_trylock(&fib6_gc_lock)) { - mod_timer(&ip6_fib_timer, jiffies + HZ); + mod_timer(net->ipv6.ip6_fib_timer, jiffies + HZ); local_bh_enable(); return; } - gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval; + gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; } gc_args.more = 0; - ndisc_dst_gc(&gc_args.more); - fib6_clean_all(fib6_age, 0, NULL); + icmp6_dst_gc(&gc_args.more); + + fib6_clean_all(net, fib6_age, 0, NULL); if (gc_args.more) - mod_timer(&ip6_fib_timer, jiffies + - init_net.ipv6.sysctl.ip6_rt_gc_interval); + mod_timer(net->ipv6.ip6_fib_timer, jiffies + + net->ipv6.sysctl.ip6_rt_gc_interval); else { - del_timer(&ip6_fib_timer); - ip6_fib_timer.expires = 0; + del_timer(net->ipv6.ip6_fib_timer); + net->ipv6.ip6_fib_timer->expires = 0; } spin_unlock_bh(&fib6_gc_lock); } -int __init fib6_init(void) +static void fib6_gc_timer_cb(unsigned long arg) +{ + fib6_run_gc(0, (struct net *)arg); +} + +static int fib6_net_init(struct net *net) { int ret; + struct timer_list *timer; + + ret = -ENOMEM; + timer = kzalloc(sizeof(*timer), GFP_KERNEL); + if (!timer) + goto out; + + setup_timer(timer, fib6_gc_timer_cb, (unsigned long)net); + net->ipv6.ip6_fib_timer = timer; + + net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); + if (!net->ipv6.rt6_stats) + goto out_timer; + + net->ipv6.fib_table_hash = + kzalloc(sizeof(*net->ipv6.fib_table_hash)*FIB_TABLE_HASHSZ, + GFP_KERNEL); + if (!net->ipv6.fib_table_hash) + goto out_rt6_stats; + + net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl), + GFP_KERNEL); + if (!net->ipv6.fib6_main_tbl) + goto out_fib_table_hash; + + net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; + net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + net->ipv6.fib6_main_tbl->tb6_root.fn_flags = + RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), + GFP_KERNEL); + if (!net->ipv6.fib6_local_tbl) + goto out_fib6_main_tbl; + net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; + net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + net->ipv6.fib6_local_tbl->tb6_root.fn_flags = + RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; +#endif + fib6_tables_init(net); + + ret = 0; +out: + return ret; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +out_fib6_main_tbl: + kfree(net->ipv6.fib6_main_tbl); +#endif +out_fib_table_hash: + kfree(net->ipv6.fib_table_hash); +out_rt6_stats: + kfree(net->ipv6.rt6_stats); +out_timer: + kfree(timer); + goto out; + } + +static void fib6_net_exit(struct net *net) +{ + rt6_ifdown(net, NULL); + del_timer(net->ipv6.ip6_fib_timer); + kfree(net->ipv6.ip6_fib_timer); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + kfree(net->ipv6.fib6_local_tbl); +#endif + kfree(net->ipv6.fib6_main_tbl); + kfree(net->ipv6.fib_table_hash); + kfree(net->ipv6.rt6_stats); +} + +static struct pernet_operations fib6_net_ops = { + .init = fib6_net_init, + .exit = fib6_net_exit, +}; + +int __init fib6_init(void) +{ + int ret = -ENOMEM; + fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), 0, SLAB_HWCACHE_ALIGN, NULL); if (!fib6_node_kmem) - return -ENOMEM; + goto out; - fib6_tables_init(); + ret = register_pernet_subsys(&fib6_net_ops); + if (ret) + goto out_kmem_cache_create; ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); if (ret) - goto out_kmem_cache_create; + goto out_unregister_subsys; out: return ret; +out_unregister_subsys: + unregister_pernet_subsys(&fib6_net_ops); out_kmem_cache_create: kmem_cache_destroy(fib6_node_kmem); goto out; @@ -1502,6 +1584,6 @@ out_kmem_cache_create: void fib6_gc_cleanup(void) { - del_timer(&ip6_fib_timer); + unregister_pernet_subsys(&fib6_net_ops); kmem_cache_destroy(fib6_node_kmem); } diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 2b7d9ee98832..eb7a940310f4 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -62,23 +62,23 @@ static DEFINE_RWLOCK(ip6_fl_lock); static DEFINE_RWLOCK(ip6_sk_fl_lock); -static __inline__ struct ip6_flowlabel * __fl_lookup(__be32 label) +static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { - if (fl->label == label) + if (fl->label == label && fl->fl_net == net) return fl; } return NULL; } -static struct ip6_flowlabel * fl_lookup(__be32 label) +static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) { struct ip6_flowlabel *fl; read_lock_bh(&ip6_fl_lock); - fl = __fl_lookup(label); + fl = __fl_lookup(net, label); if (fl) atomic_inc(&fl->users); read_unlock_bh(&ip6_fl_lock); @@ -88,8 +88,10 @@ static struct ip6_flowlabel * fl_lookup(__be32 label) static void fl_free(struct ip6_flowlabel *fl) { - if (fl) + if (fl) { + release_net(fl->fl_net); kfree(fl->opt); + } kfree(fl); } @@ -112,7 +114,6 @@ static void fl_release(struct ip6_flowlabel *fl) time_after(ip6_fl_gc_timer.expires, ttd)) mod_timer(&ip6_fl_gc_timer, ttd); } - write_unlock_bh(&ip6_fl_lock); } @@ -148,13 +149,34 @@ static void ip6_fl_gc(unsigned long dummy) if (!sched && atomic_read(&fl_size)) sched = now + FL_MAX_LINGER; if (sched) { - ip6_fl_gc_timer.expires = sched; - add_timer(&ip6_fl_gc_timer); + mod_timer(&ip6_fl_gc_timer, sched); + } + write_unlock(&ip6_fl_lock); +} + +static void ip6_fl_purge(struct net *net) +{ + int i; + + write_lock(&ip6_fl_lock); + for (i = 0; i <= FL_HASH_MASK; i++) { + struct ip6_flowlabel *fl, **flp; + flp = &fl_ht[i]; + while ((fl = *flp) != NULL) { + if (fl->fl_net == net && atomic_read(&fl->users) == 0) { + *flp = fl->next; + fl_free(fl); + atomic_dec(&fl_size); + continue; + } + flp = &fl->next; + } } write_unlock(&ip6_fl_lock); } -static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label) +static struct ip6_flowlabel *fl_intern(struct net *net, + struct ip6_flowlabel *fl, __be32 label) { struct ip6_flowlabel *lfl; @@ -165,7 +187,7 @@ static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label) for (;;) { fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; if (fl->label) { - lfl = __fl_lookup(fl->label); + lfl = __fl_lookup(net, fl->label); if (lfl == NULL) break; } @@ -179,7 +201,7 @@ static struct ip6_flowlabel *fl_intern(struct ip6_flowlabel *fl, __be32 label) * done in ipv6_flowlabel_opt - sock is locked, so new entry * with the same label can only appear on another sock */ - lfl = __fl_lookup(fl->label); + lfl = __fl_lookup(net, fl->label); if (lfl != NULL) { atomic_inc(&lfl->users); write_unlock_bh(&ip6_fl_lock); @@ -298,7 +320,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo } static struct ip6_flowlabel * -fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p) +fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, + int optlen, int *err_p) { struct ip6_flowlabel *fl; int olen; @@ -343,6 +366,7 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int * } } + fl->fl_net = hold_net(net); fl->expires = jiffies; err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); if (err) @@ -441,6 +465,7 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) { int err; + struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; @@ -483,7 +508,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) read_unlock_bh(&ip6_sk_fl_lock); if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) { - fl = fl_lookup(freq.flr_label); + fl = fl_lookup(net, freq.flr_label); if (fl) { err = fl6_renew(fl, freq.flr_linger, freq.flr_expires); fl_release(fl); @@ -496,7 +521,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; - fl = fl_create(&freq, optval, optlen, &err); + fl = fl_create(net, &freq, optval, optlen, &err); if (fl == NULL) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); @@ -518,7 +543,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) read_unlock_bh(&ip6_sk_fl_lock); if (fl1 == NULL) - fl1 = fl_lookup(freq.flr_label); + fl1 = fl_lookup(net, freq.flr_label); if (fl1) { recheck: err = -EEXIST; @@ -559,7 +584,7 @@ release: if (sfl1 == NULL || (err = mem_check(sk)) != 0) goto done; - fl1 = fl_intern(fl, freq.flr_label); + fl1 = fl_intern(net, fl, freq.flr_label); if (fl1 != NULL) goto recheck; @@ -586,6 +611,7 @@ done: #ifdef CONFIG_PROC_FS struct ip6fl_iter_state { + struct seq_net_private p; int bucket; }; @@ -595,12 +621,15 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) { struct ip6_flowlabel *fl = NULL; struct ip6fl_iter_state *state = ip6fl_seq_private(seq); + struct net *net = seq_file_net(seq); for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { - if (fl_ht[state->bucket]) { - fl = fl_ht[state->bucket]; + fl = fl_ht[state->bucket]; + + while (fl && fl->fl_net != net) + fl = fl->next; + if (fl) break; - } } return fl; } @@ -608,12 +637,18 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); + struct net *net = seq_file_net(seq); fl = fl->next; +try_again: + while (fl && fl->fl_net != net) + fl = fl->next; + while (!fl) { - if (++state->bucket <= FL_HASH_MASK) + if (++state->bucket <= FL_HASH_MASK) { fl = fl_ht[state->bucket]; - else + goto try_again; + } else break; } return fl; @@ -683,8 +718,8 @@ static const struct seq_operations ip6fl_seq_ops = { static int ip6fl_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ip6fl_seq_ops, - sizeof(struct ip6fl_iter_state)); + return seq_open_net(inode, file, &ip6fl_seq_ops, + sizeof(struct ip6fl_iter_state)); } static const struct file_operations ip6fl_seq_fops = { @@ -692,12 +727,13 @@ static const struct file_operations ip6fl_seq_fops = { .open = ip6fl_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static int ip6_flowlabel_proc_init(struct net *net) { - if (!proc_net_fops_create(net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops)) + if (!proc_net_fops_create(net, "ip6_flowlabel", + S_IRUGO, &ip6fl_seq_fops)) return -ENOMEM; return 0; } @@ -717,13 +753,24 @@ static inline void ip6_flowlabel_proc_fini(struct net *net) } #endif +static inline void ip6_flowlabel_net_exit(struct net *net) +{ + ip6_fl_purge(net); + ip6_flowlabel_proc_fini(net); +} + +static struct pernet_operations ip6_flowlabel_net_ops = { + .init = ip6_flowlabel_proc_init, + .exit = ip6_flowlabel_net_exit, +}; + int ip6_flowlabel_init(void) { - return ip6_flowlabel_proc_init(&init_net); + return register_pernet_subsys(&ip6_flowlabel_net_ops); } void ip6_flowlabel_cleanup(void) { del_timer(&ip6_fl_gc_timer); - ip6_flowlabel_proc_fini(&init_net); + unregister_pernet_subsys(&ip6_flowlabel_net_ops); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 98ab4f459905..43a617e2268b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -61,11 +61,6 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt u32 pkt_len; struct inet6_dev *idev; - if (dev->nd_net != &init_net) { - kfree_skb(skb); - return 0; - } - if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8b67ca07467d..a8b4da25b0a7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -237,9 +237,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (np) hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); tclass = -1; if (np) @@ -404,6 +402,7 @@ int ip6_forward(struct sk_buff *skb) struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(dst->dev); if (ipv6_devconf.forwarding == 0) goto error; @@ -450,7 +449,7 @@ int ip6_forward(struct sk_buff *skb) /* XXX: idev->cnf.proxy_ndp? */ if (ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { + pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); @@ -596,7 +595,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } -EXPORT_SYMBOL_GPL(ip6_find_1stfragopt); static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { @@ -912,15 +910,19 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct dst_entry **dst, struct flowi *fl) { int err; + struct net *net = sock_net(sk); if (*dst == NULL) - *dst = ip6_route_output(sk, fl); + *dst = ip6_route_output(net, sk, fl); if ((err = (*dst)->error)) goto out_err_release; if (ipv6_addr_any(&fl->fl6_src)) { - err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); + err = ipv6_dev_get_saddr(ip6_dst_idev(*dst)->dev, + &fl->fl6_dst, + sk ? inet6_sk(sk)->srcprefs : 0, + &fl->fl6_src); if (err) goto out_err_release; } @@ -939,7 +941,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct flowi fl_gw; int redirect; - ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src, + ifp = ipv6_get_ifaddr(net, &fl->fl6_src, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); @@ -954,7 +956,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, dst_release(*dst); memcpy(&fl_gw, fl, sizeof(struct flowi)); memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(sk, &fl_gw); + *dst = ip6_route_output(net, sk, &fl_gw); if ((err = (*dst)->error)) goto out_err_release; } @@ -1113,7 +1115,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* need source address above miyazawa*/ } dst_hold(&rt->u.dst); - np->cork.rt = rt; + inet->cork.dst = &rt->u.dst; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; @@ -1134,7 +1136,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, length += exthdrlen; transhdrlen += exthdrlen; } else { - rt = np->cork.rt; + rt = (struct rt6_info *)inet->cork.dst; fl = &inet->cork.fl; if (inet->cork.flags & IPCORK_OPT) opt = np->cork.opt; @@ -1379,9 +1381,9 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) inet->cork.flags &= ~IPCORK_OPT; kfree(np->cork.opt); np->cork.opt = NULL; - if (np->cork.rt) { - dst_release(&np->cork.rt->u.dst); - np->cork.rt = NULL; + if (inet->cork.dst) { + dst_release(inet->cork.dst); + inet->cork.dst = NULL; inet->cork.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); @@ -1396,7 +1398,7 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = np->cork.rt; + struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; struct flowi *fl = &inet->cork.fl; unsigned char proto = fl->proto; int err = 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 78f438880923..61517fe0c57c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -60,7 +60,7 @@ MODULE_LICENSE("GPL"); #define IPV6_TLV_TEL_DST_SIZE 8 #ifdef IP6_TNL_DEBUG -#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__) +#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__) #else #define IP6_TNL_TRACE(x...) do {;} while(0) #endif @@ -602,7 +602,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb_reset_network_header(skb2); /* Try to guess incoming interface */ - rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0); + rt = rt6_lookup(&init_net, &ipv6_hdr(skb2)->saddr, NULL, 0, 0); if (rt && rt->rt6i_dev) skb2->dev = rt->rt6i_dev; @@ -847,7 +847,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(NULL, fl); + dst = ip6_route_output(&init_net, NULL, fl); if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) goto tx_err_link_failure; @@ -1112,7 +1112,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr, + struct rt6_info *rt = rt6_lookup(&init_net, &p->raddr, &p->laddr, p->link, strict); if (rt == NULL) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bf2a686aa13d..4195ac92345e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -57,118 +57,6 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; -static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, - int proto) -{ - struct inet6_protocol *ops = NULL; - - for (;;) { - struct ipv6_opt_hdr *opth; - int len; - - if (proto != NEXTHDR_HOP) { - ops = rcu_dereference(inet6_protos[proto]); - - if (unlikely(!ops)) - break; - - if (!(ops->flags & INET6_PROTO_GSO_EXTHDR)) - break; - } - - if (unlikely(!pskb_may_pull(skb, 8))) - break; - - opth = (void *)skb->data; - len = opth->hdrlen * 8 + 8; - - if (unlikely(!pskb_may_pull(skb, len))) - break; - - proto = opth->nexthdr; - __skb_pull(skb, len); - } - - return ops; -} - -static int ipv6_gso_send_check(struct sk_buff *skb) -{ - struct ipv6hdr *ipv6h; - struct inet6_protocol *ops; - int err = -EINVAL; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - err = -EPROTONOSUPPORT; - - rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - if (likely(ops && ops->gso_send_check)) { - skb_reset_transport_header(skb); - err = ops->gso_send_check(skb); - } - rcu_read_unlock(); - -out: - return err; -} - -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct ipv6hdr *ipv6h; - struct inet6_protocol *ops; - - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - 0))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) - goto out; - - ipv6h = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*ipv6h)); - segs = ERR_PTR(-EPROTONOSUPPORT); - - rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); - if (likely(ops && ops->gso_segment)) { - skb_reset_transport_header(skb); - segs = ops->gso_segment(skb, features); - } - rcu_read_unlock(); - - if (unlikely(IS_ERR(segs))) - goto out; - - for (skb = segs; skb; skb = skb->next) { - ipv6h = ipv6_hdr(skb); - ipv6h->payload_len = htons(skb->len - skb->mac_len - - sizeof(*ipv6h)); - } - -out: - return segs; -} - -static struct packet_type ipv6_packet_type = { - .type = __constant_htons(ETH_P_IPV6), - .func = ipv6_rcv, - .gso_send_check = ipv6_gso_send_check, - .gso_segment = ipv6_gso_segment, -}; - struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); @@ -219,6 +107,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); int val, valbool; int retv = -ENOPROTOOPT; @@ -266,10 +155,11 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol == IPPROTO_TCP) { struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); local_bh_disable(); - sock_prot_inuse_add(sk->sk_prot, -1); - sock_prot_inuse_add(&tcp_prot, 1); + sock_prot_inuse_add(net, sk->sk_prot, -1); + sock_prot_inuse_add(net, &tcp_prot, 1); local_bh_enable(); sk->sk_prot = &tcp_prot; icsk->icsk_af_ops = &ipv4_specific; @@ -278,12 +168,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct proto *prot = &udp_prot; + struct net *net = sock_net(sk); if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); - sock_prot_inuse_add(sk->sk_prot, -1); - sock_prot_inuse_add(prot, 1); + sock_prot_inuse_add(net, sk->sk_prot, -1); + sock_prot_inuse_add(net, prot, 1); local_bh_enable(); sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; @@ -544,7 +435,7 @@ done: if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; - if (__dev_get_by_index(&init_net, val) == NULL) { + if (__dev_get_by_index(net, val) == NULL) { retv = -ENODEV; break; } @@ -728,7 +619,67 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; + case IPV6_ADDR_PREFERENCES: + { + unsigned int pref = 0; + unsigned int prefmask = ~0; + + retv = -EINVAL; + + /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ + switch (val & (IPV6_PREFER_SRC_PUBLIC| + IPV6_PREFER_SRC_TMP| + IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { + case IPV6_PREFER_SRC_PUBLIC: + pref |= IPV6_PREFER_SRC_PUBLIC; + break; + case IPV6_PREFER_SRC_TMP: + pref |= IPV6_PREFER_SRC_TMP; + break; + case IPV6_PREFER_SRC_PUBTMP_DEFAULT: + break; + case 0: + goto pref_skip_pubtmp; + default: + goto e_inval; + } + + prefmask &= ~(IPV6_PREFER_SRC_PUBLIC| + IPV6_PREFER_SRC_TMP); +pref_skip_pubtmp: + + /* check HOME/COA conflicts */ + switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) { + case IPV6_PREFER_SRC_HOME: + break; + case IPV6_PREFER_SRC_COA: + pref |= IPV6_PREFER_SRC_COA; + case 0: + goto pref_skip_coa; + default: + goto e_inval; + } + + prefmask &= ~IPV6_PREFER_SRC_COA; +pref_skip_coa: + + /* check CGA/NONCGA conflicts */ + switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { + case IPV6_PREFER_SRC_CGA: + case IPV6_PREFER_SRC_NONCGA: + case 0: + break; + default: + goto e_inval; + } + + np->srcprefs = (np->srcprefs & prefmask) | pref; + retv = 0; + + break; + } } + release_sock(sk); return retv; @@ -1015,9 +966,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, dst = sk_dst_get(sk); if (dst) { if (val < 0) - val = dst_metric(dst, RTAX_HOPLIMIT); - if (val < 0) - val = ipv6_get_hoplimit(dst->dev); + val = ip6_dst_hoplimit(dst); dst_release(dst); } if (val < 0) @@ -1045,6 +994,24 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->sndflow; break; + case IPV6_ADDR_PREFERENCES: + val = 0; + + if (np->srcprefs & IPV6_PREFER_SRC_TMP) + val |= IPV6_PREFER_SRC_TMP; + else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + val |= IPV6_PREFER_SRC_PUBLIC; + else { + /* XXX: should we return system default? */ + val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; + } + + if (np->srcprefs & IPV6_PREFER_SRC_COA) + val |= IPV6_PREFER_SRC_COA; + else + val |= IPV6_PREFER_SRC_HOME; + break; + default: return -ENOPROTOOPT; } @@ -1128,13 +1095,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif -int __init ipv6_packet_init(void) -{ - dev_add_pack(&ipv6_packet_type); - return 0; -} - -void ipv6_packet_cleanup(void) -{ - dev_remove_pack(&ipv6_packet_type); -} diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ab228d1ea114..d810cff818cf 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -126,8 +126,6 @@ static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ static DEFINE_RWLOCK(ipv6_sk_mc_lock); -static struct socket *igmp6_socket; - int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr); static void igmp6_join_group(struct ifmcaddr6 *ma); @@ -183,6 +181,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) struct net_device *dev = NULL; struct ipv6_mc_socklist *mc_lst; struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); int err; if (!ipv6_addr_is_multicast(addr)) @@ -208,14 +207,14 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(net, ifindex); if (dev == NULL) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -256,6 +255,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst, **lnk; + struct net *net = sock_net(sk); write_lock_bh(&ipv6_sk_mc_lock); for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { @@ -266,7 +266,8 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) { + dev = dev_get_by_index(net, mc_lst->ifindex); + if (dev != NULL) { struct inet6_dev *idev = in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -286,7 +287,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) return -EADDRNOTAVAIL; } -static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) +static struct inet6_dev *ip6_mc_find_dev(struct net *net, + struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -294,14 +297,14 @@ static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(group, NULL, 0, 0); + rt = rt6_lookup(net, group, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(net, ifindex); if (!dev) return NULL; @@ -324,6 +327,7 @@ void ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; + struct net *net = sock_net(sk); write_lock_bh(&ipv6_sk_mc_lock); while ((mc_lst = np->ipv6_mc_list) != NULL) { @@ -332,7 +336,7 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(&init_net, mc_lst->ifindex); + dev = dev_get_by_index(net, mc_lst->ifindex); if (dev) { struct inet6_dev *idev = in6_dev_get(dev); @@ -361,6 +365,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; + struct net *net = sock_net(sk); int i, j, rv; int leavegroup = 0; int pmclocked = 0; @@ -376,7 +381,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(group, pgsr->gsr_interface); + idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); if (!idev) return -ENODEV; dev = idev->dev; @@ -500,6 +505,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; + struct net *net = sock_net(sk); int leavegroup = 0; int i, err; @@ -511,7 +517,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev(group, gsf->gf_interface); + idev = ip6_mc_find_dev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; @@ -592,13 +598,14 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct net_device *dev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; + struct net *net = sock_net(sk); group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr; if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(group, gsf->gf_interface); + idev = ip6_mc_find_dev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; @@ -1393,7 +1400,8 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) static struct sk_buff *mld_newpack(struct net_device *dev, int size) { - struct sock *sk = igmp6_socket->sk; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; struct mld2_report *pmr; struct in6_addr addr_buf; @@ -1433,25 +1441,6 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) return skb; } -static inline int mld_dev_queue_xmit2(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned char ha[MAX_ADDR_LEN]; - - ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1); - if (dev_hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len) < 0) { - kfree_skb(skb); - return -EINVAL; - } - return dev_queue_xmit(skb); -} - -static inline int mld_dev_queue_xmit(struct sk_buff *skb) -{ - return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, - mld_dev_queue_xmit2); -} - static void mld_sendpack(struct sk_buff *skb) { struct ipv6hdr *pip6 = ipv6_hdr(skb); @@ -1459,7 +1448,9 @@ static void mld_sendpack(struct sk_buff *skb) (struct mld2_report *)skb_transport_header(skb); int payload_len, mldlen; struct inet6_dev *idev = in6_dev_get(skb->dev); + struct net *net = dev_net(skb->dev); int err; + struct flowi fl; IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); @@ -1469,8 +1460,25 @@ static void mld_sendpack(struct sk_buff *skb) pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), mldlen, 0)); + + skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); + + if (!skb->dst) { + err = -ENOMEM; + goto err_out; + } + + icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->dev->ifindex); + + err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + if (err) + goto err_out; + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - mld_dev_queue_xmit); + dst_output); +out: if (!err) { ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); @@ -1480,6 +1488,11 @@ static void mld_sendpack(struct sk_buff *skb) if (likely(idev != NULL)) in6_dev_put(idev); + return; + +err_out: + kfree_skb(skb); + goto out; } static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel) @@ -1749,7 +1762,8 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct sock *sk = igmp6_socket->sk; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.igmp_sk; struct inet6_dev *idev; struct sk_buff *skb; struct icmp6hdr *hdr; @@ -1761,6 +1775,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; + struct flowi fl; rcu_read_lock(); IP6_INC_STATS(__in6_dev_get(dev), @@ -1813,8 +1828,23 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) idev = in6_dev_get(skb->dev); + skb->dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); + if (!skb->dst) { + err = -ENOMEM; + goto err_out; + } + + icmpv6_flow_init(sk, &fl, type, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + skb->dev->ifindex); + + err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + if (err) + goto err_out; + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - mld_dev_queue_xmit); + dst_output); +out: if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); @@ -1825,6 +1855,10 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (likely(idev != NULL)) in6_dev_put(idev); return; + +err_out: + kfree_skb(skb); + goto out; } static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, @@ -2310,6 +2344,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) #ifdef CONFIG_PROC_FS struct igmp6_mc_iter_state { + struct seq_net_private p; struct net_device *dev; struct inet6_dev *idev; }; @@ -2320,9 +2355,10 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) { struct ifmcaddr6 *im = NULL; struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); + struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -2424,8 +2460,8 @@ static const struct seq_operations igmp6_mc_seq_ops = { static int igmp6_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp6_mc_seq_ops, - sizeof(struct igmp6_mc_iter_state)); + return seq_open_net(inode, file, &igmp6_mc_seq_ops, + sizeof(struct igmp6_mc_iter_state)); } static const struct file_operations igmp6_mc_seq_fops = { @@ -2433,10 +2469,11 @@ static const struct file_operations igmp6_mc_seq_fops = { .open = igmp6_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; struct igmp6_mcf_iter_state { + struct seq_net_private p; struct net_device *dev; struct inet6_dev *idev; struct ifmcaddr6 *im; @@ -2449,10 +2486,11 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) struct ip6_sf_list *psf = NULL; struct ifmcaddr6 *im = NULL; struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); + struct net *net = seq_file_net(seq); state->idev = NULL; state->im = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2584,8 +2622,8 @@ static const struct seq_operations igmp6_mcf_seq_ops = { static int igmp6_mcf_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp6_mcf_seq_ops, - sizeof(struct igmp6_mcf_iter_state)); + return seq_open_net(inode, file, &igmp6_mcf_seq_ops, + sizeof(struct igmp6_mcf_iter_state)); } static const struct file_operations igmp6_mcf_seq_fops = { @@ -2593,47 +2631,96 @@ static const struct file_operations igmp6_mcf_seq_fops = { .open = igmp6_mcf_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; + +static int igmp6_proc_init(struct net *net) +{ + int err; + + err = -ENOMEM; + if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops)) + goto out; + if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO, + &igmp6_mcf_seq_fops)) + goto out_proc_net_igmp6; + + err = 0; +out: + return err; + +out_proc_net_igmp6: + proc_net_remove(net, "igmp6"); + goto out; +} + +static void igmp6_proc_exit(struct net *net) +{ + proc_net_remove(net, "mcfilter6"); + proc_net_remove(net, "igmp6"); +} +#else +static int igmp6_proc_init(struct net *net) +{ + return 0; +} +static void igmp6_proc_exit(struct net *net) +{ + ; +} #endif -int __init igmp6_init(struct net_proto_family *ops) +static int igmp6_net_init(struct net *net) { struct ipv6_pinfo *np; + struct socket *sock; struct sock *sk; int err; - err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &igmp6_socket); + err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &sock); if (err < 0) { printk(KERN_ERR "Failed to initialize the IGMP6 control socket (err %d).\n", err); - igmp6_socket = NULL; /* For safety. */ - return err; + goto out; } - sk = igmp6_socket->sk; + net->ipv6.igmp_sk = sk = sock->sk; + sk_change_net(sk, net); sk->sk_allocation = GFP_ATOMIC; sk->sk_prot->unhash(sk); np = inet6_sk(sk); np->hop_limit = 1; -#ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops); - proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); -#endif + err = igmp6_proc_init(net); + if (err) + goto out_sock_create; +out: + return err; - return 0; +out_sock_create: + sk_release_kernel(net->ipv6.igmp_sk); + goto out; } -void igmp6_cleanup(void) +static void igmp6_net_exit(struct net *net) { - sock_release(igmp6_socket); - igmp6_socket = NULL; /* for safety */ + sk_release_kernel(net->ipv6.igmp_sk); + igmp6_proc_exit(net); +} -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "mcfilter6"); - proc_net_remove(&init_net, "igmp6"); -#endif +static struct pernet_operations igmp6_net_ops = { + .init = igmp6_net_init, + .exit = igmp6_net_exit, +}; + +int __init igmp6_init(void) +{ + return register_pernet_subsys(&igmp6_net_ops); +} + +void igmp6_cleanup(void) +{ + unregister_pernet_subsys(&igmp6_net_ops); } diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index cd8a5bda13cd..42403c626c27 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -304,13 +304,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, static int mip6_destopt_init_state(struct xfrm_state *x) { if (x->id.spi) { - printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + printk(KERN_INFO "%s: spi is not 0: %u\n", __func__, x->id.spi); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { printk(KERN_INFO "%s: state's mode is not %u: %u\n", - __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); return -EINVAL; } @@ -439,13 +439,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, static int mip6_rthdr_init_state(struct xfrm_state *x) { if (x->id.spi) { - printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + printk(KERN_INFO "%s: spi is not 0: %u\n", __func__, x->id.spi); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { printk(KERN_INFO "%s: state's mode is not %u: %u\n", - __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); return -EINVAL; } @@ -480,15 +480,15 @@ static int __init mip6_init(void) printk(KERN_INFO "Mobile IPv6\n"); if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { - printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__); + printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __func__); goto mip6_destopt_xfrm_fail; } if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { - printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); + printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __func__); goto mip6_rthdr_xfrm_fail; } if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { - printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__); + printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __func__); goto mip6_rawv6_mh_fail; } @@ -506,11 +506,11 @@ static int __init mip6_init(void) static void __exit mip6_fini(void) { if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) - printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__); + printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __func__); if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) - printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); + printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __func__); if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) - printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); + printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __func__); } module_init(mip6_init); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 452a2ac4eec8..510aa747a404 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -89,8 +89,6 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> -static struct socket *ndisc_socket; - static u32 ndisc_hash(const void *pkey, const struct net_device *dev); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); @@ -270,7 +268,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { ND_PRINTK2(KERN_WARNING "%s(): duplicated ND6 option found: type=%d\n", - __FUNCTION__, + __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; @@ -301,7 +299,7 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, */ ND_PRINTK2(KERN_NOTICE "%s(): ignored unsupported option; type=%d, len=%d\n", - __FUNCTION__, + __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len); } } @@ -441,21 +439,6 @@ static void pndisc_destructor(struct pneigh_entry *n) /* * Send a Neighbour Advertisement */ - -static inline void ndisc_flow_init(struct flowi *fl, u8 type, - struct in6_addr *saddr, struct in6_addr *daddr, - int oif) -{ - memset(fl, 0, sizeof(*fl)); - ipv6_addr_copy(&fl->fl6_src, saddr); - ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->proto = IPPROTO_ICMPV6; - fl->fl_icmp_type = type; - fl->fl_icmp_code = 0; - fl->oif = oif; - security_sk_classify_flow(ndisc_socket->sk, fl); -} - static void __ndisc_send(struct net_device *dev, struct neighbour *neigh, struct in6_addr *daddr, struct in6_addr *saddr, @@ -464,7 +447,8 @@ static void __ndisc_send(struct net_device *dev, { struct flowi fl; struct dst_entry *dst; - struct sock *sk = ndisc_socket->sk; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; struct inet6_dev *idev; @@ -474,10 +458,9 @@ static void __ndisc_send(struct net_device *dev, type = icmp6h->icmp6_type; - ndisc_flow_init(&fl, type, saddr, daddr, - dev->ifindex); + icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); - dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); + dst = icmp6_dst_alloc(dev, neigh, daddr); if (!dst) return; @@ -499,7 +482,7 @@ static void __ndisc_send(struct net_device *dev, if (!skb) { ND_PRINTK0(KERN_ERR "ICMPv6 ND: %s() failed to allocate an skb.\n", - __FUNCTION__); + __func__); dst_release(dst); return; } @@ -556,14 +539,16 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, }; /* for anycast or proxy, solicited_addr != src_addr */ - ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1); + ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = 0; in6_ifa_put(ifp); } else { - if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr)) + if (ipv6_dev_get_saddr(dev, daddr, + inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs, + &tmpaddr)) return; src_addr = &tmpaddr; } @@ -616,7 +601,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, * suppress the inclusion of the sllao. */ if (send_sllao) { - struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr, + struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr, dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { @@ -654,7 +639,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1)) + if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; if ((probes -= neigh->parms->ucast_probes) < 0) { @@ -662,7 +647,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: " NIP6_FMT "\n", - __FUNCTION__, + __func__, NIP6(*target)); } ndisc_send_ns(dev, neigh, target, target, saddr); @@ -676,18 +661,19 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) } } -static struct pneigh_entry *pndisc_check_router(struct net_device *dev, - struct in6_addr *addr, int *is_router) +static int pndisc_is_router(const void *pkey, + struct net_device *dev) { struct pneigh_entry *n; + int ret = -1; read_lock_bh(&nd_tbl.lock); - n = __pneigh_lookup(&nd_tbl, &init_net, addr, dev); - if (n != NULL) - *is_router = (n->flags & NTF_ROUTER); + n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev); + if (n) + ret = !!(n->flags & NTF_ROUTER); read_unlock_bh(&nd_tbl.lock); - return n; + return ret; } static void ndisc_recv_ns(struct sk_buff *skb) @@ -703,10 +689,9 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; - struct pneigh_entry *pneigh = NULL; int dad = ipv6_addr_any(saddr); int inc; - int is_router = 0; + int is_router = -1; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK2(KERN_WARNING @@ -756,7 +741,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) inc = ipv6_addr_is_multicast(daddr); - if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) { + ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); + if (ifp) { if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { @@ -801,11 +787,10 @@ static void ndisc_recv_ns(struct sk_buff *skb) return; } - if (ipv6_chk_acast_addr(dev, &msg->target) || + if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pndisc_check_router(dev, &msg->target, - &is_router)) != NULL)) { + (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && @@ -826,7 +811,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } - is_router = !!(pneigh ? is_router : idev->cnf.forwarding); + if (is_router < 0) + is_router = !!idev->cnf.forwarding; if (dad) { struct in6_addr maddr; @@ -914,7 +900,8 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } } - if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) { + ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); + if (ifp) { if (ifp->flags & IFA_F_TENTATIVE) { addrconf_dad_failure(ifp); return; @@ -945,7 +932,7 @@ static void ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1035,6 +1022,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) struct sk_buff *skb; struct nlmsghdr *nlh; struct nduseroptmsg *ndmsg; + struct net *net = dev_net(ra->dev); int err; int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + (opt->nd_opt_len << 3)); @@ -1064,7 +1052,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) &ipv6_hdr(ra)->saddr); nlmsg_end(skb, nlh); - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL, + err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); if (err < 0) goto errout; @@ -1075,7 +1063,7 @@ nla_put_failure: nlmsg_free(skb); err = -EMSGSIZE; errout: - rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err); + rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); } static void ndisc_router_discovery(struct sk_buff *skb) @@ -1178,7 +1166,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() failed to add default route.\n", - __FUNCTION__); + __func__); in6_dev_put(in6_dev); return; } @@ -1187,7 +1175,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (neigh == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", - __FUNCTION__); + __func__); dst_release(&rt->u.dst); in6_dev_put(in6_dev); return; @@ -1420,13 +1408,14 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct in6_addr *target) { - struct sock *sk = ndisc_socket->sk; + struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.ndisc_sk; int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); struct sk_buff *buff; struct icmp6hdr *icmph; struct in6_addr saddr_buf; struct in6_addr *addrp; - struct net_device *dev; struct rt6_info *rt; struct dst_entry *dst; struct inet6_dev *idev; @@ -1436,8 +1425,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, int err; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; - dev = skb->dev; - if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: no link-local address on %s\n", @@ -1452,10 +1439,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, - dev->ifindex); + icmpv6_flow_init(sk, &fl, NDISC_REDIRECT, + &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; @@ -1499,12 +1486,11 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (buff == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 Redirect: %s() failed to allocate an skb.\n", - __FUNCTION__); + __func__); dst_release(dst); return; } - skb_reserve(buff, LL_RESERVED_SPACE(dev)); ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr, IPPROTO_ICMPV6, len); @@ -1625,18 +1611,16 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - - if (dev->nd_net != &init_net) - return NOTIFY_DONE; + struct net *net = dev_net(dev); switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL); + fib6_run_gc(~0UL, net); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL); + fib6_run_gc(~0UL, net); break; default: break; @@ -1745,22 +1729,24 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, #endif -int __init ndisc_init(struct net_proto_family *ops) +static int ndisc_net_init(struct net *net) { + struct socket *sock; struct ipv6_pinfo *np; struct sock *sk; int err; - err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket); + err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &sock); if (err < 0) { ND_PRINTK0(KERN_ERR "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n", err); - ndisc_socket = NULL; /* For safety. */ return err; } - sk = ndisc_socket->sk; + net->ipv6.ndisc_sk = sk = sock->sk; + sk_change_net(sk, net); + np = inet6_sk(sk); sk->sk_allocation = GFP_ATOMIC; np->hop_limit = 255; @@ -1768,21 +1754,52 @@ int __init ndisc_init(struct net_proto_family *ops) np->mc_loop = 0; sk->sk_prot->unhash(sk); + return 0; +} + +static void ndisc_net_exit(struct net *net) +{ + sk_release_kernel(net->ipv6.ndisc_sk); +} + +static struct pernet_operations ndisc_net_ops = { + .init = ndisc_net_init, + .exit = ndisc_net_exit, +}; + +int __init ndisc_init(void) +{ + int err; + + err = register_pernet_subsys(&ndisc_net_ops); + if (err) + return err; /* * Initialize the neighbour table */ - neigh_table_init(&nd_tbl); #ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, - "ipv6", - &ndisc_ifinfo_sysctl_change, - &ndisc_ifinfo_sysctl_strategy); + err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, + NET_IPV6_NEIGH, "ipv6", + &ndisc_ifinfo_sysctl_change, + &ndisc_ifinfo_sysctl_strategy); + if (err) + goto out_unregister_pernet; #endif + err = register_netdevice_notifier(&ndisc_netdev_notifier); + if (err) + goto out_unregister_sysctl; +out: + return err; - register_netdevice_notifier(&ndisc_netdev_notifier); - return 0; +out_unregister_sysctl: +#ifdef CONFIG_SYSCTL + neigh_sysctl_unregister(&nd_tbl.parms); +out_unregister_pernet: +#endif + unregister_pernet_subsys(&ndisc_net_ops); + goto out; } void ndisc_cleanup(void) @@ -1792,6 +1809,5 @@ void ndisc_cleanup(void) neigh_sysctl_unregister(&nd_tbl.parms); #endif neigh_table_clear(&nd_tbl); - sock_release(ndisc_socket); - ndisc_socket = NULL; /* For safety. */ + unregister_pernet_subsys(&ndisc_net_ops); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 2e06724dc348..aed51bcc66b4 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,7 +23,7 @@ int ip6_route_me_harder(struct sk_buff *skb) .saddr = iph->saddr, } }, }; - dst = ip6_route_output(skb->sk, &fl); + dst = ip6_route_output(&init_net, skb->sk, &fl); #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -86,7 +86,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) { - *dst = ip6_route_output(NULL, fl); + *dst = ip6_route_output(&init_net, NULL, fl); return (*dst)->error; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 8d366f7f2a9a..92a36c9e5402 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -484,7 +484,7 @@ ipq_rcv_dev_event(struct notifier_block *this, { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) + if (dev_net(dev) != &init_net) return NOTIFY_DONE; /* Drop any packets associated with the downed device */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index bf9bb6e55bb5..70ef0d276cc0 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -55,7 +55,7 @@ MODULE_DESCRIPTION("IPv6 packet filter"); do { \ if (!(x)) \ printk("IP_NF_ASSERT: %s:%s:%u\n", \ - __FUNCTION__, __FILE__, __LINE__); \ + __func__, __FILE__, __LINE__); \ } while(0) #else #define IP_NF_ASSERT(x) @@ -1879,11 +1879,11 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = compat_do_replace(sk->sk_net, user, len); + ret = compat_do_replace(sock_net(sk), user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 1); + ret = do_add_counters(sock_net(sk), user, len, 1); break; default: @@ -1990,10 +1990,10 @@ compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 1); + ret = get_info(sock_net(sk), user, len, 1); break; case IP6T_SO_GET_ENTRIES: - ret = compat_get_entries(sk->sk_net, user, len); + ret = compat_get_entries(sock_net(sk), user, len); break; default: ret = do_ip6t_get_ctl(sk, cmd, user, len); @@ -2012,11 +2012,11 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = do_replace(sk->sk_net, user, len); + ret = do_replace(sock_net(sk), user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(sk->sk_net, user, len, 0); + ret = do_add_counters(sock_net(sk), user, len, 0); break; default: @@ -2037,11 +2037,11 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) switch (cmd) { case IP6T_SO_GET_INFO: - ret = get_info(sk->sk_net, user, len, 0); + ret = get_info(sock_net(sk), user, len, 0); break; case IP6T_SO_GET_ENTRIES: - ret = get_entries(sk->sk_net, user, len); + ret = get_entries(sock_net(sk), user, len); break; case IP6T_SO_GET_REVISION_MATCH: diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index b23baa635fe0..baf829075f6f 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -93,7 +93,7 @@ static void send_reset(struct sk_buff *oldskb) fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl); if (dst == NULL) return; if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) @@ -177,7 +177,7 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in, { const struct ip6t_reject_info *reject = targinfo; - pr_debug("%s: medium point\n", __FUNCTION__); + pr_debug("%s: medium point\n", __func__); /* WARNING: This code causes reentry within ip6tables. This means that the ip6tables jump stack is now crap. We must return an absolute verdict. --RR */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 24c0d03095bf..9e5f305b2022 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -185,7 +185,7 @@ static void nf_ct_frag6_expire(unsigned long data) spin_lock(&fq->q.lock); - if (fq->q.last_in & COMPLETE) + if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; fq_kill(fq); @@ -227,7 +227,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; int offset, end; - if (fq->q.last_in & COMPLETE) { + if (fq->q.last_in & INET_FRAG_COMPLETE) { pr_debug("Allready completed\n"); goto err; } @@ -254,11 +254,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & LAST_IN) && end != fq->q.len)) { + ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) { pr_debug("already received last fragment\n"); goto err; } - fq->q.last_in |= LAST_IN; + fq->q.last_in |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -273,7 +273,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & LAST_IN) { + if (fq->q.last_in & INET_FRAG_LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } @@ -385,7 +385,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= FIRST_IN; + fq->q.last_in |= INET_FRAG_FIRST_IN; } write_lock(&nf_frags.lock); list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); @@ -647,7 +647,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) { + if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 199ef379e501..ca8b82f96fe5 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -35,16 +35,18 @@ static struct proc_dir_entry *proc_net_devsnmp6; static int sockstat6_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; + seq_printf(seq, "TCP6: inuse %d\n", - sock_prot_inuse_get(&tcpv6_prot)); + sock_prot_inuse_get(net, &tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", - sock_prot_inuse_get(&udpv6_prot)); + sock_prot_inuse_get(net, &udpv6_prot)); seq_printf(seq, "UDPLITE6: inuse %d\n", - sock_prot_inuse_get(&udplitev6_prot)); + sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", - sock_prot_inuse_get(&rawv6_prot)); + sock_prot_inuse_get(net, &rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net)); + ip6_frag_nqueues(net), ip6_frag_mem(net)); return 0; } @@ -183,7 +185,32 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) static int sockstat6_seq_open(struct inode *inode, struct file *file) { - return single_open(file, sockstat6_seq_show, NULL); + int err; + struct net *net; + + err = -ENXIO; + net = get_proc_net(inode); + if (net == NULL) + goto err_net; + + err = single_open(file, sockstat6_seq_show, net); + if (err < 0) + goto err_open; + + return 0; + +err_open: + put_net(net); +err_net: + return err; +} + +static int sockstat6_seq_release(struct inode *inode, struct file *file) +{ + struct net *net = ((struct seq_file *)file->private_data)->private; + + put_net(net); + return single_release(inode, file); } static const struct file_operations sockstat6_seq_fops = { @@ -191,7 +218,7 @@ static const struct file_operations sockstat6_seq_fops = { .open = sockstat6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = sockstat6_seq_release, }; static int snmp6_seq_open(struct inode *inode, struct file *file) @@ -214,6 +241,9 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!idev || !idev->dev) return -EINVAL; + if (dev_net(idev->dev) != &init_net) + return 0; + if (!proc_net_devsnmp6) return -ENOENT; @@ -240,27 +270,45 @@ int snmp6_unregister_dev(struct inet6_dev *idev) return 0; } +static int ipv6_proc_init_net(struct net *net) +{ + if (!proc_net_fops_create(net, "sockstat6", S_IRUGO, + &sockstat6_seq_fops)) + return -ENOMEM; + return 0; +} + +static void ipv6_proc_exit_net(struct net *net) +{ + proc_net_remove(net, "sockstat6"); +} + +static struct pernet_operations ipv6_proc_ops = { + .init = ipv6_proc_init_net, + .exit = ipv6_proc_exit_net, +}; + int __init ipv6_misc_proc_init(void) { int rc = 0; + if (register_pernet_subsys(&ipv6_proc_ops)) + goto proc_net_fail; + if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) goto proc_snmp6_fail; proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); if (!proc_net_devsnmp6) goto proc_dev_snmp6_fail; - - if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) - goto proc_sockstat6_fail; out: return rc; -proc_sockstat6_fail: - proc_net_remove(&init_net, "dev_snmp6"); proc_dev_snmp6_fail: proc_net_remove(&init_net, "snmp6"); proc_snmp6_fail: + unregister_pernet_subsys(&ipv6_proc_ops); +proc_net_fail: rc = -ENOMEM; goto out; } @@ -270,5 +318,6 @@ void ipv6_misc_proc_exit(void) proc_net_remove(&init_net, "sockstat6"); proc_net_remove(&init_net, "dev_snmp6"); proc_net_remove(&init_net, "snmp6"); + unregister_pernet_subsys(&ipv6_proc_ops); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8897ccf8086a..aae6cedf1709 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -62,20 +62,9 @@ #include <linux/seq_file.h> static struct raw_hashinfo raw_v6_hashinfo = { - .lock = __RW_LOCK_UNLOCKED(), + .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), }; -static void raw_v6_hash(struct sock *sk) -{ - raw_hash_sk(sk, &raw_v6_hashinfo); -} - -static void raw_v6_unhash(struct sock *sk) -{ - raw_unhash_sk(sk, &raw_v6_hashinfo); -} - - static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, struct in6_addr *loc_addr, struct in6_addr *rmt_addr, int dif) @@ -87,7 +76,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, if (inet_sk(sk)->num == num) { struct ipv6_pinfo *np = inet6_sk(sk); - if (sk->sk_net != net) + if (!net_eq(sock_net(sk), net)) continue; if (!ipv6_addr_any(&np->daddr) && @@ -179,15 +168,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); - /* - * The first socket found will be delivered after - * delivery to transport protocols. - */ - if (sk == NULL) goto out; - net = skb->dev->nd_net; + net = dev_net(skb->dev); sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { @@ -291,7 +275,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if); + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -304,7 +288,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { err = -EADDRNOTAVAIL; - if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr, + if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { if (dev) dev_put(dev); @@ -374,7 +358,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, if (sk != NULL) { saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; - net = skb->dev->nd_net; + net = dev_net(skb->dev); while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, IP6CB(skb)->iif))) { @@ -896,9 +880,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); } if (tclass < 0) { @@ -1184,8 +1166,6 @@ static int rawv6_init_sk(struct sock *sk) return(0); } -DEFINE_PROTO_INUSE(rawv6) - struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, @@ -1201,14 +1181,14 @@ struct proto rawv6_prot = { .recvmsg = rawv6_recvmsg, .bind = rawv6_bind, .backlog_rcv = rawv6_rcv_skb, - .hash = raw_v6_hash, - .unhash = raw_v6_unhash, + .hash = raw_hash_sk, + .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw6_sock), + .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_rawv6_setsockopt, .compat_getsockopt = compat_rawv6_getsockopt, #endif - REF_PROTO_INUSE(rawv6) }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index f936d045a39d..7b247e3a16fe 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -202,7 +202,7 @@ static void ip6_frag_expire(unsigned long data) spin_lock(&fq->q.lock); - if (fq->q.last_in & COMPLETE) + if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; fq_kill(fq); @@ -217,7 +217,7 @@ static void ip6_frag_expire(unsigned long data) rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ - if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments) + if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out; /* @@ -265,7 +265,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net_device *dev; int offset, end; - if (fq->q.last_in & COMPLETE) + if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -294,9 +294,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * or have different end, the segment is corrupted. */ if (end < fq->q.len || - ((fq->q.last_in & LAST_IN) && end != fq->q.len)) + ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; - fq->q.last_in |= LAST_IN; + fq->q.last_in |= INET_FRAG_LAST_IN; fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. @@ -314,7 +314,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & LAST_IN) + if (fq->q.last_in & INET_FRAG_LAST_IN) goto err; fq->q.len = end; } @@ -417,10 +417,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= FIRST_IN; + fq->q.last_in |= INET_FRAG_FIRST_IN; } - if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len) + if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) return ip6_frag_reasm(fq, prev, dev); write_lock(&ip6_frags.lock); @@ -600,7 +601,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - net = skb->dev->nd_net; + net = dev_net(skb->dev); if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) ip6_evictor(net, ip6_dst_idev(skb->dst)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8b241cb60bc..cd82b6db35ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -40,6 +40,7 @@ #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/snmp.h> #include <net/ipv6.h> @@ -87,14 +88,16 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex); #endif -static struct dst_ops ip6_dst_ops = { +static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = __constant_htons(ETH_P_IPV6), .gc = ip6_dst_gc, @@ -124,7 +127,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .entries = ATOMIC_INIT(0), }; -struct rt6_info ip6_null_entry = { +static struct rt6_info ip6_null_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -134,8 +137,6 @@ struct rt6_info ip6_null_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_null_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -148,7 +149,7 @@ struct rt6_info ip6_null_entry = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -struct rt6_info ip6_prohibit_entry = { +struct rt6_info ip6_prohibit_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -158,8 +159,6 @@ struct rt6_info ip6_prohibit_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_prohibit_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -167,7 +166,7 @@ struct rt6_info ip6_prohibit_entry = { .rt6i_ref = ATOMIC_INIT(1), }; -struct rt6_info ip6_blk_hole_entry = { +static struct rt6_info ip6_blk_hole_entry_template = { .u = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -177,8 +176,6 @@ struct rt6_info ip6_blk_hole_entry = { .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = dst_discard, .output = dst_discard, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_blk_hole_entry, } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), @@ -189,9 +186,9 @@ struct rt6_info ip6_blk_hole_entry = { #endif /* allocate dst with ip6_dst_ops */ -static __inline__ struct rt6_info *ip6_dst_alloc(void) +static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(&ip6_dst_ops); + return (struct rt6_info *)dst_alloc(ops); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -211,7 +208,7 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; struct net_device *loopback_dev = - dev->nd_net->loopback_dev; + dev_net(dev)->loopback_dev; if (dev != loopback_dev && idev != NULL && idev->dev == dev) { struct inet6_dev *loopback_idev = @@ -239,7 +236,8 @@ static inline int rt6_need_strict(struct in6_addr *daddr) * Route lookup. Any table->tb6_lock is implied. */ -static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, +static inline struct rt6_info *rt6_device_match(struct net *net, + struct rt6_info *rt, int oif, int strict) { @@ -268,7 +266,7 @@ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, return local; if (strict) - return &ip6_null_entry; + return net->ipv6.ip6_null_entry; } return rt; } @@ -409,9 +407,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; + struct net *net; RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", - __FUNCTION__, fn->leaf, oif); + __func__, fn->leaf, oif); rt0 = fn->rr_ptr; if (!rt0) @@ -432,15 +431,17 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) } RT6_TRACE("%s() => %p\n", - __FUNCTION__, match); + __func__, match); - return (match ? match : &ip6_null_entry); + net = dev_net(rt0->rt6i_dev); + return (match ? match : net->ipv6.ip6_null_entry); } #ifdef CONFIG_IPV6_ROUTE_INFO int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr *gwaddr) { + struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -488,7 +489,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); @@ -496,7 +498,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | @@ -515,9 +517,9 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(saddr) \ +#define BACKTRACK(__net, saddr) \ do { \ - if (rt == &ip6_null_entry) { \ + if (rt == __net->ipv6.ip6_null_entry) { \ struct fib6_node *pn; \ while (1) { \ if (fn->fn_flags & RTN_TL_ROOT) \ @@ -533,7 +535,8 @@ do { \ } \ } while(0) -static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_lookup(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -543,8 +546,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(rt, fl->oif, flags); - BACKTRACK(&fl->fl6_src); + rt = rt6_device_match(net, rt, fl->oif, flags); + BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -552,8 +555,8 @@ out: } -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) +struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr, + struct in6_addr *saddr, int oif, int strict) { struct flowi fl = { .oif = oif, @@ -571,7 +574,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -604,7 +607,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = &init_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_ins_rt(rt, &info); } @@ -660,8 +663,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, - struct flowi *fl, int flags) +static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -680,8 +683,9 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - BACKTRACK(&fl->fl6_src); - if (rt == &ip6_null_entry || + + BACKTRACK(net, &fl->fl6_src); + if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -699,7 +703,7 @@ restart: } dst_release(&rt->u.dst); - rt = nrt ? : &ip6_null_entry; + rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->u.dst); if (nrt) { @@ -732,15 +736,16 @@ out2: return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl->iif, fl, flags); } void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, @@ -758,16 +763,17 @@ void ip6_route_input(struct sk_buff *skb) if (rt6_need_strict(&iph->daddr)) flags |= RT6_LOOKUP_F_IFACE; - skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); + skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); } -static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, +static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl->oif, fl, flags); } -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, + struct flowi *fl) { int flags = 0; @@ -776,8 +782,17 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) if (!ipv6_addr_any(&fl->fl6_src)) flags |= RT6_LOOKUP_F_HAS_SADDR; + else if (sk) { + unsigned int prefs = inet6_sk(sk)->srcprefs; + if (prefs & IPV6_PREFER_SRC_TMP) + flags |= RT6_LOOKUP_F_SRCPREF_TMP; + if (prefs & IPV6_PREFER_SRC_PUBLIC) + flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; + if (prefs & IPV6_PREFER_SRC_COA) + flags |= RT6_LOOKUP_F_SRCPREF_COA; + } - return fib6_rule_lookup(fl, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); } EXPORT_SYMBOL(ip6_route_output); @@ -886,12 +901,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static int ipv6_get_mtu(struct net_device *dev); -static inline unsigned int ipv6_advmss(unsigned int mtu) +static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) { mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss) - mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss; + if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) + mtu = net->ipv6.sysctl.ip6_rt_min_advmss; /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and @@ -904,21 +919,21 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) return mtu; } -static struct dst_entry *ndisc_dst_gc_list; -static DEFINE_SPINLOCK(ndisc_lock); +static struct dst_entry *icmp6_dst_gc_list; +static DEFINE_SPINLOCK(icmp6_dst_lock); -struct dst_entry *ndisc_dst_alloc(struct net_device *dev, +struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct neighbour *neigh, - struct in6_addr *addr, - int (*output)(struct sk_buff *)) + struct in6_addr *addr) { struct rt6_info *rt; struct inet6_dev *idev = in6_dev_get(dev); + struct net *net = dev_net(dev); if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -936,8 +951,8 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); - rt->u.dst.output = output; + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); + rt->u.dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST @@ -947,18 +962,18 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, rt->rt6i_dst.plen = 128; #endif - spin_lock_bh(&ndisc_lock); - rt->u.dst.next = ndisc_dst_gc_list; - ndisc_dst_gc_list = &rt->u.dst; - spin_unlock_bh(&ndisc_lock); + spin_lock_bh(&icmp6_dst_lock); + rt->u.dst.next = icmp6_dst_gc_list; + icmp6_dst_gc_list = &rt->u.dst; + spin_unlock_bh(&icmp6_dst_lock); - fib6_force_start_gc(); + fib6_force_start_gc(net); out: return &rt->u.dst; } -int ndisc_dst_gc(int *more) +int icmp6_dst_gc(int *more) { struct dst_entry *dst, *next, **pprev; int freed; @@ -966,8 +981,8 @@ int ndisc_dst_gc(int *more) next = NULL; freed = 0; - spin_lock_bh(&ndisc_lock); - pprev = &ndisc_dst_gc_list; + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; while ((dst = *pprev) != NULL) { if (!atomic_read(&dst->__refcnt)) { @@ -980,30 +995,33 @@ int ndisc_dst_gc(int *more) } } - spin_unlock_bh(&ndisc_lock); + spin_unlock_bh(&icmp6_dst_lock); return freed; } static int ip6_dst_gc(struct dst_ops *ops) { - static unsigned expire = 30*HZ; - static unsigned long last_gc; unsigned long now = jiffies; - - if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) && - atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size) + struct net *net = ops->dst_net; + int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; + int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; + int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; + int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; + unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + + if (time_after(rt_last_gc + rt_min_interval, now) && + atomic_read(&ops->entries) <= rt_max_size) goto out; - expire++; - fib6_run_gc(expire); - last_gc = now; - if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) - expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1; - + net->ipv6.ip6_rt_gc_expire++; + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + net->ipv6.ip6_rt_last_gc = now; + if (atomic_read(&ops->entries) < ops->gc_thresh) + net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: - expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity; - return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size); + net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; + return (atomic_read(&ops->entries) > rt_max_size); } /* Clean host part of a prefix. Not necessary in radix tree, @@ -1025,15 +1043,17 @@ static int ipv6_get_mtu(struct net_device *dev) return mtu; } -int ipv6_get_hoplimit(struct net_device *dev) +int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = ipv6_devconf.hop_limit; - struct inet6_dev *idev; - - idev = in6_dev_get(dev); - if (idev) { - hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); + int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); + if (hoplimit < 0) { + struct net_device *dev = dst->dev; + struct inet6_dev *idev = in6_dev_get(dev); + if (idev) { + hoplimit = idev->cnf.hop_limit; + in6_dev_put(idev); + } else + hoplimit = ipv6_devconf.hop_limit; } return hoplimit; } @@ -1045,6 +1065,7 @@ int ipv6_get_hoplimit(struct net_device *dev) int ip6_route_add(struct fib6_config *cfg) { int err; + struct net *net = cfg->fc_nlinfo.nl_net; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -1059,7 +1080,7 @@ int ip6_route_add(struct fib6_config *cfg) #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(&init_net, cfg->fc_ifindex); + dev = dev_get_by_index(net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1070,13 +1091,13 @@ int ip6_route_add(struct fib6_config *cfg) if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; - table = fib6_new_table(cfg->fc_table); + table = fib6_new_table(net, cfg->fc_table); if (table == NULL) { err = -ENOBUFS; goto out; } - rt = ip6_dst_alloc(); + rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt == NULL) { err = -ENOMEM; @@ -1117,12 +1138,12 @@ int ip6_route_add(struct fib6_config *cfg) if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != init_net.loopback_dev) { + if (dev != net->loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = init_net.loopback_dev; + dev = net->loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1159,7 +1180,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1226,10 +1247,13 @@ install_route: if (!rt->u.dst.metrics[RTAX_MTU-1]) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; + + cfg->fc_nlinfo.nl_net = dev_net(dev); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo); out: @@ -1246,8 +1270,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; + struct net *net = dev_net(rt->rt6i_dev); - if (rt == &ip6_null_entry) + if (rt == net->ipv6.ip6_null_entry) return -ENOENT; table = rt->rt6i_table; @@ -1264,7 +1289,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { struct nl_info info = { - .nl_net = &init_net, + .nl_net = dev_net(rt->rt6i_dev), }; return __ip6_del_rt(rt, &info); } @@ -1276,7 +1301,7 @@ static int ip6_route_del(struct fib6_config *cfg) struct rt6_info *rt; int err = -ESRCH; - table = fib6_get_table(cfg->fc_table); + table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); if (table == NULL) return err; @@ -1316,7 +1341,8 @@ struct ip6rd_flowi { struct in6_addr gateway; }; -static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, +static struct rt6_info *__ip6_route_redirect(struct net *net, + struct fib6_table *table, struct flowi *fl, int flags) { @@ -1359,8 +1385,8 @@ restart: } if (!rt) - rt = &ip6_null_entry; - BACKTRACK(&fl->fl6_src); + rt = net->ipv6.ip6_null_entry; + BACKTRACK(net, &fl->fl6_src); out: dst_hold(&rt->u.dst); @@ -1375,6 +1401,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net_device *dev) { int flags = RT6_LOOKUP_F_HAS_SADDR; + struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, @@ -1391,7 +1418,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; - return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); + return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, + flags, __ip6_route_redirect); } void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, @@ -1400,10 +1428,11 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, { struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; + struct net *net = dev_net(neigh->dev); rt = ip6_route_redirect(dest, src, saddr, neigh->dev); - if (rt == &ip6_null_entry) { + if (rt == net->ipv6.ip6_null_entry) { if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); @@ -1448,7 +1477,8 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); + nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->u.dst)); if (ip6_ins_rt(nrt)) goto out; @@ -1476,9 +1506,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, struct net_device *dev, u32 pmtu) { struct rt6_info *rt, *nrt; + struct net *net = dev_net(dev); int allfrag = 0; - rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); + rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); if (rt == NULL) return; @@ -1511,7 +1542,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, rt->u.dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1537,7 +1568,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); @@ -1552,7 +1583,8 @@ out: static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = dev_net(ort->rt6i_dev); + struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt) { rt->u.dst.input = ort->u.dst.input; @@ -1583,14 +1615,15 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_get_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_INFO); + table = fib6_get_table(net, RT6_TABLE_INFO); if (table == NULL) return NULL; @@ -1614,7 +1647,8 @@ out: return rt; } -static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, +static struct rt6_info *rt6_add_route_info(struct net *net, + struct in6_addr *prefix, int prefixlen, struct in6_addr *gwaddr, int ifindex, unsigned pref) { @@ -1625,6 +1659,9 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = net, }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1636,7 +1673,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle ip6_route_add(&cfg); - return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); } #endif @@ -1645,7 +1682,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); if (table == NULL) return NULL; @@ -1674,6 +1711,9 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + .fc_nlinfo.pid = 0, + .fc_nlinfo.nlh = NULL, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_gateway, gwaddr); @@ -1683,13 +1723,13 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(void) +void rt6_purge_dflt_routers(struct net *net) { struct rt6_info *rt; struct fib6_table *table; /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(RT6_TABLE_DFLT); + table = fib6_get_table(net, RT6_TABLE_DFLT); if (table == NULL) return; @@ -1706,7 +1746,8 @@ restart: read_unlock_bh(&table->tb6_lock); } -static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, +static void rtmsg_to_fib6_config(struct net *net, + struct in6_rtmsg *rtmsg, struct fib6_config *cfg) { memset(cfg, 0, sizeof(*cfg)); @@ -1719,14 +1760,14 @@ static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; - cfg->fc_nlinfo.nl_net = &init_net; + cfg->fc_nlinfo.nl_net = net; ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); } -int ipv6_route_ioctl(unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct fib6_config cfg; struct in6_rtmsg rtmsg; @@ -1742,7 +1783,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) if (err) return -EFAULT; - rtmsg_to_fib6_config(&rtmsg, &cfg); + rtmsg_to_fib6_config(net, &rtmsg, &cfg); rtnl_lock(); switch (cmd) { @@ -1821,21 +1862,22 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, int anycast) { - struct rt6_info *rt = ip6_dst_alloc(); + struct net *net = dev_net(idev->dev); + struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(init_net.loopback_dev); + dev_hold(net->loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = init_net.loopback_dev; + rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; rt->u.dst.obsolete = -1; @@ -1852,26 +1894,39 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); + rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); return rt; } +struct arg_dev_net { + struct net_device *dev; + struct net *net; +}; + static int fib6_ifdown(struct rt6_info *rt, void *arg) { - if (((void*)rt->rt6i_dev == arg || arg == NULL) && - rt != &ip6_null_entry) { + struct net_device *dev = ((struct arg_dev_net *)arg)->dev; + struct net *net = ((struct arg_dev_net *)arg)->net; + + if (((void *)rt->rt6i_dev == dev || dev == NULL) && + rt != net->ipv6.ip6_null_entry) { RT6_TRACE("deleted by ifdown %p\n", rt); return -1; } return 0; } -void rt6_ifdown(struct net_device *dev) +void rt6_ifdown(struct net *net, struct net_device *dev) { - fib6_clean_all(fib6_ifdown, 0, dev); + struct arg_dev_net adn = { + .dev = dev, + .net = net, + }; + + fib6_clean_all(net, fib6_ifdown, 0, &adn); } struct rt6_mtu_change_arg @@ -1884,6 +1939,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; + struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -1915,7 +1971,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->u.dst) < arg->mtu && dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); } return 0; } @@ -1927,7 +1983,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) .mtu = mtu, }; - fib6_clean_all(rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -1964,7 +2020,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; - cfg->fc_nlinfo.nl_net = skb->sk->sk_net; + cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); @@ -2010,13 +2066,9 @@ errout: static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; - if (net != &init_net) - return -EINVAL; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2026,13 +2078,9 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; - if (net != &init_net) - return -EINVAL; - err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2122,7 +2170,8 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_IIF, iif); else if (dst) { struct in6_addr saddr_buf; - if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) + if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev, + dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } @@ -2167,7 +2216,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct net *net = in_skb->sk->sk_net; + struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2175,9 +2224,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct flowi fl; int err, iif = 0; - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; @@ -2207,7 +2253,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (iif) { struct net_device *dev; - dev = __dev_get_by_index(&init_net, iif); + dev = __dev_get_by_index(net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2226,7 +2272,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(NULL, &fl); + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); skb->dst = &rt->u.dst; err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, @@ -2237,7 +2283,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void goto errout; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2245,6 +2291,7 @@ errout: void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; + struct net *net = info->nl_net; u32 seq; int err; @@ -2263,11 +2310,31 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, info->pid, - RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); + err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); +} + +static int ip6_route_dev_notify(struct notifier_block *this, + unsigned long event, void *data) +{ + struct net_device *dev = (struct net_device *)data; + struct net *net = dev_net(dev); + + if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { + net->ipv6.ip6_null_entry->u.dst.dev = dev; + net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; + net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); + net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; + net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); +#endif + } + + return NOTIFY_OK; } /* @@ -2316,13 +2383,33 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int ipv6_route_show(struct seq_file *m, void *v) { - fib6_clean_all(rt6_info_route, 0, m); + struct net *net = (struct net *)m->private; + fib6_clean_all(net, rt6_info_route, 0, m); return 0; } static int ipv6_route_open(struct inode *inode, struct file *file) { - return single_open(file, ipv6_route_show, NULL); + int err; + struct net *net = get_proc_net(inode); + if (!net) + return -ENXIO; + + err = single_open(file, ipv6_route_show, net); + if (err < 0) { + put_net(net); + return err; + } + + return 0; +} + +static int ipv6_route_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return single_release(inode, file); } static const struct file_operations ipv6_route_proc_fops = { @@ -2330,24 +2417,46 @@ static const struct file_operations ipv6_route_proc_fops = { .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = ipv6_route_release, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) { + struct net *net = (struct net *)seq->private; seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", - rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, - rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, - rt6_stats.fib_rt_cache, - atomic_read(&ip6_dst_ops.entries), - rt6_stats.fib_discarded_routes); + net->ipv6.rt6_stats->fib_nodes, + net->ipv6.rt6_stats->fib_route_nodes, + net->ipv6.rt6_stats->fib_rt_alloc, + net->ipv6.rt6_stats->fib_rt_entries, + net->ipv6.rt6_stats->fib_rt_cache, + atomic_read(&net->ipv6.ip6_dst_ops->entries), + net->ipv6.rt6_stats->fib_discarded_routes); return 0; } static int rt6_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, rt6_stats_seq_show, NULL); + int err; + struct net *net = get_proc_net(inode); + if (!net) + return -ENXIO; + + err = single_open(file, rt6_stats_seq_show, net); + if (err < 0) { + put_net(net); + return err; + } + + return 0; +} + +static int rt6_stats_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = (struct net *)seq->private; + put_net(net); + return single_release(inode, file); } static const struct file_operations rt6_stats_seq_fops = { @@ -2355,42 +2464,8 @@ static const struct file_operations rt6_stats_seq_fops = { .open = rt6_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = rt6_stats_seq_release, }; - -static int ipv6_route_proc_init(struct net *net) -{ - int ret = -ENOMEM; - if (!proc_net_fops_create(net, "ipv6_route", - 0, &ipv6_route_proc_fops)) - goto out; - - if (!proc_net_fops_create(net, "rt6_stats", - S_IRUGO, &rt6_stats_seq_fops)) - goto out_ipv6_route; - - ret = 0; -out: - return ret; -out_ipv6_route: - proc_net_remove(net, "ipv6_route"); - goto out; -} - -static void ipv6_route_proc_fini(struct net *net) -{ - proc_net_remove(net, "ipv6_route"); - proc_net_remove(net, "rt6_stats"); -} -#else -static inline int ipv6_route_proc_init(struct net *net) -{ - return 0; -} -static inline void ipv6_route_proc_fini(struct net *net) -{ - return ; -} #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL @@ -2399,10 +2474,11 @@ static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int delay = init_net.ipv6.sysctl.flush_delay; + struct net *net = current->nsproxy->net_ns; + int delay = net->ipv6.sysctl.flush_delay; if (write) { proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else return -EINVAL; @@ -2419,7 +2495,7 @@ ctl_table ipv6_route_table_template[] = { { .ctl_name = NET_IPV6_ROUTE_GC_THRESH, .procname = "gc_thresh", - .data = &ip6_dst_ops.gc_thresh, + .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -2505,33 +2581,141 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net) table = kmemdup(ipv6_route_table_template, sizeof(ipv6_route_table_template), GFP_KERNEL); + + if (table) { + table[0].data = &net->ipv6.sysctl.flush_delay; + table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh; + table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; + table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; + table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; + table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; + table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; + table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; + } + return table; } #endif +static int ip6_route_net_init(struct net *net) +{ + int ret = 0; + + ret = -ENOMEM; + net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template, + sizeof(*net->ipv6.ip6_dst_ops), + GFP_KERNEL); + if (!net->ipv6.ip6_dst_ops) + goto out; + net->ipv6.ip6_dst_ops->dst_net = net; + + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, + sizeof(*net->ipv6.ip6_null_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_null_entry) + goto out_ip6_dst_ops; + net->ipv6.ip6_null_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_null_entry; + net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, + sizeof(*net->ipv6.ip6_prohibit_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_prohibit_entry) { + kfree(net->ipv6.ip6_null_entry); + goto out; + } + net->ipv6.ip6_prohibit_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_prohibit_entry; + net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; + + net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, + sizeof(*net->ipv6.ip6_blk_hole_entry), + GFP_KERNEL); + if (!net->ipv6.ip6_blk_hole_entry) { + kfree(net->ipv6.ip6_null_entry); + kfree(net->ipv6.ip6_prohibit_entry); + goto out; + } + net->ipv6.ip6_blk_hole_entry->u.dst.path = + (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; + net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; +#endif + +#ifdef CONFIG_PROC_FS + proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); + proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); +#endif + net->ipv6.ip6_rt_gc_expire = 30*HZ; + + ret = 0; +out: + return ret; + +out_ip6_dst_ops: + kfree(net->ipv6.ip6_dst_ops); + goto out; +} + +static void ip6_route_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +#endif + kfree(net->ipv6.ip6_null_entry); +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + kfree(net->ipv6.ip6_prohibit_entry); + kfree(net->ipv6.ip6_blk_hole_entry); +#endif + kfree(net->ipv6.ip6_dst_ops); +} + +static struct pernet_operations ip6_route_net_ops = { + .init = ip6_route_net_init, + .exit = ip6_route_net_exit, +}; + +static struct notifier_block ip6_route_dev_notifier = { + .notifier_call = ip6_route_dev_notify, + .priority = 0, +}; + int __init ip6_route_init(void) { int ret; - ip6_dst_ops.kmem_cachep = + ret = -ENOMEM; + ip6_dst_ops_template.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN, NULL); - if (!ip6_dst_ops.kmem_cachep) - return -ENOMEM; + if (!ip6_dst_ops_template.kmem_cachep) + goto out;; - ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; - - ret = fib6_init(); + ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) goto out_kmem_cache; - ret = ipv6_route_proc_init(&init_net); + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif + ret = fib6_init(); if (ret) - goto out_fib6_init; + goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_proc_init; + goto out_fib6_init; ret = fib6_rules_init(); if (ret) @@ -2543,7 +2727,10 @@ int __init ip6_route_init(void) __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) goto fib6_rules_init; - ret = 0; + ret = register_netdevice_notifier(&ip6_route_dev_notifier); + if (ret) + goto fib6_rules_init; + out: return ret; @@ -2551,22 +2738,21 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_proc_init: - ipv6_route_proc_fini(&init_net); out_fib6_init: - rt6_ifdown(NULL); fib6_gc_cleanup(); +out_register_subsys: + unregister_pernet_subsys(&ip6_route_net_ops); out_kmem_cache: - kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); goto out; } void ip6_route_cleanup(void) { + unregister_netdevice_notifier(&ip6_route_dev_notifier); fib6_rules_cleanup(); - ipv6_route_proc_fini(&init_net); xfrm6_fini(); - rt6_ifdown(NULL); fib6_gc_cleanup(); - kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + unregister_pernet_subsys(&ip6_route_net_ops); + kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1656c003b989..1b8196c8d145 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -339,11 +339,11 @@ out: skb_reset_network_header(skb2); /* Try to guess incoming interface */ - rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0); + rt6i = rt6_lookup(&init_net, &iph6->saddr, NULL, NULL, 0); if (rt6i && rt6i->rt6i_dev) { skb2->dev = rt6i->rt6i_dev; - rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0); + rt6i = rt6_lookup(&init_net, &iph6->daddr, &iph6->saddr, NULL, 0); if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); @@ -393,7 +393,7 @@ isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev) fl.oif = dev->ifindex; security_skb_classify_flow(skb, &fl); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl); if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) { addr6 = (struct in6_addr*)&neigh->primary_key; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c new file mode 100644 index 000000000000..3a622e7abc02 --- /dev/null +++ b/net/ipv6/syncookies.c @@ -0,0 +1,267 @@ +/* + * IPv6 Syncookies implementation for the Linux kernel + * + * Authors: + * Glenn Griffin <ggriffin.kernel@gmail.com> + * + * Based on IPv4 implementation by Andi Kleen + * linux/net/ipv4/syncookies.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/tcp.h> +#include <linux/random.h> +#include <linux/cryptohash.h> +#include <linux/kernel.h> +#include <net/ipv6.h> +#include <net/tcp.h> + +extern int sysctl_tcp_syncookies; +extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; + +#define COOKIEBITS 24 /* Upper bits store count */ +#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) + +/* + * This table has to be sorted and terminated with (__u16)-1. + * XXX generate a better table. + * Unresolved Issues: HIPPI with a 64k MSS is not well supported. + * + * Taken directly from ipv4 implementation. + * Should this list be modified for ipv6 use or is it close enough? + * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart + */ +static __u16 const msstab[] = { + 64 - 1, + 256 - 1, + 512 - 1, + 536 - 1, + 1024 - 1, + 1440 - 1, + 1460 - 1, + 4312 - 1, + (__u16)-1 +}; +/* The number doesn't include the -1 terminator */ +#define NUM_MSS (ARRAY_SIZE(msstab) - 1) + +/* + * This (misnamed) value is the age of syncookie which is permitted. + * Its ideal value should be dependent on TCP_TIMEOUT_INIT and + * sysctl_tcp_retries1. It's a rather complicated formula (exponential + * backoff) to compute at runtime so it's currently hardcoded here. + */ +#define COUNTER_TRIES 4 + +static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct sock *child; + + child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); + if (child) + inet_csk_reqsk_queue_add(sk, req, child); + else + reqsk_free(req); + + return child; +} + +static DEFINE_PER_CPU(__u32, cookie_scratch)[16 + 5 + SHA_WORKSPACE_WORDS]; + +static u32 cookie_hash(struct in6_addr *saddr, struct in6_addr *daddr, + __be16 sport, __be16 dport, u32 count, int c) +{ + __u32 *tmp = __get_cpu_var(cookie_scratch); + + /* + * we have 320 bits of information to hash, copy in the remaining + * 192 bits required for sha_transform, from the syncookie_secret + * and overwrite the digest with the secret + */ + memcpy(tmp + 10, syncookie_secret[c], 44); + memcpy(tmp, saddr, 16); + memcpy(tmp + 4, daddr, 16); + tmp[8] = ((__force u32)sport << 16) + (__force u32)dport; + tmp[9] = count; + sha_transform(tmp + 16, (__u8 *)tmp, tmp + 16 + 5); + + return tmp[17]; +} + +static __u32 secure_tcp_syn_cookie(struct in6_addr *saddr, struct in6_addr *daddr, + __be16 sport, __be16 dport, __u32 sseq, + __u32 count, __u32 data) +{ + return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + + sseq + (count << COOKIEBITS) + + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) + & COOKIEMASK)); +} + +static __u32 check_tcp_syn_cookie(__u32 cookie, struct in6_addr *saddr, + struct in6_addr *daddr, __be16 sport, + __be16 dport, __u32 sseq, __u32 count, + __u32 maxdiff) +{ + __u32 diff; + + cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; + + diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); + if (diff >= maxdiff) + return (__u32)-1; + + return (cookie - + cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) + & COOKIEMASK; +} + +__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + int mssind; + const __u16 mss = *mssp; + + tcp_sk(sk)->last_synq_overflow = jiffies; + + for (mssind = 0; mss > msstab[mssind + 1]; mssind++) + ; + *mssp = msstab[mssind] + 1; + + NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); + + return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, + th->dest, ntohl(th->seq), + jiffies / (HZ * 60), mssind); +} + +static inline int cookie_check(struct sk_buff *skb, __u32 cookie) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + __u32 seq = ntohl(th->seq) - 1; + __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, + th->source, th->dest, seq, + jiffies / (HZ * 60), COUNTER_TRIES); + + return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; +} + +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct inet6_request_sock *ireq6; + struct tcp_request_sock *treq; + struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + const struct tcphdr *th = tcp_hdr(skb); + __u32 cookie = ntohl(th->ack_seq) - 1; + struct sock *ret = sk; + struct request_sock *req; + int mss; + struct dst_entry *dst; + __u8 rcv_wscale; + + if (!sysctl_tcp_syncookies || !th->ack) + goto out; + + if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || + (mss = cookie_check(skb, cookie)) == 0) { + NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED); + goto out; + } + + NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV); + + ret = NULL; + req = inet6_reqsk_alloc(&tcp6_request_sock_ops); + if (!req) + goto out; + + ireq = inet_rsk(req); + ireq6 = inet6_rsk(req); + treq = tcp_rsk(req); + ireq6->pktopts = NULL; + + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } + + req->mss = mss; + ireq->rmt_port = th->source; + ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); + if (ipv6_opt_accepted(sk, skb) || + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { + atomic_inc(&skb->users); + ireq6->pktopts = skb; + } + + ireq6->iif = sk->sk_bound_dev_if; + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq6->iif = inet6_iif(skb); + + req->expires = 0UL; + req->retrans = 0; + ireq->snd_wscale = ireq->rcv_wscale = ireq->tstamp_ok = 0; + ireq->wscale_ok = ireq->sack_ok = 0; + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = cookie; + + /* + * We need to lookup the dst_entry to get the correct window size. + * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten + * me if there is a preferred way. + */ + { + struct in6_addr *final_p = NULL, final; + struct flowi fl; + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); + if (np->opt && np->opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; + } + ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); + fl.oif = sk->sk_bound_dev_if; + fl.fl_ip_dport = inet_rsk(req)->rmt_port; + fl.fl_ip_sport = inet_sk(sk)->sport; + security_req_classify_flow(req, &fl); + if (ip6_dst_lookup(sk, &dst, &fl)) { + reqsk_free(req); + goto out; + } + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + goto out; + } + + req->window_clamp = dst_metric(dst, RTAX_WINDOW); + tcp_select_initial_window(tcp_full_space(sk), req->mss, + &req->rcv_wnd, &req->window_clamp, + 0, &rcv_wscale); + + ireq->rcv_wscale = rcv_wscale; + + ret = get_cookie_sock(sk, skb, req, dst); + +out: return ret; +} + diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index d6d3e68086f8..3804dcbbfab0 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -71,24 +71,11 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) goto out_ipv6_table; + ipv6_table[0].child = ipv6_route_table; ipv6_icmp_table = ipv6_icmp_sysctl_init(net); if (!ipv6_icmp_table) goto out_ipv6_route_table; - - ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay; - /* ipv6_route_table[1].data will be handled when we have - routes per namespace */ - ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; - ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; - ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; - ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; - ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; - ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; - ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; - ipv6_table[0].child = ipv6_route_table; - - ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time; ipv6_table[1].child = ipv6_icmp_table; ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 12750f2b05ab..6d851c3c3db9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -69,9 +69,6 @@ #include <linux/crypto.h> #include <linux/scatterlist.h> -/* Socket used for sending RSTs and ACKs */ -static struct socket *tcp6_socket; - static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); static void tcp_v6_send_check(struct sock *sk, int len, @@ -324,7 +321,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq; - sk = inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, &hdr->daddr, + sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { @@ -455,8 +452,7 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -464,6 +460,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct ipv6_txoptions *opt = NULL; struct in6_addr * final_p = NULL, final; struct flowi fl; + struct dst_entry *dst; int err = -1; memset(&fl, 0, sizeof(fl)); @@ -476,24 +473,22 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.fl_ip_sport = inet_sk(sk)->sport; security_req_classify_flow(req, &fl); - if (dst == NULL) { - opt = np->opt; - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) - goto done; + opt = np->opt; + if (opt && opt->srcrt) { + struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + final_p = &final; } + err = ip6_dst_lookup(sk, &dst, &fl); + if (err) + goto done; + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + goto done; + skb = tcp_make_synack(sk, dst, req); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -514,6 +509,20 @@ done: return err; } +static inline void syn_flood_warning(struct sk_buff *skb) +{ +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) + printk(KERN_INFO + "TCPv6: Possible SYN flooding on port %d. " + "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest)); + else +#endif + printk(KERN_INFO + "TCPv6: Possible SYN flooding on port %d. " + "Dropping request.\n", ntohs(tcp_hdr(skb)->dest)); +} + static void tcp_v6_reqsk_destructor(struct request_sock *req) { if (inet6_rsk(req)->pktopts) @@ -741,7 +750,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, hp = tcp_get_md5sig_pool(); if (!hp) { - printk(KERN_WARNING "%s(): hash pool not found...\n", __FUNCTION__); + printk(KERN_WARNING "%s(): hash pool not found...\n", __func__); goto clear_hash_noput; } bp = &hp->md5_blk.ip6; @@ -781,17 +790,17 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, /* Now store the hash into the packet */ err = crypto_hash_init(desc); if (err) { - printk(KERN_WARNING "%s(): hash_init failed\n", __FUNCTION__); + printk(KERN_WARNING "%s(): hash_init failed\n", __func__); goto clear_hash; } err = crypto_hash_update(desc, sg, nbytes); if (err) { - printk(KERN_WARNING "%s(): hash_update failed\n", __FUNCTION__); + printk(KERN_WARNING "%s(): hash_update failed\n", __func__); goto clear_hash; } err = crypto_hash_final(desc, md5_hash); if (err) { - printk(KERN_WARNING "%s(): hash_final failed\n", __FUNCTION__); + printk(KERN_WARNING "%s(): hash_final failed\n", __func__); goto clear_hash; } @@ -917,7 +926,7 @@ done_opts: } #endif -static struct request_sock_ops tcp6_request_sock_ops __read_mostly = { +struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), .rtx_syn_ack = tcp_v6_send_synack, @@ -979,6 +988,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; + struct net *net = dev_net(skb->dst->dev); + struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(*th); #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; @@ -1059,11 +1070,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); - /* sk = NULL, but it is safe for now. RST socket required. */ - if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { + /* Pass a socket to ip6_dst_lookup either it is for RST + * Underlying function will use this to retrieve the network + * namespace + */ + if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); + ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); return; @@ -1079,6 +1093,8 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; + struct net *net = dev_net(skb->dev); + struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; #ifdef CONFIG_TCP_MD5SIG @@ -1160,9 +1176,9 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); - if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { + if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); + ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); return; } @@ -1202,7 +1218,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __inet6_lookup_established(sk->sk_net, &tcp_hashinfo, + nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); @@ -1215,9 +1231,9 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) return NULL; } -#if 0 /*def CONFIG_SYN_COOKIES*/ +#ifdef CONFIG_SYN_COOKIES if (!th->rst && !th->syn && th->ack) - sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt)); + sk = cookie_v6_check(sk, skb); #endif return sk; } @@ -1233,6 +1249,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct request_sock *req = NULL; __u32 isn = TCP_SKB_CB(skb)->when; +#ifdef CONFIG_SYN_COOKIES + int want_cookie = 0; +#else +#define want_cookie 0 +#endif if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1240,12 +1261,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; - /* - * There are no SYN attacks on IPv6, yet... - */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { if (net_ratelimit()) - printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n"); + syn_flood_warning(skb); +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) + want_cookie = 1; + else +#endif goto drop; } @@ -1266,39 +1289,51 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_parse_options(skb, &tmp_opt, 0); + if (want_cookie) { + tcp_clear_options(&tmp_opt); + tmp_opt.saw_tstamp = 0; + } + tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - TCP_ECN_create_request(req, tcp_hdr(skb)); treq->pktopts = NULL; - if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { - atomic_inc(&skb->users); - treq->pktopts = skb; - } - treq->iif = sk->sk_bound_dev_if; + if (!want_cookie) + TCP_ECN_create_request(req, tcp_hdr(skb)); + + if (want_cookie) { + isn = cookie_v6_init_sequence(sk, skb, &req->mss); + } else if (!isn) { + if (ipv6_opt_accepted(sk, skb) || + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { + atomic_inc(&skb->users); + treq->pktopts = skb; + } + treq->iif = sk->sk_bound_dev_if; - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); - if (isn == 0) isn = tcp_v6_init_sequence(skb); + } tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req, NULL)) + if (tcp_v6_send_synack(sk, req)) goto drop; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - return 0; + if (!want_cookie) { + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + return 0; + } drop: if (req) @@ -1704,7 +1739,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup(skb->dev->nd_net, &tcp_hashinfo, + sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); @@ -1787,7 +1822,7 @@ do_time_wait: { struct sock *sk2; - sk2 = inet6_lookup_listener(skb->dev->nd_net, &tcp_hashinfo, + sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { @@ -2094,19 +2129,17 @@ static struct tcp_seq_afinfo tcp6_seq_afinfo = { .seq_fops = &tcp6_seq_fops, }; -int __init tcp6_proc_init(void) +int tcp6_proc_init(struct net *net) { - return tcp_proc_register(&tcp6_seq_afinfo); + return tcp_proc_register(net, &tcp6_seq_afinfo); } -void tcp6_proc_exit(void) +void tcp6_proc_exit(struct net *net) { - tcp_proc_unregister(&tcp6_seq_afinfo); + tcp_proc_unregister(net, &tcp6_seq_afinfo); } #endif -DEFINE_PROTO_INUSE(tcpv6) - struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, @@ -2137,12 +2170,11 @@ struct proto tcpv6_prot = { .obj_size = sizeof(struct tcp6_sock), .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, - .hashinfo = &tcp_hashinfo, + .h.hashinfo = &tcp_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif - REF_PROTO_INUSE(tcpv6) }; static struct inet6_protocol tcpv6_protocol = { @@ -2164,6 +2196,31 @@ static struct inet_protosw tcpv6_protosw = { INET_PROTOSW_ICSK, }; +static int tcpv6_net_init(struct net *net) +{ + int err; + struct socket *sock; + struct sock *sk; + + err = inet_csk_ctl_sock_create(&sock, PF_INET6, SOCK_RAW, IPPROTO_TCP); + if (err) + return err; + + net->ipv6.tcp_sk = sk = sock->sk; + sk_change_net(sk, net); + return err; +} + +static void tcpv6_net_exit(struct net *net) +{ + sk_release_kernel(net->ipv6.tcp_sk); +} + +static struct pernet_operations tcpv6_net_ops = { + .init = tcpv6_net_init, + .exit = tcpv6_net_exit, +}; + int __init tcpv6_init(void) { int ret; @@ -2177,8 +2234,7 @@ int __init tcpv6_init(void) if (ret) goto out_tcpv6_protocol; - ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, - SOCK_RAW, IPPROTO_TCP); + ret = register_pernet_subsys(&tcpv6_net_ops); if (ret) goto out_tcpv6_protosw; out: @@ -2193,7 +2249,7 @@ out_tcpv6_protosw: void tcpv6_exit(void) { - sock_release(tcp6_socket); + unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 53739de829db..30ef7dc5d403 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -51,9 +51,9 @@ #include <linux/seq_file.h> #include "udp_impl.h" -static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) +int udp_v6_get_port(struct sock *sk, unsigned short snum) { - return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); + return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); } static struct sock *__udp6_lib_lookup(struct net *net, @@ -70,7 +70,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_net == net && sk->sk_hash == hnum && + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); int score = 0; @@ -235,7 +235,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sock *sk; int err; - sk = __udp6_lib_lookup(skb->dev->nd_net, daddr, uh->dest, + sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable); if (sk == NULL) return; @@ -323,6 +323,9 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); + if (sock_net(s) != sock_net(sk)) + continue; + if (s->sk_hash == num && s->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(s); if (inet->dport) { @@ -480,7 +483,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = __udp6_lib_lookup(skb->dev->nd_net, saddr, uh->source, + sk = __udp6_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, uh->dest, inet6_iif(skb), udptable); if (sk == NULL) { @@ -789,9 +792,7 @@ do_udp_sendmsg: else hlimit = np->hop_limit; if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - if (hlimit < 0) - hlimit = ipv6_get_hoplimit(dst->dev); + hlimit = ip6_dst_hoplimit(dst); } if (tclass < 0) { @@ -976,30 +977,30 @@ int udp6_seq_show(struct seq_file *seq, void *v) return 0; } -static struct file_operations udp6_seq_fops; static struct udp_seq_afinfo udp6_seq_afinfo = { - .owner = THIS_MODULE, .name = "udp6", .family = AF_INET6, .hashtable = udp_hash, - .seq_show = udp6_seq_show, - .seq_fops = &udp6_seq_fops, + .seq_fops = { + .owner = THIS_MODULE, + }, + .seq_ops = { + .show = udp6_seq_show, + }, }; -int __init udp6_proc_init(void) +int udp6_proc_init(struct net *net) { - return udp_proc_register(&udp6_seq_afinfo); + return udp_proc_register(net, &udp6_seq_afinfo); } -void udp6_proc_exit(void) { - udp_proc_unregister(&udp6_seq_afinfo); +void udp6_proc_exit(struct net *net) { + udp_proc_unregister(net, &udp6_seq_afinfo); } #endif /* CONFIG_PROC_FS */ /* ------------------------------------------------------------------------ */ -DEFINE_PROTO_INUSE(udpv6) - struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, @@ -1021,11 +1022,11 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), + .h.udp_hash = udp_hash, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - REF_PROTO_INUSE(udpv6) }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 21be3a83e7bc..321b81a4d418 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -11,6 +11,8 @@ extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, int , int , int , __be32 , struct hlist_head []); +extern int udp_v6_get_port(struct sock *sk, unsigned short snum); + extern int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int udpv6_setsockopt(struct sock *sk, int level, int optname, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 87d4202522ee..491efd00a866 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -35,13 +35,6 @@ static struct inet6_protocol udplitev6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static int udplite_v6_get_port(struct sock *sk, unsigned short snum) -{ - return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); -} - -DEFINE_PROTO_INUSE(udplitev6) - struct proto udplitev6_prot = { .name = "UDPLITEv6", .owner = THIS_MODULE, @@ -58,13 +51,13 @@ struct proto udplitev6_prot = { .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, - .get_port = udplite_v6_get_port, + .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), + .h.udp_hash = udplite_hash, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - REF_PROTO_INUSE(udplitev6) }; static struct inet_protosw udplite6_protosw = { @@ -103,23 +96,40 @@ void udplitev6_exit(void) } #ifdef CONFIG_PROC_FS -static struct file_operations udplite6_seq_fops; static struct udp_seq_afinfo udplite6_seq_afinfo = { - .owner = THIS_MODULE, .name = "udplite6", .family = AF_INET6, .hashtable = udplite_hash, - .seq_show = udp6_seq_show, - .seq_fops = &udplite6_seq_fops, + .seq_fops = { + .owner = THIS_MODULE, + }, + .seq_ops = { + .show = udp6_seq_show, + }, +}; + +static int udplite6_proc_init_net(struct net *net) +{ + return udp_proc_register(net, &udplite6_seq_afinfo); +} + +static void udplite6_proc_exit_net(struct net *net) +{ + udp_proc_unregister(net, &udplite6_seq_afinfo); +} + +static struct pernet_operations udplite6_net_ops = { + .init = udplite6_proc_init_net, + .exit = udplite6_proc_exit_net, }; int __init udplite6_proc_init(void) { - return udp_proc_register(&udplite6_seq_afinfo); + return register_pernet_subsys(&udplite6_net_ops); } void udplite6_proc_exit(void) { - udp_proc_unregister(&udplite6_seq_afinfo); + unregister_pernet_subsys(&udplite6_net_ops); } #endif diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a4714d76ae6b..a71c7ddcb41e 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,9 +59,6 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { struct xfrm_state *x = NULL; - int wildcard = 0; - xfrm_address_t *xany; - int nh = 0; int i = 0; /* Allocate new secpath or COW existing one. */ @@ -83,10 +80,9 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, goto drop; } - xany = (xfrm_address_t *)&in6addr_any; - for (i = 0; i < 3; i++) { xfrm_address_t *dst, *src; + switch (i) { case 0: dst = daddr; @@ -94,16 +90,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; case 1: /* lookup state with wild-card source address */ - wildcard = 1; dst = daddr; - src = xany; + src = (xfrm_address_t *)&in6addr_any; break; - case 2: default: /* lookup state with wild-card addresses */ - wildcard = 1; /* XXX */ - dst = xany; - src = xany; + dst = (xfrm_address_t *)&in6addr_any; + src = (xfrm_address_t *)&in6addr_any; break; } @@ -113,39 +106,19 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, spin_lock(&x->lock); - if (wildcard) { - if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { - spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - } - - if (unlikely(x->km.state != XFRM_STATE_VALID)) { + if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) && + likely(x->km.state == XFRM_STATE_VALID) && + !xfrm_state_check_expire(x)) { spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - if (xfrm_state_check_expire(x)) { + if (x->type->input(x, skb) > 0) { + /* found a valid state */ + break; + } + } else spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - - spin_unlock(&x->lock); - - nh = x->type->input(x, skb); - if (nh <= 0) { - xfrm_state_put(x); - x = NULL; - continue; - } - /* Found a state */ - break; + xfrm_state_put(x); + x = NULL; } if (!x) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 7d20199ee1f3..8f1e0543b3c4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -38,7 +38,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); - dst = ip6_route_output(NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl); err = dst->error; if (dst->error) { @@ -57,8 +57,9 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) if (IS_ERR(dst)) return -EHOSTUNREACH; - ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6, - (struct in6_addr *)&saddr->a6); + ipv6_dev_get_saddr(ip6_dst_idev(dst)->dev, + (struct in6_addr *)&daddr->a6, 0, + (struct in6_addr *)&saddr->a6); dst_release(dst); return 0; } @@ -246,7 +247,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { struct inet6_dev *loopback_idev = - in6_dev_get(dev->nd_net->loopback_dev); + in6_dev_get(dev_net(dev)->loopback_dev); BUG_ON(!loopback_idev); do { diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index ff1e1db8e236..89884a4f23aa 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -49,125 +49,102 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } +/* distribution counting sort function for xfrm_state and xfrm_tmpl */ static int -__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) +__xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass) { int i; - int j = 0; + int class[XFRM_MAX_DEPTH]; + int count[maxclass]; - /* Rule 1: select IPsec transport except AH */ - for (i = 0; i < n; i++) { - if (src[i]->props.mode == XFRM_MODE_TRANSPORT && - src[i]->id.proto != IPPROTO_AH) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; + memset(count, 0, sizeof(count)); - /* Rule 2: select MIPv6 RO or inbound trigger */ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || - src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) { - dst[j++] = src[i]; - src[i] = NULL; - } + int c; + class[i] = c = cmp(src[i]); + count[c]++; } - if (j == n) - goto end; -#endif - /* Rule 3: select IPsec transport AH */ - for (i = 0; i < n; i++) { - if (src[i] && - src[i]->props.mode == XFRM_MODE_TRANSPORT && - src[i]->id.proto == IPPROTO_AH) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; + for (i = 2; i < maxclass; i++) + count[i] += count[i - 1]; - /* Rule 4: select IPsec tunnel */ for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->props.mode == XFRM_MODE_TUNNEL || - src[i]->props.mode == XFRM_MODE_BEET)) { - dst[j++] = src[i]; - src[i] = NULL; - } + dst[count[class[i] - 1]++] = src[i]; + src[i] = 0; } - if (likely(j == n)) - goto end; - /* Final rule */ - for (i = 0; i < n; i++) { - if (src[i]) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - - end: return 0; } -static int -__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) +/* + * Rule for xfrm_state: + * + * rule 1: select IPsec transport except AH + * rule 2: select MIPv6 RO or inbound trigger + * rule 3: select IPsec transport AH + * rule 4: select IPsec tunnel + * rule 5: others + */ +static int __xfrm6_state_sort_cmp(void *p) { - int i; - int j = 0; - - /* Rule 1: select IPsec transport */ - for (i = 0; i < n; i++) { - if (src[i]->mode == XFRM_MODE_TRANSPORT) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; - - /* Rule 2: select MIPv6 RO or inbound trigger */ + struct xfrm_state *v = p; + + switch (v->props.mode) { + case XFRM_MODE_TRANSPORT: + if (v->id.proto != IPPROTO_AH) + return 1; + else + return 3; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || - src[i]->mode == XFRM_MODE_IN_TRIGGER)) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + return 2; #endif - - /* Rule 3: select IPsec tunnel */ - for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->mode == XFRM_MODE_TUNNEL || - src[i]->mode == XFRM_MODE_BEET)) { - dst[j++] = src[i]; - src[i] = NULL; - } + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + return 4; } - if (likely(j == n)) - goto end; + return 5; +} - /* Final rule */ - for (i = 0; i < n; i++) { - if (src[i]) { - dst[j++] = src[i]; - src[i] = NULL; - } +static int +__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) +{ + return __xfrm6_sort((void **)dst, (void **)src, n, + __xfrm6_state_sort_cmp, 6); +} + +/* + * Rule for xfrm_tmpl: + * + * rule 1: select IPsec transport + * rule 2: select MIPv6 RO or inbound trigger + * rule 3: select IPsec tunnel + * rule 4: others + */ +static int __xfrm6_tmpl_sort_cmp(void *p) +{ + struct xfrm_tmpl *v = p; + switch (v->mode) { + case XFRM_MODE_TRANSPORT: + return 1; +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_IN_TRIGGER: + return 2; +#endif + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + return 3; } + return 4; +} - end: - return 0; +static int +__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) +{ + return __xfrm6_sort((void **)dst, (void **)src, n, + __xfrm6_tmpl_sort_cmp, 5); } int xfrm6_extract_header(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 639fe8a6ff1e..c2b278138604 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -140,12 +140,26 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup); +static int __xfrm6_tunnel_spi_check(u32 spi) +{ + struct xfrm6_tunnel_spi *x6spi; + int index = xfrm6_tunnel_spi_hash_byspi(spi); + struct hlist_node *pos; + + hlist_for_each_entry(x6spi, pos, + &xfrm6_tunnel_spi_byspi[index], + list_byspi) { + if (x6spi->spi == spi) + return -1; + } + return index; +} + static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) { u32 spi; struct xfrm6_tunnel_spi *x6spi; - struct hlist_node *pos; - unsigned index; + int index; if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN || xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX) @@ -154,32 +168,19 @@ static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) xfrm6_tunnel_spi++; for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) { - index = xfrm6_tunnel_spi_hash_byspi(spi); - hlist_for_each_entry(x6spi, pos, - &xfrm6_tunnel_spi_byspi[index], - list_byspi) { - if (x6spi->spi == spi) - goto try_next_1; - } - xfrm6_tunnel_spi = spi; - goto alloc_spi; -try_next_1:; + index = __xfrm6_tunnel_spi_check(spi); + if (index >= 0) + goto alloc_spi; } for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) { - index = xfrm6_tunnel_spi_hash_byspi(spi); - hlist_for_each_entry(x6spi, pos, - &xfrm6_tunnel_spi_byspi[index], - list_byspi) { - if (x6spi->spi == spi) - goto try_next_2; - } - xfrm6_tunnel_spi = spi; - goto alloc_spi; -try_next_2:; + index = __xfrm6_tunnel_spi_check(spi); + if (index >= 0) + goto alloc_spi; } spi = 0; goto out; alloc_spi: + xfrm6_tunnel_spi = spi; x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC); if (!x6spi) goto out; |