diff options
Diffstat (limited to 'net/ipv6')
62 files changed, 3801 insertions, 2215 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 87c23a73d284..24f3aa0f2a35 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -5,11 +5,12 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ + addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ - exthdrs.o sysctl_net_ipv6.o datagram.o \ - ip6_flowlabel.o inet6_connection_sock.o + exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o +ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e8c347579da9..e40213db9e4c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -101,8 +101,16 @@ #define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) #ifdef CONFIG_SYSCTL -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); -static void addrconf_sysctl_unregister(struct ipv6_devconf *p); +static void addrconf_sysctl_register(struct inet6_dev *idev); +static void addrconf_sysctl_unregister(struct inet6_dev *idev); +#else +static inline void addrconf_sysctl_register(struct inet6_dev *idev) +{ +} + +static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) +{ +} #endif #ifdef CONFIG_IPV6_PRIVACY @@ -141,7 +149,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo); -static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); +static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); @@ -256,16 +265,13 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { if (snmp_mib_init((void **)idev->stats.ipv6, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib)) < 0) goto err_ip; if (snmp_mib_init((void **)idev->stats.icmpv6, - sizeof(struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6_mib)) < 0) goto err_icmp; if (snmp_mib_init((void **)idev->stats.icmpv6msg, - sizeof(struct icmpv6msg_mib), - __alignof__(struct icmpv6msg_mib)) < 0) + sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg; return 0; @@ -329,7 +335,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; - memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); + memcpy(&ndev->cnf, dev->nd_net->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); @@ -366,9 +372,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) in6_dev_hold(ndev); #ifdef CONFIG_IPV6_PRIVACY - init_timer(&ndev->regen_timer); - ndev->regen_timer.function = ipv6_regen_rndid; - ndev->regen_timer.data = (unsigned long) ndev; + setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) @@ -379,6 +383,13 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) "%s: Disabled Privacy Extensions\n", dev->name); ndev->cnf.use_tempaddr = -1; + + if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { + printk(KERN_INFO + "%s: Disabled Multicast RS\n", + dev->name); + ndev->cnf.rtr_solicits = 0; + } } else { in6_dev_hold(ndev); ipv6_regen_rndid((unsigned long) ndev); @@ -390,13 +401,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; -#ifdef CONFIG_SYSCTL - neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, - NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - NULL); - addrconf_sysctl_register(ndev, &ndev->cnf); -#endif + addrconf_sysctl_register(ndev); /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); @@ -452,18 +457,18 @@ static void dev_forward_change(struct inet6_dev *idev) } -static void addrconf_forward_change(void) +static void addrconf_forward_change(struct net *net, __s32 newf) { struct net_device *dev; struct inet6_dev *idev; read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); - idev->cnf.forwarding = ipv6_devconf.forwarding; + int changed = (!idev->cnf.forwarding) ^ (!newf); + idev->cnf.forwarding = newf; if (changed) dev_forward_change(idev); } @@ -471,6 +476,25 @@ static void addrconf_forward_change(void) } read_unlock(&dev_base_lock); } + +static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) +{ + struct net *net; + + net = (struct net *)table->extra2; + if (p == &net->ipv6.devconf_dflt->forwarding) + return; + + if (p == &net->ipv6.devconf_all->forwarding) { + __s32 newf = net->ipv6.devconf_all->forwarding; + net->ipv6.devconf_dflt->forwarding = newf; + addrconf_forward_change(net, newf); + } else if ((!*p) ^ (!old)) + dev_forward_change((struct inet6_dev *)table->extra1); + + if (*p) + rt6_purge_dflt_routers(); +} #endif /* Nobody refers to this ifaddr, destroy it */ @@ -537,7 +561,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, write_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(addr, idev->dev)) { + if (ipv6_chk_same_addr(&init_net, addr, idev->dev)) { ADBG(("ipv6_add_addr: already assigned\n")); err = -EEXIST; goto out; @@ -876,35 +900,6 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -/* static matching label */ -static inline int ipv6_saddr_label(const struct in6_addr *addr, int type) -{ - /* - * prefix (longest match) label - * ----------------------------- - * ::1/128 0 - * ::/0 1 - * 2002::/16 2 - * ::/96 3 - * ::ffff:0:0/96 4 - * fc00::/7 5 - * 2001::/32 6 - */ - if (type & IPV6_ADDR_LOOPBACK) - return 0; - else if (type & IPV6_ADDR_COMPATv4) - return 3; - else if (type & IPV6_ADDR_MAPPED) - return 4; - else if (addr->s6_addr32[0] == htonl(0x20010000)) - return 6; - else if (addr->s6_addr16[0] == htons(0x2002)) - return 2; - else if ((addr->s6_addr[0] & 0xfe) == 0xfc) - return 5; - return 1; -} - int ipv6_dev_get_saddr(struct net_device *daddr_dev, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -912,7 +907,8 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, struct inet6_ifaddr *ifa_result = NULL; int daddr_type = __ipv6_addr_type(daddr); int daddr_scope = __ipv6_addr_src_scope(daddr_type); - u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); + int daddr_ifindex = daddr_dev ? daddr_dev->ifindex : 0; + u32 daddr_label = ipv6_addr_label(daddr, daddr_type, daddr_ifindex); struct net_device *dev; memset(&hiscore, 0, sizeof(hiscore)); @@ -1085,11 +1081,15 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, /* Rule 6: Prefer matching label */ if (hiscore.rule < 6) { - if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) + if (ipv6_addr_label(&ifa_result->addr, + hiscore.addr_type, + ifa_result->idev->dev->ifindex) == daddr_label) hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; hiscore.rule++; } - if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { + if (ipv6_addr_label(&ifa->addr, + score.addr_type, + ifa->idev->dev->ifindex) == daddr_label) { score.attrs |= IPV6_SADDR_SCORE_LABEL; if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { score.rule = 6; @@ -1207,13 +1207,16 @@ static int ipv6_count_addresses(struct inet6_dev *idev) return cnt; } -int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) +int ipv6_chk_addr(struct net *net, struct in6_addr *addr, + struct net_device *dev, int strict) { struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp->flags&IFA_F_TENTATIVE)) { if (dev == NULL || ifp->idev->dev == dev || @@ -1224,16 +1227,18 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) read_unlock_bh(&addrconf_hash_lock); return ifp != NULL; } - EXPORT_SYMBOL(ipv6_chk_addr); static -int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) +int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev) { struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev) break; @@ -1242,13 +1247,16 @@ int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) return ifp != NULL; } -struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict) +struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, struct in6_addr *addr, + struct net_device *dev, int strict) { struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_equal(&ifp->addr, addr)) { if (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { @@ -1435,6 +1443,9 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_arcnet(eui, dev); case ARPHRD_INFINIBAND: return addrconf_ifid_infiniband(eui, dev); + case ARPHRD_SIT: + if (dev->priv_flags & IFF_ISATAP) + return ipv6_isatap_eui64(eui, *(__be32 *)dev->dev_addr); } return -1; } @@ -1470,7 +1481,7 @@ regen: * * - Reserved subnet anycast (RFC 2526) * 11111101 11....11 1xxxxxxx - * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1 + * - ISATAP (RFC4214) 6.1 * 00-00-5E-FE-xx-xx-xx-xx * - value 0 * - XXX: already assigned to an address on the device @@ -1731,7 +1742,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) ok: - ifp = ipv6_get_ifaddr(&addr, dev, 1); + ifp = ipv6_get_ifaddr(&init_net, &addr, dev, 1); if (ifp == NULL && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -1889,7 +1900,7 @@ int addrconf_set_dstaddr(void __user *arg) p.iph.ihl = 5; p.iph.protocol = IPPROTO_IPV6; p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (void __user *)&p; + ifr.ifr_ifru.ifru_data = (__force void __user *)&p; oldfs = get_fs(); set_fs(KERNEL_DS); err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); @@ -2201,6 +2212,16 @@ static void addrconf_sit_config(struct net_device *dev) return; } + if (dev->priv_flags & IFF_ISATAP) { + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + addrconf_prefix_route(&addr, 64, dev, 0, 0); + if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) + addrconf_add_linklocal(idev, &addr); + return; + } + sit_add_v4_addrs(idev); if (dev->flags&IFF_POINTOPOINT) { @@ -2385,15 +2406,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGENAME: if (idev) { snmp6_unregister_dev(idev); -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&idev->cnf); - neigh_sysctl_unregister(idev->nd_parms); - neigh_sysctl_register(dev, idev->nd_parms, - NET_IPV6, NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - NULL); - addrconf_sysctl_register(idev, &idev->cnf); -#endif + addrconf_sysctl_unregister(idev); + addrconf_sysctl_register(idev); err = snmp6_register_dev(idev); if (err) return notifier_from_errno(err); @@ -2517,10 +2531,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* Shot the device (if unregistered) */ if (how == 1) { -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&idev->cnf); - neigh_sysctl_unregister(idev->nd_parms); -#endif + addrconf_sysctl_unregister(idev); neigh_parms_release(&nd_tbl, idev->nd_parms); neigh_ifdown(&nd_tbl, dev); in6_dev_put(idev); @@ -2734,6 +2745,7 @@ static void addrconf_dad_run(struct inet6_dev *idev) { #ifdef CONFIG_PROC_FS struct if6_iter_state { + struct seq_net_private p; int bucket; }; @@ -2741,9 +2753,13 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) { struct inet6_ifaddr *ifa = NULL; struct if6_iter_state *state = seq->private; + struct net *net = state->p.net; for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { ifa = inet6_addr_lst[state->bucket]; + + while (ifa && ifa->idev->dev->nd_net != net) + ifa = ifa->lst_next; if (ifa) break; } @@ -2753,13 +2769,22 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) { struct if6_iter_state *state = seq->private; + struct net *net = state->p.net; ifa = ifa->lst_next; try_again: + if (ifa) { + if (ifa->idev->dev->nd_net != net) { + ifa = ifa->lst_next; + goto try_again; + } + } + if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { ifa = inet6_addr_lst[state->bucket]; goto try_again; } + return ifa; } @@ -2774,6 +2799,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) } static void *if6_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(addrconf_hash_lock) { read_lock_bh(&addrconf_hash_lock); return if6_get_idx(seq, *pos); @@ -2789,6 +2815,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void if6_seq_stop(struct seq_file *seq, void *v) + __releases(addrconf_hash_lock) { read_unlock_bh(&addrconf_hash_lock); } @@ -2816,8 +2843,8 @@ static const struct seq_operations if6_seq_ops = { static int if6_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &if6_seq_ops, - sizeof(struct if6_iter_state)); + return seq_open_net(inode, file, &if6_seq_ops, + sizeof(struct if6_iter_state)); } static const struct file_operations if6_fops = { @@ -2825,31 +2852,48 @@ static const struct file_operations if6_fops = { .open = if6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -int __init if6_proc_init(void) +static int if6_proc_net_init(struct net *net) { - if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) + if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops)) return -ENOMEM; return 0; } +static void if6_proc_net_exit(struct net *net) +{ + proc_net_remove(net, "if_inet6"); +} + +static struct pernet_operations if6_proc_net_ops = { + .init = if6_proc_net_init, + .exit = if6_proc_net_exit, +}; + +int __init if6_proc_init(void) +{ + return register_pernet_subsys(&if6_proc_net_ops); +} + void if6_proc_exit(void) { - proc_net_remove(&init_net, "if_inet6"); + unregister_pernet_subsys(&if6_proc_net_ops); } #endif /* CONFIG_PROC_FS */ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) /* Check if address is a home address configured on any interface. */ -int ipv6_chk_home_addr(struct in6_addr *addr) +int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) { int ret = 0; struct inet6_ifaddr * ifp; u8 hash = ipv6_addr_hash(addr); read_lock_bh(&addrconf_hash_lock); for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + if (ifp->idev->dev->nd_net != net) + continue; if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && (ifp->flags & IFA_F_HOMEADDRESS)) { ret = 1; @@ -2997,11 +3041,15 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3054,6 +3102,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3063,6 +3112,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 ifa_flags; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3090,7 +3142,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) /* We ignore other flags so far. */ ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); - ifa = ipv6_get_ifaddr(pfx, dev, 1); + ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (ifa == NULL) { /* * It would be best to check for !NLM_F_CREATE here but @@ -3283,11 +3335,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, ifa = ifa->if_next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - if ((err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR, - NLM_F_MULTI)) <= 0) - goto done; + err = inet6_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWADDR, + NLM_F_MULTI); } break; case MULTICAST_ADDR: @@ -3296,11 +3348,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, ifmca = ifmca->next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - if ((err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETMULTICAST, - NLM_F_MULTI)) <= 0) - goto done; + err = inet6_fill_ifmcaddr(skb, ifmca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETMULTICAST, + NLM_F_MULTI); } break; case ANYCAST_ADDR: @@ -3309,11 +3361,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, ifaca = ifaca->aca_next, ip_idx++) { if (ip_idx < s_ip_idx) continue; - if ((err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_GETANYCAST, - NLM_F_MULTI)) <= 0) - goto done; + err = inet6_fill_ifacaddr(skb, ifaca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETANYCAST, + NLM_F_MULTI); } break; default: @@ -3321,14 +3373,12 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } read_unlock_bh(&idev->lock); in6_dev_put(idev); + + if (err <= 0) + break; cont: idx++; } -done: - if (err <= 0) { - read_unlock_bh(&idev->lock); - in6_dev_put(idev); - } cb->args[0] = idx; cb->args[1] = ip_idx; return skb->len; @@ -3336,26 +3386,42 @@ done: static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = UNICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = MULTICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = ANYCAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; @@ -3364,6 +3430,9 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, struct sk_buff *skb; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) goto errout; @@ -3378,7 +3447,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, if (ifm->ifa_index) dev = __dev_get_by_index(&init_net, ifm->ifa_index); - if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { + if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; goto errout; } @@ -3396,7 +3465,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); errout: @@ -3419,10 +3488,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3581,11 +3650,15 @@ nla_put_failure: static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, err; int s_idx = cb->args[0]; struct net_device *dev; struct inet6_dev *idev; + if (net != &init_net) + return 0; + read_lock(&dev_base_lock); idx = 0; for_each_netdev(&init_net, dev) { @@ -3623,10 +3696,10 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -3692,10 +3765,10 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); } static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -3746,22 +3819,8 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - if (write && valp != &ipv6_devconf_dflt.forwarding) { - if (valp != &ipv6_devconf.forwarding) { - if ((!*valp) ^ (!val)) { - struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; - if (idev == NULL) - return ret; - dev_forward_change(idev); - } - } else { - ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; - addrconf_forward_change(); - } - if (*valp) - rt6_purge_dflt_routers(); - } - + if (write) + addrconf_fixup_forwarding(ctl, valp, val); return ret; } @@ -3772,6 +3831,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, void __user *newval, size_t newlen) { int *valp = table->data; + int val = *valp; int new; if (!newval || !newlen) @@ -3796,26 +3856,8 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, } } - if (valp != &ipv6_devconf_dflt.forwarding) { - if (valp != &ipv6_devconf.forwarding) { - struct inet6_dev *idev = (struct inet6_dev *)table->extra1; - int changed; - if (unlikely(idev == NULL)) - return -ENODEV; - changed = (!*valp) ^ (!new); - *valp = new; - if (changed) - dev_forward_change(idev); - } else { - *valp = new; - addrconf_forward_change(); - } - - if (*valp) - rt6_purge_dflt_routers(); - } else - *valp = new; - + *valp = new; + addrconf_fixup_forwarding(table, valp, val); return 1; } @@ -3823,10 +3865,7 @@ static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; ctl_table addrconf_vars[__NET_IPV6_MAX]; - ctl_table addrconf_dev[2]; - ctl_table addrconf_conf_dir[2]; - ctl_table addrconf_proto_dir[2]; - ctl_table addrconf_root_dir[2]; + char *dev_name; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { @@ -4047,72 +4086,33 @@ static struct addrconf_sysctl_table .ctl_name = 0, /* sentinel */ } }, - .addrconf_dev = { - { - .ctl_name = NET_PROTO_CONF_ALL, - .procname = "all", - .mode = 0555, - .child = addrconf_sysctl.addrconf_vars, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_conf_dir = { - { - .ctl_name = NET_IPV6_CONF, - .procname = "conf", - .mode = 0555, - .child = addrconf_sysctl.addrconf_dev, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_proto_dir = { - { - .ctl_name = NET_IPV6, - .procname = "ipv6", - .mode = 0555, - .child = addrconf_sysctl.addrconf_conf_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, - .addrconf_root_dir = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = addrconf_sysctl.addrconf_proto_dir, - }, - { - .ctl_name = 0, /* sentinel */ - } - }, }; -static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) +static int __addrconf_sysctl_register(struct net *net, char *dev_name, + int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p) { int i; - struct net_device *dev = idev ? idev->dev : NULL; struct addrconf_sysctl_table *t; - char *dev_name = NULL; + +#define ADDRCONF_CTL_PATH_DEV 3 + + struct ctl_path addrconf_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv6", .ctl_name = NET_IPV6, }, + { .procname = "conf", .ctl_name = NET_IPV6_CONF, }, + { /* to be set */ }, + { }, + }; + t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); if (t == NULL) - return; + goto out; + for (i=0; t->addrconf_vars[i].data; i++) { t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ - } - if (dev) { - dev_name = dev->name; - t->addrconf_dev[0].ctl_name = dev->ifindex; - } else { - dev_name = "default"; - t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; + t->addrconf_vars[i].extra2 = net; } /* @@ -4120,47 +4120,126 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf * by sysctl and we wouldn't want anyone to change it under our feet * (see SIOCSIFNAME). */ - dev_name = kstrdup(dev_name, GFP_KERNEL); - if (!dev_name) - goto free; - - t->addrconf_dev[0].procname = dev_name; + t->dev_name = kstrdup(dev_name, GFP_KERNEL); + if (!t->dev_name) + goto free; - t->addrconf_dev[0].child = t->addrconf_vars; - t->addrconf_conf_dir[0].child = t->addrconf_dev; - t->addrconf_proto_dir[0].child = t->addrconf_conf_dir; - t->addrconf_root_dir[0].child = t->addrconf_proto_dir; + addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; + addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name; - t->sysctl_header = register_sysctl_table(t->addrconf_root_dir); + t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path, + t->addrconf_vars); if (t->sysctl_header == NULL) goto free_procname; - else - p->sysctl = t; - return; - /* error path */ - free_procname: - kfree(dev_name); - free: + p->sysctl = t; + return 0; + +free_procname: + kfree(t->dev_name); +free: kfree(t); +out: + return -ENOBUFS; +} - return; +static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) +{ + struct addrconf_sysctl_table *t; + + if (p->sysctl == NULL) + return; + + t = p->sysctl; + p->sysctl = NULL; + unregister_sysctl_table(t->sysctl_header); + kfree(t->dev_name); + kfree(t); } -static void addrconf_sysctl_unregister(struct ipv6_devconf *p) +static void addrconf_sysctl_register(struct inet6_dev *idev) { - if (p->sysctl) { - struct addrconf_sysctl_table *t = p->sysctl; - p->sysctl = NULL; - unregister_sysctl_table(t->sysctl_header); - kfree(t->addrconf_dev[0].procname); - kfree(t); - } + neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, + NET_IPV6_NEIGH, "ipv6", + &ndisc_ifinfo_sysctl_change, + NULL); + __addrconf_sysctl_register(idev->dev->nd_net, idev->dev->name, + idev->dev->ifindex, idev, &idev->cnf); +} + +static void addrconf_sysctl_unregister(struct inet6_dev *idev) +{ + __addrconf_sysctl_unregister(&idev->cnf); + neigh_sysctl_unregister(idev->nd_parms); } #endif +static int addrconf_init_net(struct net *net) +{ + int err; + struct ipv6_devconf *all, *dflt; + + err = -ENOMEM; + all = &ipv6_devconf; + dflt = &ipv6_devconf_dflt; + + if (net != &init_net) { + all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; + + dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; + } + + net->ipv6.devconf_all = all; + net->ipv6.devconf_dflt = dflt; + +#ifdef CONFIG_SYSCTL + err = __addrconf_sysctl_register(net, "all", NET_PROTO_CONF_ALL, + NULL, all); + if (err < 0) + goto err_reg_all; + + err = __addrconf_sysctl_register(net, "default", NET_PROTO_CONF_DEFAULT, + NULL, dflt); + if (err < 0) + goto err_reg_dflt; +#endif + return 0; + +#ifdef CONFIG_SYSCTL +err_reg_dflt: + __addrconf_sysctl_unregister(all); +err_reg_all: + kfree(dflt); +#endif +err_alloc_dflt: + kfree(all); +err_alloc_all: + return err; +} + +static void addrconf_exit_net(struct net *net) +{ +#ifdef CONFIG_SYSCTL + __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); + __addrconf_sysctl_unregister(net->ipv6.devconf_all); +#endif + if (net != &init_net) { + kfree(net->ipv6.devconf_dflt); + kfree(net->ipv6.devconf_all); + } +} + +static struct pernet_operations addrconf_ops = { + .init = addrconf_init_net, + .exit = addrconf_exit_net, +}; + /* * Device notifier */ @@ -4185,7 +4264,15 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier); int __init addrconf_init(void) { - int err = 0; + int err; + + if ((err = ipv6_addr_label_init()) < 0) { + printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n", + err); + return err; + } + + register_pernet_subsys(&addrconf_ops); /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup @@ -4210,7 +4297,7 @@ int __init addrconf_init(void) err = -ENOMEM; rtnl_unlock(); if (err) - return err; + goto errlo; ip6_null_entry.u.dst.dev = init_net.loopback_dev; ip6_null_entry.rt6i_idev = in6_dev_get(init_net.loopback_dev); @@ -4236,20 +4323,18 @@ int __init addrconf_init(void) __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); -#ifdef CONFIG_SYSCTL - addrconf_sysctl.sysctl_header = - register_sysctl_table(addrconf_sysctl.addrconf_root_dir); - addrconf_sysctl_register(NULL, &ipv6_devconf_dflt); -#endif + ipv6_addr_label_rtnl_register(); return 0; errout: unregister_netdevice_notifier(&ipv6_dev_notf); +errlo: + unregister_pernet_subsys(&addrconf_ops); return err; } -void __exit addrconf_cleanup(void) +void addrconf_cleanup(void) { struct net_device *dev; struct inet6_ifaddr *ifa; @@ -4257,10 +4342,7 @@ void __exit addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); -#ifdef CONFIG_SYSCTL - addrconf_sysctl_unregister(&ipv6_devconf_dflt); - addrconf_sysctl_unregister(&ipv6_devconf); -#endif + unregister_pernet_subsys(&addrconf_ops); rtnl_lock(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c new file mode 100644 index 000000000000..a3c5a72218fd --- /dev/null +++ b/net/ipv6/addrlabel.c @@ -0,0 +1,561 @@ +/* + * IPv6 Address Label subsystem + * for the IPv6 "Default" Source Address Selection + * + * Copyright (C)2007 USAGI/WIDE Project + */ +/* + * Author: + * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/rcupdate.h> +#include <linux/in6.h> +#include <net/addrconf.h> +#include <linux/if_addrlabel.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#if 0 +#define ADDRLABEL(x...) printk(x) +#else +#define ADDRLABEL(x...) do { ; } while(0) +#endif + +/* + * Policy Table + */ +struct ip6addrlbl_entry +{ + struct in6_addr prefix; + int prefixlen; + int ifindex; + int addrtype; + u32 label; + struct hlist_node list; + atomic_t refcnt; + struct rcu_head rcu; +}; + +static struct ip6addrlbl_table +{ + struct hlist_head head; + spinlock_t lock; + u32 seq; +} ip6addrlbl_table; + +/* + * Default policy table (RFC3484 + extensions) + * + * prefix addr_type label + * ------------------------------------------------------------------------- + * ::1/128 LOOPBACK 0 + * ::/0 N/A 1 + * 2002::/16 N/A 2 + * ::/96 COMPATv4 3 + * ::ffff:0:0/96 V4MAPPED 4 + * fc00::/7 N/A 5 ULA (RFC 4193) + * 2001::/32 N/A 6 Teredo (RFC 4380) + * + * Note: 0xffffffff is used if we do not have any policies. + */ + +#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL + +static const __initdata struct ip6addrlbl_init_table +{ + const struct in6_addr *prefix; + int prefixlen; + u32 label; +} ip6addrlbl_init_table[] = { + { /* ::/0 */ + .prefix = &in6addr_any, + .label = 1, + },{ /* fc00::/7 */ + .prefix = &(struct in6_addr){{{ 0xfc }}}, + .prefixlen = 7, + .label = 5, + },{ /* 2002::/16 */ + .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, + .prefixlen = 16, + .label = 2, + },{ /* 2001::/32 */ + .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, + .prefixlen = 32, + .label = 6, + },{ /* ::ffff:0:0 */ + .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, + .prefixlen = 96, + .label = 4, + },{ /* ::/96 */ + .prefix = &in6addr_any, + .prefixlen = 96, + .label = 3, + },{ /* ::1/128 */ + .prefix = &in6addr_loopback, + .prefixlen = 128, + .label = 0, + } +}; + +/* Object management */ +static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) +{ + kfree(p); +} + +static void ip6addrlbl_free_rcu(struct rcu_head *h) +{ + ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); +} + +static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p) +{ + return atomic_inc_not_zero(&p->refcnt); +} + +static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) +{ + if (atomic_dec_and_test(&p->refcnt)) + call_rcu(&p->rcu, ip6addrlbl_free_rcu); +} + +/* Find label */ +static int __ip6addrlbl_match(struct ip6addrlbl_entry *p, + const struct in6_addr *addr, + int addrtype, int ifindex) +{ + if (p->ifindex && p->ifindex != ifindex) + return 0; + if (p->addrtype && p->addrtype != addrtype) + return 0; + if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) + return 0; + return 1; +} + +static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr, + int type, int ifindex) +{ + struct hlist_node *pos; + struct ip6addrlbl_entry *p; + hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + if (__ip6addrlbl_match(p, addr, type, ifindex)) + return p; + } + return NULL; +} + +u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex) +{ + u32 label; + struct ip6addrlbl_entry *p; + + type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; + + rcu_read_lock(); + p = __ipv6_addr_label(addr, type, ifindex); + label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; + rcu_read_unlock(); + + ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", + __FUNCTION__, + NIP6(*addr), type, ifindex, + label); + + return label; +} + +/* allocate one entry */ +static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, + int prefixlen, int ifindex, + u32 label) +{ + struct ip6addrlbl_entry *newp; + int addrtype; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex, + (unsigned int)label); + + addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); + + switch (addrtype) { + case IPV6_ADDR_MAPPED: + if (prefixlen > 96) + return ERR_PTR(-EINVAL); + if (prefixlen < 96) + addrtype = 0; + break; + case IPV6_ADDR_COMPATv4: + if (prefixlen != 96) + addrtype = 0; + break; + case IPV6_ADDR_LOOPBACK: + if (prefixlen != 128) + addrtype = 0; + break; + } + + newp = kmalloc(sizeof(*newp), GFP_KERNEL); + if (!newp) + return ERR_PTR(-ENOMEM); + + ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); + newp->prefixlen = prefixlen; + newp->ifindex = ifindex; + newp->addrtype = addrtype; + newp->label = label; + INIT_HLIST_NODE(&newp->list); + atomic_set(&newp->refcnt, 1); + return newp; +} + +/* add a label */ +static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) +{ + int ret = 0; + + ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", + __FUNCTION__, + newp, replace); + + if (hlist_empty(&ip6addrlbl_table.head)) { + hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); + } else { + struct hlist_node *pos, *n; + struct ip6addrlbl_entry *p = NULL; + hlist_for_each_entry_safe(p, pos, n, + &ip6addrlbl_table.head, list) { + if (p->prefixlen == newp->prefixlen && + p->ifindex == newp->ifindex && + ipv6_addr_equal(&p->prefix, &newp->prefix)) { + if (!replace) { + ret = -EEXIST; + goto out; + } + hlist_replace_rcu(&p->list, &newp->list); + ip6addrlbl_put(p); + goto out; + } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || + (p->prefixlen < newp->prefixlen)) { + hlist_add_before_rcu(&newp->list, &p->list); + goto out; + } + } + hlist_add_after_rcu(&p->list, &newp->list); + } +out: + if (!ret) + ip6addrlbl_table.seq++; + return ret; +} + +/* add a label */ +static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen, + int ifindex, u32 label, int replace) +{ + struct ip6addrlbl_entry *newp; + int ret = 0; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex, + (unsigned int)label, + replace); + + newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); + if (IS_ERR(newp)) + return PTR_ERR(newp); + spin_lock(&ip6addrlbl_table.lock); + ret = __ip6addrlbl_add(newp, replace); + spin_unlock(&ip6addrlbl_table.lock); + if (ret) + ip6addrlbl_free(newp); + return ret; +} + +/* remove a label */ +static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, + int ifindex) +{ + struct ip6addrlbl_entry *p = NULL; + struct hlist_node *pos, *n; + int ret = -ESRCH; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex); + + hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { + if (p->prefixlen == prefixlen && + p->ifindex == ifindex && + ipv6_addr_equal(&p->prefix, prefix)) { + hlist_del_rcu(&p->list); + ip6addrlbl_put(p); + ret = 0; + break; + } + } + return ret; +} + +static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen, + int ifindex) +{ + struct in6_addr prefix_buf; + int ret; + + ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", + __FUNCTION__, + NIP6(*prefix), prefixlen, + ifindex); + + ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); + spin_lock(&ip6addrlbl_table.lock); + ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex); + spin_unlock(&ip6addrlbl_table.lock); + return ret; +} + +/* add default label */ +static __init int ip6addrlbl_init(void) +{ + int err = 0; + int i; + + ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__); + + for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { + int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix, + ip6addrlbl_init_table[i].prefixlen, + 0, + ip6addrlbl_init_table[i].label, 0); + /* XXX: should we free all rules when we catch an error? */ + if (ret && (!err || err != -ENOMEM)) + err = ret; + } + return err; +} + +int __init ipv6_addr_label_init(void) +{ + spin_lock_init(&ip6addrlbl_table.lock); + + return ip6addrlbl_init(); +} + +static const struct nla_policy ifal_policy[IFAL_MAX+1] = { + [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, + [IFAL_LABEL] = { .len = sizeof(u32), }, +}; + +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = skb->sk->sk_net; + struct ifaddrlblmsg *ifal; + struct nlattr *tb[IFAL_MAX+1]; + struct in6_addr *pfx; + u32 label; + int err = 0; + + if (net != &init_net) + return 0; + + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + if (err < 0) + return err; + + ifal = nlmsg_data(nlh); + + if (ifal->ifal_family != AF_INET6 || + ifal->ifal_prefixlen > 128) + return -EINVAL; + + if (ifal->ifal_index && + !__dev_get_by_index(&init_net, ifal->ifal_index)) + return -EINVAL; + + if (!tb[IFAL_ADDRESS]) + return -EINVAL; + + pfx = nla_data(tb[IFAL_ADDRESS]); + if (!pfx) + return -EINVAL; + + if (!tb[IFAL_LABEL]) + return -EINVAL; + label = nla_get_u32(tb[IFAL_LABEL]); + if (label == IPV6_ADDR_LABEL_DEFAULT) + return -EINVAL; + + switch(nlh->nlmsg_type) { + case RTM_NEWADDRLABEL: + err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen, + ifal->ifal_index, label, + nlh->nlmsg_flags & NLM_F_REPLACE); + break; + case RTM_DELADDRLABEL: + err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen, + ifal->ifal_index); + break; + default: + err = -EOPNOTSUPP; + } + return err; +} + +static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh, + int prefixlen, int ifindex, u32 lseq) +{ + struct ifaddrlblmsg *ifal = nlmsg_data(nlh); + ifal->ifal_family = AF_INET6; + ifal->ifal_prefixlen = prefixlen; + ifal->ifal_flags = 0; + ifal->ifal_index = ifindex; + ifal->ifal_seq = lseq; +}; + +static int ip6addrlbl_fill(struct sk_buff *skb, + struct ip6addrlbl_entry *p, + u32 lseq, + u32 pid, u32 seq, int event, + unsigned int flags) +{ + struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, + sizeof(struct ifaddrlblmsg), flags); + if (!nlh) + return -EMSGSIZE; + + ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); + + if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || + nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + return nlmsg_end(skb, nlh); +} + +static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = skb->sk->sk_net; + struct ip6addrlbl_entry *p; + struct hlist_node *pos; + int idx = 0, s_idx = cb->args[0]; + int err; + + if (net != &init_net) + return 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) { + if (idx >= s_idx) { + if ((err = ip6addrlbl_fill(skb, p, + ip6addrlbl_table.seq, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWADDRLABEL, + NLM_F_MULTI)) <= 0) + break; + } + idx++; + } + rcu_read_unlock(); + cb->args[0] = idx; + return skb->len; +} + +static inline int ip6addrlbl_msgsize(void) +{ + return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) + + nla_total_size(16) /* IFAL_ADDRESS */ + + nla_total_size(4) /* IFAL_LABEL */ + ); +} + +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, + void *arg) +{ + struct net *net = in_skb->sk->sk_net; + struct ifaddrlblmsg *ifal; + struct nlattr *tb[IFAL_MAX+1]; + struct in6_addr *addr; + u32 lseq; + int err = 0; + struct ip6addrlbl_entry *p; + struct sk_buff *skb; + + if (net != &init_net) + return 0; + + err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy); + if (err < 0) + return err; + + ifal = nlmsg_data(nlh); + + if (ifal->ifal_family != AF_INET6 || + ifal->ifal_prefixlen != 128) + return -EINVAL; + + if (ifal->ifal_index && + !__dev_get_by_index(&init_net, ifal->ifal_index)) + return -EINVAL; + + if (!tb[IFAL_ADDRESS]) + return -EINVAL; + + addr = nla_data(tb[IFAL_ADDRESS]); + if (!addr) + return -EINVAL; + + rcu_read_lock(); + p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index); + if (p && ip6addrlbl_hold(p)) + p = NULL; + lseq = ip6addrlbl_table.seq; + rcu_read_unlock(); + + if (!p) { + err = -ESRCH; + goto out; + } + + if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { + ip6addrlbl_put(p); + return -ENOBUFS; + } + + err = ip6addrlbl_fill(skb, p, lseq, + NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + RTM_NEWADDRLABEL, 0); + + ip6addrlbl_put(p); + + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto out; + } + + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); +out: + return err; +} + +void __init ipv6_addr_label_rtnl_register(void) +{ + __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL); + __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL); + __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump); +} + diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ecbd38894fdd..bddac0e8780f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -66,9 +66,7 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -int sysctl_ipv6_bindv6only __read_mostly; - -/* The inetsw table contains everything that inet_create needs to +/* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ static struct list_head inetsw6[SOCK_MAX]; @@ -193,7 +191,7 @@ lookup_protocol: np->mcast_hops = -1; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = sysctl_ipv6_bindv6only; + np->ipv6only = init_net.ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. @@ -280,7 +278,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(v4addr) != RTN_LOCAL) { + if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) { err = -EADDRNOTAVAIL; goto out; } @@ -314,7 +312,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { + if (!ipv6_chk_addr(&init_net, &addr->sin6_addr, + dev, 0)) { if (dev) dev_put(dev); err = -EADDRNOTAVAIL; @@ -491,6 +490,7 @@ const struct proto_ops inet6_stream_ops = { .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = tcp_sendpage, + .splice_read = tcp_splice_read, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, @@ -528,57 +528,23 @@ static struct net_proto_family inet6_family_ops = { .owner = THIS_MODULE, }; -/* Same as inet6_dgram_ops, sans udp_poll. */ -static const struct proto_ops inet6_sockraw_ops = { - .family = PF_INET6, - .owner = THIS_MODULE, - .release = inet6_release, - .bind = inet6_bind, - .connect = inet_dgram_connect, /* ok */ - .socketpair = sock_no_socketpair, /* a do nothing */ - .accept = sock_no_accept, /* a do nothing */ - .getname = inet6_getname, - .poll = datagram_poll, /* ok */ - .ioctl = inet6_ioctl, /* must change */ - .listen = sock_no_listen, /* ok */ - .shutdown = inet_shutdown, /* ok */ - .setsockopt = sock_common_setsockopt, /* ok */ - .getsockopt = sock_common_getsockopt, /* ok */ - .sendmsg = inet_sendmsg, /* ok */ - .recvmsg = sock_common_recvmsg, /* ok */ - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_sock_common_setsockopt, - .compat_getsockopt = compat_sock_common_getsockopt, -#endif -}; - -static struct inet_protosw rawv6_protosw = { - .type = SOCK_RAW, - .protocol = IPPROTO_IP, /* wild card */ - .prot = &rawv6_prot, - .ops = &inet6_sockraw_ops, - .capability = CAP_NET_RAW, - .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_REUSE, -}; - -void -inet6_register_protosw(struct inet_protosw *p) +int inet6_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; - int protocol = p->protocol; struct list_head *last_perm; + int protocol = p->protocol; + int ret; spin_lock_bh(&inetsw6_lock); + ret = -EINVAL; if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ answer = NULL; + ret = -EPERM; last_perm = &inetsw6[p->type]; list_for_each(lh, &inetsw6[p->type]) { answer = list_entry(lh, struct inet_protosw, list); @@ -602,9 +568,10 @@ inet6_register_protosw(struct inet_protosw *p) * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); + ret = 0; out: spin_unlock_bh(&inetsw6_lock); - return; + return ret; out_permanent: printk(KERN_ERR "Attempt to override permanent protocol %d.\n", @@ -713,20 +680,19 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted); static int __init init_ipv6_mibs(void) { - if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + if (snmp_mib_init((void **)ipv6_statistics, + sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + if (snmp_mib_init((void **)icmpv6_statistics, + sizeof(struct icmpv6_mib)) < 0) goto err_icmp_mib; if (snmp_mib_init((void **)icmpv6msg_statistics, - sizeof (struct icmpv6msg_mib), __alignof__(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; - if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib), - __alignof__(struct udp_mib)) < 0) + if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udp_mib; - if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib), - __alignof__(struct udp_mib)) < 0) + if (snmp_mib_init((void **)udplite_stats_in6, + sizeof (struct udp_mib)) < 0) goto err_udplite_mib; return 0; @@ -752,6 +718,32 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)udplite_stats_in6); } +static int inet6_net_init(struct net *net) +{ + net->ipv6.sysctl.bindv6only = 0; + net->ipv6.sysctl.flush_delay = 0; + net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; + net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; + net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; + net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; + net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; + net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + net->ipv6.sysctl.icmpv6_time = 1*HZ; + + return 0; +} + +static void inet6_net_exit(struct net *net) +{ + return; +} + +static struct pernet_operations inet6_net_ops = { + .init = inet6_net_init, + .exit = inet6_net_exit, +}; + static int __init inet6_init(void) { struct sk_buff *dummy_skb; @@ -768,7 +760,6 @@ static int __init inet6_init(void) __this_module.can_unload = &ipv6_unload; #endif #endif - err = proto_register(&tcpv6_prot, 1); if (err) goto out; @@ -793,14 +784,16 @@ static int __init inet6_init(void) /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ - inet6_register_protosw(&rawv6_protosw); + err = rawv6_init(); + if (err) + goto out_unregister_raw_proto; /* Register the family here so that the init calls below will * be able to create sockets. (?? is this dangerous ??) */ err = sock_register(&inet6_family_ops); if (err) - goto out_unregister_raw_proto; + goto out_sock_register_fail; /* Initialise ipv6 mibs */ err = init_ipv6_mibs(); @@ -814,8 +807,14 @@ static int __init inet6_init(void) * able to communicate via both network protocols. */ + err = register_pernet_subsys(&inet6_net_ops); + if (err) + goto register_pernet_fail; + #ifdef CONFIG_SYSCTL - ipv6_sysctl_register(); + err = ipv6_sysctl_register(); + if (err) + goto sysctl_fail; #endif err = icmpv6_init(&inet6_family_ops); if (err) @@ -848,31 +847,61 @@ static int __init inet6_init(void) if (if6_proc_init()) goto proc_if6_fail; #endif - ip6_route_init(); - ip6_flowlabel_init(); + err = ip6_route_init(); + if (err) + goto ip6_route_fail; + err = ip6_flowlabel_init(); + if (err) + goto ip6_flowlabel_fail; err = addrconf_init(); if (err) goto addrconf_fail; /* Init v6 extension headers. */ - ipv6_rthdr_init(); - ipv6_frag_init(); - ipv6_nodata_init(); - ipv6_destopt_init(); + err = ipv6_exthdrs_init(); + if (err) + goto ipv6_exthdrs_fail; + + err = ipv6_frag_init(); + if (err) + goto ipv6_frag_fail; /* Init v6 transport protocols. */ - udpv6_init(); - udplitev6_init(); - tcpv6_init(); + err = udpv6_init(); + if (err) + goto udpv6_fail; - ipv6_packet_init(); - err = 0; + err = udplitev6_init(); + if (err) + goto udplitev6_fail; + + err = tcpv6_init(); + if (err) + goto tcpv6_fail; + + err = ipv6_packet_init(); + if (err) + goto ipv6_packet_fail; out: return err; +ipv6_packet_fail: + tcpv6_exit(); +tcpv6_fail: + udplitev6_exit(); +udplitev6_fail: + udpv6_exit(); +udpv6_fail: + ipv6_frag_exit(); +ipv6_frag_fail: + ipv6_exthdrs_exit(); +ipv6_exthdrs_fail: + addrconf_cleanup(); addrconf_fail: ip6_flowlabel_cleanup(); +ip6_flowlabel_fail: ip6_route_cleanup(); +ip6_route_fail: #ifdef CONFIG_PROC_FS if6_proc_exit(); proc_if6_fail: @@ -899,10 +928,16 @@ ndisc_fail: icmp_fail: #ifdef CONFIG_SYSCTL ipv6_sysctl_unregister(); +sysctl_fail: #endif + unregister_pernet_subsys(&inet6_net_ops); +register_pernet_fail: cleanup_ipv6_mibs(); out_unregister_sock: sock_unregister(PF_INET6); + rtnl_unregister_all(PF_INET6); +out_sock_register_fail: + rawv6_exit(); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: @@ -922,9 +957,14 @@ static void __exit inet6_exit(void) /* Disallow any further netlink messages */ rtnl_unregister_all(PF_INET6); + udpv6_exit(); + udplitev6_exit(); + tcpv6_exit(); + /* Cleanup code parts. */ ipv6_packet_cleanup(); - + ipv6_frag_exit(); + ipv6_exthdrs_exit(); addrconf_cleanup(); ip6_flowlabel_cleanup(); ip6_route_cleanup(); @@ -943,9 +983,11 @@ static void __exit inet6_exit(void) igmp6_cleanup(); ndisc_cleanup(); icmpv6_cleanup(); + rawv6_exit(); #ifdef CONFIG_SYSCTL ipv6_sysctl_unregister(); #endif + unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 4eaf55072b1b..fb0d07a15e93 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -370,6 +370,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->flow_lbl[2] = 0; ip6h->hop_limit = 0; + spin_lock(&x->lock); { u8 auth_data[MAX_AH_AUTH_LEN]; @@ -378,14 +379,15 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, hdr_len); err = ah_mac_digest(ahp, skb, ah->auth_data); if (err) - goto free_out; - err = -EINVAL; - if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { - LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); - x->stats.integrity_failed++; - goto free_out; - } + goto unlock; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) + err = -EBADMSG; } +unlock: + spin_unlock(&x->lock); + + if (err) + goto free_out; skb->network_header += ah_hlen; memcpy(skb_network_header(skb), tmp_hdr, hdr_len); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f915c4df9820..9c7f83fbc3a1 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -89,7 +89,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(addr, NULL, 0)) + if (ipv6_chk_addr(&init_net, addr, NULL, 0)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); @@ -504,6 +504,7 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) } static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { read_lock(&dev_base_lock); return ac6_get_idx(seq, *pos); @@ -518,6 +519,7 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ac6_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { struct ac6_iter_state *state = ac6_seq_private(seq); if (likely(state->idev != NULL)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5d4245ab4183..94fa6ae77cfe 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -177,7 +177,7 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -549,7 +549,8 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, return -ENODEV; } } - if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) { + if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + dev, 0)) { if (dev) dev_put(dev); err = -EINVAL; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 444053254676..5bd5292ad9fa 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -165,31 +165,32 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } + if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { + ret = -EINVAL; + goto out; + } + + skb->ip_summed = CHECKSUM_NONE; + + spin_lock(&x->lock); + /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { u8 sum[alen]; ret = esp_mac_digest(esp, skb, 0, skb->len - alen); if (ret) - goto out; + goto unlock; if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { - x->stats.integrity_failed++; - ret = -EINVAL; - goto out; + ret = -EBADMSG; + goto unlock; } } - if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { - ret = -EINVAL; - goto out; - } - - skb->ip_summed = CHECKSUM_NONE; - esph = (struct ip_esp_hdr *)skb->data; iph = ipv6_hdr(skb); @@ -198,15 +199,13 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); { - u8 nexthdr[2]; struct scatterlist *sg = &esp->sgbuf[0]; - u8 padlen; if (unlikely(nfrags > ESP_NUM_FAST_SG)) { sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); if (!sg) { ret = -ENOMEM; - goto out; + goto unlock; } } sg_init_table(sg, nfrags); @@ -216,8 +215,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); - if (unlikely(ret)) - goto out; + } + +unlock: + spin_unlock(&x->lock); + + if (unlikely(ret)) + goto out; + + { + u8 nexthdr[2]; + u8 padlen; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 1e89efd38a0c..3cd1c993d52b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -32,6 +32,7 @@ #include <linux/in6.h> #include <linux/icmpv6.h> +#include <net/dst.h> #include <net/sock.h> #include <net/snmp.h> @@ -307,38 +308,6 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } -static struct inet6_protocol destopt_protocol = { - .handler = ipv6_destopt_rcv, - .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, -}; - -void __init ipv6_destopt_init(void) -{ - if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0) - printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n"); -} - -/******************************** - NONE header. No data in packet. - ********************************/ - -static int ipv6_nodata_rcv(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - -static struct inet6_protocol nodata_protocol = { - .handler = ipv6_nodata_rcv, - .flags = INET6_PROTO_NOPOLICY, -}; - -void __init ipv6_nodata_init(void) -{ - if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0) - printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n"); -} - /******************************** Routing header. ********************************/ @@ -476,7 +445,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(addr)) { + if (!ipv6_chk_home_addr(&init_net, addr)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); @@ -536,12 +505,48 @@ static struct inet6_protocol rthdr_protocol = { .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, }; -void __init ipv6_rthdr_init(void) +static struct inet6_protocol destopt_protocol = { + .handler = ipv6_destopt_rcv, + .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR, +}; + +static struct inet6_protocol nodata_protocol = { + .handler = dst_discard, + .flags = INET6_PROTO_NOPOLICY, +}; + +int __init ipv6_exthdrs_init(void) { - if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0) - printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n"); + int ret; + + ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING); + if (ret) + goto out; + + ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS); + if (ret) + goto out_rthdr; + + ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE); + if (ret) + goto out_destopt; + +out: + return ret; +out_rthdr: + inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); +out_destopt: + inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); + goto out; }; +void ipv6_exthdrs_exit(void) +{ + inet6_del_protocol(&nodata_protocol, IPPROTO_NONE); + inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); + inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); +} + /********************************** Hop-by-hop options. **********************************/ diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 428c6b0e26d8..695c0ca8a417 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -223,7 +223,7 @@ nla_put_failure: return -ENOBUFS; } -static u32 fib6_rule_default_pref(void) +static u32 fib6_rule_default_pref(struct fib_rules_ops *ops) { return 0x3FFF; } @@ -249,6 +249,7 @@ static struct fib_rules_ops fib6_rules_ops = { .policy = fib6_rule_policy, .rules_list = LIST_HEAD_INIT(fib6_rules_ops.rules_list), .owner = THIS_MODULE, + .fro_net = &init_net, }; static int __init fib6_default_rules_init(void) @@ -265,10 +266,23 @@ static int __init fib6_default_rules_init(void) return 0; } -void __init fib6_rules_init(void) +int __init fib6_rules_init(void) { - BUG_ON(fib6_default_rules_init()); - fib_rules_register(&fib6_rules_ops); + int ret; + + ret = fib6_default_rules_init(); + if (ret) + goto out; + + ret = fib_rules_register(&fib6_rules_ops); + if (ret) + goto out_default_rules_init; +out: + return ret; + +out_default_rules_init: + fib_rules_cleanup_ops(&fib6_rules_ops); + goto out; } void fib6_rules_cleanup(void) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f1240688dc58..cbb5b9cf84ad 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -63,6 +63,7 @@ #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/icmp.h> +#include <net/xfrm.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -86,7 +87,7 @@ static int icmpv6_rcv(struct sk_buff *skb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, - .flags = INET6_PROTO_FINAL, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; static __inline__ int icmpv6_xmit_lock(void) @@ -153,8 +154,6 @@ static int is_ineligible(struct sk_buff *skb) return 0; } -static int sysctl_icmpv6_time __read_mostly = 1*HZ; - /* * Check the ICMP output rate limit */ @@ -185,7 +184,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, res = 1; } else { struct rt6_info *rt = (struct rt6_info *)dst; - int tmo = sysctl_icmpv6_time; + int tmo = init_net.ipv6.sysctl.icmpv6_time; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) @@ -310,8 +309,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, struct ipv6_pinfo *np; struct in6_addr *saddr = NULL; struct dst_entry *dst; + struct dst_entry *dst2; struct icmp6hdr tmp_hdr; struct flowi fl; + struct flowi fl2; struct icmpv6_msg msg; int iif = 0; int addr_type = 0; @@ -331,7 +332,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, */ addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(&init_net, &hdr->daddr, skb->dev, 0)) saddr = &hdr->daddr; /* @@ -418,9 +419,42 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, goto out_dst_release; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + /* No need to clone since we're just using its address. */ + dst2 = dst; + + err = xfrm_lookup(&dst, &fl, sk, 0); + switch (err) { + case 0: + if (dst != dst2) + goto route_done; + break; + case -EPERM: + dst = NULL; + break; + default: + goto out; + } + + if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) + goto out; + + if (ip6_dst_lookup(sk, &dst2, &fl)) + goto out; + + err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + if (err == -ENOENT) { + if (!dst) + goto out; + goto route_done; + } + + dst_release(dst); + dst = dst2; + + if (err) goto out; +route_done: if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else @@ -555,9 +589,7 @@ out: static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) { - struct in6_addr *saddr, *daddr; struct inet6_protocol *ipprot; - struct sock *sk; int inner_offset; int hash; u8 nexthdr; @@ -579,9 +611,6 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) if (!pskb_may_pull(skb, inner_offset+8)) return; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed pmtu discovery. @@ -597,15 +626,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); rcu_read_unlock(); - read_lock(&raw_v6_lock); - if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { - while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr, - IP6CB(skb)->iif))) { - rawv6_err(sk, skb, NULL, type, code, inner_offset, info); - sk = sk_next(sk); - } - } - read_unlock(&raw_v6_lock); + raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); } /* @@ -621,6 +642,25 @@ static int icmpv6_rcv(struct sk_buff *skb) struct icmp6hdr *hdr; int type; + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + int nh; + + if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + XFRM_STATE_ICMP)) + goto drop_no_count; + + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) + goto drop_no_count; + + nh = skb_network_offset(skb); + skb_set_network_header(skb, sizeof(*hdr)); + + if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) + goto drop_no_count; + + skb_set_network_header(skb, nh); + } + ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; @@ -643,8 +683,7 @@ static int icmpv6_rcv(struct sk_buff *skb) } } - if (!pskb_pull(skb, sizeof(struct icmp6hdr))) - goto discard_it; + __skb_pull(skb, sizeof(*hdr)); hdr = icmp6_hdr(skb); @@ -730,6 +769,7 @@ static int icmpv6_rcv(struct sk_buff *skb) discard_it: ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); +drop_no_count: kfree_skb(skb); return 0; } @@ -865,16 +905,26 @@ int icmpv6_err_convert(int type, int code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -ctl_table ipv6_icmp_table[] = { +ctl_table ipv6_icmp_table_template[] = { { .ctl_name = NET_IPV6_ICMP_RATELIMIT, .procname = "ratelimit", - .data = &sysctl_icmpv6_time, + .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, { .ctl_name = 0 }, }; + +struct ctl_table *ipv6_icmp_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(ipv6_icmp_table_template, + sizeof(ipv6_icmp_table_template), + GFP_KERNEL); + return table; +} #endif diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 0765d8bd380f..a66a7d8e2811 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -43,7 +43,7 @@ void __inet6_hash(struct inet_hashinfo *hashinfo, } __sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); + sock_prot_inuse_add(sk->sk_prot, 1); write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -216,7 +216,7 @@ unique: BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sk->sk_hash = hash; - sock_prot_inc_use(sk->sk_prot); + sock_prot_inuse_add(sk->sk_prot, 1); write_unlock(lock); if (twp != NULL) { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 946cf389ab95..f93407cf6515 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -361,6 +361,7 @@ end: static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; @@ -369,6 +370,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *node; int res = 0; + if (net != &init_net) + return 0; + s_h = cb->args[0]; s_e = cb->args[1]; @@ -677,13 +681,15 @@ static __inline__ void fib6_start_gc(struct rt6_info *rt) { if (ip6_fib_timer.expires == 0 && (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE))) - mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); + mod_timer(&ip6_fib_timer, jiffies + + init_net.ipv6.sysctl.ip6_rt_gc_interval); } void fib6_force_start_gc(void) { if (ip6_fib_timer.expires == 0) - mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); + mod_timer(&ip6_fib_timer, jiffies + + init_net.ipv6.sysctl.ip6_rt_gc_interval); } /* @@ -1122,9 +1128,6 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, rt->u.dst.rt6_next = NULL; - if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT) - fn->leaf = &ip6_null_entry; - /* If it was last route, expunge its radix tree node */ if (fn->leaf == NULL) { fn->fn_flags &= ~RTN_RTINFO; @@ -1311,6 +1314,9 @@ static int fib6_walk(struct fib6_walker_t *w) static int fib6_clean_node(struct fib6_walker_t *w) { + struct nl_info info = { + .nl_net = &init_net, + }; int res; struct rt6_info *rt; struct fib6_cleaner_t *c = container_of(w, struct fib6_cleaner_t, w); @@ -1319,7 +1325,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL); + res = fib6_del(rt, &info); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); @@ -1445,7 +1451,8 @@ void fib6_run_gc(unsigned long dummy) { if (dummy != ~0UL) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval; + gc_args.timeout = dummy ? (int)dummy : + init_net.ipv6.sysctl.ip6_rt_gc_interval; } else { local_bh_disable(); if (!spin_trylock(&fib6_gc_lock)) { @@ -1453,7 +1460,7 @@ void fib6_run_gc(unsigned long dummy) local_bh_enable(); return; } - gc_args.timeout = ip6_rt_gc_interval; + gc_args.timeout = init_net.ipv6.sysctl.ip6_rt_gc_interval; } gc_args.more = 0; @@ -1461,7 +1468,8 @@ void fib6_run_gc(unsigned long dummy) fib6_clean_all(fib6_age, 0, NULL); if (gc_args.more) - mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); + mod_timer(&ip6_fib_timer, jiffies + + init_net.ipv6.sysctl.ip6_rt_gc_interval); else { del_timer(&ip6_fib_timer); ip6_fib_timer.expires = 0; @@ -1469,16 +1477,27 @@ void fib6_run_gc(unsigned long dummy) spin_unlock_bh(&fib6_gc_lock); } -void __init fib6_init(void) +int __init fib6_init(void) { + int ret; fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!fib6_node_kmem) + return -ENOMEM; fib6_tables_init(); - __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); + ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); + if (ret) + goto out_kmem_cache_create; +out: + return ret; + +out_kmem_cache_create: + kmem_cache_destroy(fib6_node_kmem); + goto out; } void fib6_gc_cleanup(void) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b12cc22e7745..2b7d9ee98832 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -629,6 +629,7 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) } static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(ip6_fl_lock) { read_lock_bh(&ip6_fl_lock); return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -647,6 +648,7 @@ static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip6fl_seq_stop(struct seq_file *seq, void *v) + __releases(ip6_fl_lock) { read_unlock_bh(&ip6_fl_lock); } @@ -692,20 +694,36 @@ static const struct file_operations ip6fl_seq_fops = { .llseek = seq_lseek, .release = seq_release_private, }; -#endif +static int ip6_flowlabel_proc_init(struct net *net) +{ + if (!proc_net_fops_create(net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops)) + return -ENOMEM; + return 0; +} -void ip6_flowlabel_init(void) +static void ip6_flowlabel_proc_fini(struct net *net) { -#ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + proc_net_remove(net, "ip6_flowlabel"); +} +#else +static inline int ip6_flowlabel_proc_init(struct net *net) +{ + return 0; +} +static inline void ip6_flowlabel_proc_fini(struct net *net) +{ + return ; +} #endif + +int ip6_flowlabel_init(void) +{ + return ip6_flowlabel_proc_init(&init_net); } void ip6_flowlabel_cleanup(void) { del_timer(&ip6_fl_gc_timer); -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "ip6_flowlabel"); -#endif + ip6_flowlabel_proc_fini(&init_net); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index fac6f7f9dd73..178aebc0427a 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -134,7 +134,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt rcu_read_unlock(); - return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); + return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, + ip6_rcv_finish); err: IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); drop: @@ -152,9 +153,8 @@ out: static int ip6_input_finish(struct sk_buff *skb) { struct inet6_protocol *ipprot; - struct sock *raw_sk; unsigned int nhoff; - int nexthdr; + int nexthdr, raw; u8 hash; struct inet6_dev *idev; @@ -170,9 +170,7 @@ resubmit: nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; - raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); - if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) - raw_sk = NULL; + raw = raw6_local_deliver(skb, nexthdr); hash = nexthdr & (MAX_INET_PROTOS - 1); if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { @@ -205,7 +203,7 @@ resubmit: else if (ret == 0) IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); } else { - if (!raw_sk) { + if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, @@ -229,7 +227,8 @@ discard: int ip6_input(struct sk_buff *skb) { - return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish); + return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + ip6_input_finish); } int ip6_mc_input(struct sk_buff *skb) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6338a9c1aa14..15c4f6cee3e6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -29,7 +29,7 @@ */ #include <linux/errno.h> -#include <linux/types.h> +#include <linux/kernel.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/net.h> @@ -70,6 +70,31 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f spin_unlock_bh(&ip6_id_lock); } +int __ip6_local_out(struct sk_buff *skb) +{ + int len; + + len = skb->len - sizeof(struct ipv6hdr); + if (len > IPV6_MAXPLEN) + len = 0; + ipv6_hdr(skb)->payload_len = htons(len); + + return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev, + dst_output); +} + +int ip6_local_out(struct sk_buff *skb) +{ + int err; + + err = __ip6_local_out(skb); + if (likely(err == 1)) + err = dst_output(skb); + + return err; +} +EXPORT_SYMBOL_GPL(ip6_local_out); + static int ip6_output_finish(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; @@ -120,8 +145,8 @@ static int ip6_output2(struct sk_buff *skb) is not supported in any case. */ if (newskb) - NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL, - newskb->dev, + NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, + NULL, newskb->dev, ip6_dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { @@ -134,7 +159,8 @@ static int ip6_output2(struct sk_buff *skb) IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); } - return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish); + return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, + ip6_output_finish); } static inline int ip6_skb_dst_mtu(struct sk_buff *skb) @@ -236,7 +262,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) { IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTREQUESTS); - return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, + return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); } @@ -423,7 +449,7 @@ int ip6_forward(struct sk_buff *skb) /* XXX: idev->cnf.proxy_ndp? */ if (ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); @@ -500,7 +526,8 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); + return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, + ip6_forward_finish); error: IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); @@ -909,7 +936,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct flowi fl_gw; int redirect; - ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1); + ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src, + (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); if (ifp) @@ -1098,7 +1126,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0); + exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - + rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; } else { @@ -1113,7 +1142,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); + fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { @@ -1316,8 +1346,6 @@ alloc_new_skb: skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; - skb->truesize += PAGE_SIZE; - atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); } else { err = -EMSGSIZE; goto error; @@ -1330,6 +1358,8 @@ alloc_new_skb: frag->size += copy; skb->len += copy; skb->data_len += copy; + skb->truesize += copy; + atomic_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; @@ -1401,10 +1431,6 @@ int ip6_push_pending_frames(struct sock *sk) *(__be32*)hdr = fl->fl6_flowlabel | htonl(0x60000000 | ((int)np->cork.tclass << 20)); - if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) - hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); - else - hdr->payload_len = 0; hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); @@ -1421,7 +1447,7 @@ int ip6_push_pending_frames(struct sock *sk) ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); } - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); + err = ip6_local_out(skb); if (err) { if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5383b33db8ca..9031e521c1df 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -533,7 +533,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_dst = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(&rt, &fl)) + if (ip_route_output_key(&init_net, &rt, &fl)) goto out; skb2->dev = rt->u.dst.dev; @@ -545,7 +545,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_dst = eiph->daddr; fl.fl4_src = eiph->saddr; fl.fl4_tos = eiph->tos; - if (ip_route_output_key(&rt, &fl) || + if (ip_route_output_key(&init_net, &rt, &fl) || rt->u.dst.dev->type != ARPHRD_TUNNEL) { ip_rt_put(rt); goto out; @@ -635,7 +635,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, struct sk_buff *skb) { if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) - ipv6_copy_dscp(ipv6h, ipv6_hdr(skb)); + ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) IP6_ECN_set_ce(ipv6_hdr(skb)); @@ -653,8 +653,8 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) ldev = dev_get_by_index(&init_net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || - likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && - likely(!ipv6_chk_addr(&p->raddr, NULL, 0))) + likely(ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) && + likely(!ipv6_chk_addr(&init_net, &p->raddr, NULL, 0))) ret = 1; if (ldev) @@ -788,12 +788,12 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) if (p->link) ldev = dev_get_by_index(&init_net, p->link); - if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) printk(KERN_WARNING "%s xmit: Local address not yet configured!\n", p->name); else if (!ipv6_addr_is_multicast(&p->raddr) && - unlikely(ipv6_chk_addr(&p->raddr, NULL, 0))) + unlikely(ipv6_chk_addr(&init_net, &p->raddr, NULL, 0))) printk(KERN_WARNING "%s xmit: Routing loop! " "Remote address found on this node!\n", @@ -910,15 +910,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); - ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); nf_reset(skb); pkt_len = skb->len; - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, - skb->dst->dev, dst_output); + err = ip6_local_out(skb); if (net_xmit_eval(err) == 0) { stats->tx_bytes += pkt_len; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 0cd4056f9127..b276d04d6db5 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -190,7 +190,6 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; - u8 mode = XFRM_MODE_TUNNEL; t = xfrm_state_alloc(); if (!t) @@ -204,9 +203,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - if (x->props.mode == XFRM_MODE_BEET) - mode = x->props.mode; - t->props.mode = mode; + t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) @@ -405,22 +402,22 @@ static int ipcomp6_init_state(struct xfrm_state *x) if (x->encap) goto out; - err = -ENOMEM; - ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); - if (!ipcd) - goto out; - x->props.header_len = 0; switch (x->props.mode) { - case XFRM_MODE_BEET: case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); + break; default: - goto error; + goto out; } + err = -ENOMEM; + ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); + if (!ipcd) + goto out; + mutex_lock(&ipcomp6_resource_mutex); if (!ipcomp6_alloc_scratches()) goto error; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8c5f80fd03ad..bf2a686aa13d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -268,8 +268,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct inet_connection_sock *icsk = inet_csk(sk); local_bh_disable(); - sock_prot_dec_use(sk->sk_prot); - sock_prot_inc_use(&tcp_prot); + sock_prot_inuse_add(sk->sk_prot, -1); + sock_prot_inuse_add(&tcp_prot, 1); local_bh_enable(); sk->sk_prot = &tcp_prot; icsk->icsk_af_ops = &ipv4_specific; @@ -282,8 +282,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); - sock_prot_dec_use(sk->sk_prot); - sock_prot_inc_use(prot); + sock_prot_inuse_add(sk->sk_prot, -1); + sock_prot_inuse_add(prot, 1); local_bh_enable(); sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; @@ -1128,9 +1128,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif -void __init ipv6_packet_init(void) +int __init ipv6_packet_init(void) { dev_add_pack(&ipv6_packet_type); + return 0; } void ipv6_packet_cleanup(void) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 331d728c2035..ab228d1ea114 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -903,9 +903,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr) return -ENOMEM; } - init_timer(&mc->mca_timer); - mc->mca_timer.function = igmp6_timer_handler; - mc->mca_timer.data = (unsigned long) mc; + setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); ipv6_addr_copy(&mc->mca_addr, addr); mc->idev = idev; @@ -1450,7 +1448,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb) static inline int mld_dev_queue_xmit(struct sk_buff *skb) { - return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev, + return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, mld_dev_queue_xmit2); } @@ -1471,7 +1469,7 @@ static void mld_sendpack(struct sk_buff *skb) pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), mldlen, 0)); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); @@ -1815,7 +1813,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) idev = in6_dev_get(skb->dev); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, mld_dev_queue_xmit); if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); @@ -2259,14 +2257,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); rwlock_init(&idev->mc_lock); idev->mc_gq_running = 0; - init_timer(&idev->mc_gq_timer); - idev->mc_gq_timer.data = (unsigned long) idev; - idev->mc_gq_timer.function = &mld_gq_timer_expire; + setup_timer(&idev->mc_gq_timer, mld_gq_timer_expire, + (unsigned long)idev); idev->mc_tomb = NULL; idev->mc_ifc_count = 0; - init_timer(&idev->mc_ifc_timer); - idev->mc_ifc_timer.data = (unsigned long) idev; - idev->mc_ifc_timer.function = &mld_ifc_timer_expire; + setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, + (unsigned long)idev); idev->mc_qrv = MLD_QRV_DEFAULT; idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; idev->mc_v1_seen = 0; @@ -2377,6 +2373,7 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { read_lock(&dev_base_lock); return igmp6_mc_get_idx(seq, *pos); @@ -2391,6 +2388,7 @@ static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); if (likely(state->idev != NULL)) { @@ -2520,6 +2518,7 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { read_lock(&dev_base_lock); return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -2537,6 +2536,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (likely(state->im != NULL)) { diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 7fd841d41019..49d396620eac 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -34,11 +34,6 @@ #include <net/xfrm.h> #include <net/mip6.h> -static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr) -{ - return x->coaddr; -} - static inline unsigned int calc_padlen(unsigned int len, unsigned int n) { return (n - len + 16) & 0x7; @@ -133,12 +128,15 @@ static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; + int err = destopt->nexthdr; + spin_lock(&x->lock); if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) - return -ENOENT; + err = -ENOENT; + spin_unlock(&x->lock); - return destopt->nexthdr; + return err; } /* Destination Option Header is inserted. @@ -337,25 +335,27 @@ static struct xfrm_type mip6_destopt_type = .description = "MIP6DESTOPT", .owner = THIS_MODULE, .proto = IPPROTO_DSTOPTS, - .flags = XFRM_TYPE_NON_FRAGMENT, + .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR, .init_state = mip6_destopt_init_state, .destructor = mip6_destopt_destroy, .input = mip6_destopt_input, .output = mip6_destopt_output, .reject = mip6_destopt_reject, .hdr_offset = mip6_destopt_offset, - .local_addr = mip6_xfrm_addr, }; static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; + int err = rt2->rt_hdr.nexthdr; + spin_lock(&x->lock); if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) - return -ENOENT; + err = -ENOENT; + spin_unlock(&x->lock); - return rt2->rt_hdr.nexthdr; + return err; } /* Routing Header type 2 is inserted. @@ -467,13 +467,12 @@ static struct xfrm_type mip6_rthdr_type = .description = "MIP6RT", .owner = THIS_MODULE, .proto = IPPROTO_ROUTING, - .flags = XFRM_TYPE_NON_FRAGMENT, + .flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR, .init_state = mip6_rthdr_init_state, .destructor = mip6_rthdr_destroy, .input = mip6_rthdr_input, .output = mip6_rthdr_output, .hdr_offset = mip6_rthdr_offset, - .remote_addr = mip6_xfrm_addr, }; static int __init mip6_init(void) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 777ed733b2d7..0d33a7d32125 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -337,7 +337,7 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d ipv6_arcnet_mc_map(addr, buf); return 0; case ARPHRD_INFINIBAND: - ipv6_ib_mc_map(addr, buf); + ipv6_ib_mc_map(addr, dev->broadcast, buf); return 0; default: if (dir) { @@ -533,7 +533,8 @@ static void __ndisc_send(struct net_device *dev, idev = in6_dev_get(dst->dev); IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, + dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(idev, type); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); @@ -555,7 +556,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, }; /* for anycast or proxy, solicited_addr != src_addr */ - ifp = ipv6_get_ifaddr(solicited_addr, dev, 1); + ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) @@ -615,7 +616,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, * suppress the inclusion of the sllao. */ if (send_sllao) { - struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1); + struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr, + dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { send_sllao = 0; @@ -652,7 +654,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1)) + if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; if ((probes -= neigh->parms->ucast_probes) < 0) { @@ -740,7 +742,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) inc = ipv6_addr_is_multicast(daddr); - if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) { + if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) { if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { @@ -788,7 +790,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, + (pneigh = pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -898,7 +900,7 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } } - if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) { + if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) { if (ifp->flags & IFA_F_TENTATIVE) { addrconf_dad_failure(ifp); return; @@ -929,7 +931,7 @@ static void ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1048,7 +1050,8 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) &ipv6_hdr(ra)->saddr); nlmsg_end(skb, nlh); - err = rtnl_notify(skb, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL, + GFP_ATOMIC); if (err < 0) goto errout; @@ -1058,7 +1061,7 @@ nla_put_failure: nlmsg_free(skb); err = -EMSGSIZE; errout: - rtnl_set_sk_err(RTNLGRP_ND_USEROPT, err); + rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err); } static void ndisc_router_discovery(struct sk_buff *skb) @@ -1294,11 +1297,11 @@ skip_defrtr: } if (ndopts.nd_useropts) { - struct nd_opt_hdr *opt; - for (opt = ndopts.nd_useropts; - opt; - opt = ndisc_next_useropt(opt, ndopts.nd_useropts_end)) { - ndisc_ra_useropt(skb, opt); + struct nd_opt_hdr *p; + for (p = ndopts.nd_useropts; + p; + p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) { + ndisc_ra_useropt(skb, p); } } @@ -1538,7 +1541,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff->dst = dst; idev = in6_dev_get(dst->dev); IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, + dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index b1326c2bf8aa..2e06724dc348 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -8,6 +8,7 @@ #include <net/ip6_route.h> #include <net/xfrm.h> #include <net/ip6_checksum.h> +#include <net/netfilter/nf_queue.h> int ip6_route_me_harder(struct sk_buff *skb) { @@ -56,11 +57,12 @@ struct ip6_rt_info { struct in6_addr saddr; }; -static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) +static void nf_ip6_saveroute(const struct sk_buff *skb, + struct nf_queue_entry *entry) { - struct ip6_rt_info *rt_info = nf_info_reroute(info); + struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (info->hook == NF_IP6_LOCAL_OUT) { + if (entry->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); rt_info->daddr = iph->daddr; @@ -68,11 +70,12 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) +static int nf_ip6_reroute(struct sk_buff *skb, + const struct nf_queue_entry *entry) { - struct ip6_rt_info *rt_info = nf_info_reroute(info); + struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (info->hook == NF_IP6_LOCAL_OUT) { + if (entry->hook == NF_INET_LOCAL_OUT) { struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) @@ -81,6 +84,12 @@ static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) return 0; } +static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) +{ + *dst = ip6_route_output(NULL, fl); + return (*dst)->error; +} + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) { @@ -89,7 +98,7 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, switch (skb->ip_summed) { case CHECKSUM_COMPLETE: - if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) + if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) break; if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb->len - dataoff, protocol, @@ -115,9 +124,10 @@ __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, EXPORT_SYMBOL(nf_ip6_checksum); -static struct nf_afinfo nf_ip6_afinfo = { +static const struct nf_afinfo nf_ip6_afinfo = { .family = AF_INET6, .checksum = nf_ip6_checksum, + .route = nf_ip6_route, .saveroute = nf_ip6_saveroute, .reroute = nf_ip6_reroute, .route_key_size = sizeof(struct ip6_rt_info), diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 838b8ddee8c0..6cae5475737e 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -2,12 +2,13 @@ # IP netfilter configuration # -menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" - depends on INET && IPV6 && NETFILTER && EXPERIMENTAL +menu "IPv6: Netfilter Configuration" + depends on INET && IPV6 && NETFILTER config NF_CONNTRACK_IPV6 - tristate "IPv6 connection tracking support (EXPERIMENTAL)" - depends on INET && IPV6 && EXPERIMENTAL && NF_CONNTRACK + tristate "IPv6 connection tracking support" + depends on INET && IPV6 && NF_CONNTRACK + default m if NETFILTER_ADVANCED=n ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -21,7 +22,8 @@ config NF_CONNTRACK_IPV6 config IP6_NF_QUEUE tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" - depends on INET && IPV6 && NETFILTER && EXPERIMENTAL + depends on INET && IPV6 && NETFILTER + depends on NETFILTER_ADVANCED ---help--- This option adds a queue handler to the kernel for IPv6 @@ -42,8 +44,9 @@ config IP6_NF_QUEUE config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" - depends on INET && IPV6 && EXPERIMENTAL + depends on INET && IPV6 select NETFILTER_XTABLES + default m if NETFILTER_ADVANCED=n help ip6tables is a general, extensible packet identification framework. Currently only the packet filtering and packet mangling subsystem @@ -54,8 +57,9 @@ config IP6_NF_IPTABLES # The simple matches. config IP6_NF_MATCH_RT - tristate "Routing header match support" + tristate '"rt" Routing header match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help rt matching allows you to match packets based on the routing header of the packet. @@ -63,8 +67,9 @@ config IP6_NF_MATCH_RT To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_OPTS - tristate "Hop-by-hop and Dst opts header match support" + tristate '"hopbyhop" and "dst" opts header match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This allows one to match packets based on the hop-by-hop and destination options headers of a packet. @@ -72,8 +77,9 @@ config IP6_NF_MATCH_OPTS To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_FRAG - tristate "Fragmentation header match support" + tristate '"frag" Fragmentation header match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help frag matching allows you to match packets based on the fragmentation header of the packet. @@ -81,26 +87,19 @@ config IP6_NF_MATCH_FRAG To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_HL - tristate "HL match support" + tristate '"hl" match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help HL matching allows you to match packets based on the hop limit of the packet. To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_OWNER - tristate "Owner match support" - depends on IP6_NF_IPTABLES - help - Packet owner matching allows you to match locally-generated packets - based on who created them: the user, group, process or session. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_IPV6HEADER - tristate "IPv6 Extension Headers Match" + tristate '"ipv6header" IPv6 Extension Headers Match' depends on IP6_NF_IPTABLES + default m if NETFILTER_ADVANCED=n help This module allows one to match packets based upon the ipv6 extension headers. @@ -108,24 +107,27 @@ config IP6_NF_MATCH_IPV6HEADER To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_AH - tristate "AH match support" + tristate '"ah" match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This module allows one to match AH packets. To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_MH - tristate "MH match support" + tristate '"mh" match support' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This module allows one to match MH packets. To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_EUI64 - tristate "EUI64 address check" + tristate '"eui64" address check' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This module performs checking on the IPv6 source address Compares the last 64 bits with the EUI64 (delivered @@ -137,6 +139,7 @@ config IP6_NF_MATCH_EUI64 config IP6_NF_FILTER tristate "Packet filtering" depends on IP6_NF_IPTABLES + default m if NETFILTER_ADVANCED=n help Packet filtering defines a table `filter', which has a series of rules for simple packet filtering at local input, forwarding and @@ -147,6 +150,7 @@ config IP6_NF_FILTER config IP6_NF_TARGET_LOG tristate "LOG target support" depends on IP6_NF_FILTER + default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in any iptables table which records the packet header to the syslog. @@ -156,6 +160,7 @@ config IP6_NF_TARGET_LOG config IP6_NF_TARGET_REJECT tristate "REJECT target support" depends on IP6_NF_FILTER + default m if NETFILTER_ADVANCED=n help The REJECT target allows a filtering rule to specify that an ICMPv6 error should be issued in response to an incoming packet, rather @@ -166,6 +171,7 @@ config IP6_NF_TARGET_REJECT config IP6_NF_MANGLE tristate "Packet mangling" depends on IP6_NF_IPTABLES + default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -176,27 +182,29 @@ config IP6_NF_MANGLE config IP6_NF_TARGET_HL tristate 'HL (hoplimit) target support' depends on IP6_NF_MANGLE + depends on NETFILTER_ADVANCED help This option adds a `HL' target, which enables the user to decrement the hoplimit value of the IPv6 header or set it to a given (lower) value. - + While it is safe to decrement the hoplimit value, this option also enables functionality to increment and set the hoplimit value of the IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since you can easily create immortal packets that loop forever on the - network. + network. To compile it as a module, choose M here. If unsure, say N. config IP6_NF_RAW tristate 'raw table support (required for TRACE)' depends on IP6_NF_IPTABLES + depends on NETFILTER_ADVANCED help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING and OUTPUT chains. - + If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index e789ec44d23b..fbf2c14ed887 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o -obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index e273605eef85..56b4ea6d29ed 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -29,6 +29,7 @@ #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/netfilter/nf_queue.h> #include <linux/netfilter_ipv4/ip_queue.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> @@ -38,13 +39,7 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" -struct ipq_queue_entry { - struct list_head list; - struct nf_info *info; - struct sk_buff *skb; -}; - -typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); +typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; @@ -58,70 +53,13 @@ static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); -static void -ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) -{ - local_bh_disable(); - nf_reinject(entry->skb, entry->info, verdict); - local_bh_enable(); - kfree(entry); -} - static inline void -__ipq_enqueue_entry(struct ipq_queue_entry *entry) +__ipq_enqueue_entry(struct nf_queue_entry *entry) { - list_add(&entry->list, &queue_list); + list_add_tail(&entry->list, &queue_list); queue_total++; } -/* - * Find and return a queued entry matched by cmpfn, or return the last - * entry if cmpfn is NULL. - */ -static inline struct ipq_queue_entry * -__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data) -{ - struct list_head *p; - - list_for_each_prev(p, &queue_list) { - struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p; - - if (!cmpfn || cmpfn(entry, data)) - return entry; - } - return NULL; -} - -static inline void -__ipq_dequeue_entry(struct ipq_queue_entry *entry) -{ - list_del(&entry->list); - queue_total--; -} - -static inline struct ipq_queue_entry * -__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) -{ - struct ipq_queue_entry *entry; - - entry = __ipq_find_entry(cmpfn, data); - if (entry == NULL) - return NULL; - - __ipq_dequeue_entry(entry); - return entry; -} - - -static inline void -__ipq_flush(int verdict) -{ - struct ipq_queue_entry *entry; - - while ((entry = __ipq_find_dequeue_entry(NULL, 0))) - ipq_issue_verdict(entry, verdict); -} - static inline int __ipq_set_mode(unsigned char mode, unsigned int range) { @@ -148,36 +86,64 @@ __ipq_set_mode(unsigned char mode, unsigned int range) return status; } +static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); + static inline void __ipq_reset(void) { peer_pid = 0; net_disable_timestamp(); __ipq_set_mode(IPQ_COPY_NONE, 0); - __ipq_flush(NF_DROP); + __ipq_flush(NULL, 0); } -static struct ipq_queue_entry * -ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) +static struct nf_queue_entry * +ipq_find_dequeue_entry(unsigned long id) { - struct ipq_queue_entry *entry; + struct nf_queue_entry *entry = NULL, *i; write_lock_bh(&queue_lock); - entry = __ipq_find_dequeue_entry(cmpfn, data); + + list_for_each_entry(i, &queue_list, list) { + if ((unsigned long)i == id) { + entry = i; + break; + } + } + + if (entry) { + list_del(&entry->list); + queue_total--; + } + write_unlock_bh(&queue_lock); return entry; } static void -ipq_flush(int verdict) +__ipq_flush(ipq_cmpfn cmpfn, unsigned long data) +{ + struct nf_queue_entry *entry, *next; + + list_for_each_entry_safe(entry, next, &queue_list, list) { + if (!cmpfn || cmpfn(entry, data)) { + list_del(&entry->list); + queue_total--; + nf_reinject(entry, NF_DROP); + } + } +} + +static void +ipq_flush(ipq_cmpfn cmpfn, unsigned long data) { write_lock_bh(&queue_lock); - __ipq_flush(verdict); + __ipq_flush(cmpfn, data); write_unlock_bh(&queue_lock); } static struct sk_buff * -ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) +ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) { sk_buff_data_t old_tail; size_t size = 0; @@ -234,20 +200,20 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->timestamp_sec = tv.tv_sec; pmsg->timestamp_usec = tv.tv_usec; pmsg->mark = entry->skb->mark; - pmsg->hook = entry->info->hook; + pmsg->hook = entry->hook; pmsg->hw_protocol = entry->skb->protocol; - if (entry->info->indev) - strcpy(pmsg->indev_name, entry->info->indev->name); + if (entry->indev) + strcpy(pmsg->indev_name, entry->indev->name); else pmsg->indev_name[0] = '\0'; - if (entry->info->outdev) - strcpy(pmsg->outdev_name, entry->info->outdev->name); + if (entry->outdev) + strcpy(pmsg->outdev_name, entry->outdev->name); else pmsg->outdev_name[0] = '\0'; - if (entry->info->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } @@ -268,28 +234,17 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, - unsigned int queuenum, void *data) +ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) { int status = -EINVAL; struct sk_buff *nskb; - struct ipq_queue_entry *entry; if (copy_mode == IPQ_COPY_NONE) return -EAGAIN; - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) { - printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n"); - return -ENOMEM; - } - - entry->info = info; - entry->skb = skb; - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) - goto err_out_free; + return status; write_lock_bh(&queue_lock); @@ -323,14 +278,11 @@ err_out_free_nskb: err_out_unlock: write_unlock_bh(&queue_lock); - -err_out_free: - kfree(entry); return status; } static int -ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) +ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e) { int diff; int err; @@ -365,21 +317,15 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) return 0; } -static inline int -id_cmp(struct ipq_queue_entry *e, unsigned long id) -{ - return (id == (unsigned long )e); -} - static int ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) { - struct ipq_queue_entry *entry; + struct nf_queue_entry *entry; if (vmsg->value > NF_MAX_VERDICT) return -EINVAL; - entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); + entry = ipq_find_dequeue_entry(vmsg->id); if (entry == NULL) return -ENOENT; else { @@ -389,7 +335,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) if (ipq_mangle_ipv6(vmsg, entry) < 0) verdict = NF_DROP; - ipq_issue_verdict(entry, verdict); + nf_reinject(entry, verdict); return 0; } } @@ -434,26 +380,32 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg, } static int -dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) +dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) { - if (entry->info->indev) - if (entry->info->indev->ifindex == ifindex) + if (entry->indev) + if (entry->indev->ifindex == ifindex) return 1; - if (entry->info->outdev) - if (entry->info->outdev->ifindex == ifindex) + if (entry->outdev) + if (entry->outdev->ifindex == ifindex) return 1; - +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif return 0; } static void ipq_dev_drop(int ifindex) { - struct ipq_queue_entry *entry; - - while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL) - ipq_issue_verdict(entry, NF_DROP); + ipq_flush(dev_cmp, ifindex); } #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) @@ -577,26 +529,6 @@ static ctl_table ipq_table[] = { { .ctl_name = 0 } }; -static ctl_table ipq_dir_table[] = { - { - .ctl_name = NET_IPV6, - .procname = "ipv6", - .mode = 0555, - .child = ipq_table - }, - { .ctl_name = 0 } -}; - -static ctl_table ipq_root_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ipq_dir_table - }, - { .ctl_name = 0 } -}; - static int ip6_queue_show(struct seq_file *m, void *v) { read_lock_bh(&queue_lock); @@ -634,7 +566,7 @@ static const struct file_operations ip6_queue_proc_fops = { .owner = THIS_MODULE, }; -static struct nf_queue_handler nfqh = { +static const struct nf_queue_handler nfqh = { .name = "ip6_queue", .outfn = &ipq_enqueue_packet, }; @@ -662,7 +594,7 @@ static int __init ip6_queue_init(void) } register_netdevice_notifier(&ipq_dev_notifier); - ipq_sysctl_header = register_sysctl_table(ipq_root_table); + ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table); status = nf_register_queue_handler(PF_INET6, &nfqh); if (status < 0) { @@ -677,7 +609,7 @@ cleanup_sysctl: proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: - sock_release(ipqnl->sk_socket); + netlink_kernel_release(ipqnl); mutex_lock(&ipqnl_mutex); mutex_unlock(&ipqnl_mutex); @@ -690,13 +622,13 @@ static void __exit ip6_queue_fini(void) { nf_unregister_queue_handlers(&nfqh); synchronize_net(); - ipq_flush(NF_DROP); + ipq_flush(NULL, 0); unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); proc_net_remove(&init_net, IPQ_PROC_FS_NAME); - sock_release(ipqnl->sk_socket); + netlink_kernel_release(ipqnl); mutex_lock(&ipqnl_mutex); mutex_unlock(&ipqnl_mutex); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index acaba1537931..dd7860fea61f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -19,21 +19,21 @@ #include <linux/poison.h> #include <linux/icmpv6.h> #include <net/ipv6.h> +#include <net/compat.h> #include <asm/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> +#include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_log.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); -#define IPV6_HDR_LEN (sizeof(struct ipv6hdr)) -#define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr)) - /*#define DEBUG_IP_FIREWALL*/ /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */ /*#define DEBUG_IP_FIREWALL_USER*/ @@ -76,12 +76,6 @@ do { \ Hence the start of any table is given by get_table() below. */ -#if 0 -#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0) -#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; }) -#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0) -#endif - /* Check for an extension */ int ip6t_ext_hdr(u8 nexthdr) @@ -96,6 +90,7 @@ ip6t_ext_hdr(u8 nexthdr) } /* Returns whether matches rule or not. */ +/* Performance critical - called for every packet */ static inline bool ip6_packet_match(const struct sk_buff *skb, const char *indev, @@ -108,7 +103,7 @@ ip6_packet_match(const struct sk_buff *skb, unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); -#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg)) +#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, &ip6info->src), IP6T_INV_SRCIP) @@ -188,7 +183,7 @@ ip6_packet_match(const struct sk_buff *skb, } /* should be ip6 safe */ -static inline bool +static bool ip6_checkentry(const struct ip6t_ip6 *ipv6) { if (ipv6->flags & ~IP6T_F_MASK) { @@ -218,8 +213,9 @@ ip6t_error(struct sk_buff *skb, return NF_DROP; } -static inline -bool do_match(struct ip6t_entry_match *m, +/* Performance critical - called for every packet */ +static inline bool +do_match(struct ip6t_entry_match *m, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -242,6 +238,7 @@ get_entry(void *base, unsigned int offset) } /* All zeroes == unconditional rule. */ +/* Mildly perf critical (only if packet tracing is on) */ static inline int unconditional(const struct ip6t_ip6 *ipv6) { @@ -257,12 +254,12 @@ unconditional(const struct ip6t_ip6 *ipv6) #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* This cries for unification! */ -static const char *hooknames[] = { - [NF_IP6_PRE_ROUTING] = "PREROUTING", - [NF_IP6_LOCAL_IN] = "INPUT", - [NF_IP6_FORWARD] = "FORWARD", - [NF_IP6_LOCAL_OUT] = "OUTPUT", - [NF_IP6_POST_ROUTING] = "POSTROUTING", +static const char *const hooknames[] = { + [NF_INET_PRE_ROUTING] = "PREROUTING", + [NF_INET_LOCAL_IN] = "INPUT", + [NF_INET_FORWARD] = "FORWARD", + [NF_INET_LOCAL_OUT] = "OUTPUT", + [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { @@ -271,7 +268,7 @@ enum nf_ip_trace_comments { NF_IP6_TRACE_COMMENT_POLICY, }; -static const char *comments[] = { +static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_RULE] = "rule", [NF_IP6_TRACE_COMMENT_RETURN] = "return", [NF_IP6_TRACE_COMMENT_POLICY] = "policy", @@ -287,6 +284,7 @@ static struct nf_loginfo trace_loginfo = { }, }; +/* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, char *hookname, char **chainname, @@ -378,8 +376,8 @@ ip6t_do_table(struct sk_buff *skb, * match it. */ read_lock_bh(&table->lock); - private = table->private; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -399,9 +397,8 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(skb)->payload_len) - + IPV6_HDR_LEN, - 1); + ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr), 1); t = ip6t_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -502,11 +499,9 @@ mark_source_chains(struct xt_table_info *newinfo, /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ - for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) { + for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct ip6t_entry *e - = (struct ip6t_entry *)(entry0 + pos); - int visited = e->comefrom & (1 << hook); + struct ip6t_entry *e = (struct ip6t_entry *)(entry0 + pos); if (!(valid_hooks & (1 << hook))) continue; @@ -517,14 +512,14 @@ mark_source_chains(struct xt_table_info *newinfo, for (;;) { struct ip6t_standard_target *t = (void *)ip6t_get_target(e); + int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) { + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { printk("iptables: loop hook %u pos %u %08X.\n", hook, pos, e->comefrom); return 0; } - e->comefrom - |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS)); + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((e->target_offset == sizeof(struct ip6t_entry) @@ -544,10 +539,10 @@ mark_source_chains(struct xt_table_info *newinfo, /* Return: backtrack through the last big jump. */ do { - e->comefrom ^= (1<<NF_IP6_NUMHOOKS); + e->comefrom ^= (1<<NF_INET_NUMHOOKS); #ifdef DEBUG_IP_FIREWALL_USER if (e->comefrom - & (1 << NF_IP6_NUMHOOKS)) { + & (1 << NF_INET_NUMHOOKS)) { duprintf("Back unset " "on hook %u " "rule %u\n", @@ -604,7 +599,7 @@ mark_source_chains(struct xt_table_info *newinfo, return 1; } -static inline int +static int cleanup_match(struct ip6t_entry_match *m, unsigned int *i) { if (i && (*i)-- == 0) @@ -616,102 +611,135 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) return 0; } -static inline int -check_match(struct ip6t_entry_match *m, - const char *name, - const struct ip6t_ip6 *ipv6, - unsigned int hookmask, - unsigned int *i) +static int +check_entry(struct ip6t_entry *e, const char *name) +{ + struct ip6t_entry_target *t; + + if (!ip6_checkentry(&e->ipv6)) { + duprintf("ip_tables: ip check failed %p %s.\n", e, name); + return -EINVAL; + } + + if (e->target_offset + sizeof(struct ip6t_entry_target) > + e->next_offset) + return -EINVAL; + + t = ip6t_get_target(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static int check_match(struct ip6t_entry_match *m, const char *name, + const struct ip6t_ip6 *ipv6, + unsigned int hookmask, unsigned int *i) +{ + struct xt_match *match; + int ret; + + match = m->u.kernel.match; + ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), + name, hookmask, ipv6->proto, + ipv6->invflags & IP6T_INV_PROTO); + if (!ret && m->u.kernel.match->checkentry + && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, + hookmask)) { + duprintf("ip_tables: check failed for `%s'.\n", + m->u.kernel.match->name); + ret = -EINVAL; + } + if (!ret) + (*i)++; + return ret; +} + +static int +find_check_match(struct ip6t_entry_match *m, + const char *name, + const struct ip6t_ip6 *ipv6, + unsigned int hookmask, + unsigned int *i) { struct xt_match *match; int ret; match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, - m->u.user.revision), + m->u.user.revision), "ip6t_%s", m->u.user.name); if (IS_ERR(match) || !match) { - duprintf("check_match: `%s' not found\n", m->u.user.name); + duprintf("find_check_match: `%s' not found\n", m->u.user.name); return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), - name, hookmask, ipv6->proto, - ipv6->invflags & IP6T_INV_PROTO); + ret = check_match(m, name, ipv6, hookmask, i); if (ret) goto err; - if (m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, - hookmask)) { - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; - goto err; - } - - (*i)++; return 0; err: module_put(m->u.kernel.match->me); return ret; } -static struct xt_target ip6t_standard_target; - -static inline int -check_entry(struct ip6t_entry *e, const char *name, unsigned int size, - unsigned int *i) +static int check_target(struct ip6t_entry *e, const char *name) { struct ip6t_entry_target *t; struct xt_target *target; int ret; - unsigned int j; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; + t = ip6t_get_target(e); + target = t->u.kernel.target; + ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), + name, e->comefrom, e->ipv6.proto, + e->ipv6.invflags & IP6T_INV_PROTO); + if (!ret && t->u.kernel.target->checkentry + && !t->u.kernel.target->checkentry(name, e, target, t->data, + e->comefrom)) { + duprintf("ip_tables: check failed for `%s'.\n", + t->u.kernel.target->name); + ret = -EINVAL; } + return ret; +} - if (e->target_offset + sizeof(struct ip6t_entry_target) > - e->next_offset) - return -EINVAL; +static int +find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, + unsigned int *i) +{ + struct ip6t_entry_target *t; + struct xt_target *target; + int ret; + unsigned int j; + + ret = check_entry(e, name); + if (ret) + return ret; j = 0; - ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j); + ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6, + e->comefrom, &j); if (ret != 0) goto cleanup_matches; t = ip6t_get_target(e); - ret = -EINVAL; - if (e->target_offset + t->u.target_size > e->next_offset) - goto cleanup_matches; target = try_then_request_module(xt_find_target(AF_INET6, t->u.user.name, t->u.user.revision), "ip6t_%s", t->u.user.name); if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; goto cleanup_matches; } t->u.kernel.target = target; - ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ipv6.proto, - e->ipv6.invflags & IP6T_INV_PROTO); + ret = check_target(e, name); if (ret) goto err; - if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - goto err; - } - (*i)++; return 0; err: @@ -721,7 +749,7 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, return ret; } -static inline int +static int check_entry_size_and_hooks(struct ip6t_entry *e, struct xt_table_info *newinfo, unsigned char *base, @@ -746,7 +774,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, } /* Check hooks & underflows */ - for (h = 0; h < NF_IP6_NUMHOOKS; h++) { + for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) @@ -764,7 +792,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return 0; } -static inline int +static int cleanup_entry(struct ip6t_entry *e, unsigned int *i) { struct ip6t_entry_target *t; @@ -800,7 +828,7 @@ translate_table(const char *name, newinfo->number = number; /* Init all hooks to impossible value. */ - for (i = 0; i < NF_IP6_NUMHOOKS; i++) { + for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } @@ -824,7 +852,7 @@ translate_table(const char *name, } /* Check hooks all assigned */ - for (i = 0; i < NF_IP6_NUMHOOKS; i++) { + for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ if (!(valid_hooks & (1 << i))) continue; @@ -846,7 +874,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, - check_entry, name, size, &i); + find_check_entry, name, size, &i); if (ret != 0) { IP6T_ENTRY_ITERATE(entry0, newinfo->size, @@ -860,7 +888,7 @@ translate_table(const char *name, memcpy(newinfo->entries[i], entry0, newinfo->size); } - return 0; + return ret; } /* Gets counters. */ @@ -920,33 +948,49 @@ get_counters(const struct xt_table_info *t, } } -static int -copy_entries_to_user(unsigned int total_size, - struct xt_table *table, - void __user *userptr) +static struct xt_counters *alloc_counters(struct xt_table *table) { - unsigned int off, num, countersize; - struct ip6t_entry *e; + unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - int ret = 0; - void *loc_cpu_entry; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc(countersize); + counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); /* First, sum counters... */ write_lock_bh(&table->lock); get_counters(private, counters); write_unlock_bh(&table->lock); - /* choose the copy that is on ourc node/cpu */ + return counters; +} + +static int +copy_entries_to_user(unsigned int total_size, + struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + struct ip6t_entry *e; + struct xt_counters *counters; + struct xt_table_info *private = table->private; + int ret = 0; + void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy that is on our node/cpu, ... + * This choice is lazy (because current thread is + * allowed to migrate to another cpu) + */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; @@ -1001,23 +1045,167 @@ copy_entries_to_user(unsigned int total_size, return ret; } +#ifdef CONFIG_COMPAT +static void compat_standard_from_user(void *dst, void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(AF_INET6, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(AF_INET6, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static inline int +compat_calc_match(struct ip6t_entry_match *m, int *size) +{ + *size += xt_compat_match_offset(m->u.kernel.match); + return 0; +} + +static int compat_calc_entry(struct ip6t_entry *e, + const struct xt_table_info *info, + void *base, struct xt_table_info *newinfo) +{ + struct ip6t_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + entry_offset = (void *)e - base; + IP6T_MATCH_ITERATE(e, compat_calc_match, &off); + t = ip6t_get_target(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(AF_INET6, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct ip6t_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct ip6t_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + void *loc_cpu_entry; + + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries[] */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries[raw_smp_processor_id()]; + return IP6T_ENTRY_ITERATE(loc_cpu_entry, info->size, + compat_calc_entry, info, loc_cpu_entry, + newinfo); +} +#endif + +static int get_info(void __user *user, int *len, int compat) +{ + char name[IP6T_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct ip6t_getinfo)) { + duprintf("length %u != %zu\n", *len, + sizeof(struct ip6t_getinfo)); + return -EINVAL; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_lock(AF_INET6); +#endif + t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + "ip6table_%s", name); + if (t && !IS_ERR(t)) { + struct ip6t_getinfo info; + struct xt_table_info *private = t->private; + +#ifdef CONFIG_COMPAT + if (compat) { + struct xt_table_info tmp; + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(AF_INET6); + private = &tmp; + } +#endif + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + + xt_table_unlock(t); + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_unlock(AF_INET6); +#endif + return ret; +} + static int -get_entries(const struct ip6t_get_entries *entries, - struct ip6t_get_entries __user *uptr) +get_entries(struct ip6t_get_entries __user *uptr, int *len) { int ret; + struct ip6t_get_entries get; struct xt_table *t; - t = xt_find_table_lock(AF_INET6, entries->name); + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct ip6t_get_entries) + get.size) { + duprintf("get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } + + t = xt_find_table_lock(AF_INET6, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); - if (entries->size == private->size) + if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else { duprintf("get_entries: I've got %u not %u!\n", - private->size, entries->size); + private->size, get.size); ret = -EINVAL; } module_put(t->me); @@ -1029,67 +1217,40 @@ get_entries(const struct ip6t_get_entries *entries, } static int -do_replace(void __user *user, unsigned int len) +__do_replace(const char *name, unsigned int valid_hooks, + struct xt_table_info *newinfo, unsigned int num_counters, + void __user *counters_ptr) { int ret; - struct ip6t_replace tmp; struct xt_table *t; - struct xt_table_info *newinfo, *oldinfo; + struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_entry, *loc_cpu_old_entry; + void *loc_cpu_old_entry; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) - return -EFAULT; - - /* overflow check */ - if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS - - SMP_CACHE_BYTES) - return -ENOMEM; - if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) - return -ENOMEM; - - newinfo = xt_alloc_table_info(tmp.size); - if (!newinfo) - return -ENOMEM; - - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { - ret = -EFAULT; - goto free_newinfo; - } - - counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters)); + ret = 0; + counters = vmalloc_node(num_counters * sizeof(struct xt_counters), + numa_node_id()); if (!counters) { ret = -ENOMEM; - goto free_newinfo; + goto out; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); - if (ret != 0) - goto free_newinfo_counters; - - duprintf("ip_tables: Translated table\n"); - - t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name), - "ip6table_%s", tmp.name); + t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + "ip6table_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } /* You lied! */ - if (tmp.valid_hooks != t->valid_hooks) { + if (valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", - tmp.valid_hooks, t->valid_hooks); + valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } - oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; @@ -1107,10 +1268,11 @@ do_replace(void __user *user, unsigned int len) get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); + IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, + NULL); xt_free_table_info(oldinfo); - if (copy_to_user(tmp.counters, counters, - sizeof(struct xt_counters) * tmp.num_counters) != 0) + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) ret = -EFAULT; vfree(counters); xt_table_unlock(t); @@ -1120,9 +1282,54 @@ do_replace(void __user *user, unsigned int len) module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); - free_newinfo_counters: vfree(counters); + out: + return ret; +} + +static int +do_replace(void __user *user, unsigned int len) +{ + int ret; + struct ip6t_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_table(tmp.name, tmp.valid_hooks, + newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, + tmp.hook_entry, tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("ip_tables: Translated table\n"); + + ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1151,31 +1358,59 @@ add_counter_to_entry(struct ip6t_entry *e, } static int -do_add_counters(void __user *user, unsigned int len) +do_add_counters(void __user *user, unsigned int len, int compat) { unsigned int i; - struct xt_counters_info tmp, *paddc; - struct xt_table_info *private; + struct xt_counters_info tmp; + struct xt_counters *paddc; + unsigned int num_counters; + char *name; + int size; + void *ptmp; struct xt_table *t; + struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; +#ifdef CONFIG_COMPAT + struct compat_xt_counters_info compat_tmp; - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (compat) { + ptmp = &compat_tmp; + size = sizeof(struct compat_xt_counters_info); + } else +#endif + { + ptmp = &tmp; + size = sizeof(struct xt_counters_info); + } + + if (copy_from_user(ptmp, user, size) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) +#ifdef CONFIG_COMPAT + if (compat) { + num_counters = compat_tmp.num_counters; + name = compat_tmp.name; + } else +#endif + { + num_counters = tmp.num_counters; + name = tmp.name; + } + + if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc(len); + paddc = vmalloc_node(len - size, numa_node_id()); if (!paddc) return -ENOMEM; - if (copy_from_user(paddc, user, len) != 0) { + if (copy_from_user(paddc, user + size, len - size) != 0) { ret = -EFAULT; goto free; } - t = xt_find_table_lock(AF_INET6, tmp.name); + t = xt_find_table_lock(AF_INET6, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1183,18 +1418,18 @@ do_add_counters(void __user *user, unsigned int len) write_lock_bh(&t->lock); private = t->private; - if (private->number != tmp.num_counters) { + if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + loc_cpu_entry = private->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, - paddc->counters, + paddc, &i); unlock_up_free: write_unlock_bh(&t->lock); @@ -1206,8 +1441,433 @@ do_add_counters(void __user *user, unsigned int len) return ret; } +#ifdef CONFIG_COMPAT +struct compat_ip6t_replace { + char name[IP6T_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_INET_NUMHOOKS]; + u32 underflow[NF_INET_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; /* struct ip6t_counters * */ + struct compat_ip6t_entry entries[0]; +}; + static int -do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr, + compat_uint_t *size, struct xt_counters *counters, + unsigned int *i) +{ + struct ip6t_entry_target *t; + struct compat_ip6t_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; + int ret; + + ret = -EFAULT; + origsize = *size; + ce = (struct compat_ip6t_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(struct ip6t_entry))) + goto out; + + if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i]))) + goto out; + + *dstptr += sizeof(struct compat_ip6t_entry); + *size -= sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + + ret = IP6T_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size); + target_offset = e->target_offset - (origsize - *size); + if (ret) + goto out; + t = ip6t_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + goto out; + ret = -EFAULT; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset)) + goto out; + if (put_user(next_offset, &ce->next_offset)) + goto out; + + (*i)++; + return 0; +out: + return ret; +} + +static int +compat_find_calc_match(struct ip6t_entry_match *m, + const char *name, + const struct ip6t_ip6 *ipv6, + unsigned int hookmask, + int *size, int *i) +{ + struct xt_match *match; + + match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, + m->u.user.revision), + "ip6t_%s", m->u.user.name); + if (IS_ERR(match) || !match) { + duprintf("compat_check_calc_match: `%s' not found\n", + m->u.user.name); + return match ? PTR_ERR(match) : -ENOENT; + } + m->u.kernel.match = match; + *size += xt_compat_match_offset(match); + + (*i)++; + return 0; +} + +static int +compat_release_match(struct ip6t_entry_match *m, unsigned int *i) +{ + if (i && (*i)-- == 0) + return 1; + + module_put(m->u.kernel.match->me); + return 0; +} + +static int +compat_release_entry(struct compat_ip6t_entry *e, unsigned int *i) +{ + struct ip6t_entry_target *t; + + if (i && (*i)-- == 0) + return 1; + + /* Cleanup all matches */ + COMPAT_IP6T_MATCH_ITERATE(e, compat_release_match, NULL); + t = compat_ip6t_get_target(e); + module_put(t->u.kernel.target->me); + return 0; +} + +static int +check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + unsigned char *base, + unsigned char *limit, + unsigned int *hook_entries, + unsigned int *underflows, + unsigned int *i, + const char *name) +{ + struct ip6t_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + int ret, off, h, j; + + duprintf("check_compat_entry_size_and_hooks %p\n", e); + if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 + || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + duprintf("Bad offset %p, limit = %p\n", e, limit); + return -EINVAL; + } + + if (e->next_offset < sizeof(struct compat_ip6t_entry) + + sizeof(struct compat_xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* For purposes of check_entry casting the compat entry is fine */ + ret = check_entry((struct ip6t_entry *)e, name); + if (ret) + return ret; + + off = sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + entry_offset = (void *)e - (void *)base; + j = 0; + ret = COMPAT_IP6T_MATCH_ITERATE(e, compat_find_calc_match, name, + &e->ipv6, e->comefrom, &off, &j); + if (ret != 0) + goto release_matches; + + t = compat_ip6t_get_target(e); + target = try_then_request_module(xt_find_target(AF_INET6, + t->u.user.name, + t->u.user.revision), + "ip6t_%s", t->u.user.name); + if (IS_ERR(target) || !target) { + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); + ret = target ? PTR_ERR(target) : -ENOENT; + goto release_matches; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(AF_INET6, entry_offset, off); + if (ret) + goto out; + + /* Check hooks & underflows */ + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* Clear counters and comefrom */ + memset(&e->counters, 0, sizeof(e->counters)); + e->comefrom = 0; + + (*i)++; + return 0; + +out: + module_put(t->u.kernel.target->me); +release_matches: + IP6T_MATCH_ITERATE(e, compat_release_match, &j); + return ret; +} + +static int +compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, + unsigned int *size, const char *name, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct ip6t_entry_target *t; + struct xt_target *target; + struct ip6t_entry *de; + unsigned int origsize; + int ret, h; + + ret = 0; + origsize = *size; + de = (struct ip6t_entry *)*dstptr; + memcpy(de, e, sizeof(struct ip6t_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct ip6t_entry); + *size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry); + + ret = COMPAT_IP6T_MATCH_ITERATE(e, xt_compat_match_from_user, + dstptr, size); + if (ret) + return ret; + de->target_offset = e->target_offset - (origsize - *size); + t = compat_ip6t_get_target(e); + target = t->u.kernel.target; + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } + return ret; +} + +static int compat_check_entry(struct ip6t_entry *e, const char *name, + unsigned int *i) +{ + int j, ret; + + j = 0; + ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, + e->comefrom, &j); + if (ret) + goto cleanup_matches; + + ret = check_target(e, name); + if (ret) + goto cleanup_matches; + + (*i)++; + return 0; + + cleanup_matches: + IP6T_MATCH_ITERATE(e, cleanup_match, &j); + return ret; +} + +static int +translate_compat_table(const char *name, + unsigned int valid_hooks, + struct xt_table_info **pinfo, + void **pentry0, + unsigned int total_size, + unsigned int number, + unsigned int *hook_entries, + unsigned int *underflows) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + unsigned int size; + int ret; + + info = *pinfo; + entry0 = *pentry0; + size = total_size; + info->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + info->hook_entry[i] = 0xFFFFFFFF; + info->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_compat_table: size %u\n", info->size); + j = 0; + xt_compat_lock(AF_INET6); + /* Walk through entries, checking offsets. */ + ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, + check_compat_entry_size_and_hooks, + info, &size, entry0, + entry0 + total_size, + hook_entries, underflows, &j, name); + if (ret != 0) + goto out_unlock; + + ret = -EINVAL; + if (j != number) { + duprintf("translate_compat_table: %u not %u entries\n", + j, number); + goto out_unlock; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (info->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + goto out_unlock; + } + if (info->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + goto out_unlock; + } + } + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + newinfo->number = number; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + newinfo->hook_entry[i] = info->hook_entry[i]; + newinfo->underflow[i] = info->underflow[i]; + } + entry1 = newinfo->entries[raw_smp_processor_id()]; + pos = entry1; + size = total_size; + ret = COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, + compat_copy_entry_from_user, + &pos, &size, name, newinfo, entry1); + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + if (ret) + goto free_newinfo; + + ret = -ELOOP; + if (!mark_source_chains(newinfo, valid_hooks, entry1)) + goto free_newinfo; + + i = 0; + ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, + name, &i); + if (ret) { + j -= i; + COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, + compat_release_entry, &j); + IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + xt_free_table_info(newinfo); + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) + if (newinfo->entries[i] && newinfo->entries[i] != entry1) + memcpy(newinfo->entries[i], entry1, newinfo->size); + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); +out: + COMPAT_IP6T_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET6); + xt_compat_unlock(AF_INET6); + goto out; +} + +static int +compat_do_replace(void __user *user, unsigned int len) +{ + int ret; + struct compat_ip6t_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.size >= INT_MAX / num_possible_cpus()) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(tmp.name, tmp.valid_hooks, + &newinfo, &loc_cpu_entry, tmp.size, + tmp.num_entries, tmp.hook_entry, + tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("compat_do_replace: Translated table\n"); + + ret = __do_replace(tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int +compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, + unsigned int len) { int ret; @@ -1216,11 +1876,11 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) switch (cmd) { case IP6T_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = compat_do_replace(user, len); break; case IP6T_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len); + ret = do_add_counters(user, len, 1); break; default: @@ -1231,75 +1891,155 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) return ret; } +struct compat_ip6t_get_entries { + char name[IP6T_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_ip6t_entry entrytable[0]; +}; + static int -do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + void *loc_cpu_entry; + unsigned int i = 0; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy that is on our node/cpu, ... + * This choice is lazy (because current thread is + * allowed to migrate to another cpu) + */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + pos = userptr; + size = total_size; + ret = IP6T_ENTRY_ITERATE(loc_cpu_entry, total_size, + compat_copy_entry_to_user, + &pos, &size, counters, &i); + + vfree(counters); + return ret; +} + +static int +compat_get_entries(struct compat_ip6t_get_entries __user *uptr, int *len) { int ret; + struct compat_ip6t_get_entries get; + struct xt_table *t; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; + if (*len < sizeof(get)) { + duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } - switch (cmd) { - case IP6T_SO_GET_INFO: { - char name[IP6T_TABLE_MAXNAMELEN]; - struct xt_table *t; + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; - if (*len != sizeof(struct ip6t_getinfo)) { - duprintf("length %u != %u\n", *len, - sizeof(struct ip6t_getinfo)); + if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) { + duprintf("compat_get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } + + xt_compat_lock(AF_INET6); + t = xt_find_table_lock(AF_INET6, get.name); + if (t && !IS_ERR(t)) { + struct xt_table_info *private = t->private; + struct xt_table_info info; + duprintf("t->private->number = %u\n", private->number); + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) { + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + } else if (!ret) { + duprintf("compat_get_entries: I've got %u not %u!\n", + private->size, get.size); ret = -EINVAL; - break; } + xt_compat_flush_offsets(AF_INET6); + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; - if (copy_from_user(name, user, sizeof(name)) != 0) { - ret = -EFAULT; - break; - } - name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - - t = try_then_request_module(xt_find_table_lock(AF_INET6, name), - "ip6table_%s", name); - if (t && !IS_ERR(t)) { - struct ip6t_getinfo info; - struct xt_table_info *private = t->private; - - info.valid_hooks = t->valid_hooks; - memcpy(info.hook_entry, private->hook_entry, - sizeof(info.hook_entry)); - memcpy(info.underflow, private->underflow, - sizeof(info.underflow)); - info.num_entries = private->number; - info.size = private->size; - memcpy(info.name, name, sizeof(info.name)); - - if (copy_to_user(user, &info, *len) != 0) - ret = -EFAULT; - else - ret = 0; - xt_table_unlock(t); - module_put(t->me); - } else - ret = t ? PTR_ERR(t) : -ENOENT; + xt_compat_unlock(AF_INET6); + return ret; +} + +static int do_ip6t_get_ctl(struct sock *, int, void __user *, int *); + +static int +compat_do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IP6T_SO_GET_INFO: + ret = get_info(user, len, 1); + break; + case IP6T_SO_GET_ENTRIES: + ret = compat_get_entries(user, len); + break; + default: + ret = do_ip6t_get_ctl(sk, cmd, user, len); } - break; + return ret; +} +#endif - case IP6T_SO_GET_ENTRIES: { - struct ip6t_get_entries get; +static int +do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +{ + int ret; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %u\n", *len, sizeof(get)); - ret = -EINVAL; - } else if (copy_from_user(&get, user, sizeof(get)) != 0) { - ret = -EFAULT; - } else if (*len != sizeof(struct ip6t_get_entries) + get.size) { - duprintf("get_entries: %u != %u\n", *len, - sizeof(struct ip6t_get_entries) + get.size); - ret = -EINVAL; - } else - ret = get_entries(&get, user); + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IP6T_SO_SET_REPLACE: + ret = do_replace(user, len); break; + + case IP6T_SO_SET_ADD_COUNTERS: + ret = do_add_counters(user, len, 0); + break; + + default: + duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; } + return ret; +} + +static int +do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IP6T_SO_GET_INFO: + ret = get_info(user, len, 0); + break; + + case IP6T_SO_GET_ENTRIES: + ret = get_entries(user, len); + break; + case IP6T_SO_GET_REVISION_MATCH: case IP6T_SO_GET_REVISION_TARGET: { struct ip6t_get_revision rev; @@ -1334,12 +2074,11 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -int ip6t_register_table(struct xt_table *table, - const struct ip6t_replace *repl) +int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl) { int ret; struct xt_table_info *newinfo; - static struct xt_table_info bootstrap + struct xt_table_info bootstrap = { 0, 0, 0, { 0 }, { 0 }, { } }; void *loc_cpu_entry; @@ -1347,7 +2086,7 @@ int ip6t_register_table(struct xt_table *table, if (!newinfo) return -ENOMEM; - /* choose the copy on our node/cpu */ + /* choose the copy on our node/cpu, but dont care about preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); @@ -1403,17 +2142,18 @@ icmp6_match(const struct sk_buff *skb, unsigned int protoff, bool *hotdrop) { - struct icmp6hdr _icmp, *ic; + struct icmp6hdr _icmph, *ic; const struct ip6t_icmp *icmpinfo = matchinfo; /* Must not be a fragment. */ if (offset) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp); + ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ + * can't. Hence, no choice but to drop. + */ duprintf("Dropping evil ICMP tinygram.\n"); *hotdrop = true; return false; @@ -1445,6 +2185,11 @@ static struct xt_target ip6t_standard_target __read_mostly = { .name = IP6T_STANDARD_TARGET, .targetsize = sizeof(int), .family = AF_INET6, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif }; static struct xt_target ip6t_error_target __read_mostly = { @@ -1459,15 +2204,21 @@ static struct nf_sockopt_ops ip6t_sockopts = { .set_optmin = IP6T_BASE_CTL, .set_optmax = IP6T_SO_SET_MAX+1, .set = do_ip6t_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_ip6t_set_ctl, +#endif .get_optmin = IP6T_BASE_CTL, .get_optmax = IP6T_SO_GET_MAX+1, .get = do_ip6t_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_ip6t_get_ctl, +#endif .owner = THIS_MODULE, }; static struct xt_match icmp6_matchstruct __read_mostly = { .name = "icmp6", - .match = &icmp6_match, + .match = icmp6_match, .matchsize = sizeof(struct ip6t_icmp), .checkentry = icmp6_checkentry, .proto = IPPROTO_ICMPV6, @@ -1516,6 +2267,7 @@ err1: static void __exit ip6_tables_fini(void) { nf_unregister_sockopt(&ip6t_sockopts); + xt_unregister_match(&icmp6_matchstruct); xt_unregister_target(&ip6t_error_target); xt_unregister_target(&ip6t_standard_target); diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index 9afc836fd454..d5f8fd5f29d3 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -15,15 +15,13 @@ #include <linux/netfilter_ipv6/ip6t_HL.h> MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); -MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); +MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target"); MODULE_LICENSE("GPL"); -static unsigned int ip6t_hl_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +hl_tg6(struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, const void *targinfo) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = targinfo; @@ -58,11 +56,10 @@ static unsigned int ip6t_hl_target(struct sk_buff *skb, return XT_CONTINUE; } -static bool ip6t_hl_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool +hl_tg6_check(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { const struct ip6t_HL_info *info = targinfo; @@ -79,25 +76,25 @@ static bool ip6t_hl_checkentry(const char *tablename, return true; } -static struct xt_target ip6t_HL __read_mostly = { +static struct xt_target hl_tg6_reg __read_mostly = { .name = "HL", .family = AF_INET6, - .target = ip6t_hl_target, + .target = hl_tg6, .targetsize = sizeof(struct ip6t_HL_info), .table = "mangle", - .checkentry = ip6t_hl_checkentry, + .checkentry = hl_tg6_check, .me = THIS_MODULE }; -static int __init ip6t_hl_init(void) +static int __init hl_tg6_init(void) { - return xt_register_target(&ip6t_HL); + return xt_register_target(&hl_tg6_reg); } -static void __exit ip6t_hl_fini(void) +static void __exit hl_tg6_exit(void) { - xt_unregister_target(&ip6t_HL); + xt_unregister_target(&hl_tg6_reg); } -module_init(ip6t_hl_init); -module_exit(ip6t_hl_fini); +module_init(hl_tg6_init); +module_exit(hl_tg6_exit); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 7a48c342df46..86a613810b69 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -23,9 +23,10 @@ #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> +#include <net/netfilter/nf_log.h> MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); -MODULE_DESCRIPTION("IP6 tables LOG target module"); +MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog"); MODULE_LICENSE("GPL"); struct in_device; @@ -362,7 +363,9 @@ static void dump_packet(const struct nf_loginfo *info, if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) - printk("UID=%u ", skb->sk->sk_socket->file->f_uid); + printk("UID=%u GID=%u", + skb->sk->sk_socket->file->f_uid, + skb->sk->sk_socket->file->f_gid); read_unlock_bh(&skb->sk->sk_callback_lock); } } @@ -431,12 +434,9 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -ip6t_log_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +log_tg6(struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, const void *targinfo) { const struct ip6t_log_info *loginfo = targinfo; struct nf_loginfo li; @@ -450,11 +450,10 @@ ip6t_log_target(struct sk_buff *skb, } -static bool ip6t_log_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool +log_tg6_check(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; @@ -470,37 +469,37 @@ static bool ip6t_log_checkentry(const char *tablename, return true; } -static struct xt_target ip6t_log_reg __read_mostly = { +static struct xt_target log_tg6_reg __read_mostly = { .name = "LOG", .family = AF_INET6, - .target = ip6t_log_target, + .target = log_tg6, .targetsize = sizeof(struct ip6t_log_info), - .checkentry = ip6t_log_checkentry, + .checkentry = log_tg6_check, .me = THIS_MODULE, }; -static struct nf_logger ip6t_logger = { +static const struct nf_logger ip6t_logger = { .name = "ip6t_LOG", .logfn = &ip6t_log_packet, .me = THIS_MODULE, }; -static int __init ip6t_log_init(void) +static int __init log_tg6_init(void) { int ret; - ret = xt_register_target(&ip6t_log_reg); + ret = xt_register_target(&log_tg6_reg); if (ret < 0) return ret; nf_log_register(PF_INET6, &ip6t_logger); return 0; } -static void __exit ip6t_log_fini(void) +static void __exit log_tg6_exit(void) { nf_log_unregister(&ip6t_logger); - xt_unregister_target(&ip6t_log_reg); + xt_unregister_target(&log_tg6_reg); } -module_init(ip6t_log_init); -module_exit(ip6t_log_fini); +module_init(log_tg6_init); +module_exit(log_tg6_exit); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 1a7d2917545d..b23baa635fe0 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -31,7 +31,7 @@ #include <linux/netfilter_ipv6/ip6t_REJECT.h> MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); -MODULE_DESCRIPTION("IP6 tables REJECT target module"); +MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); /* Send RST reply */ @@ -121,7 +121,6 @@ static void send_reset(struct sk_buff *oldskb) ip6h->version = 6; ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); ip6h->nexthdr = IPPROTO_TCP; - ip6h->payload_len = htons(sizeof(struct tcphdr)); ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); @@ -159,25 +158,22 @@ static void send_reset(struct sk_buff *oldskb) nf_ct_attach(nskb, oldskb); - NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, - dst_output); + ip6_local_out(nskb); } static inline void send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) { - if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = init_net.loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } -static unsigned int reject6_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +reject_tg6(struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const struct xt_target *target, const void *targinfo) { const struct ip6t_reject_info *reject = targinfo; @@ -216,11 +212,10 @@ static unsigned int reject6_target(struct sk_buff *skb, return NF_DROP; } -static bool check(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool +reject_tg6_check(const char *tablename, const void *entry, + const struct xt_target *target, void *targinfo, + unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; const struct ip6t_entry *e = entry; @@ -239,27 +234,27 @@ static bool check(const char *tablename, return true; } -static struct xt_target ip6t_reject_reg __read_mostly = { +static struct xt_target reject_tg6_reg __read_mostly = { .name = "REJECT", .family = AF_INET6, - .target = reject6_target, + .target = reject_tg6, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", - .hooks = (1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | - (1 << NF_IP6_LOCAL_OUT), - .checkentry = check, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT), + .checkentry = reject_tg6_check, .me = THIS_MODULE }; -static int __init ip6t_reject_init(void) +static int __init reject_tg6_init(void) { - return xt_register_target(&ip6t_reject_reg); + return xt_register_target(&reject_tg6_reg); } -static void __exit ip6t_reject_fini(void) +static void __exit reject_tg6_exit(void) { - xt_unregister_target(&ip6t_reject_reg); + xt_unregister_target(&reject_tg6_reg); } -module_init(ip6t_reject_init); -module_exit(ip6t_reject_fini); +module_init(reject_tg6_init); +module_exit(reject_tg6_exit); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 2a25fe25e0e0..429629fd63b6 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -20,7 +20,7 @@ #include <linux/netfilter_ipv6/ip6t_ah.h> MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 AH match"); +MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ @@ -37,14 +37,9 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +ah_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; @@ -100,11 +95,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +ah_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_ah *ahinfo = matchinfo; @@ -115,24 +108,24 @@ checkentry(const char *tablename, return true; } -static struct xt_match ah_match __read_mostly = { +static struct xt_match ah_mt6_reg __read_mostly = { .name = "ah", .family = AF_INET6, - .match = match, + .match = ah_mt6, .matchsize = sizeof(struct ip6t_ah), - .checkentry = checkentry, + .checkentry = ah_mt6_check, .me = THIS_MODULE, }; -static int __init ip6t_ah_init(void) +static int __init ah_mt6_init(void) { - return xt_register_match(&ah_match); + return xt_register_match(&ah_mt6_reg); } -static void __exit ip6t_ah_fini(void) +static void __exit ah_mt6_exit(void) { - xt_unregister_match(&ah_match); + xt_unregister_match(&ah_mt6_reg); } -module_init(ip6t_ah_init); -module_exit(ip6t_ah_fini); +module_init(ah_mt6_init); +module_exit(ah_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 41df9a578c7a..8f331f12b2ec 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -15,19 +15,15 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> -MODULE_DESCRIPTION("IPv6 EUI64 address checking match"); +MODULE_DESCRIPTION("Xtables: IPv6 EUI64 address match"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +eui64_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { unsigned char eui64[8]; int i = 0; @@ -62,25 +58,25 @@ match(const struct sk_buff *skb, return false; } -static struct xt_match eui64_match __read_mostly = { +static struct xt_match eui64_mt6_reg __read_mostly = { .name = "eui64", .family = AF_INET6, - .match = match, + .match = eui64_mt6, .matchsize = sizeof(int), - .hooks = (1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_IN) | - (1 << NF_IP6_FORWARD), + .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD), .me = THIS_MODULE, }; -static int __init ip6t_eui64_init(void) +static int __init eui64_mt6_init(void) { - return xt_register_match(&eui64_match); + return xt_register_match(&eui64_mt6_reg); } -static void __exit ip6t_eui64_fini(void) +static void __exit eui64_mt6_exit(void) { - xt_unregister_match(&eui64_match); + xt_unregister_match(&eui64_mt6_reg); } -module_init(ip6t_eui64_init); -module_exit(ip6t_eui64_fini); +module_init(eui64_mt6_init); +module_exit(eui64_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 968aeba02073..e2bbc63dba5b 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -19,7 +19,7 @@ #include <linux/netfilter_ipv6/ip6t_frag.h> MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 FRAG match"); +MODULE_DESCRIPTION("Xtables: IPv6 fragment match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); /* Returns 1 if the id is matched by the range, 0 otherwise */ @@ -35,14 +35,10 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +frag_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { struct frag_hdr _frag; const struct frag_hdr *fh; @@ -116,11 +112,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +frag_mt6_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_frag *fraginfo = matchinfo; @@ -131,24 +125,24 @@ checkentry(const char *tablename, return true; } -static struct xt_match frag_match __read_mostly = { +static struct xt_match frag_mt6_reg __read_mostly = { .name = "frag", .family = AF_INET6, - .match = match, + .match = frag_mt6, .matchsize = sizeof(struct ip6t_frag), - .checkentry = checkentry, + .checkentry = frag_mt6_check, .me = THIS_MODULE, }; -static int __init ip6t_frag_init(void) +static int __init frag_mt6_init(void) { - return xt_register_match(&frag_match); + return xt_register_match(&frag_mt6_reg); } -static void __exit ip6t_frag_fini(void) +static void __exit frag_mt6_exit(void) { - xt_unregister_match(&frag_match); + xt_unregister_match(&frag_mt6_reg); } -module_init(ip6t_frag_init); -module_exit(ip6t_frag_fini); +module_init(frag_mt6_init); +module_exit(frag_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index e6ca6018b1ea..62e39ace0588 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -21,7 +21,7 @@ #include <linux/netfilter_ipv6/ip6t_opts.h> MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 opts match"); +MODULE_DESCRIPTION("Xtables: IPv6 Hop-By-Hop and Destination Header match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); MODULE_ALIAS("ip6t_dst"); @@ -42,14 +42,10 @@ MODULE_ALIAS("ip6t_dst"); */ static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +hbh_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { struct ipv6_opt_hdr _optsh; const struct ipv6_opt_hdr *oh; @@ -171,11 +167,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +hbh_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; @@ -186,36 +180,36 @@ checkentry(const char *tablename, return true; } -static struct xt_match opts_match[] __read_mostly = { +static struct xt_match hbh_mt6_reg[] __read_mostly = { { .name = "hbh", .family = AF_INET6, - .match = match, + .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, + .checkentry = hbh_mt6_check, .me = THIS_MODULE, .data = NEXTHDR_HOP, }, { .name = "dst", .family = AF_INET6, - .match = match, + .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, + .checkentry = hbh_mt6_check, .me = THIS_MODULE, .data = NEXTHDR_DEST, }, }; -static int __init ip6t_hbh_init(void) +static int __init hbh_mt6_init(void) { - return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); + return xt_register_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg)); } -static void __exit ip6t_hbh_fini(void) +static void __exit hbh_mt6_exit(void) { - xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); + xt_unregister_matches(hbh_mt6_reg, ARRAY_SIZE(hbh_mt6_reg)); } -module_init(ip6t_hbh_init); -module_exit(ip6t_hbh_fini); +module_init(hbh_mt6_init); +module_exit(hbh_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index ca29ec00dc18..345671673845 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -16,13 +16,13 @@ #include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); -MODULE_DESCRIPTION("IP tables Hop Limit matching module"); +MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match"); MODULE_LICENSE("GPL"); -static bool match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool +hl_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { const struct ip6t_hl_info *info = matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -49,23 +49,23 @@ static bool match(const struct sk_buff *skb, return false; } -static struct xt_match hl_match __read_mostly = { +static struct xt_match hl_mt6_reg __read_mostly = { .name = "hl", .family = AF_INET6, - .match = match, + .match = hl_mt6, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, }; -static int __init ip6t_hl_init(void) +static int __init hl_mt6_init(void) { - return xt_register_match(&hl_match); + return xt_register_match(&hl_mt6_reg); } -static void __exit ip6t_hl_fini(void) +static void __exit hl_mt6_exit(void) { - xt_unregister_match(&hl_match); + xt_unregister_match(&hl_mt6_reg); } -module_init(ip6t_hl_init); -module_exit(ip6t_hl_fini); +module_init(hl_mt6_init); +module_exit(hl_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 2c65c2f9a4ab..3a940171f829 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -23,18 +23,14 @@ #include <linux/netfilter_ipv6/ip6t_ipv6header.h> MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 headers match"); +MODULE_DESCRIPTION("Xtables: IPv6 header types match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool -ipv6header_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, + bool *hotdrop) { const struct ip6t_ipv6header_info *info = matchinfo; unsigned int temp; @@ -125,11 +121,9 @@ ipv6header_match(const struct sk_buff *skb, } static bool -ipv6header_checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +ipv6header_mt6_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_ipv6header_info *info = matchinfo; @@ -141,25 +135,25 @@ ipv6header_checkentry(const char *tablename, return true; } -static struct xt_match ip6t_ipv6header_match __read_mostly = { +static struct xt_match ipv6header_mt6_reg __read_mostly = { .name = "ipv6header", .family = AF_INET6, - .match = &ipv6header_match, + .match = ipv6header_mt6, .matchsize = sizeof(struct ip6t_ipv6header_info), - .checkentry = &ipv6header_checkentry, + .checkentry = ipv6header_mt6_check, .destroy = NULL, .me = THIS_MODULE, }; -static int __init ipv6header_init(void) +static int __init ipv6header_mt6_init(void) { - return xt_register_match(&ip6t_ipv6header_match); + return xt_register_match(&ipv6header_mt6_reg); } -static void __exit ipv6header_exit(void) +static void __exit ipv6header_mt6_exit(void) { - xt_unregister_match(&ip6t_ipv6header_match); + xt_unregister_match(&ipv6header_mt6_reg); } -module_init(ipv6header_init); -module_exit(ipv6header_exit); +module_init(ipv6header_mt6_init); +module_exit(ipv6header_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index 0fa714092dc9..e06678d07ec8 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -21,7 +21,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6t_mh.h> -MODULE_DESCRIPTION("ip6t_tables match for MH"); +MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); MODULE_LICENSE("GPL"); #ifdef DEBUG_IP_FIREWALL_USER @@ -38,14 +38,9 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +mh_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct ip6_mh _mh; const struct ip6_mh *mh; @@ -77,11 +72,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -mh_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +mh_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_mh *mhinfo = matchinfo; @@ -89,25 +82,25 @@ mh_checkentry(const char *tablename, return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); } -static struct xt_match mh_match __read_mostly = { +static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", .family = AF_INET6, - .checkentry = mh_checkentry, - .match = match, + .checkentry = mh_mt6_check, + .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), .proto = IPPROTO_MH, .me = THIS_MODULE, }; -static int __init ip6t_mh_init(void) +static int __init mh_mt6_init(void) { - return xt_register_match(&mh_match); + return xt_register_match(&mh_mt6_reg); } -static void __exit ip6t_mh_fini(void) +static void __exit mh_mt6_exit(void) { - xt_unregister_match(&mh_match); + xt_unregister_match(&mh_mt6_reg); } -module_init(ip6t_mh_init); -module_exit(ip6t_mh_fini); +module_init(mh_mt6_init); +module_exit(mh_mt6_exit); diff --git a/net/ipv6/netfilter/ip6t_owner.c b/net/ipv6/netfilter/ip6t_owner.c deleted file mode 100644 index 6036613aef36..000000000000 --- a/net/ipv6/netfilter/ip6t_owner.c +++ /dev/null @@ -1,92 +0,0 @@ -/* Kernel module to match various things tied to sockets associated with - locally generated outgoing packets. */ - -/* (C) 2000-2001 Marc Boucher <marc@mbsi.ca> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/file.h> -#include <linux/rcupdate.h> -#include <net/sock.h> - -#include <linux/netfilter_ipv6/ip6t_owner.h> -#include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter/x_tables.h> - -MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); -MODULE_DESCRIPTION("IP6 tables owner matching module"); -MODULE_LICENSE("GPL"); - - -static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) -{ - const struct ip6t_owner_info *info = matchinfo; - - if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return false; - - if (info->match & IP6T_OWNER_UID) - if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ - !!(info->invert & IP6T_OWNER_UID)) - return false; - - if (info->match & IP6T_OWNER_GID) - if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ - !!(info->invert & IP6T_OWNER_GID)) - return false; - - return true; -} - -static bool -checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) -{ - const struct ip6t_owner_info *info = matchinfo; - - if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { - printk("ipt_owner: pid and sid matching " - "not supported anymore\n"); - return false; - } - return true; -} - -static struct xt_match owner_match __read_mostly = { - .name = "owner", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct ip6t_owner_info), - .hooks = (1 << NF_IP6_LOCAL_OUT) | (1 << NF_IP6_POST_ROUTING), - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init ip6t_owner_init(void) -{ - return xt_register_match(&owner_match); -} - -static void __exit ip6t_owner_fini(void) -{ - xt_unregister_match(&owner_match); -} - -module_init(ip6t_owner_init); -module_exit(ip6t_owner_fini); diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 357cea703bd9..12a9efe9886e 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -21,7 +21,7 @@ #include <linux/netfilter_ipv6/ip6t_rt.h> MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 RT match"); +MODULE_DESCRIPTION("Xtables: IPv6 Routing Header match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); /* Returns 1 if the id is matched by the range, 0 otherwise */ @@ -37,14 +37,9 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) } static bool -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +rt_mt6(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct xt_match *match, + const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { struct ipv6_rt_hdr _route; const struct ipv6_rt_hdr *rh; @@ -195,11 +190,9 @@ match(const struct sk_buff *skb, /* Called when user tries to insert an entry of this type. */ static bool -checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +rt_mt6_check(const char *tablename, const void *entry, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) { const struct ip6t_rt *rtinfo = matchinfo; @@ -218,24 +211,24 @@ checkentry(const char *tablename, return true; } -static struct xt_match rt_match __read_mostly = { +static struct xt_match rt_mt6_reg __read_mostly = { .name = "rt", .family = AF_INET6, - .match = match, + .match = rt_mt6, .matchsize = sizeof(struct ip6t_rt), - .checkentry = checkentry, + .checkentry = rt_mt6_check, .me = THIS_MODULE, }; -static int __init ip6t_rt_init(void) +static int __init rt_mt6_init(void) { - return xt_register_match(&rt_match); + return xt_register_match(&rt_mt6_reg); } -static void __exit ip6t_rt_fini(void) +static void __exit rt_mt6_exit(void) { - xt_unregister_match(&rt_match); + xt_unregister_match(&rt_mt6_reg); } -module_init(ip6t_rt_init); -module_exit(ip6t_rt_fini); +module_init(rt_mt6_init); +module_exit(rt_mt6_exit); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 1d26b202bf30..87d38d08aad0 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -17,7 +17,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("ip6tables filter table"); -#define FILTER_VALID_HOOKS ((1 << NF_IP6_LOCAL_IN) | (1 << NF_IP6_FORWARD) | (1 << NF_IP6_LOCAL_OUT)) +#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT)) static struct { @@ -31,14 +33,14 @@ static struct .num_entries = 4, .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), .hook_entry = { - [NF_IP6_LOCAL_IN] = 0, - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 }, .underflow = { - [NF_IP6_LOCAL_IN] = 0, - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard), - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 }, }, .entries = { @@ -88,26 +90,26 @@ ip6t_local_out_hook(unsigned int hook, return ip6t_do_table(skb, hook, in, out, &packet_filter); } -static struct nf_hook_ops ip6t_ops[] = { +static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_IN, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_FILTER, }, { .hook = ip6t_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, + .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_FILTER, }, { .hook = ip6t_local_out_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FILTER, }, }; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a0b6381f1e8c..d6082600bc5d 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -15,11 +15,11 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("ip6tables mangle table"); -#define MANGLE_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | \ - (1 << NF_IP6_LOCAL_IN) | \ - (1 << NF_IP6_FORWARD) | \ - (1 << NF_IP6_LOCAL_OUT) | \ - (1 << NF_IP6_POST_ROUTING)) +#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ + (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_POST_ROUTING)) static struct { @@ -33,18 +33,18 @@ static struct .num_entries = 6, .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), .hook_entry = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), + [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, + [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, }, .underflow = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_IP6_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_IP6_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), + [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, + [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, }, }, .entries = { @@ -120,40 +120,40 @@ ip6t_local_hook(unsigned int hook, return ret; } -static struct nf_hook_ops ip6t_ops[] = { +static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_local_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_IN, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, + .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_local_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_MANGLE, }, { .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, + .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, }; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 8f7109f991e6..eccbaaa104af 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -6,7 +6,7 @@ #include <linux/module.h> #include <linux/netfilter_ipv6/ip6_tables.h> -#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) +#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) static struct { @@ -20,12 +20,12 @@ static struct .num_entries = 3, .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), .hook_entry = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) }, .underflow = { - [NF_IP6_PRE_ROUTING] = 0, - [NF_IP6_LOCAL_OUT] = sizeof(struct ip6t_standard) + [NF_INET_PRE_ROUTING] = 0, + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) }, }, .entries = { @@ -54,18 +54,18 @@ ip6t_hook(unsigned int hook, return ip6t_do_table(skb, hook, in, out, &packet_raw); } -static struct nf_hook_ops ip6t_ops[] = { +static struct nf_hook_ops ip6t_ops[] __read_mostly = { { .hook = ip6t_hook, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, }, { .hook = ip6t_hook, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, }, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index ad74bab05047..2d7b0246475d 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -60,12 +60,6 @@ static int ipv6_print_tuple(struct seq_file *s, NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); } -static int ipv6_print_conntrack(struct seq_file *s, - const struct nf_conn *conntrack) -{ - return 0; -} - /* * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c * @@ -258,80 +252,51 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, return ipv6_conntrack_in(hooknum, skb, in, out, okfn); } -static struct nf_hook_ops ipv6_conntrack_ops[] = { +static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { { .hook = ipv6_defrag, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, + .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, { .hook = ipv6_defrag, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, + .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, + .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_IN, + .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_LAST-1, }, }; -#ifdef CONFIG_SYSCTL -static ctl_table nf_ct_ipv6_sysctl_table[] = { - { - .procname = "nf_conntrack_frag6_timeout", - .data = &nf_frags_ctl.timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, - .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_frags_ctl.low_thresh, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, - .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_frags_ctl.high_thresh, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { .ctl_name = 0 } -}; -#endif - #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include <linux/netfilter/nfnetlink.h> @@ -376,7 +341,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, - .print_conntrack = ipv6_print_conntrack, .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = ipv6_tuple_to_nlattr, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index fd9123f3dc04..da924c6b5f06 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -24,6 +24,7 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> +#include <net/netfilter/nf_log.h> static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; @@ -74,13 +75,6 @@ static int icmpv6_print_tuple(struct seq_file *s, ntohs(tuple->src.u.icmp.id)); } -/* Print out the private part of the conntrack. */ -static int icmpv6_print_conntrack(struct seq_file *s, - const struct nf_conn *conntrack) -{ - return 0; -} - /* Returns verdict for packet, or -1 for invalid. */ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, @@ -192,7 +186,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, return -NF_ACCEPT; } - if (nf_conntrack_checksum && hooknum == NF_IP6_PRE_ROUTING && + if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); @@ -213,12 +207,9 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, static int icmpv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NLA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t), - &t->src.u.icmp.id); - NLA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t), - &t->dst.u.icmp.type); - NLA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t), - &t->dst.u.icmp.code); + NLA_PUT_BE16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id); + NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type); + NLA_PUT_U8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code); return 0; @@ -240,12 +231,9 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], || !tb[CTA_PROTO_ICMPV6_ID]) return -EINVAL; - tuple->dst.u.icmp.type = - *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_TYPE]); - tuple->dst.u.icmp.code = - *(u_int8_t *)nla_data(tb[CTA_PROTO_ICMPV6_CODE]); - tuple->src.u.icmp.id = - *(__be16 *)nla_data(tb[CTA_PROTO_ICMPV6_ID]); + tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); + tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); + tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); if (tuple->dst.u.icmp.type < 128 || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) @@ -280,7 +268,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, .print_tuple = icmpv6_print_tuple, - .print_conntrack = icmpv6_print_conntrack, .packet = icmpv6_packet, .new = icmpv6_new, .error = icmpv6_error, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index e170c67c47a5..022da6ce4c0f 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -70,14 +70,37 @@ struct nf_ct_frag6_queue __u16 nhoffset; }; -struct inet_frags_ctl nf_frags_ctl __read_mostly = { - .high_thresh = 256 * 1024, - .low_thresh = 192 * 1024, - .timeout = IPV6_FRAG_TIMEOUT, - .secret_interval = 10 * 60 * HZ, -}; - static struct inet_frags nf_frags; +static struct netns_frags nf_init_frags; + +#ifdef CONFIG_SYSCTL +struct ctl_table nf_ct_ipv6_sysctl_table[] = { + { + .procname = "nf_conntrack_frag6_timeout", + .data = &nf_init_frags.timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, + .procname = "nf_conntrack_frag6_low_thresh", + .data = &nf_init_frags.low_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, + .procname = "nf_conntrack_frag6_high_thresh", + .data = &nf_init_frags.high_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; +#endif static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) @@ -125,7 +148,7 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &nf_frags.mem); + atomic_sub(skb->truesize, &nf_init_frags.mem); nf_skb_free(skb); kfree_skb(skb); } @@ -147,7 +170,7 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) static void nf_ct_frag6_evictor(void) { - inet_frag_evictor(&nf_frags); + inet_frag_evictor(&nf_init_frags, &nf_frags); } static void nf_ct_frag6_expire(unsigned long data) @@ -183,7 +206,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; hash = ip6qhashfn(id, src, dst); - q = inet_frag_find(&nf_frags, &arg, hash); + q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); if (q == NULL) goto oom; @@ -352,7 +375,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, skb->dev = NULL; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &nf_frags.mem); + atomic_add(skb->truesize, &nf_init_frags.mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -362,7 +385,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, fq->q.last_in |= FIRST_IN; } write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); + list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); write_unlock(&nf_frags.lock); return 0; @@ -429,7 +452,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_frags.mem); + atomic_add(clone->truesize, &nf_init_frags.mem); } /* We have to remove fragment header from datagram and to relocate @@ -443,7 +466,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_frags.mem); + atomic_sub(head->truesize, &nf_init_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -453,7 +476,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_frags.mem); + atomic_sub(fp->truesize, &nf_init_frags.mem); } head->next = NULL; @@ -603,7 +626,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) + if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) nf_ct_frag6_evictor(); fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); @@ -674,7 +697,6 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb) int nf_ct_frag6_init(void) { - nf_frags.ctl = &nf_frags_ctl; nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; @@ -682,6 +704,11 @@ int nf_ct_frag6_init(void) nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; + nf_frags.secret_interval = 10 * 60 * HZ; + nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; + nf_init_frags.high_thresh = 256 * 1024; + nf_init_frags.low_thresh = 192 * 1024; + inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); return 0; @@ -691,6 +718,6 @@ void nf_ct_frag6_cleanup(void) { inet_frags_fini(&nf_frags); - nf_frags_ctl.low_thresh = 0; + nf_init_frags.low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 44937616057e..35e502a72495 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -27,6 +27,7 @@ #include <net/ip.h> #include <net/sock.h> #include <net/tcp.h> +#include <net/udp.h> #include <net/transp_v6.h> #include <net/ipv6.h> @@ -35,15 +36,15 @@ static struct proc_dir_entry *proc_net_devsnmp6; static int sockstat6_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "TCP6: inuse %d\n", - sock_prot_inuse(&tcpv6_prot)); + sock_prot_inuse_get(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", - sock_prot_inuse(&udpv6_prot)); + sock_prot_inuse_get(&udpv6_prot)); seq_printf(seq, "UDPLITE6: inuse %d\n", - sock_prot_inuse(&udplitev6_prot)); + sock_prot_inuse_get(&udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", - sock_prot_inuse(&rawv6_prot)); + sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(), ip6_frag_mem()); + ip6_frag_nqueues(&init_net), ip6_frag_mem(&init_net)); return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 807260d03586..4d880551fe6a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -54,39 +54,31 @@ #include <net/mip6.h> #endif +#include <net/raw.h> #include <net/rawv6.h> #include <net/xfrm.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; -DEFINE_RWLOCK(raw_v6_lock); +static struct raw_hashinfo raw_v6_hashinfo = { + .lock = __RW_LOCK_UNLOCKED(), +}; static void raw_v6_hash(struct sock *sk) { - struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num & - (RAWV6_HTABLE_SIZE - 1)]; - - write_lock_bh(&raw_v6_lock); - sk_add_node(sk, list); - sock_prot_inc_use(sk->sk_prot); - write_unlock_bh(&raw_v6_lock); + raw_hash_sk(sk, &raw_v6_hashinfo); } static void raw_v6_unhash(struct sock *sk) { - write_lock_bh(&raw_v6_lock); - if (sk_del_node_init(sk)) - sock_prot_dec_use(sk->sk_prot); - write_unlock_bh(&raw_v6_lock); + raw_unhash_sk(sk, &raw_v6_hashinfo); } -/* Grumble... icmp and ip_input want to get at this... */ -struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr, - int dif) +static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, + unsigned short num, struct in6_addr *loc_addr, + struct in6_addr *rmt_addr, int dif) { struct hlist_node *node; int is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -95,6 +87,9 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, if (inet_sk(sk)->num == num) { struct ipv6_pinfo *np = inet6_sk(sk); + if (sk->sk_net != net) + continue; + if (!ipv6_addr_any(&np->daddr) && !ipv6_addr_equal(&np->daddr, rmt_addr)) continue; @@ -167,21 +162,22 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); * * Caller owns SKB so we must make clones. */ -int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) +static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct in6_addr *saddr; struct in6_addr *daddr; struct sock *sk; int delivered = 0; __u8 hash; + struct net *net; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; hash = nexthdr & (MAX_INET_PROTOS - 1); - read_lock(&raw_v6_lock); - sk = sk_head(&raw_v6_htable[hash]); + read_lock(&raw_v6_hashinfo.lock); + sk = sk_head(&raw_v6_hashinfo.ht[hash]); /* * The first socket found will be delivered after @@ -191,7 +187,8 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) if (sk == NULL) goto out; - sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); + net = skb->dev->nd_net; + sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { int filtered; @@ -234,14 +231,25 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) rawv6_rcv(sk, clone); } } - sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr, + sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, IP6CB(skb)->iif); } out: - read_unlock(&raw_v6_lock); + read_unlock(&raw_v6_hashinfo.lock); return delivered; } +int raw6_local_deliver(struct sk_buff *skb, int nexthdr) +{ + struct sock *raw_sk; + + raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); + if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) + raw_sk = NULL; + + return raw_sk != NULL; +} + /* This cleans up af_inet6 a bit. -DaveM */ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -283,7 +291,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + dev = dev_get_by_index(sk->sk_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -296,7 +304,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { err = -EADDRNOTAVAIL; - if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) { + if (!ipv6_chk_addr(sk->sk_net, &addr->sin6_addr, + dev, 0)) { if (dev) dev_put(dev); goto out; @@ -316,7 +325,7 @@ out: return err; } -void rawv6_err(struct sock *sk, struct sk_buff *skb, +static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { @@ -350,18 +359,45 @@ void rawv6_err(struct sock *sk, struct sk_buff *skb, } } +void raw6_icmp_error(struct sk_buff *skb, int nexthdr, + int type, int code, int inner_offset, __be32 info) +{ + struct sock *sk; + int hash; + struct in6_addr *saddr, *daddr; + struct net *net; + + hash = nexthdr & (RAW_HTABLE_SIZE - 1); + + read_lock(&raw_v6_hashinfo.lock); + sk = sk_head(&raw_v6_hashinfo.ht[hash]); + if (sk != NULL) { + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + net = skb->dev->nd_net; + + while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, + IP6CB(skb)->iif))) { + rawv6_err(sk, skb, NULL, type, code, + inner_offset, info); + sk = sk_next(sk); + } + } + read_unlock(&raw_v6_hashinfo.lock); +} + static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) { if ((raw6_sk(sk)->checksum || sk->sk_filter) && skb_checksum_complete(skb)) { - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); kfree_skb(skb); return 0; } /* Charge it to the socket. */ if (sock_queue_rcv_skb(sk,skb)<0) { - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); kfree_skb(skb); return 0; } @@ -382,6 +418,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) struct raw6_sock *rp = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { + atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } @@ -405,7 +442,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (inet->hdrincl) { if (skb_checksum_complete(skb)) { - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); kfree_skb(skb); return 0; } @@ -496,7 +533,7 @@ csum_copy_err: as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; - /* FIXME: increment a raw6 drops counter here */ + atomic_inc(&sk->sk_drops); goto out; } @@ -618,7 +655,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, goto error_fault; IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); - err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev, + err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; @@ -843,7 +880,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -1172,76 +1209,6 @@ struct proto rawv6_prot = { }; #ifdef CONFIG_PROC_FS -struct raw6_iter_state { - int bucket; -}; - -#define raw6_seq_private(seq) ((struct raw6_iter_state *)(seq)->private) - -static struct sock *raw6_get_first(struct seq_file *seq) -{ - struct sock *sk; - struct hlist_node *node; - struct raw6_iter_state* state = raw6_seq_private(seq); - - for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket) - sk_for_each(sk, node, &raw_v6_htable[state->bucket]) - if (sk->sk_family == PF_INET6) - goto out; - sk = NULL; -out: - return sk; -} - -static struct sock *raw6_get_next(struct seq_file *seq, struct sock *sk) -{ - struct raw6_iter_state* state = raw6_seq_private(seq); - - do { - sk = sk_next(sk); -try_again: - ; - } while (sk && sk->sk_family != PF_INET6); - - if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) { - sk = sk_head(&raw_v6_htable[state->bucket]); - goto try_again; - } - return sk; -} - -static struct sock *raw6_get_idx(struct seq_file *seq, loff_t pos) -{ - struct sock *sk = raw6_get_first(seq); - if (sk) - while (pos && (sk = raw6_get_next(seq, sk)) != NULL) - --pos; - return pos ? NULL : sk; -} - -static void *raw6_seq_start(struct seq_file *seq, loff_t *pos) -{ - read_lock(&raw_v6_lock); - return *pos ? raw6_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; -} - -static void *raw6_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct sock *sk; - - if (v == SEQ_START_TOKEN) - sk = raw6_get_first(seq); - else - sk = raw6_get_next(seq, v); - ++*pos; - return sk; -} - -static void raw6_seq_stop(struct seq_file *seq, void *v) -{ - read_unlock(&raw_v6_lock); -} - static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct ipv6_pinfo *np = inet6_sk(sp); @@ -1254,7 +1221,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) srcp = inet_sk(sp)->num; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -1266,7 +1233,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); + atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } static int raw6_seq_show(struct seq_file *seq, void *v) @@ -1277,23 +1244,22 @@ static int raw6_seq_show(struct seq_file *seq, void *v) "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode\n"); + " uid timeout inode drops\n"); else - raw6_sock_seq_show(seq, v, raw6_seq_private(seq)->bucket); + raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); return 0; } static const struct seq_operations raw6_seq_ops = { - .start = raw6_seq_start, - .next = raw6_seq_next, - .stop = raw6_seq_stop, + .start = raw_seq_start, + .next = raw_seq_next, + .stop = raw_seq_stop, .show = raw6_seq_show, }; static int raw6_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &raw6_seq_ops, - sizeof(struct raw6_iter_state)); + return raw_seq_open(inode, file, &raw_v6_hashinfo, PF_INET6); } static const struct file_operations raw6_seq_fops = { @@ -1301,18 +1267,86 @@ static const struct file_operations raw6_seq_fops = { .open = raw6_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; -int __init raw6_proc_init(void) +static int raw6_init_net(struct net *net) { - if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_net_fops_create(net, "raw6", S_IRUGO, &raw6_seq_fops)) return -ENOMEM; + return 0; } +static void raw6_exit_net(struct net *net) +{ + proc_net_remove(net, "raw6"); +} + +static struct pernet_operations raw6_net_ops = { + .init = raw6_init_net, + .exit = raw6_exit_net, +}; + +int __init raw6_proc_init(void) +{ + return register_pernet_subsys(&raw6_net_ops); +} + void raw6_proc_exit(void) { - proc_net_remove(&init_net, "raw6"); + unregister_pernet_subsys(&raw6_net_ops); } #endif /* CONFIG_PROC_FS */ + +/* Same as inet6_dgram_ops, sans udp_poll. */ +static const struct proto_ops inet6_sockraw_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = inet6_release, + .bind = inet6_bind, + .connect = inet_dgram_connect, /* ok */ + .socketpair = sock_no_socketpair, /* a do nothing */ + .accept = sock_no_accept, /* a do nothing */ + .getname = inet6_getname, + .poll = datagram_poll, /* ok */ + .ioctl = inet6_ioctl, /* must change */ + .listen = sock_no_listen, /* ok */ + .shutdown = inet_shutdown, /* ok */ + .setsockopt = sock_common_setsockopt, /* ok */ + .getsockopt = sock_common_getsockopt, /* ok */ + .sendmsg = inet_sendmsg, /* ok */ + .recvmsg = sock_common_recvmsg, /* ok */ + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +static struct inet_protosw rawv6_protosw = { + .type = SOCK_RAW, + .protocol = IPPROTO_IP, /* wild card */ + .prot = &rawv6_prot, + .ops = &inet6_sockraw_ops, + .capability = CAP_NET_RAW, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + +int __init rawv6_init(void) +{ + int ret; + + ret = inet6_register_protosw(&rawv6_protosw); + if (ret) + goto out; +out: + return ret; +} + +void rawv6_exit(void) +{ + inet6_unregister_protosw(&rawv6_protosw); +} diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 76c88a93b9b5..f936d045a39d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -82,23 +82,16 @@ struct frag_queue __u16 nhoffset; }; -struct inet_frags_ctl ip6_frags_ctl __read_mostly = { - .high_thresh = 256 * 1024, - .low_thresh = 192 * 1024, - .timeout = IPV6_FRAG_TIMEOUT, - .secret_interval = 10 * 60 * HZ, -}; - static struct inet_frags ip6_frags; -int ip6_frag_nqueues(void) +int ip6_frag_nqueues(struct net *net) { - return ip6_frags.nqueues; + return net->ipv6.frags.nqueues; } -int ip6_frag_mem(void) +int ip6_frag_mem(struct net *net) { - return atomic_read(&ip6_frags.mem); + return atomic_read(&net->ipv6.frags.mem); } static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, @@ -156,11 +149,12 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) EXPORT_SYMBOL(ip6_frag_match); /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, int *work) +static inline void frag_kfree_skb(struct netns_frags *nf, + struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip6_frags.mem); + atomic_sub(skb->truesize, &nf->mem); kfree_skb(skb); } @@ -190,11 +184,11 @@ static __inline__ void fq_kill(struct frag_queue *fq) inet_frag_kill(&fq->q, &ip6_frags); } -static void ip6_evictor(struct inet6_dev *idev) +static void ip6_evictor(struct net *net, struct inet6_dev *idev) { int evicted; - evicted = inet_frag_evictor(&ip6_frags); + evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); if (evicted) IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); } @@ -241,7 +235,7 @@ out: } static __inline__ struct frag_queue * -fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, +fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, struct inet6_dev *idev) { struct inet_frag_queue *q; @@ -253,7 +247,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, arg.dst = dst; hash = ip6qhashfn(id, src, dst); - q = inet_frag_find(&ip6_frags, &arg, hash); + q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (q == NULL) goto oom; @@ -396,7 +390,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->q.fragments = next; fq->q.meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + frag_kfree_skb(fq->q.net, free_it, NULL); } } @@ -416,7 +410,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &ip6_frags.mem); + atomic_add(skb->truesize, &fq->q.net->mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +424,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return ip6_frag_reasm(fq, prev, dev); write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); + list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); write_unlock(&ip6_frags.lock); return -1; @@ -510,7 +504,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip6_frags.mem); + atomic_add(clone->truesize, &fq->q.net->mem); } /* We have to remove fragment header from datagram and to relocate @@ -525,7 +519,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip6_frags.mem); + atomic_sub(head->truesize, &fq->q.net->mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -535,7 +529,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip6_frags.mem); + atomic_sub(fp->truesize, &fq->q.net->mem); } head->next = NULL; @@ -575,6 +569,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); + struct net *net; IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); @@ -605,10 +600,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) - ip6_evictor(ip6_dst_idev(skb->dst)); + net = skb->dev->nd_net; + if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) + ip6_evictor(net, ip6_dst_idev(skb->dst)); - if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, + if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { int ret; @@ -632,12 +628,127 @@ static struct inet6_protocol frag_protocol = .flags = INET6_PROTO_NOPOLICY, }; -void __init ipv6_frag_init(void) +#ifdef CONFIG_SYSCTL +static struct ctl_table ip6_frags_ctl_table[] = { + { + .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, + .procname = "ip6frag_high_thresh", + .data = &init_net.ipv6.frags.high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, + .procname = "ip6frag_low_thresh", + .data = &init_net.ipv6.frags.low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV6_IP6FRAG_TIME, + .procname = "ip6frag_time", + .data = &init_net.ipv6.frags.timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { + .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, + .procname = "ip6frag_secret_interval", + .data = &ip6_frags.secret_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { } +}; + +static int ip6_frags_sysctl_register(struct net *net) { - if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) - printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = ip6_frags_ctl_table; + if (net != &init_net) { + table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; + table[2].data = &net->ipv6.frags.timeout; + table[3].mode &= ~0222; + } + + hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (net != &init_net) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void ip6_frags_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->ipv6.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); + kfree(table); +} +#else +static inline int ip6_frags_sysctl_register(struct net *net) +{ + return 0; +} + +static inline void ip6_frags_sysctl_unregister(struct net *net) +{ +} +#endif + +static int ipv6_frags_init_net(struct net *net) +{ + net->ipv6.frags.high_thresh = 256 * 1024; + net->ipv6.frags.low_thresh = 192 * 1024; + net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; + + inet_frags_init_net(&net->ipv6.frags); + + return ip6_frags_sysctl_register(net); +} + +static void ipv6_frags_exit_net(struct net *net) +{ + ip6_frags_sysctl_unregister(net); + inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); +} + +static struct pernet_operations ip6_frags_ops = { + .init = ipv6_frags_init_net, + .exit = ipv6_frags_exit_net, +}; + +int __init ipv6_frag_init(void) +{ + int ret; + + ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); + if (ret) + goto out; + + register_pernet_subsys(&ip6_frags_ops); - ip6_frags.ctl = &ip6_frags_ctl; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; @@ -645,5 +756,15 @@ void __init ipv6_frag_init(void) ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; + ip6_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&ip6_frags); +out: + return ret; +} + +void ipv6_frag_exit(void) +{ + inet_frags_fini(&ip6_frags); + unregister_pernet_subsys(&ip6_frags_ops); + inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 20083e0d3995..4004c5f0b8d7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -73,21 +73,13 @@ #define CLONE_OFFLINK_ROUTE 0 -static int ip6_rt_max_size = 4096; -static int ip6_rt_gc_min_interval = HZ / 2; -static int ip6_rt_gc_timeout = 60*HZ; -int ip6_rt_gc_interval = 30*HZ; -static int ip6_rt_gc_elasticity = 9; -static int ip6_rt_mtu_expires = 10*60*HZ; -static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; - static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(void); +static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); @@ -113,6 +105,7 @@ static struct dst_ops ip6_dst_ops = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, + .local_out = ip6_local_out, .entry_size = sizeof(struct rt6_info), }; @@ -152,7 +145,6 @@ struct rt6_info ip6_null_entry = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -static int ip6_pkt_blk_hole(struct sk_buff *skb); struct rt6_info ip6_prohibit_entry = { .u = { @@ -181,8 +173,8 @@ struct rt6_info ip6_blk_hole_entry = { .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_blk_hole, - .output = ip6_pkt_blk_hole, + .input = dst_discard, + .output = dst_discard, .ops = &ip6_dst_ops, .path = (struct dst_entry*)&ip6_blk_hole_entry, } @@ -216,9 +208,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct net_device *loopback_dev = + dev->nd_net->loopback_dev; - if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); + if (dev != loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = + in6_dev_get(loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -606,7 +601,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) int ip6_ins_rt(struct rt6_info *rt) { - return __ip6_ins_rt(rt, NULL); + struct nl_info info = { + .nl_net = &init_net, + }; + return __ip6_ins_rt(rt, &info); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, @@ -782,12 +780,6 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) EXPORT_SYMBOL(ip6_route_output); -static int ip6_blackhole_output(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) { struct rt6_info *ort = (struct rt6_info *) *dstp; @@ -800,8 +792,8 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl atomic_set(&new->__refcnt, 1); new->__use = 1; - new->input = ip6_blackhole_output; - new->output = ip6_blackhole_output; + new->input = dst_discard; + new->output = dst_discard; memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); new->dev = ort->u.dst.dev; @@ -896,8 +888,8 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) { mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); - if (mtu < ip6_rt_min_advmss) - mtu = ip6_rt_min_advmss; + if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss) + mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss; /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and @@ -991,25 +983,25 @@ int ndisc_dst_gc(int *more) return freed; } -static int ip6_dst_gc(void) +static int ip6_dst_gc(struct dst_ops *ops) { static unsigned expire = 30*HZ; static unsigned long last_gc; unsigned long now = jiffies; - if (time_after(last_gc + ip6_rt_gc_min_interval, now) && - atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) + if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) && + atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size) goto out; expire++; fib6_run_gc(expire); last_gc = now; if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) - expire = ip6_rt_gc_timeout>>1; + expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1; out: - expire -= expire>>ip6_rt_gc_elasticity; - return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); + expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity; + return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size); } /* Clean host part of a prefix. Not necessary in radix tree, @@ -1269,7 +1261,10 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) int ip6_del_rt(struct rt6_info *rt) { - return __ip6_del_rt(rt, NULL); + struct nl_info info = { + .nl_net = &init_net, + }; + return __ip6_del_rt(rt, &info); } static int ip6_route_del(struct fib6_config *cfg) @@ -1514,7 +1509,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, rt->u.dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); + dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1540,7 +1535,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); + dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); @@ -1665,6 +1660,8 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return rt; } +EXPORT_SYMBOL(rt6_get_dflt_router); + struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) @@ -1766,8 +1763,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static inline int ip6_pkt_drop(struct sk_buff *skb, int code, - int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) { int type; switch (ipstats_mib_noroutes) { @@ -1811,12 +1807,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -static int ip6_pkt_blk_hole(struct sk_buff *skb) -{ - kfree_skb(skb); - return 0; -} - #endif /* @@ -2015,9 +2005,13 @@ errout: static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2027,9 +2021,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2164,6 +2162,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2171,6 +2170,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct flowi fl; int err, iif = 0; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; @@ -2230,7 +2232,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void goto errout; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2238,32 +2240,29 @@ errout: void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - u32 pid = 0, seq = 0; - struct nlmsghdr *nlh = NULL; - int err = -ENOBUFS; - - if (info) { - pid = info->pid; - nlh = info->nlh; - if (nlh) - seq = nlh->nlmsg_seq; - } + u32 seq; + int err; + + err = -ENOBUFS; + seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); if (skb == NULL) goto errout; - err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); + err = rt6_fill_node(skb, rt, NULL, NULL, 0, + event, info->pid, seq, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + err = rtnl_notify(skb, &init_net, info->pid, + RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); } /* @@ -2353,28 +2352,61 @@ static const struct file_operations rt6_stats_seq_fops = { .llseek = seq_lseek, .release = single_release, }; + +static int ipv6_route_proc_init(struct net *net) +{ + int ret = -ENOMEM; + if (!proc_net_fops_create(net, "ipv6_route", + 0, &ipv6_route_proc_fops)) + goto out; + + if (!proc_net_fops_create(net, "rt6_stats", + S_IRUGO, &rt6_stats_seq_fops)) + goto out_ipv6_route; + + ret = 0; +out: + return ret; +out_ipv6_route: + proc_net_remove(net, "ipv6_route"); + goto out; +} + +static void ipv6_route_proc_fini(struct net *net) +{ + proc_net_remove(net, "ipv6_route"); + proc_net_remove(net, "rt6_stats"); +} +#else +static inline int ipv6_route_proc_init(struct net *net) +{ + return 0; +} +static inline void ipv6_route_proc_fini(struct net *net) +{ + return ; +} #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSCTL -static int flush_delay; - static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { + int delay = init_net.ipv6.sysctl.flush_delay; if (write) { proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay); return 0; } else return -EINVAL; } -ctl_table ipv6_route_table[] = { +ctl_table ipv6_route_table_template[] = { { .procname = "flush", - .data = &flush_delay, + .data = &init_net.ipv6.sysctl.flush_delay, .maxlen = sizeof(int), .mode = 0200, .proc_handler = &ipv6_sysctl_rtcache_flush @@ -2390,7 +2422,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, .procname = "max_size", - .data = &ip6_rt_max_size, + .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -2398,7 +2430,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", - .data = &ip6_rt_gc_min_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2407,7 +2439,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", - .data = &ip6_rt_gc_timeout, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2416,7 +2448,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, .procname = "gc_interval", - .data = &ip6_rt_gc_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2425,7 +2457,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", - .data = &ip6_rt_gc_elasticity, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2434,7 +2466,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", - .data = &ip6_rt_mtu_expires, + .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2443,7 +2475,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", - .data = &ip6_rt_min_advmss, + .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -2452,7 +2484,7 @@ ctl_table ipv6_route_table[] = { { .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", - .data = &ip6_rt_gc_min_interval, + .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_ms_jiffies, @@ -2461,42 +2493,74 @@ ctl_table ipv6_route_table[] = { { .ctl_name = 0 } }; +struct ctl_table *ipv6_route_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(ipv6_route_table_template, + sizeof(ipv6_route_table_template), + GFP_KERNEL); + return table; +} #endif -void __init ip6_route_init(void) +int __init ip6_route_init(void) { + int ret; + ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + SLAB_HWCACHE_ALIGN, NULL); + if (!ip6_dst_ops.kmem_cachep) + return -ENOMEM; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; - fib6_init(); - proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops); - proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#ifdef CONFIG_XFRM - xfrm6_init(); -#endif -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - fib6_rules_init(); -#endif + ret = fib6_init(); + if (ret) + goto out_kmem_cache; - __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL); - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL); - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL); + ret = ipv6_route_proc_init(&init_net); + if (ret) + goto out_fib6_init; + + ret = xfrm6_init(); + if (ret) + goto out_proc_init; + + ret = fib6_rules_init(); + if (ret) + goto xfrm6_init; + + ret = -ENOBUFS; + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) + goto fib6_rules_init; + + ret = 0; +out: + return ret; + +fib6_rules_init: + fib6_rules_cleanup(); +xfrm6_init: + xfrm6_fini(); +out_proc_init: + ipv6_route_proc_fini(&init_net); +out_fib6_init: + rt6_ifdown(NULL); + fib6_gc_cleanup(); +out_kmem_cache: + kmem_cache_destroy(ip6_dst_ops.kmem_cachep); + goto out; } void ip6_route_cleanup(void) { -#ifdef CONFIG_IPV6_MULTIPLE_TABLES fib6_rules_cleanup(); -#endif -#ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "ipv6_route"); - proc_net_remove(&init_net, "rt6_stats"); -#endif -#ifdef CONFIG_XFRM + ipv6_route_proc_fini(&init_net); xfrm6_fini(); -#endif rt6_ifdown(NULL); fib6_gc_cleanup(); kmem_cache_destroy(ip6_dst_ops.kmem_cachep); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 71433d29d884..e77239d02bf5 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -16,6 +16,7 @@ * Changes: * Roger Venning <r.venning@telstra.com>: 6to4 support * Nate Thompson <nate@thebog.net>: 6to4 support + * Fred L. Templin <fltemplin@acm.org>: isatap support */ #include <linux/module.h> @@ -182,6 +183,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int dev->init = ipip6_tunnel_init; nt->parms = *parms; + if (parms->i_flags & SIT_ISATAP) + dev->priv_flags |= IFF_ISATAP; + if (register_netdevice(dev) < 0) { free_netdev(dev); goto failed; @@ -364,6 +368,48 @@ static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) IP6_ECN_set_ce(ipv6_hdr(skb)); } +/* ISATAP (RFC4214) - check source address */ +static int +isatap_srcok(struct sk_buff *skb, struct iphdr *iph, struct net_device *dev) +{ + struct neighbour *neigh; + struct dst_entry *dst; + struct rt6_info *rt; + struct flowi fl; + struct in6_addr *addr6; + struct in6_addr rtr; + struct ipv6hdr *iph6; + int ok = 0; + + /* from onlink default router */ + ipv6_addr_set(&rtr, htonl(0xFE800000), 0, 0, 0); + ipv6_isatap_eui64(rtr.s6_addr + 8, iph->saddr); + if ((rt = rt6_get_dflt_router(&rtr, dev))) { + dst_release(&rt->u.dst); + return 1; + } + + iph6 = ipv6_hdr(skb); + memset(&fl, 0, sizeof(fl)); + fl.proto = iph6->nexthdr; + ipv6_addr_copy(&fl.fl6_dst, &iph6->saddr); + fl.oif = dev->ifindex; + security_skb_classify_flow(skb, &fl); + + dst = ip6_route_output(NULL, &fl); + if (!dst->error && (dst->dev == dev) && (neigh = dst->neighbour)) { + + addr6 = (struct in6_addr*)&neigh->primary_key; + + /* from correct previous hop */ + if (ipv6_addr_is_isatap(addr6) && + (addr6->s6_addr32[3] == iph->saddr)) + ok = 1; + } + dst_release(dst); + return ok; +} + static int ipip6_rcv(struct sk_buff *skb) { struct iphdr *iph; @@ -382,6 +428,14 @@ static int ipip6_rcv(struct sk_buff *skb) IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; + + if ((tunnel->dev->priv_flags & IFF_ISATAP) && + !isatap_srcok(skb, iph, tunnel->dev)) { + tunnel->stat.rx_errors++; + read_unlock(&ipip6_lock); + kfree_skb(skb); + return 0; + } tunnel->stat.rx_packets++; tunnel->stat.rx_bytes += skb->len; skb->dev = tunnel->dev; @@ -444,6 +498,29 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; + /* ISATAP (RFC4214) - must come before 6to4 */ + if (dev->priv_flags & IFF_ISATAP) { + struct neighbour *neigh = NULL; + + if (skb->dst) + neigh = skb->dst->neighbour; + + if (neigh == NULL) { + if (net_ratelimit()) + printk(KERN_DEBUG "sit: nexthop == NULL\n"); + goto tx_error; + } + + addr6 = (struct in6_addr*)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if ((addr_type & IPV6_ADDR_UNICAST) && + ipv6_addr_is_isatap(addr6)) + dst = addr6->s6_addr32[3]; + else + goto tx_error; + } + if (!dst) dst = try_6to4(&iph6->daddr); @@ -480,7 +557,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; - if (ip_route_output_key(&rt, &fl)) { + if (ip_route_output_key(&init_net, &rt, &fl)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } @@ -592,6 +669,42 @@ tx_error: return 0; } +static void ipip6_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel; + struct iphdr *iph; + + tunnel = netdev_priv(dev); + iph = &tunnel->parms.iph; + + if (iph->daddr) { + struct flowi fl = { .nl_u = { .ip4_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, + .tos = RT_TOS(iph->tos) } }, + .oif = tunnel->parms.link, + .proto = IPPROTO_IPV6 }; + struct rtable *rt; + if (!ip_route_output_key(&init_net, &rt, &fl)) { + tdev = rt->u.dst.dev; + ip_rt_put(rt); + } + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); + + if (tdev) { + dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); + dev->mtu = tdev->mtu - sizeof(struct iphdr); + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + dev->iflink = tunnel->parms.link; +} + static int ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -663,6 +776,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) if (cmd == SIOCCHGTUNNEL) { t->parms.iph.ttl = p.iph.ttl; t->parms.iph.tos = p.iph.tos; + if (t->parms.link != p.link) { + t->parms.link = p.link; + ipip6_tunnel_bind_dev(dev); + netdev_state_change(dev); + } } if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) err = -EFAULT; @@ -731,12 +849,9 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { - struct net_device *tdev = NULL; struct ip_tunnel *tunnel; - struct iphdr *iph; tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; tunnel->dev = dev; strcpy(tunnel->parms.name, dev->name); @@ -744,31 +859,7 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); - if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) } }, - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - struct rtable *rt; - if (!ip_route_output_key(&rt, &fl)) { - tdev = rt->u.dst.dev; - ip_rt_put(rt); - } - dev->flags |= IFF_POINTOPOINT; - } - - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(&init_net, tunnel->parms.link); - - if (tdev) { - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); - dev->mtu = tdev->mtu - sizeof(struct iphdr); - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; - } - dev->iflink = tunnel->parms.link; + ipip6_tunnel_bind_dev(dev); return 0; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 68bb2548e469..408691b777c2 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -14,66 +14,30 @@ #include <net/addrconf.h> #include <net/inet_frag.h> -#ifdef CONFIG_SYSCTL - -static ctl_table ipv6_table[] = { +static ctl_table ipv6_table_template[] = { { .ctl_name = NET_IPV6_ROUTE, .procname = "route", .maxlen = 0, .mode = 0555, - .child = ipv6_route_table + .child = ipv6_route_table_template }, { .ctl_name = NET_IPV6_ICMP, .procname = "icmp", .maxlen = 0, .mode = 0555, - .child = ipv6_icmp_table + .child = ipv6_icmp_table_template }, { .ctl_name = NET_IPV6_BINDV6ONLY, .procname = "bindv6only", - .data = &sysctl_ipv6_bindv6only, + .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, - .procname = "ip6frag_high_thresh", - .data = &ip6_frags_ctl.high_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, - .procname = "ip6frag_low_thresh", - .data = &ip6_frags_ctl.low_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV6_IP6FRAG_TIME, - .procname = "ip6frag_time", - .data = &ip6_frags_ctl.timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, - .procname = "ip6frag_secret_interval", - .data = &ip6_frags_ctl.secret_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { .ctl_name = NET_IPV6_MLD_MAX_MSF, .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, @@ -84,39 +48,106 @@ static ctl_table ipv6_table[] = { { .ctl_name = 0 } }; -static struct ctl_table_header *ipv6_sysctl_header; - -static ctl_table ipv6_net_table[] = { - { - .ctl_name = NET_IPV6, - .procname = "ipv6", - .mode = 0555, - .child = ipv6_table - }, - { .ctl_name = 0 } -}; - -static ctl_table ipv6_root_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ipv6_net_table - }, - { .ctl_name = 0 } +struct ctl_path net_ipv6_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv6", .ctl_name = NET_IPV6, }, + { }, }; +EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); -void ipv6_sysctl_register(void) +static int ipv6_sysctl_net_init(struct net *net) { - ipv6_sysctl_header = register_sysctl_table(ipv6_root_table); + struct ctl_table *ipv6_table; + struct ctl_table *ipv6_route_table; + struct ctl_table *ipv6_icmp_table; + int err; + + err = -ENOMEM; + ipv6_table = kmemdup(ipv6_table_template, sizeof(ipv6_table_template), + GFP_KERNEL); + if (!ipv6_table) + goto out; + + ipv6_route_table = ipv6_route_sysctl_init(net); + if (!ipv6_route_table) + goto out_ipv6_table; + + ipv6_icmp_table = ipv6_icmp_sysctl_init(net); + if (!ipv6_icmp_table) + goto out_ipv6_route_table; + + ipv6_route_table[0].data = &net->ipv6.sysctl.flush_delay; + /* ipv6_route_table[1].data will be handled when we have + routes per namespace */ + ipv6_route_table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; + ipv6_route_table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + ipv6_route_table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; + ipv6_route_table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; + ipv6_route_table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; + ipv6_route_table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; + ipv6_route_table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; + ipv6_table[0].child = ipv6_route_table; + + ipv6_icmp_table[0].data = &net->ipv6.sysctl.icmpv6_time; + ipv6_table[1].child = ipv6_icmp_table; + + ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; + + /* We don't want this value to be per namespace, it should be global + to all namespaces, so make it read-only when we are not in the + init network namespace */ + if (net != &init_net) + ipv6_table[3].mode = 0444; + + net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, + ipv6_table); + if (!net->ipv6.sysctl.table) + return -ENOMEM; + + if (!net->ipv6.sysctl.table) + goto out_ipv6_icmp_table; + + err = 0; +out: + return err; + +out_ipv6_icmp_table: + kfree(ipv6_icmp_table); +out_ipv6_route_table: + kfree(ipv6_route_table); +out_ipv6_table: + kfree(ipv6_table); + goto out; } -void ipv6_sysctl_unregister(void) +static void ipv6_sysctl_net_exit(struct net *net) { - unregister_sysctl_table(ipv6_sysctl_header); -} + struct ctl_table *ipv6_table; + struct ctl_table *ipv6_route_table; + struct ctl_table *ipv6_icmp_table; -#endif /* CONFIG_SYSCTL */ + ipv6_table = net->ipv6.sysctl.table->ctl_table_arg; + ipv6_route_table = ipv6_table[0].child; + ipv6_icmp_table = ipv6_table[1].child; + unregister_net_sysctl_table(net->ipv6.sysctl.table); + kfree(ipv6_table); + kfree(ipv6_route_table); + kfree(ipv6_icmp_table); +} + +static struct pernet_operations ipv6_sysctl_net_ops = { + .init = ipv6_sysctl_net_init, + .exit = ipv6_sysctl_net_exit, +}; +int ipv6_sysctl_register(void) +{ + return register_pernet_subsys(&ipv6_sysctl_net_ops); +} + +void ipv6_sysctl_unregister(void) +{ + unregister_pernet_subsys(&ipv6_sysctl_net_ops); +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93980c3b83e6..00c08399837d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -265,7 +265,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -733,7 +733,7 @@ static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, struct in6_addr *saddr, struct in6_addr *daddr, struct tcphdr *th, int protocol, - int tcplen) + unsigned int tcplen) { struct scatterlist sg[4]; __u16 data_len; @@ -818,7 +818,7 @@ static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, struct dst_entry *dst, struct request_sock *req, struct tcphdr *th, int protocol, - int tcplen) + unsigned int tcplen) { struct in6_addr *saddr, *daddr; @@ -985,7 +985,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - int tot_len = sizeof(*th); + unsigned int tot_len = sizeof(*th); #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif @@ -1085,7 +1085,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; - int tot_len = sizeof(struct tcphdr); + unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; @@ -2166,14 +2166,36 @@ static struct inet_protosw tcpv6_protosw = { INET_PROTOSW_ICSK, }; -void __init tcpv6_init(void) +int __init tcpv6_init(void) { + int ret; + + ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); + if (ret) + goto out; + /* register inet6 protocol */ - if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0) - printk(KERN_ERR "tcpv6_init: Could not register protocol\n"); - inet6_register_protosw(&tcpv6_protosw); + ret = inet6_register_protosw(&tcpv6_protosw); + if (ret) + goto out_tcpv6_protocol; + + ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, + SOCK_RAW, IPPROTO_TCP); + if (ret) + goto out_tcpv6_protosw; +out: + return ret; - if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW, - IPPROTO_TCP) < 0) - panic("Failed to create the TCPv6 control socket.\n"); +out_tcpv6_protocol: + inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); +out_tcpv6_protosw: + inet6_unregister_protosw(&tcpv6_protosw); + goto out; +} + +void tcpv6_exit(void) +{ + sock_release(tcp6_socket); + inet6_unregister_protosw(&tcpv6_protosw); + inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ee1cc3f8599f..bd4b9df8f614 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -34,6 +34,7 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/init.h> +#include <linux/module.h> #include <linux/skbuff.h> #include <asm/uaccess.h> @@ -50,8 +51,6 @@ #include <linux/seq_file.h> #include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; - static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) { return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); @@ -121,6 +120,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; + int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -131,7 +131,8 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, return ipv6_recv_error(sk, msg, len); try_again: - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, &err); if (!skb) goto out; @@ -164,6 +165,9 @@ try_again: if (err) goto out_free; + if (!peeked) + UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ @@ -200,13 +204,17 @@ try_again: err = ulen; out_free: + lock_sock(sk); skb_free_datagram(sk, skb); + release_sock(sk); out: return err; csum_copy_err: - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); - skb_kill_datagram(sk, skb, flags); + lock_sock(sk); + if (!skb_kill_datagram(sk, skb, flags)) + UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); if (flags & MSG_DONTWAIT) return -EAGAIN; @@ -251,13 +259,14 @@ static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info ) { - return __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); } int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; + int is_udplite = IS_UDPLITE(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; @@ -265,7 +274,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" @@ -289,13 +298,13 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); + UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; } - UDP6_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); + return 0; drop: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -361,10 +370,21 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); - if (buff) - udpv6_queue_rcv_skb(sk2, buff); + if (buff) { + bh_lock_sock_nested(sk2); + if (!sock_owned_by_user(sk2)) + udpv6_queue_rcv_skb(sk2, buff); + else + sk_add_backlog(sk2, buff); + bh_unlock_sock(sk2); + } } - udpv6_queue_rcv_skb(sk, skb); + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); out: read_unlock(&udp_hash_lock); return 0; @@ -477,7 +497,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], /* deliver */ - udpv6_queue_rcv_skb(sk, skb); + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); sock_put(sk); return 0; @@ -523,6 +548,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct flowi *fl = &inet->cork.fl; int err = 0; + int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; /* Grab the skbuff where UDP header space exists. */ @@ -538,7 +564,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) uh->len = htons(up->len); uh->check = 0; - if (up->pcflag) + if (is_udplite) csum = udplite_csum_outgoing(sk, skb); else csum = udp_csum_outgoing(sk, skb); @@ -554,7 +580,7 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -578,7 +604,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; - int is_udplite = up->pcflag; + int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); /* destination address check */ @@ -748,7 +774,7 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, 1)) < 0) { + if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -988,6 +1014,10 @@ struct proto udpv6_prot = { .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, @@ -1007,9 +1037,27 @@ static struct inet_protosw udpv6_protosw = { }; -void __init udpv6_init(void) +int __init udpv6_init(void) +{ + int ret; + + ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP); + if (ret) + goto out; + + ret = inet6_register_protosw(&udpv6_protosw); + if (ret) + goto out_udpv6_protocol; +out: + return ret; + +out_udpv6_protocol: + inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); + goto out; +} + +void udpv6_exit(void) { - if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0) - printk(KERN_ERR "udpv6_init: Could not register protocol\n"); - inet6_register_protosw(&udpv6_protosw); + inet6_unregister_protosw(&udpv6_protosw); + inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP); } diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 2d3fda601232..21be3a83e7bc 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -5,6 +5,7 @@ #include <net/protocol.h> #include <net/addrconf.h> #include <net/inet_common.h> +#include <net/transp_v6.h> extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5a0379f71415..87d4202522ee 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -26,7 +26,7 @@ static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { - return __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); } static struct inet6_protocol udplitev6_protocol = { @@ -77,12 +77,29 @@ static struct inet_protosw udplite6_protosw = { .flags = INET_PROTOSW_PERMANENT, }; -void __init udplitev6_init(void) +int __init udplitev6_init(void) { - if (inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE) < 0) - printk(KERN_ERR "%s: Could not register.\n", __FUNCTION__); + int ret; - inet6_register_protosw(&udplite6_protosw); + ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); + if (ret) + goto out; + + ret = inet6_register_protosw(&udplite6_protosw); + if (ret) + goto out_udplitev6_protocol; +out: + return ret; + +out_udplitev6_protocol: + inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); + goto out; +} + +void udplitev6_exit(void) +{ + inet6_unregister_protosw(&udplite6_protosw); + inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); } #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 515783707e86..a4714d76ae6b 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,120 +16,37 @@ #include <net/ipv6.h> #include <net/xfrm.h> -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) +int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) { - int err; - __be32 seq; - struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; - struct xfrm_state *x; - int xfrm_nr = 0; - int decaps = 0; - unsigned int nhoff; - - nhoff = IP6CB(skb)->nhoff; - - seq = 0; - if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) - goto drop; - - do { - struct ipv6hdr *iph = ipv6_hdr(skb); - - if (xfrm_nr == XFRM_MAX_DEPTH) - goto drop; - - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, - nexthdr, AF_INET6); - if (x == NULL) - goto drop; - spin_lock(&x->lock); - if (unlikely(x->km.state != XFRM_STATE_VALID)) - goto drop_unlock; - - if (x->props.replay_window && xfrm_replay_check(x, seq)) - goto drop_unlock; - - if (xfrm_state_check_expire(x)) - goto drop_unlock; - - nexthdr = x->type->input(x, skb); - if (nexthdr <= 0) - goto drop_unlock; - - skb_network_header(skb)[nhoff] = nexthdr; - - if (x->props.replay_window) - xfrm_replay_advance(x, seq); - - x->curlft.bytes += skb->len; - x->curlft.packets++; - - spin_unlock(&x->lock); - - xfrm_vec[xfrm_nr++] = x; - - if (x->outer_mode->input(x, skb)) - goto drop; - - if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { - decaps = 1; - break; - } - - if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0) - goto drop; - } while (!err); + return xfrm6_extract_header(skb); +} - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - sp = secpath_dup(skb->sp); - if (!sp) - goto drop; - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; - } +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + return xfrm_input(skb, nexthdr, spi, 0); +} +EXPORT_SYMBOL(xfrm6_rcv_spi); - if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH) - goto drop; +int xfrm6_transport_finish(struct sk_buff *skb, int async) +{ + skb_network_header(skb)[IP6CB(skb)->nhoff] = + XFRM_MODE_SKB_CB(skb)->protocol; - memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec, - xfrm_nr * sizeof(xfrm_vec[0])); - skb->sp->len += xfrm_nr; - - nf_reset(skb); - - if (decaps) { - dst_release(skb->dst); - skb->dst = NULL; - netif_rx(skb); - return -1; - } else { -#ifdef CONFIG_NETFILTER - ipv6_hdr(skb)->payload_len = htons(skb->len); - __skb_push(skb, skb->data - skb_network_header(skb)); - - NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, - ip6_rcv_finish); - return -1; -#else +#ifndef CONFIG_NETFILTER + if (!async) return 1; #endif - } -drop_unlock: - spin_unlock(&x->lock); - xfrm_state_put(x); -drop: - while (--xfrm_nr >= 0) - xfrm_state_put(xfrm_vec[xfrm_nr]); - kfree_skb(skb); + ipv6_hdr(skb)->payload_len = htons(skb->len); + __skb_push(skb, skb->data - skb_network_header(skb)); + + NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + ip6_rcv_finish); return -1; } -EXPORT_SYMBOL(xfrm6_rcv_spi); - int xfrm6_rcv(struct sk_buff *skb) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], @@ -144,10 +61,28 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, struct xfrm_state *x = NULL; int wildcard = 0; xfrm_address_t *xany; - struct xfrm_state *xfrm_vec_one = NULL; int nh = 0; int i = 0; + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + + sp = secpath_dup(skb->sp); + if (!sp) { + XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + goto drop; + } + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + if (1 + skb->sp->len == XFRM_MAX_DEPTH) { + XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + goto drop; + } + xany = (xfrm_address_t *)&in6addr_any; for (i = 0; i < 3; i++) { @@ -200,47 +135,37 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, continue; } + spin_unlock(&x->lock); + nh = x->type->input(x, skb); if (nh <= 0) { - spin_unlock(&x->lock); xfrm_state_put(x); x = NULL; continue; } - x->curlft.bytes += skb->len; - x->curlft.packets++; - - spin_unlock(&x->lock); - - xfrm_vec_one = x; + /* Found a state */ break; } - if (!xfrm_vec_one) + if (!x) { + XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; - - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - sp = secpath_dup(skb->sp); - if (!sp) - goto drop; - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; } - if (1 + skb->sp->len > XFRM_MAX_DEPTH) - goto drop; + skb->sp->xvec[skb->sp->len++] = x; + + spin_lock(&x->lock); - skb->sp->xvec[skb->sp->len] = xfrm_vec_one; - skb->sp->len ++; + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); return 1; + drop: - if (xfrm_vec_one) - xfrm_state_put(xfrm_vec_one); return -1; } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 2bfb4f05c14c..0527d11c1ae3 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -19,31 +19,39 @@ #include <net/ipv6.h> #include <net/xfrm.h> +static void xfrm6_beet_make_header(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + iph->version = 6; + + memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(iph->flow_lbl)); + iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; + + ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos); + iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; +} + /* Add encapsulation header. * * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. */ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) { - struct ipv6hdr *iph, *top_iph; - u8 *prevhdr; - int hdr_len; + struct ipv6hdr *top_iph; - iph = ipv6_hdr(skb); - - hdr_len = ip6_find_1stfragopt(skb, &prevhdr); - - skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); skb_set_network_header(skb, -x->props.header_len); - skb->transport_header = skb->network_header + hdr_len; - __skb_pull(skb, hdr_len); + skb->mac_header = skb->network_header + + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); + + xfrm6_beet_make_header(skb); top_iph = ipv6_hdr(skb); - memmove(top_iph, iph, hdr_len); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); - return 0; } @@ -52,19 +60,21 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *ip6h; const unsigned char *old_mac; int size = sizeof(struct ipv6hdr); - int err = -EINVAL; + int err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + err = skb_cow_head(skb, size + skb->mac_len); + if (err) goto out; - skb_push(skb, size); - memmove(skb->data, skb_network_header(skb), size); + __skb_push(skb, size); skb_reset_network_header(skb); old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -skb->mac_len); memmove(skb_mac_header(skb), old_mac, skb->mac_len); + xfrm6_beet_make_header(skb); + ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(skb->len - size); ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); @@ -75,8 +85,10 @@ out: } static struct xfrm_mode xfrm6_beet_mode = { - .input = xfrm6_beet_input, - .output = xfrm6_beet_output, + .input2 = xfrm6_beet_input, + .input = xfrm_prepare_input, + .output2 = xfrm6_beet_output, + .output = xfrm6_prepare_output, .owner = THIS_MODULE, .encap = XFRM_MODE_BEET, .flags = XFRM_MODE_FLAG_TUNNEL, diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index a7bc8c62317a..63d5d493098a 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -28,6 +28,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/spinlock.h> #include <linux/stringify.h> #include <linux/time.h> #include <net/ipv6.h> diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index fd84e2217274..0c742faaa30b 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -25,46 +25,29 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) IP6_ECN_set_ce(inner_iph); } -static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) -{ - if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb)))) - IP_ECN_set_ce(ipip_hdr(skb)); -} - /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. */ -static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_dst *xdst = (struct xfrm_dst*)dst; - struct ipv6hdr *iph, *top_iph; + struct ipv6hdr *top_iph; int dsfield; - iph = ipv6_hdr(skb); - skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); - skb->transport_header = skb->network_header + sizeof(*iph); + skb->transport_header = skb->network_header + sizeof(*top_iph); top_iph = ipv6_hdr(skb); top_iph->version = 6; - if (xdst->route->ops->family == AF_INET6) { - top_iph->priority = iph->priority; - top_iph->flow_lbl[0] = iph->flow_lbl[0]; - top_iph->flow_lbl[1] = iph->flow_lbl[1]; - top_iph->flow_lbl[2] = iph->flow_lbl[2]; - top_iph->nexthdr = IPPROTO_IPV6; - } else { - top_iph->priority = 0; - top_iph->flow_lbl[0] = 0; - top_iph->flow_lbl[1] = 0; - top_iph->flow_lbl[2] = 0; - top_iph->nexthdr = IPPROTO_IPIP; - } - dsfield = ipv6_get_dsfield(top_iph); + + memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, + sizeof(top_iph->flow_lbl)); + top_iph->nexthdr = x->inner_mode->afinfo->proto; + + dsfield = XFRM_MODE_SKB_CB(skb)->tos; dsfield = INET_ECN_encapsulate(dsfield, dsfield); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; @@ -72,18 +55,15 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); - skb->protocol = htons(ETH_P_IPV6); return 0; } -static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { int err = -EINVAL; const unsigned char *old_mac; - const unsigned char *nh = skb_network_header(skb); - if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 && - nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP) + if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -92,17 +72,12 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - nh = skb_network_header(skb); - if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb)); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); - } else { - if (!(x->props.flags & XFRM_STATE_NOECN)) - ip6ip_ecn_decapsulate(skb); - skb->protocol = htons(ETH_P_IP); - } + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)), + ipipv6_hdr(skb)); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -skb->mac_len); memmove(skb_mac_header(skb), old_mac, skb->mac_len); @@ -114,19 +89,21 @@ out: } static struct xfrm_mode xfrm6_tunnel_mode = { - .input = xfrm6_tunnel_input, - .output = xfrm6_tunnel_output, + .input2 = xfrm6_mode_tunnel_input, + .input = xfrm_prepare_input, + .output2 = xfrm6_mode_tunnel_output, + .output = xfrm6_prepare_output, .owner = THIS_MODULE, .encap = XFRM_MODE_TUNNEL, .flags = XFRM_MODE_FLAG_TUNNEL, }; -static int __init xfrm6_tunnel_init(void) +static int __init xfrm6_mode_tunnel_init(void) { return xfrm_register_mode(&xfrm6_tunnel_mode, AF_INET6); } -static void __exit xfrm6_tunnel_exit(void) +static void __exit xfrm6_mode_tunnel_exit(void) { int err; @@ -134,7 +111,7 @@ static void __exit xfrm6_tunnel_exit(void) BUG_ON(err); } -module_init(xfrm6_tunnel_init); -module_exit(xfrm6_tunnel_exit); +module_init(xfrm6_mode_tunnel_init); +module_exit(xfrm6_mode_tunnel_exit); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_TUNNEL); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 656976760ad4..b34c58c65656 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -10,10 +10,12 @@ */ #include <linux/if_ether.h> -#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/module.h> #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <linux/netfilter_ipv6.h> +#include <net/dst.h> #include <net/ipv6.h> #include <net/xfrm.h> @@ -43,97 +45,50 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) return ret; } -static inline int xfrm6_output_one(struct sk_buff *skb) +int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; - struct ipv6hdr *iph; int err; - if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { - err = xfrm6_tunnel_check_size(skb); - if (err) - goto error_nolock; - } - - err = xfrm_output(skb); + err = xfrm6_tunnel_check_size(skb); if (err) - goto error_nolock; + return err; - iph = ipv6_hdr(skb); - iph->payload_len = htons(skb->len - sizeof(*iph)); + XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; - err = 0; - -out_exit: - return err; -error_nolock: - kfree_skb(skb); - goto out_exit; + return xfrm6_extract_header(skb); } -static int xfrm6_output_finish2(struct sk_buff *skb) +int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - while (likely((err = xfrm6_output_one(skb)) == 0)) { - nf_reset(skb); - - err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, - skb->dst->dev, dst_output); - if (unlikely(err != 1)) - break; + err = x->inner_mode->afinfo->extract_output(x, skb); + if (err) + return err; - if (!skb->dst->xfrm) - return dst_output(skb); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif - err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, - skb->dst->dev, xfrm6_output_finish2); - if (unlikely(err != 1)) - break; - } + skb->protocol = htons(ETH_P_IPV6); - return err; + return x->outer_mode->output2(x, skb); } +EXPORT_SYMBOL(xfrm6_prepare_output); static int xfrm6_output_finish(struct sk_buff *skb) { - struct sk_buff *segs; - - if (!skb_is_gso(skb)) - return xfrm6_output_finish2(skb); +#ifdef CONFIG_NETFILTER + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; +#endif skb->protocol = htons(ETH_P_IPV6); - segs = skb_gso_segment(skb, 0); - kfree_skb(skb); - if (unlikely(IS_ERR(segs))) - return PTR_ERR(segs); - - do { - struct sk_buff *nskb = segs->next; - int err; - - segs->next = NULL; - err = xfrm6_output_finish2(segs); - - if (unlikely(err)) { - while ((segs = nskb)) { - nskb = segs->next; - segs->next = NULL; - kfree_skb(segs); - } - return err; - } - - segs = nskb; - } while (segs); - - return 0; + return xfrm_output(skb); } int xfrm6_output(struct sk_buff *skb) { - return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, + return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm6_output_finish); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index b8e9eb445d74..c25a6b527fc4 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -11,9 +11,11 @@ * */ -#include <linux/compiler.h> +#include <linux/err.h> +#include <linux/kernel.h> #include <linux/netdevice.h> #include <net/addrconf.h> +#include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/ipv6.h> @@ -25,35 +27,40 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static int xfrm6_dst_lookup(struct xfrm_dst **xdst, struct flowi *fl) +static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, + xfrm_address_t *daddr) { - struct dst_entry *dst = ip6_route_output(NULL, fl); - int err = dst->error; - if (!err) - *xdst = (struct xfrm_dst *) dst; - else + struct flowi fl = {}; + struct dst_entry *dst; + int err; + + memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst)); + if (saddr) + memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); + + dst = ip6_route_output(NULL, &fl); + + err = dst->error; + if (dst->error) { dst_release(dst); - return err; + dst = ERR_PTR(err); + } + + return dst; } static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) { - struct rt6_info *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .daddr = *(struct in6_addr *)&daddr->a6, - }, - }, - }; - - if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { - ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6, - (struct in6_addr *)&saddr->a6); - dst_release(&rt->u.dst); - return 0; - } - return -EHOSTUNREACH; + struct dst_entry *dst; + + dst = xfrm6_dst_lookup(0, NULL, daddr); + if (IS_ERR(dst)) + return -EHOSTUNREACH; + + ipv6_get_saddr(dst, (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&saddr->a6); + dst_release(dst); + return 0; } static struct dst_entry * @@ -86,177 +93,53 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } -static inline struct in6_addr* -__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr) +static int xfrm6_get_tos(struct flowi *fl) { - return (x->type->remote_addr) ? - (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) : - (struct in6_addr*)&x->id.daddr; + return 0; } -static inline struct in6_addr* -__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) +static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, + int nfheader_len) { - return (x->type->local_addr) ? - (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : - (struct in6_addr*)&x->props.saddr; -} + if (dst->ops->family == AF_INET6) { + struct rt6_info *rt = (struct rt6_info*)dst; + if (rt->rt6i_node) + path->path_cookie = rt->rt6i_node->fn_sernum; + } -static inline void -__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x) -{ - if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) - *nflen += x->props.header_len; - else - *len += x->props.header_len; -} + path->u.rt6.rt6i_nfheader_len = nfheader_len; -static inline void -__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x) -{ - if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) - *nflen -= x->props.header_len; - else - *len -= x->props.header_len; + return 0; } -/* Allocate chain of dst_entry's, attach known xfrm's, calculate - * all the metrics... Shortly, bundle a bundle. - */ - -static int -__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, - struct flowi *fl, struct dst_entry **dst_p) +static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev) { - struct dst_entry *dst, *dst_prev; - struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); - struct rt6_info *rt = rt0; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .saddr = fl->fl6_src, - .daddr = fl->fl6_dst, - } - } - }; - int i; - int err = 0; - int header_len = 0; - int nfheader_len = 0; - int trailer_len = 0; - - dst = dst_prev = NULL; - dst_hold(&rt->u.dst); - - for (i = 0; i < nx; i++) { - struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops); - struct xfrm_dst *xdst; - - if (unlikely(dst1 == NULL)) { - err = -ENOBUFS; - dst_release(&rt->u.dst); - goto error; - } + struct rt6_info *rt = (struct rt6_info*)xdst->route; - if (!dst) - dst = dst1; - else { - dst_prev->child = dst1; - dst1->flags |= DST_NOHASH; - dst_clone(dst1); - } - - xdst = (struct xfrm_dst *)dst1; - xdst->route = &rt->u.dst; - xdst->genid = xfrm[i]->genid; - if (rt->rt6i_node) - xdst->route_cookie = rt->rt6i_node->fn_sernum; - - dst1->next = dst_prev; - dst_prev = dst1; - - __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); - trailer_len += xfrm[i]->props.trailer_len; - - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - unsigned short encap_family = xfrm[i]->props.family; - switch(encap_family) { - case AF_INET: - fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4; - fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4; - break; - case AF_INET6: - ipv6_addr_copy(&fl_tunnel.fl6_dst, __xfrm6_bundle_addr_remote(xfrm[i], &fl->fl6_dst)); - - ipv6_addr_copy(&fl_tunnel.fl6_src, __xfrm6_bundle_addr_local(xfrm[i], &fl->fl6_src)); - break; - default: - BUG_ON(1); - } + xdst->u.dst.dev = dev; + dev_hold(dev); - err = xfrm_dst_lookup((struct xfrm_dst **) &rt, - &fl_tunnel, encap_family); - if (err) - goto error; - } else - dst_hold(&rt->u.dst); - } + xdst->u.rt6.rt6i_idev = in6_dev_get(rt->u.dst.dev); + if (!xdst->u.rt6.rt6i_idev) + return -ENODEV; - dst_prev->child = &rt->u.dst; - dst->path = &rt->u.dst; + /* Sheit... I remember I did this right. Apparently, + * it was magically lost, so this code needs audit */ + xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | + RTF_LOCAL); + xdst->u.rt6.rt6i_metric = rt->rt6i_metric; + xdst->u.rt6.rt6i_node = rt->rt6i_node; if (rt->rt6i_node) - ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum; - - *dst_p = dst; - dst = dst_prev; - - dst_prev = *dst_p; - i = 0; - for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) { - struct xfrm_dst *x = (struct xfrm_dst*)dst_prev; - - dst_prev->xfrm = xfrm[i++]; - dst_prev->dev = rt->u.dst.dev; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); - dst_prev->obsolete = -1; - dst_prev->flags |= DST_HOST; - dst_prev->lastuse = jiffies; - dst_prev->header_len = header_len; - dst_prev->nfheader_len = nfheader_len; - dst_prev->trailer_len = trailer_len; - memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); - - /* Copy neighbour for reachability confirmation */ - dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); - dst_prev->input = rt->u.dst.input; - dst_prev->output = dst_prev->xfrm->outer_mode->afinfo->output; - /* Sheit... I remember I did this right. Apparently, - * it was magically lost, so this code needs audit */ - x->u.rt6.rt6i_flags = rt0->rt6i_flags&(RTF_ANYCAST|RTF_LOCAL); - x->u.rt6.rt6i_metric = rt0->rt6i_metric; - x->u.rt6.rt6i_node = rt0->rt6i_node; - x->u.rt6.rt6i_gateway = rt0->rt6i_gateway; - memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); - x->u.rt6.rt6i_dst = rt0->rt6i_dst; - x->u.rt6.rt6i_src = rt0->rt6i_src; - x->u.rt6.rt6i_idev = rt0->rt6i_idev; - in6_dev_hold(rt0->rt6i_idev); - __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm); - trailer_len -= x->u.dst.xfrm->props.trailer_len; - } + xdst->route_cookie = rt->rt6i_node->fn_sernum; + xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; + xdst->u.rt6.rt6i_dst = rt->rt6i_dst; + xdst->u.rt6.rt6i_src = rt->rt6i_src; - xfrm_init_pmtu(dst); return 0; - -error: - if (dst) - dst_free(dst); - return err; } static inline void -_decode_session6(struct sk_buff *skb, struct flowi *fl) +_decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { u16 offset = skb_network_header_len(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -265,8 +148,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); - ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); - ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); + ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); + ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) { nh = skb_network_header(skb); @@ -289,8 +172,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) { __be16 *ports = (__be16 *)exthdr; - fl->fl_ip_sport = ports[0]; - fl->fl_ip_dport = ports[1]; + fl->fl_ip_sport = ports[!!reverse]; + fl->fl_ip_dport = ports[!reverse]; } fl->proto = nexthdr; return; @@ -329,7 +212,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) } } -static inline int xfrm6_garbage_collect(void) +static inline int xfrm6_garbage_collect(struct dst_ops *ops) { xfrm6_policy_afinfo.garbage_collect(); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); @@ -362,7 +245,8 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev); + struct inet6_dev *loopback_idev = + in6_dev_get(dev->nd_net->loopback_dev); BUG_ON(!loopback_idev); do { @@ -385,6 +269,7 @@ static struct dst_ops xfrm6_dst_ops = { .update_pmtu = xfrm6_update_pmtu, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, + .local_out = __ip6_local_out, .gc_thresh = 1024, .entry_size = sizeof(struct xfrm_dst), }; @@ -395,13 +280,15 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .find_bundle = __xfrm6_find_bundle, - .bundle_create = __xfrm6_bundle_create, .decode_session = _decode_session6, + .get_tos = xfrm6_get_tos, + .init_path = xfrm6_init_path, + .fill_dst = xfrm6_fill_dst, }; -static void __init xfrm6_policy_init(void) +static int __init xfrm6_policy_init(void) { - xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); + return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); } static void xfrm6_policy_fini(void) @@ -409,10 +296,22 @@ static void xfrm6_policy_fini(void) xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo); } -void __init xfrm6_init(void) +int __init xfrm6_init(void) { - xfrm6_policy_init(); - xfrm6_state_init(); + int ret; + + ret = xfrm6_policy_init(); + if (ret) + goto out; + + ret = xfrm6_state_init(); + if (ret) + goto out_policy; +out: + return ret; +out_policy: + xfrm6_policy_fini(); + goto out; } void xfrm6_fini(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b392bee396f1..dc817e035e23 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -14,6 +14,8 @@ #include <net/xfrm.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> +#include <linux/netfilter_ipv6.h> +#include <net/dsfield.h> #include <net/ipv6.h> #include <net/addrconf.h> @@ -168,18 +170,37 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) return 0; } +int xfrm6_extract_header(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + XFRM_MODE_SKB_CB(skb)->id = 0; + XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); + XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); + XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, + sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); + + return 0; +} + static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, + .proto = IPPROTO_IPV6, + .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, + .extract_input = xfrm6_extract_input, + .extract_output = xfrm6_extract_output, + .transport_finish = xfrm6_transport_finish, }; -void __init xfrm6_state_init(void) +int __init xfrm6_state_init(void) { - xfrm_state_register_afinfo(&xfrm6_state_afinfo); + return xfrm_state_register_afinfo(&xfrm6_state_afinfo); } void xfrm6_state_fini(void) |