diff options
Diffstat (limited to 'net')
57 files changed, 1530 insertions, 1031 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c0c7bb8e9f07..bd93c45778d4 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -117,8 +117,7 @@ static void __exit vlan_cleanup_devices(void) struct net_device *dev, *nxt; rtnl_lock(); - for (dev = dev_base; dev; dev = nxt) { - nxt = dev->next; + for_each_netdev_safe(dev, nxt) { if (dev->priv_flags & IFF_802_1Q_VLAN) { unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, VLAN_DEV_INFO(dev)->vlan_id); diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 5e24f72602a1..d216a64421cd 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -237,13 +237,9 @@ int vlan_proc_rem_dev(struct net_device *vlandev) * The following few functions build the content of /proc/net/vlan/config */ -/* starting at dev, find a VLAN device */ -static struct net_device *vlan_skip(struct net_device *dev) +static inline int is_vlan_dev(struct net_device *dev) { - while (dev && !(dev->priv_flags & IFF_802_1Q_VLAN)) - dev = dev->next; - - return dev; + return dev->priv_flags & IFF_802_1Q_VLAN; } /* start read of /proc/net/vlan/config */ @@ -257,19 +253,35 @@ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) return SEQ_START_TOKEN; - for (dev = vlan_skip(dev_base); dev && i < *pos; - dev = vlan_skip(dev->next), ++i); + for_each_netdev(dev) { + if (!is_vlan_dev(dev)) + continue; + + if (i++ == *pos) + return dev; + } - return (i == *pos) ? dev : NULL; + return NULL; } static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; + ++*pos; - return vlan_skip((v == SEQ_START_TOKEN) - ? dev_base - : ((struct net_device *)v)->next); + dev = (struct net_device *)v; + if (v == SEQ_START_TOKEN) + dev = net_device_entry(&dev_base_head); + + for_each_netdev_continue(dev) { + if (!is_vlan_dev(dev)) + continue; + + return dev; + } + + return NULL; } static void vlan_seq_stop(struct seq_file *seq, void *v) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 690573bbf012..849deaf14108 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -475,11 +475,9 @@ void __exit br_cleanup_bridges(void) struct net_device *dev, *nxt; rtnl_lock(); - for (dev = dev_base; dev; dev = nxt) { - nxt = dev->next; + for_each_netdev_safe(dev, nxt) if (dev->priv_flags & IFF_EBRIDGE) del_br(dev->priv); - } rtnl_unlock(); } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index eda0fbfc923a..bb15e9e259b1 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -27,7 +27,9 @@ static int get_bridge_ifindices(int *indices, int num) struct net_device *dev; int i = 0; - for (dev = dev_base; dev && i < num; dev = dev->next) { + for_each_netdev(dev) { + if (i >= num) + break; if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 9b2986b182ba..fa779874b9dd 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -142,14 +142,33 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) return skb->nf_bridge; } -static inline void nf_bridge_save_header(struct sk_buff *skb) +static inline void nf_bridge_push_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_push(skb, len); + skb->network_header -= len; +} + +static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) { - int header_size = ETH_HLEN; + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull(skb, len); + skb->network_header += len; +} - if (skb->protocol == htons(ETH_P_8021Q)) - header_size += VLAN_HLEN; - else if (skb->protocol == htons(ETH_P_PPP_SES)) - header_size += PPPOE_SES_HLEN; +static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull_rcsum(skb, len); + skb->network_header += len; +} + +static inline void nf_bridge_save_header(struct sk_buff *skb) +{ + int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); skb_copy_from_linear_data_offset(skb, -header_size, skb->nf_bridge->data, header_size); @@ -162,12 +181,7 @@ static inline void nf_bridge_save_header(struct sk_buff *skb) int nf_bridge_copy_header(struct sk_buff *skb) { int err; - int header_size = ETH_HLEN; - - if (skb->protocol == htons(ETH_P_8021Q)) - header_size += VLAN_HLEN; - else if (skb->protocol == htons(ETH_P_PPP_SES)) - header_size += PPPOE_SES_HLEN; + int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); err = skb_cow(skb, header_size); if (err) @@ -175,11 +189,7 @@ int nf_bridge_copy_header(struct sk_buff *skb) skb_copy_to_linear_data_offset(skb, -header_size, skb->nf_bridge->data, header_size); - - if (skb->protocol == htons(ETH_P_8021Q)) - __skb_push(skb, VLAN_HLEN); - else if (skb->protocol == htons(ETH_P_PPP_SES)) - __skb_push(skb, PPPOE_SES_HLEN); + __skb_push(skb, nf_bridge_encap_header_len(skb)); return 0; } @@ -200,13 +210,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) dst_hold(skb->dst); skb->dev = nf_bridge->physindev; - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->network_header -= VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_push(skb, PPPOE_SES_HLEN); - skb->network_header -= PPPOE_SES_HLEN; - } + nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); @@ -284,13 +288,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) if (!skb->dev) kfree_skb(skb); else { - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - skb->network_header += VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_pull(skb, PPPOE_SES_HLEN); - skb->network_header += PPPOE_SES_HLEN; - } + nf_bridge_pull_encap_header(skb); skb->dst->output(skb); } return 0; @@ -356,15 +354,7 @@ bridged_dnat: * bridged frame */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; skb->dev = nf_bridge->physindev; - if (skb->protocol == - htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->network_header -= VLAN_HLEN; - } else if(skb->protocol == - htons(ETH_P_PPP_SES)) { - skb_push(skb, PPPOE_SES_HLEN); - skb->network_header -= PPPOE_SES_HLEN; - } + nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, @@ -380,13 +370,7 @@ bridged_dnat: } skb->dev = nf_bridge->physindev; - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->network_header -= VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_push(skb, PPPOE_SES_HLEN); - skb->network_header -= PPPOE_SES_HLEN; - } + nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); @@ -536,14 +520,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, #endif if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) goto out; - - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull_rcsum(skb, VLAN_HLEN); - skb->network_header += VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_pull_rcsum(skb, PPPOE_SES_HLEN); - skb->network_header += PPPOE_SES_HLEN; - } + nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); } #ifdef CONFIG_SYSCTL @@ -557,14 +534,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) goto out; - - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull_rcsum(skb, VLAN_HLEN); - skb->network_header += VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_pull_rcsum(skb, PPPOE_SES_HLEN); - skb->network_header += PPPOE_SES_HLEN; - } + nf_bridge_pull_encap_header_rcsum(skb); if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; @@ -642,13 +612,7 @@ static int br_nf_forward_finish(struct sk_buff *skb) } else { in = *((struct net_device **)(skb->cb)); } - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->network_header -= VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_push(skb, PPPOE_SES_HLEN); - skb->network_header -= PPPOE_SES_HLEN; - } + nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in, skb->dev, br_forward_finish, 1); return 0; @@ -682,13 +646,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, else pf = PF_INET6; - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull(*pskb, VLAN_HLEN); - (*pskb)->network_header += VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_pull(*pskb, PPPOE_SES_HLEN); - (*pskb)->network_header += PPPOE_SES_HLEN; - } + nf_bridge_pull_encap_header(*pskb); nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { @@ -722,15 +680,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) return NF_ACCEPT; - skb_pull(*pskb, VLAN_HLEN); - (*pskb)->network_header += VLAN_HLEN; + nf_bridge_pull_encap_header(*pskb); } if (arp_hdr(skb)->ar_pln != 4) { - if (IS_VLAN_ARP(skb)) { - skb_push(*pskb, VLAN_HLEN); - (*pskb)->network_header -= VLAN_HLEN; - } + if (IS_VLAN_ARP(skb)) + nf_bridge_push_encap_header(*pskb); return NF_ACCEPT; } *d = (struct net_device *)in; @@ -777,13 +732,7 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->network_header -= VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_push(skb, PPPOE_SES_HLEN); - skb->network_header -= PPPOE_SES_HLEN; - } + nf_bridge_push_encap_header(skb); NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev, br_forward_finish); @@ -848,14 +797,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (skb->protocol == htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - skb->network_header += VLAN_HLEN; - } else if (skb->protocol == htons(ETH_P_PPP_SES)) { - skb_pull(skb, PPPOE_SES_HLEN); - skb->network_header += PPPOE_SES_HLEN; - } - + nf_bridge_pull_encap_header(skb); nf_bridge_save_header(skb); #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 35facc0c11c2..0fcf6f073064 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -109,7 +109,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int idx; - for (dev = dev_base, idx = 0; dev; dev = dev->next) { + idx = 0; + for_each_netdev(dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; diff --git a/net/core/dev.c b/net/core/dev.c index eb999003bbb7..f27d4ab181e6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -156,13 +156,13 @@ static spinlock_t net_dma_event_lock; #endif /* - * The @dev_base list is protected by @dev_base_lock and the rtnl + * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. * * Pure readers hold dev_base_lock for reading. * * Writers must hold the rtnl semaphore while they loop through the - * dev_base list, and hold dev_base_lock for writing when they do the + * dev_base_head list, and hold dev_base_lock for writing when they do the * actual updates. This allows pure readers to access the list even * while a writer is preparing to update it. * @@ -174,11 +174,10 @@ static spinlock_t net_dma_event_lock; * unregister_netdevice(), which must be called with the rtnl * semaphore held. */ -struct net_device *dev_base; -static struct net_device **dev_tail = &dev_base; +LIST_HEAD(dev_base_head); DEFINE_RWLOCK(dev_base_lock); -EXPORT_SYMBOL(dev_base); +EXPORT_SYMBOL(dev_base_head); EXPORT_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 @@ -567,26 +566,38 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) ASSERT_RTNL(); - for (dev = dev_base; dev; dev = dev->next) + for_each_netdev(dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) - break; - return dev; + return dev; + + return NULL; } EXPORT_SYMBOL(dev_getbyhwaddr); +struct net_device *__dev_getfirstbyhwtype(unsigned short type) +{ + struct net_device *dev; + + ASSERT_RTNL(); + for_each_netdev(dev) + if (dev->type == type) + return dev; + + return NULL; +} + +EXPORT_SYMBOL(__dev_getfirstbyhwtype); + struct net_device *dev_getfirstbyhwtype(unsigned short type) { struct net_device *dev; rtnl_lock(); - for (dev = dev_base; dev; dev = dev->next) { - if (dev->type == type) { - dev_hold(dev); - break; - } - } + dev = __dev_getfirstbyhwtype(type); + if (dev) + dev_hold(dev); rtnl_unlock(); return dev; } @@ -606,17 +617,19 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype); struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) { - struct net_device *dev; + struct net_device *dev, *ret; + ret = NULL; read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for_each_netdev(dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); + ret = dev; break; } } read_unlock(&dev_base_lock); - return dev; + return ret; } /** @@ -682,7 +695,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) if (!inuse) return -ENOMEM; - for (d = dev_base; d; d = d->next) { + for_each_netdev(d) { if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) @@ -964,7 +977,7 @@ int register_netdevice_notifier(struct notifier_block *nb) rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); if (!err) { - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { nb->notifier_call(nb, NETDEV_REGISTER, dev); if (dev->flags & IFF_UP) @@ -2038,7 +2051,7 @@ static int dev_ifconf(char __user *arg) */ total = 0; - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2070,26 +2083,28 @@ static int dev_ifconf(char __user *arg) * This is invoked by the /proc filesystem handler to display a device * in detail. */ -static struct net_device *dev_get_idx(loff_t pos) +void *dev_seq_start(struct seq_file *seq, loff_t *pos) { + loff_t off; struct net_device *dev; - loff_t i; - for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); + read_lock(&dev_base_lock); + if (!*pos) + return SEQ_START_TOKEN; - return i == pos ? dev : NULL; -} + off = 1; + for_each_netdev(dev) + if (off++ == *pos) + return dev; -void *dev_seq_start(struct seq_file *seq, loff_t *pos) -{ - read_lock(&dev_base_lock); - return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; + return NULL; } void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; + return v == SEQ_START_TOKEN ? + first_net_device() : next_net_device((struct net_device *)v); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -3071,11 +3086,9 @@ int register_netdevice(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); - dev->next = NULL; dev_init_scheduler(dev); write_lock_bh(&dev_base_lock); - *dev_tail = dev; - dev_tail = &dev->next; + list_add_tail(&dev->dev_list, &dev_base_head); hlist_add_head(&dev->name_hlist, head); hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); @@ -3349,8 +3362,6 @@ void synchronize_net(void) void unregister_netdevice(struct net_device *dev) { - struct net_device *d, **dp; - BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3370,19 +3381,11 @@ void unregister_netdevice(struct net_device *dev) dev_close(dev); /* And unlink it from device chain. */ - for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { - if (d == dev) { - write_lock_bh(&dev_base_lock); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - if (dev_tail == &dev->next) - dev_tail = dp; - *dp = d->next; - write_unlock_bh(&dev_base_lock); - break; - } - } - BUG_ON(!d); + write_lock_bh(&dev_base_lock); + list_del(&dev->dev_list); + hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); + write_unlock_bh(&dev_base_lock); dev->reg_state = NETREG_UNREGISTERING; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 7d57bf77f3a3..5a54053386c8 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -223,7 +223,7 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) loff_t off = 0; read_lock(&dev_base_lock); - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { if (off++ == *pos) return dev; } @@ -232,9 +232,8 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = v; ++*pos; - return dev->next; + return next_net_device((struct net_device *)v); } static void dev_mc_seq_stop(struct seq_file *seq, void *v) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cec111109155..8c971a2efe2a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -539,13 +539,16 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) int s_idx = cb->args[0]; struct net_device *dev; - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + idx = 0; + for_each_netdev(dev) { if (idx < s_idx) - continue; + goto cont; if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) break; +cont: + idx++; } cb->args[0] = idx; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a205eaa87f52..9fbe87c93802 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -721,7 +721,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr; - struct net_device *dev; + struct net_device *dev, *ldev; int rv; if (addr_len != sizeof(struct sockaddr_dn)) @@ -746,14 +746,17 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(saddr->sdn_flags & SDF_WILD)) { if (dn_ntohs(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); - for(dev = dev_base; dev; dev = dev->next) { + ldev = NULL; + for_each_netdev(dev) { if (!dev->dn_ptr) continue; - if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) + if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { + ldev = dev; break; + } } read_unlock(&dev_base_lock); - if (dev == NULL) + if (ldev == NULL) return -EADDRNOTAVAIL; } } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 5c2a9951b638..764a56a13e38 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -799,9 +799,10 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) skip_ndevs = cb->args[0]; skip_naddr = cb->args[1]; - for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + idx = 0; + for_each_netdev(dev) { if (idx < skip_ndevs) - continue; + goto cont; else if (idx > skip_ndevs) { /* Only skip over addresses for first dev dumped * in this iteration (idx == skip_ndevs) */ @@ -809,18 +810,20 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) } if ((dn_db = dev->dn_ptr) == NULL) - continue; + goto cont; for (ifa = dn_db->ifa_list, dn_idx = 0; ifa; ifa = ifa->ifa_next, dn_idx++) { if (dn_idx < skip_naddr) - continue; + goto cont; if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) < 0) goto done; } +cont: + idx++; } done: cb->args[0] = idx; @@ -1296,7 +1299,7 @@ void dn_dev_devices_off(void) struct net_device *dev; rtnl_lock(); - for(dev = dev_base; dev; dev = dev->next) + for_each_netdev(dev) dn_dev_down(dev); rtnl_unlock(); @@ -1307,7 +1310,7 @@ void dn_dev_devices_on(void) struct net_device *dev; rtnl_lock(); - for(dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { if (dev->flags & IFF_UP) dn_dev_up(dev); } @@ -1325,62 +1328,56 @@ int unregister_dnaddr_notifier(struct notifier_block *nb) } #ifdef CONFIG_PROC_FS -static inline struct net_device *dn_dev_get_next(struct seq_file *seq, struct net_device *dev) +static inline int is_dn_dev(struct net_device *dev) { - do { - dev = dev->next; - } while(dev && !dev->dn_ptr); - - return dev; + return dev->dn_ptr != NULL; } -static struct net_device *dn_dev_get_idx(struct seq_file *seq, loff_t pos) +static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) { + int i; struct net_device *dev; - dev = dev_base; - if (dev && !dev->dn_ptr) - dev = dn_dev_get_next(seq, dev); - if (pos) { - while(dev && (dev = dn_dev_get_next(seq, dev))) - --pos; - } - return dev; -} + read_lock(&dev_base_lock); -static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) -{ - if (*pos) { - struct net_device *dev; - read_lock(&dev_base_lock); - dev = dn_dev_get_idx(seq, *pos - 1); - if (dev == NULL) - read_unlock(&dev_base_lock); - return dev; + if (*pos == 0) + return SEQ_START_TOKEN; + + i = 1; + for_each_netdev(dev) { + if (!is_dn_dev(dev)) + continue; + + if (i++ == *pos) + return dev; } - return SEQ_START_TOKEN; + + return NULL; } static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = v; - loff_t one = 1; + struct net_device *dev; - if (v == SEQ_START_TOKEN) { - dev = dn_dev_seq_start(seq, &one); - } else { - dev = dn_dev_get_next(seq, dev); - if (dev == NULL) - read_unlock(&dev_base_lock); - } ++*pos; - return dev; + + dev = (struct net_device *)v; + if (v == SEQ_START_TOKEN) + dev = net_device_entry(&dev_base_head); + + for_each_netdev_continue(dev) { + if (!is_dn_dev(dev)) + continue; + + return dev; + } + + return NULL; } static void dn_dev_seq_stop(struct seq_file *seq, void *v) { - if (v && v != SEQ_START_TOKEN) - read_unlock(&dev_base_lock); + read_unlock(&dev_base_lock); } static char *dn_type2asc(char type) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 310a86268d2b..d2bc19d47950 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -602,7 +602,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) /* Scan device list */ read_lock(&dev_base_lock); - for(dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5d7337bcf0fe..a8bf106b7a61 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -886,7 +886,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old .iif = loopback_dev.ifindex, .oif = oldflp->oif }; struct dn_route *rt = NULL; - struct net_device *dev_out = NULL; + struct net_device *dev_out = NULL, *dev; struct neighbour *neigh = NULL; unsigned hash; unsigned flags = 0; @@ -925,15 +925,17 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old goto out; } read_lock(&dev_base_lock); - for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { - if (!dev_out->dn_ptr) + for_each_netdev(dev) { + if (!dev->dn_ptr) continue; - if (!dn_dev_islocal(dev_out, oldflp->fld_src)) + if (!dn_dev_islocal(dev, oldflp->fld_src)) continue; - if ((dev_out->flags & IFF_LOOPBACK) && + if ((dev->flags & IFF_LOOPBACK) && oldflp->fld_dst && - !dn_dev_islocal(dev_out, oldflp->fld_dst)) + !dn_dev_islocal(dev, oldflp->fld_dst)) continue; + + dev_out = dev; break; } read_unlock(&dev_base_lock); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 088888db8b3d..7f95e6e9beeb 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -910,7 +910,7 @@ no_in_dev: */ read_lock(&dev_base_lock); rcu_read_lock(); - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -989,7 +989,7 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, read_lock(&dev_base_lock); rcu_read_lock(); - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -1182,23 +1182,26 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) int s_ip_idx, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; - for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + idx = 0; + for_each_netdev(dev) { if (idx < s_idx) - continue; + goto cont; if (idx > s_idx) s_ip_idx = 0; if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) - continue; + goto cont; for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { if (ip_idx < s_ip_idx) - continue; + goto cont; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) goto done; } +cont: + idx++; } done: @@ -1243,7 +1246,7 @@ void inet_forward_change(void) ipv4_devconf_dflt.forwarding = on; read_lock(&dev_base_lock); - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 2506021c2935..f4dd47453108 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2288,9 +2288,8 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) struct ip_mc_list *im = NULL; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - for (state->dev = dev_base, state->in_dev = NULL; - state->dev; - state->dev = state->dev->next) { + state->in_dev = NULL; + for_each_netdev(state->dev) { struct in_device *in_dev; in_dev = in_dev_get(state->dev); if (!in_dev) @@ -2316,7 +2315,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li read_unlock(&state->in_dev->mc_list_lock); in_dev_put(state->in_dev); } - state->dev = state->dev->next; + state->dev = next_net_device(state->dev); if (!state->dev) { state->in_dev = NULL; break; @@ -2450,9 +2449,9 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) struct ip_mc_list *im = NULL; struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL, state->im = NULL; - state->dev; - state->dev = state->dev->next) { + state->idev = NULL; + state->im = NULL; + for_each_netdev(state->dev) { struct in_device *idev; idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2488,7 +2487,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l read_unlock(&state->idev->mc_list_lock); in_dev_put(state->idev); } - state->dev = state->dev->next; + state->dev = next_net_device(state->dev); if (!state->dev) { state->idev = NULL; goto out; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 597c800b2fdc..342ca8d89458 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -192,7 +192,7 @@ static int __init ic_open_devs(void) if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { if (dev == &loopback_dev) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index e5a34c17d927..c3908bc5a709 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -72,6 +72,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, __be16 *keyptr; unsigned int min, i, range_size; + /* If there is no master conntrack we are not PPTP, + do not change tuples */ + if (!conntrack->master) + return 0; + if (maniptype == IP_NAT_MANIP_SRC) keyptr = &tuple->src.u.gre.key; else @@ -122,18 +127,9 @@ gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, if (maniptype != IP_NAT_MANIP_DST) return 1; switch (greh->version) { - case 0: - if (!greh->key) { - DEBUGP("can't nat GRE w/o key\n"); - break; - } - if (greh->csum) { - /* FIXME: Never tested this code... */ - nf_proto_csum_replace4(gre_csum(greh), *pskb, - *(gre_key(greh)), - tuple->dst.u.gre.key, 0); - } - *(gre_key(greh)) = tuple->dst.u.gre.key; + case GRE_VERSION_1701: + /* We do not currently NAT any GREv0 packets. + * Try to behave like "nf_nat_proto_unknown" */ break; case GRE_VERSION_PPTP: DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 2a283397a8b6..2534f718ab92 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -226,10 +226,6 @@ static int ipt_dnat_checkentry(const char *tablename, printk("DNAT: multiple ranges no longer supported\n"); return 0; } - if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) { - printk("DNAT: port randomization not supported\n"); - return 0; - } return 1; } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index bfd88e4e0685..fac97cf51ae5 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -222,6 +222,29 @@ static unsigned int mangle_sdp(struct sk_buff **pskb, return mangle_content_len(pskb, ctinfo, ct, dptr); } +static void ip_nat_sdp_expect(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + /* hook doesn't matter, but it has to do source manip */ + nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = exp->saved_proto; + range.min_ip = range.max_ip = exp->saved_ip; + /* hook doesn't matter, but it has to do destination manip */ + nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); +} + /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ static unsigned int ip_nat_sdp(struct sk_buff **pskb, @@ -239,13 +262,14 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, /* Connection will come from reply */ newip = ct->tuplehash[!dir].tuple.dst.u3.ip; + exp->saved_ip = exp->tuple.dst.u3.ip; exp->tuple.dst.u3.ip = newip; exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; exp->dir = !dir; /* When you see the packet, we need to NAT it the same as the this one. */ - exp->expectfn = nf_nat_follow_master; + exp->expectfn = ip_nat_sdp_expect; /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d6e488668171..8b124eafbb90 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1760,8 +1760,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_retrans(tp); inet_csk_delack_init(sk); tcp_init_send_head(sk); - tp->rx_opt.saw_tstamp = 0; - tcp_sack_reset(&tp->rx_opt); + memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); BUG_TRAP(!inet->num || icsk->icsk_bind_hash); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index a291097fcc0a..43d624e5043c 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -97,10 +97,6 @@ struct hstcp { u32 ai; }; -static int max_ssthresh = 100; -module_param(max_ssthresh, int, 0644); -MODULE_PARM_DESC(max_ssthresh, "limited slow start threshold (RFC3742)"); - static void hstcp_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -122,23 +118,9 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, if (!tcp_is_cwnd_limited(sk, in_flight)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) { - /* RFC3742: limited slow start - * the window is increased by 1/K MSS for each arriving ACK, - * for K = int(cwnd/(0.5 max_ssthresh)) - */ - if (max_ssthresh > 0 && tp->snd_cwnd > max_ssthresh) { - u32 k = max(tp->snd_cwnd / (max_ssthresh >> 1), 1U); - if (++tp->snd_cwnd_cnt >= k) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } - } else { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } else { + if (tp->snd_cwnd <= tp->snd_ssthresh) + tcp_slow_start(tp); + else { /* Update AIMD parameters. * * We want to guarantee that: diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h deleted file mode 100644 index ed3b7198f23c..000000000000 --- a/net/ipv4/tcp_yeah.h +++ /dev/null @@ -1,7 +0,0 @@ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/inet_diag.h> -#include <asm/div64.h> - -#include <net/tcp.h> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3452433cbc96..d02685c6bc69 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -449,7 +449,7 @@ static void addrconf_forward_change(void) struct inet6_dev *idev; read_lock(&dev_base_lock); - for (dev=dev_base; dev; dev=dev->next) { + for_each_netdev(dev) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { @@ -911,7 +911,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, read_lock(&dev_base_lock); rcu_read_lock(); - for (dev = dev_base; dev; dev=dev->next) { + for_each_netdev(dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -2064,7 +2064,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) return; } - for (dev = dev_base; dev != NULL; dev = dev->next) { + for_each_netdev(dev) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2225,7 +2225,7 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev) return; } /* then try to inherit it from any device */ - for (link_dev = dev_base; link_dev; link_dev = link_dev->next) { + for_each_netdev(link_dev) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -3257,14 +3257,15 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; - for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) { + idx = 0; + for_each_netdev(dev) { if (idx < s_idx) - continue; + goto cont; if (idx > s_idx) s_ip_idx = 0; ip_idx = 0; if ((idev = in6_dev_get(dev)) == NULL) - continue; + goto cont; read_lock_bh(&idev->lock); switch (type) { case UNICAST_ADDR: @@ -3311,6 +3312,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } read_unlock_bh(&idev->lock); in6_dev_put(idev); +cont: + idx++; } done: if (err <= 0) { @@ -3575,16 +3578,19 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct inet6_dev *idev; read_lock(&dev_base_lock); - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + idx = 0; + for_each_netdev(dev) { if (idx < s_idx) - continue; + goto cont; if ((idev = in6_dev_get(dev)) == NULL) - continue; + goto cont; err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); in6_dev_put(idev); if (err <= 0) break; +cont: + idx++; } read_unlock(&dev_base_lock); cb->args[0] = idx; @@ -4247,7 +4253,7 @@ void __exit addrconf_cleanup(void) * clean dev list. */ - for (dev=dev_base; dev; dev=dev->next) { + for_each_netdev(dev) { if ((idev = __in6_dev_get(dev)) == NULL) continue; addrconf_ifdown(dev, 1); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 09117d63256f..9b81264eb78f 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -423,14 +423,18 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) */ int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr) { + int found = 0; + if (dev) return ipv6_chk_acast_dev(dev, addr); read_lock(&dev_base_lock); - for (dev=dev_base; dev; dev=dev->next) - if (ipv6_chk_acast_dev(dev, addr)) + for_each_netdev(dev) + if (ipv6_chk_acast_dev(dev, addr)) { + found = 1; break; + } read_unlock(&dev_base_lock); - return dev != 0; + return found; } @@ -447,9 +451,8 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) struct ifacaddr6 *im = NULL; struct ac6_iter_state *state = ac6_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL; - state->dev; - state->dev = state->dev->next) { + state->idev = NULL; + for_each_netdev(state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -476,7 +479,7 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im read_unlock_bh(&state->idev->lock); in6_dev_put(state->idev); } - state->dev = state->dev->next; + state->dev = next_net_device(state->dev); if (!state->dev) { state->idev = NULL; break; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6c2758951d60..3e308fb41b49 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2331,9 +2331,8 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) struct ifmcaddr6 *im = NULL; struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL; - state->dev; - state->dev = state->dev->next) { + state->idev = NULL; + for_each_netdev(state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -2360,7 +2359,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr read_unlock_bh(&state->idev->lock); in6_dev_put(state->idev); } - state->dev = state->dev->next; + state->dev = next_net_device(state->dev); if (!state->dev) { state->idev = NULL; break; @@ -2475,9 +2474,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) struct ifmcaddr6 *im = NULL; struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - for (state->dev = dev_base, state->idev = NULL, state->im = NULL; - state->dev; - state->dev = state->dev->next) { + state->idev = NULL; + state->im = NULL; + for_each_netdev(state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2513,7 +2512,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s read_unlock_bh(&state->idev->lock); in6_dev_put(state->idev); } - state->dev = state->dev->next; + state->dev = next_net_device(state->dev); if (!state->dev) { state->idev = NULL; goto out; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index e84c924a81ee..2f1373855a8b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -45,7 +45,8 @@ static struct proto iucv_proto = { static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *); static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]); -static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]); +static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], + u8 ipuser[16]); static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]); static struct iucv_sock_list iucv_sk_list = { @@ -147,11 +148,12 @@ static void iucv_sock_close(struct sock *sk) unsigned char user_data[16]; struct iucv_sock *iucv = iucv_sk(sk); int err; + unsigned long timeo; iucv_sock_clear_timer(sk); lock_sock(sk); - switch(sk->sk_state) { + switch (sk->sk_state) { case IUCV_LISTEN: iucv_sock_cleanup_listen(sk); break; @@ -159,6 +161,21 @@ static void iucv_sock_close(struct sock *sk) case IUCV_CONNECTED: case IUCV_DISCONN: err = 0; + + sk->sk_state = IUCV_CLOSING; + sk->sk_state_change(sk); + + if (!skb_queue_empty(&iucv->send_skb_q)) { + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + timeo = sk->sk_lingertime; + else + timeo = IUCV_DISCONN_TIMEOUT; + err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); + } + + sk->sk_state = IUCV_CLOSED; + sk->sk_state_change(sk); + if (iucv->path) { low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); @@ -168,12 +185,11 @@ static void iucv_sock_close(struct sock *sk) iucv->path = NULL; } - sk->sk_state = IUCV_CLOSED; - sk->sk_state_change(sk); sk->sk_err = ECONNRESET; sk->sk_state_change(sk); skb_queue_purge(&iucv->send_skb_q); + skb_queue_purge(&iucv->backlog_skb_q); sock_set_flag(sk, SOCK_ZAPPED); break; @@ -204,6 +220,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) sock_init_data(sock, sk); INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); skb_queue_head_init(&iucv_sk(sk)->send_skb_q); + skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; sk->sk_destruct = iucv_sock_destruct; @@ -276,7 +293,7 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) struct iucv_sock *isk, *n; struct sock *sk; - list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){ + list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) { sk = (struct sock *) isk; lock_sock(sk); @@ -510,7 +527,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, long timeo; int err = 0; - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_state != IUCV_LISTEN) { err = -EBADFD; @@ -521,7 +538,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, /* Wait for an incoming connection */ add_wait_queue_exclusive(sk->sk_sleep, &wait); - while (!(nsk = iucv_accept_dequeue(sk, newsock))){ + while (!(nsk = iucv_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { err = -EAGAIN; @@ -530,7 +547,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, release_sock(sk); timeo = schedule_timeout(timeo); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_state != IUCV_LISTEN) { err = -EBADFD; @@ -602,13 +619,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - if (sk->sk_state == IUCV_CONNECTED){ - if(!(skb = sock_alloc_send_skb(sk, len, - msg->msg_flags & MSG_DONTWAIT, - &err))) - return err; + if (sk->sk_state == IUCV_CONNECTED) { + if (!(skb = sock_alloc_send_skb(sk, len, + msg->msg_flags & MSG_DONTWAIT, + &err))) + goto out; - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){ + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { err = -EFAULT; goto fail; } @@ -647,10 +664,16 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; + struct iucv_sock *iucv = iucv_sk(sk); int target, copied = 0; - struct sk_buff *skb; + struct sk_buff *skb, *rskb, *cskb; int err = 0; + if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) && + skb_queue_empty(&iucv->backlog_skb_q) && + skb_queue_empty(&sk->sk_receive_queue)) + return 0; + if (flags & (MSG_OOB)) return -EOPNOTSUPP; @@ -665,10 +688,12 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied = min_t(unsigned int, skb->len, len); - if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) { + cskb = skb; + if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) { skb_queue_head(&sk->sk_receive_queue, skb); if (copied == 0) return -EFAULT; + goto done; } len -= copied; @@ -683,6 +708,18 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } kfree_skb(skb); + + /* Queue backlog skbs */ + rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + while (rskb) { + if (sock_queue_rcv_skb(sk, rskb)) { + skb_queue_head(&iucv_sk(sk)->backlog_skb_q, + rskb); + break; + } else { + rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q); + } + } } else skb_queue_head(&sk->sk_receive_queue, skb); @@ -695,7 +732,7 @@ static inline unsigned int iucv_accept_poll(struct sock *parent) struct iucv_sock *isk, *n; struct sock *sk; - list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){ + list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) { sk = (struct sock *) isk; if (sk->sk_state == IUCV_CONNECTED) @@ -726,12 +763,15 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, mask |= POLLHUP; if (!skb_queue_empty(&sk->sk_receive_queue) || - (sk->sk_shutdown & RCV_SHUTDOWN)) + (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= POLLIN | POLLRDNORM; if (sk->sk_state == IUCV_CLOSED) mask |= POLLHUP; + if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) + mask |= POLLIN; + if (sock_writeable(sk)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else @@ -754,7 +794,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how) return -EINVAL; lock_sock(sk); - switch(sk->sk_state) { + switch (sk->sk_state) { case IUCV_CLOSED: err = -ENOTCONN; goto fail; @@ -770,7 +810,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how) err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0, (void *) prmmsg, 8); if (err) { - switch(err) { + switch (err) { case 1: err = -ENOTCONN; break; @@ -817,13 +857,6 @@ static int iucv_sock_release(struct socket *sock) iucv_sk(sk)->path = NULL; } - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){ - lock_sock(sk); - err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, - sk->sk_lingertime); - release_sock(sk); - } - sock_orphan(sk); iucv_sock_kill(sk); return err; @@ -880,7 +913,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Create the new socket */ nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC); - if (!nsk){ + if (!nsk) { err = iucv_path_sever(path, user_data); goto fail; } @@ -903,7 +936,7 @@ static int iucv_callback_connreq(struct iucv_path *path, path->msglim = IUCV_QUEUELEN_DEFAULT; err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); - if (err){ + if (err) { err = iucv_path_sever(path, user_data); goto fail; } @@ -927,18 +960,53 @@ static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16]) sk->sk_state_change(sk); } +static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len, + struct sk_buff_head fragmented_skb_q) +{ + int dataleft, size, copied = 0; + struct sk_buff *nskb; + + dataleft = len; + while (dataleft) { + if (dataleft >= sk->sk_rcvbuf / 4) + size = sk->sk_rcvbuf / 4; + else + size = dataleft; + + nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA); + if (!nskb) + return -ENOMEM; + + memcpy(nskb->data, skb->data + copied, size); + copied += size; + dataleft -= size; + + nskb->h.raw = nskb->data; + nskb->nh.raw = nskb->data; + nskb->len = size; + + skb_queue_tail(fragmented_skb_q, nskb); + } + + return 0; +} + static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) { struct sock *sk = path->private; - struct sk_buff *skb; + struct iucv_sock *iucv = iucv_sk(sk); + struct sk_buff *skb, *fskb; + struct sk_buff_head fragmented_skb_q; int rc; + skb_queue_head_init(&fragmented_skb_q); + if (sk->sk_shutdown & RCV_SHUTDOWN) return; skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA); if (!skb) { - iucv_message_reject(path, msg); + iucv_path_sever(path, NULL); return; } @@ -952,14 +1020,39 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) kfree_skb(skb); return; } + if (skb->truesize >= sk->sk_rcvbuf / 4) { + rc = iucv_fragment_skb(sk, skb, msg->length, + &fragmented_skb_q); + kfree_skb(skb); + skb = NULL; + if (rc) { + iucv_path_sever(path, NULL); + return; + } + } else { + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + skb->len = msg->length; + } + } + /* Queue the fragmented skb */ + fskb = skb_dequeue(&fragmented_skb_q); + while (fskb) { + if (!skb_queue_empty(&iucv->backlog_skb_q)) + skb_queue_tail(&iucv->backlog_skb_q, fskb); + else if (sock_queue_rcv_skb(sk, fskb)) + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, fskb); + fskb = skb_dequeue(&fragmented_skb_q); + } - skb_reset_transport_header(skb); - skb_reset_network_header(skb); - skb->len = msg->length; + /* Queue the original skb if it exists (was not fragmented) */ + if (skb) { + if (!skb_queue_empty(&iucv->backlog_skb_q)) + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); + else if (sock_queue_rcv_skb(sk, skb)) + skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb); } - if (sock_queue_rcv_skb(sk, skb)) - kfree_skb(skb); } static void iucv_callback_txdone(struct iucv_path *path, @@ -971,17 +1064,27 @@ static void iucv_callback_txdone(struct iucv_path *path, struct sk_buff *list_skb = list->next; unsigned long flags; - spin_lock_irqsave(&list->lock, flags); + if (list_skb) { + spin_lock_irqsave(&list->lock, flags); + + do { + this = list_skb; + list_skb = list_skb->next; + } while (memcmp(&msg->tag, this->cb, 4) && list_skb); + + spin_unlock_irqrestore(&list->lock, flags); - do { - this = list_skb; - list_skb = list_skb->next; - } while (memcmp(&msg->tag, this->cb, 4)); + skb_unlink(this, &iucv_sk(sk)->send_skb_q); + kfree_skb(this); + } - spin_unlock_irqrestore(&list->lock, flags); + if (sk->sk_state == IUCV_CLOSING) { + if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { + sk->sk_state = IUCV_CLOSED; + sk->sk_state_change(sk); + } + } - skb_unlink(this, &iucv_sk(sk)->send_skb_q); - kfree_skb(this); } static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16]) @@ -1022,7 +1125,7 @@ static struct net_proto_family iucv_sock_family_ops = { .create = iucv_sock_create, }; -static int afiucv_init(void) +static int __init afiucv_init(void) { int err; diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 903bdb6eaaa1..fb3faf72e850 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -32,7 +32,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> - #include <linux/spinlock.h> #include <linux/kernel.h> #include <linux/slab.h> @@ -69,7 +68,7 @@ #define IUCV_IPNORPY 0x10 #define IUCV_IPALL 0x80 -static int iucv_bus_match (struct device *dev, struct device_driver *drv) +static int iucv_bus_match(struct device *dev, struct device_driver *drv) { return 0; } @@ -78,8 +77,11 @@ struct bus_type iucv_bus = { .name = "iucv", .match = iucv_bus_match, }; +EXPORT_SYMBOL(iucv_bus); struct device *iucv_root; +EXPORT_SYMBOL(iucv_root); + static int iucv_available; /* General IUCV interrupt structure */ @@ -405,7 +407,7 @@ static void iucv_declare_cpu(void *data) rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm); if (rc) { char *err = "Unknown"; - switch(rc) { + switch (rc) { case 0x03: err = "Directory error"; break; @@ -588,7 +590,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block iucv_cpu_notifier = { +static struct notifier_block __cpuinitdata iucv_cpu_notifier = { .notifier_call = iucv_cpu_notify, }; @@ -691,6 +693,7 @@ out_mutex: mutex_unlock(&iucv_register_mutex); return rc; } +EXPORT_SYMBOL(iucv_register); /** * iucv_unregister @@ -723,6 +726,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp) iucv_setmask_mp(); mutex_unlock(&iucv_register_mutex); } +EXPORT_SYMBOL(iucv_unregister); /** * iucv_path_accept @@ -761,6 +765,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_path_accept); /** * iucv_path_connect @@ -824,6 +829,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, spin_unlock_bh(&iucv_table_lock); return rc; } +EXPORT_SYMBOL(iucv_path_connect); /** * iucv_path_quiesce: @@ -850,6 +856,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16]) local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_path_quiesce); /** * iucv_path_resume: @@ -890,7 +897,6 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) { int rc; - preempt_disable(); if (iucv_active_cpu != smp_processor_id()) spin_lock_bh(&iucv_table_lock); @@ -904,6 +910,7 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16]) preempt_enable(); return rc; } +EXPORT_SYMBOL(iucv_path_sever); /** * iucv_message_purge @@ -936,6 +943,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_message_purge); /** * iucv_message_receive @@ -1006,6 +1014,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_message_receive); /** * iucv_message_reject @@ -1034,6 +1043,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg) local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_message_reject); /** * iucv_message_reply @@ -1077,6 +1087,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_message_reply); /** * iucv_message_send @@ -1125,6 +1136,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_message_send); /** * iucv_message_send2way @@ -1181,6 +1193,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, local_bh_enable(); return rc; } +EXPORT_SYMBOL(iucv_message_send2way); /** * iucv_path_pending @@ -1572,7 +1585,7 @@ static void iucv_external_interrupt(u16 code) * * Allocates and initializes various data structures. */ -static int iucv_init(void) +static int __init iucv_init(void) { int rc; @@ -1583,7 +1596,7 @@ static int iucv_init(void) rc = iucv_query_maxconn(); if (rc) goto out; - rc = register_external_interrupt (0x4000, iucv_external_interrupt); + rc = register_external_interrupt(0x4000, iucv_external_interrupt); if (rc) goto out; rc = bus_register(&iucv_bus); @@ -1594,7 +1607,7 @@ static int iucv_init(void) rc = PTR_ERR(iucv_root); goto out_bus; } - /* Note: GFP_DMA used used to get memory below 2G */ + /* Note: GFP_DMA used to get memory below 2G */ iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data), GFP_KERNEL|GFP_DMA); if (!iucv_irq_data) { @@ -1632,7 +1645,7 @@ out: * * Frees everything allocated from iucv_init. */ -static void iucv_exit(void) +static void __exit iucv_exit(void) { struct iucv_irq_list *p, *n; @@ -1653,24 +1666,6 @@ static void iucv_exit(void) subsys_initcall(iucv_init); module_exit(iucv_exit); -/** - * Export all public stuff - */ -EXPORT_SYMBOL (iucv_bus); -EXPORT_SYMBOL (iucv_root); -EXPORT_SYMBOL (iucv_register); -EXPORT_SYMBOL (iucv_unregister); -EXPORT_SYMBOL (iucv_path_accept); -EXPORT_SYMBOL (iucv_path_connect); -EXPORT_SYMBOL (iucv_path_quiesce); -EXPORT_SYMBOL (iucv_path_sever); -EXPORT_SYMBOL (iucv_message_purge); -EXPORT_SYMBOL (iucv_message_receive); -EXPORT_SYMBOL (iucv_message_reject); -EXPORT_SYMBOL (iucv_message_reply); -EXPORT_SYMBOL (iucv_message_send); -EXPORT_SYMBOL (iucv_message_send2way); - MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)"); MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver"); MODULE_LICENSE("GPL"); diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index d12413cff5bd..d4b13a031fd5 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -160,8 +160,14 @@ static struct packet_type llc_tr_packet_type = { static int __init llc_init(void) { - if (dev_base->next) - memcpy(llc_station_mac_sa, dev_base->next->dev_addr, ETH_ALEN); + struct net_device *dev; + + dev = first_net_device(); + if (dev != NULL) + dev = next_net_device(dev); + + if (dev != NULL) + memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN); else memset(llc_station_mac_sa, 0, ETH_ALEN); dev_add_pack(&llc_packet_type); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 42d2fb94eff1..507828d7d4ae 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -140,6 +140,14 @@ static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) static void netlink_sock_destruct(struct sock *sk) { + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->cb) { + if (nlk->cb->done) + nlk->cb->done(nlk->cb); + netlink_destroy_callback(nlk->cb); + } + skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -148,7 +156,6 @@ static void netlink_sock_destruct(struct sock *sk) } BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!nlk_sk(sk)->cb); BUG_TRAP(!nlk_sk(sk)->groups); } @@ -456,17 +463,10 @@ static int netlink_release(struct socket *sock) sock_orphan(sk); nlk = nlk_sk(sk); - mutex_lock(nlk->cb_mutex); - if (nlk->cb) { - if (nlk->cb->done) - nlk->cb->done(nlk->cb); - netlink_destroy_callback(nlk->cb); - nlk->cb = NULL; - } - mutex_unlock(nlk->cb_mutex); - - /* OK. Socket is unlinked, and, therefore, - no new packets will arrive */ + /* + * OK. Socket is unlinked, any packets that arrive now + * will be purged. + */ sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); @@ -1245,16 +1245,14 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, siocb->scm = &scm; } siocb->scm->creds = *NETLINK_CREDS(skb); + if (flags & MSG_TRUNC) + copied = skb->len; skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) netlink_dump(sk); scm_recv(sock, msg, siocb->scm, flags); - - if (flags & MSG_TRUNC) - copied = skb->len; - out: netlink_rcv_wake(sk); return err ? : copied; @@ -1426,9 +1424,9 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, return -ECONNREFUSED; } nlk = nlk_sk(sk); - /* A dump or destruction is in progress... */ + /* A dump is in progress... */ mutex_lock(nlk->cb_mutex); - if (nlk->cb || sock_flag(sk, SOCK_DEAD)) { + if (nlk->cb) { mutex_unlock(nlk->cb_mutex); netlink_destroy_callback(cb); sock_put(sk); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 8e6bd4e9d82c..2f76e062609d 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -598,7 +598,7 @@ struct net_device *nr_dev_first(void) struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for_each_netdev(dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -618,12 +618,13 @@ struct net_device *nr_dev_get(ax25_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for_each_netdev(dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } + dev = NULL; out: read_unlock(&dev_base_lock); return dev; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 1f9aefd95a99..929a784a86d7 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -596,7 +596,7 @@ struct net_device *rose_dev_first(void) struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for_each_netdev(dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -614,12 +614,13 @@ struct net_device *rose_dev_get(rose_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for_each_netdev(dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } + dev = NULL; out: read_unlock(&dev_base_lock); return dev; @@ -630,10 +631,11 @@ static int rose_dev_exists(rose_address *addr) struct net_device *dev; read_lock(&dev_base_lock); - for (dev = dev_base; dev != NULL; dev = dev->next) { + for_each_netdev(dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } + dev = NULL; out: read_unlock(&dev_base_lock); return dev != NULL; diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 8750f6da6bc7..91b3d52f6f1a 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -5,6 +5,7 @@ config AF_RXRPC tristate "RxRPC session sockets" depends on EXPERIMENTAL + select KEYS help Say Y or M here to include support for RxRPC session sockets (just the transport part, not the presentation part: (un)marshalling is @@ -29,7 +30,7 @@ config AF_RXRPC_DEBUG config RXKAD tristate "RxRPC Kerberos security" - depends on AF_RXRPC && KEYS + depends on AF_RXRPC select CRYPTO select CRYPTO_MANAGER select CRYPTO_BLKCIPHER diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index fc07a926df56..657ee69f2133 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -543,6 +543,38 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call) } /* + * process the extra information that may be appended to an ACK packet + */ +static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + unsigned latest, int nAcks) +{ + struct rxrpc_ackinfo ackinfo; + struct rxrpc_peer *peer; + unsigned mtu; + + if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) { + _leave(" [no ackinfo]"); + return; + } + + _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", + latest, + ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU), + ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max)); + + mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); + + peer = call->conn->trans->peer; + if (mtu < peer->maxdata) { + spin_lock_bh(&peer->lock); + peer->maxdata = mtu; + peer->mtu = mtu + peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + } +} + +/* * process packets in the reception queue */ static int rxrpc_process_rx_queue(struct rxrpc_call *call, @@ -606,6 +638,8 @@ process_further: rxrpc_acks[ack.reason], ack.nAcks); + rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); + if (ack.reason == RXRPC_ACK_PING) { _proto("Rx ACK %%%u PING Request", latest); rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, @@ -801,9 +835,9 @@ void rxrpc_process_call(struct work_struct *work) struct msghdr msg; struct kvec iov[5]; unsigned long bits; - __be32 data; + __be32 data, pad; size_t len; - int genbit, loop, nbit, ioc, ret; + int genbit, loop, nbit, ioc, ret, mtu; u32 abort_code = RX_PROTOCOL_ERROR; u8 *acks = NULL; @@ -899,9 +933,30 @@ void rxrpc_process_call(struct work_struct *work) } if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) { - hdr.type = RXRPC_PACKET_TYPE_ACKALL; genbit = RXRPC_CALL_ACK_FINAL; - goto send_message; + + ack.bufferSpace = htons(8); + ack.maxSkew = 0; + ack.serial = 0; + ack.reason = RXRPC_ACK_IDLE; + ack.nAcks = 0; + call->ackr_reason = 0; + + spin_lock_bh(&call->lock); + ack.serial = call->ackr_serial; + ack.previousPacket = call->ackr_prev_seq; + ack.firstPacket = htonl(call->rx_data_eaten + 1); + spin_unlock_bh(&call->lock); + + pad = 0; + + iov[1].iov_base = &ack; + iov[1].iov_len = sizeof(ack); + iov[2].iov_base = &pad; + iov[2].iov_len = 3; + iov[3].iov_base = &ackinfo; + iov[3].iov_len = sizeof(ackinfo); + goto send_ACK; } if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) | @@ -971,8 +1026,6 @@ void rxrpc_process_call(struct work_struct *work) /* consider sending an ordinary ACK */ if (test_bit(RXRPC_CALL_ACK, &call->events)) { - __be32 pad; - _debug("send ACK: window: %d - %d { %lx }", call->rx_data_eaten, call->ackr_win_top, call->ackr_window[0]); @@ -997,12 +1050,6 @@ void rxrpc_process_call(struct work_struct *work) ack.serial = 0; ack.reason = 0; - ackinfo.rxMTU = htonl(5692); -// ackinfo.rxMTU = htonl(call->conn->trans->peer->maxdata); - ackinfo.maxMTU = htonl(call->conn->trans->peer->maxdata); - ackinfo.rwind = htonl(32); - ackinfo.jumbo_max = htonl(4); - spin_lock_bh(&call->lock); ack.reason = call->ackr_reason; ack.serial = call->ackr_serial; @@ -1116,6 +1163,15 @@ send_ACK_with_skew: ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) - ntohl(ack.serial)); send_ACK: + mtu = call->conn->trans->peer->if_mtu; + mtu -= call->conn->trans->peer->hdrsize; + ackinfo.maxMTU = htonl(mtu); + ackinfo.rwind = htonl(32); + + /* permit the peer to send us jumbo packets if it wants to */ + ackinfo.rxMTU = htonl(5692); + ackinfo.jumbo_max = htonl(4); + hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", ntohl(hdr.serial), diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 2c27df1ffa17..6cb3e8890e7e 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -100,8 +100,10 @@ void rxrpc_UDP_error_report(struct sock *sk) } if (mtu < peer->mtu) { + spin_lock_bh(&peer->lock); peer->mtu = mtu; peer->maxdata = peer->mtu - peer->hdrsize; + spin_unlock_bh(&peer->lock); _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); } diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 5cdde4a48ed1..591c4422205e 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -582,7 +582,7 @@ static int rxrpc_send_data(struct kiocb *iocb, max &= ~(call->conn->size_align - 1UL); chunk = max; - if (chunk > len) + if (chunk > len && !more) chunk = len; space = chunk + call->conn->size_align; diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index d399de4a7fe2..ce08b78647ce 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -19,6 +19,7 @@ #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> +#include <net/route.h> #include "ar-internal.h" static LIST_HEAD(rxrpc_peers); @@ -28,6 +29,47 @@ static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq); static void rxrpc_destroy_peer(struct work_struct *work); /* + * assess the MTU size for the network interface through which this peer is + * reached + */ +static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) +{ + struct rtable *rt; + struct flowi fl; + int ret; + + peer->if_mtu = 1500; + + memset(&fl, 0, sizeof(fl)); + + switch (peer->srx.transport.family) { + case AF_INET: + fl.oif = 0; + fl.proto = IPPROTO_UDP, + fl.nl_u.ip4_u.saddr = 0; + fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr; + fl.nl_u.ip4_u.tos = 0; + /* assume AFS.CM talking to AFS.FS */ + fl.uli_u.ports.sport = htons(7001); + fl.uli_u.ports.dport = htons(7000); + break; + default: + BUG(); + } + + ret = ip_route_output_key(&rt, &fl); + if (ret < 0) { + kleave(" [route err %d]", ret); + return; + } + + peer->if_mtu = dst_mtu(&rt->u.dst); + dst_release(&rt->u.dst); + + kleave(" [if_mtu %u]", peer->if_mtu); +} + +/* * allocate a new peer */ static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx, @@ -47,7 +89,8 @@ static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx, peer->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&peer->srx, srx, sizeof(*srx)); - peer->mtu = peer->if_mtu = 65535; + rxrpc_assess_MTU_size(peer); + peer->mtu = peer->if_mtu; if (srx->transport.family == AF_INET) { peer->hdrsize = sizeof(struct iphdr); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 8699e7006d80..bec600af03ca 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -894,9 +894,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; read_lock(&dev_base_lock); - for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { + idx = 0; + for_each_netdev(dev) { if (idx < s_idx) - continue; + goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; @@ -910,6 +911,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) goto done; q_idx++; } +cont: + idx++; } done: diff --git a/net/sctp/associola.c b/net/sctp/associola.c index db73ef97485a..df94e3cdfba3 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1103,6 +1103,13 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->ssnmap = new->ssnmap; new->ssnmap = NULL; } + + if (!asoc->assoc_id) { + /* get a new association id since we don't have one + * yet. + */ + sctp_assoc_set_id(asoc, GFP_ATOMIC); + } } } @@ -1375,3 +1382,25 @@ out: sctp_read_unlock(&asoc->base.addr_lock); return found; } + +/* Set an association id for a given association */ +int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) +{ + int assoc_id; + int error = 0; +retry: + if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp))) + return -ENOMEM; + + spin_lock_bh(&sctp_assocs_id_lock); + error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, + 1, &assoc_id); + spin_unlock_bh(&sctp_assocs_id_lock); + if (error == -EAGAIN) + goto retry; + else if (error) + return error; + + asoc->assoc_id = (sctp_assoc_t) assoc_id; + return error; +} diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ca527a27dd05..84cd53635fe8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -992,45 +992,52 @@ static struct sctp_pf sctp_pf_inet6_specific = { .af = &sctp_ipv6_specific, }; -/* Initialize IPv6 support and register with inet6 stack. */ +/* Initialize IPv6 support and register with socket layer. */ int sctp_v6_init(void) { - int rc = proto_register(&sctpv6_prot, 1); + int rc; + /* Register the SCTP specific PF_INET6 functions. */ + sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6); + + /* Register the SCTP specific AF_INET6 functions. */ + sctp_register_af(&sctp_ipv6_specific); + + rc = proto_register(&sctpv6_prot, 1); if (rc) - goto out; - /* Register inet6 protocol. */ - rc = -EAGAIN; - if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0) - goto out_unregister_sctp_proto; + return rc; /* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */ inet6_register_protosw(&sctpv6_seqpacket_protosw); inet6_register_protosw(&sctpv6_stream_protosw); - /* Register the SCTP specific PF_INET6 functions. */ - sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6); - - /* Register the SCTP specific AF_INET6 functions. */ - sctp_register_af(&sctp_ipv6_specific); + return 0; +} +/* Register with inet6 layer. */ +int sctp_v6_add_protocol(void) +{ /* Register notifier for inet6 address additions/deletions. */ register_inet6addr_notifier(&sctp_inet6addr_notifier); - rc = 0; -out: - return rc; -out_unregister_sctp_proto: - proto_unregister(&sctpv6_prot); - goto out; + + if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0) + return -EAGAIN; + + return 0; } /* IPv6 specific exit support. */ void sctp_v6_exit(void) { - list_del(&sctp_ipv6_specific.list); - inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP); inet6_unregister_protosw(&sctpv6_seqpacket_protosw); inet6_unregister_protosw(&sctpv6_stream_protosw); - unregister_inet6addr_notifier(&sctp_inet6addr_notifier); proto_unregister(&sctpv6_prot); + list_del(&sctp_ipv6_specific.list); +} + +/* Unregister with inet6 layer. */ +void sctp_v6_del_protocol(void) +{ + inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP); + unregister_inet6addr_notifier(&sctp_inet6addr_notifier); } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index c361deb6cea9..34bab36637ac 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -170,7 +170,7 @@ static void sctp_get_local_addr_list(void) struct sctp_af *af; read_lock(&dev_base_lock); - for (dev = dev_base; dev; dev = dev->next) { + for_each_netdev(dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); @@ -975,28 +975,14 @@ SCTP_STATIC __init int sctp_init(void) if (!sctp_sanity_check()) goto out; - status = proto_register(&sctp_prot, 1); - if (status) - goto out; - - /* Add SCTP to inet_protos hash table. */ - status = -EAGAIN; - if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) - goto err_add_protocol; - - /* Add SCTP(TCP and UDP style) to inetsw linked list. */ - inet_register_protosw(&sctp_seqpacket_protosw); - inet_register_protosw(&sctp_stream_protosw); - - /* Allocate a cache pools. */ + /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket", sizeof(struct sctp_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!sctp_bucket_cachep) - goto err_bucket_cachep; + goto out; sctp_chunk_cachep = kmem_cache_create("sctp_chunk", sizeof(struct sctp_chunk), @@ -1153,6 +1139,14 @@ SCTP_STATIC __init int sctp_init(void) INIT_LIST_HEAD(&sctp_address_families); sctp_register_af(&sctp_ipv4_specific); + status = proto_register(&sctp_prot, 1); + if (status) + goto err_proto_register; + + /* Register SCTP(UDP and TCP style) with socket layer. */ + inet_register_protosw(&sctp_seqpacket_protosw); + inet_register_protosw(&sctp_stream_protosw); + status = sctp_v6_init(); if (status) goto err_v6_init; @@ -1166,19 +1160,39 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); - sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ register_inetaddr_notifier(&sctp_inetaddr_notifier); + /* Register SCTP with inet layer. */ + if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) { + status = -EAGAIN; + goto err_add_protocol; + } + + /* Register SCTP with inet6 layer. */ + status = sctp_v6_add_protocol(); + if (status) + goto err_v6_add_protocol; + __unsafe(THIS_MODULE); status = 0; out: return status; +err_v6_add_protocol: + inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); +err_add_protocol: + sctp_free_local_addr_list(); + sock_release(sctp_ctl_socket); err_ctl_sock_init: sctp_v6_exit(); err_v6_init: + inet_unregister_protosw(&sctp_stream_protosw); + inet_unregister_protosw(&sctp_seqpacket_protosw); + proto_unregister(&sctp_prot); +err_proto_register: sctp_sysctl_unregister(); list_del(&sctp_ipv4_specific.list); free_pages((unsigned long)sctp_port_hashtable, @@ -1192,19 +1206,13 @@ err_ehash_alloc: sizeof(struct sctp_hashbucket))); err_ahash_alloc: sctp_dbg_objcnt_exit(); -err_init_proc: sctp_proc_exit(); +err_init_proc: cleanup_sctp_mibs(); err_init_mibs: kmem_cache_destroy(sctp_chunk_cachep); err_chunk_cachep: kmem_cache_destroy(sctp_bucket_cachep); -err_bucket_cachep: - inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); - inet_unregister_protosw(&sctp_seqpacket_protosw); - inet_unregister_protosw(&sctp_stream_protosw); -err_add_protocol: - proto_unregister(&sctp_prot); goto out; } @@ -1215,8 +1223,9 @@ SCTP_STATIC __exit void sctp_exit(void) * up all the remaining associations and all that memory. */ - /* Unregister notifier for inet address additions/deletions. */ - unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + /* Unregister with inet6/inet layers. */ + sctp_v6_del_protocol(); + inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); /* Free the local address list. */ sctp_free_local_addr_list(); @@ -1224,7 +1233,16 @@ SCTP_STATIC __exit void sctp_exit(void) /* Free the control endpoint. */ sock_release(sctp_ctl_socket); + /* Cleanup v6 initializations. */ sctp_v6_exit(); + + /* Unregister with socket layer. */ + inet_unregister_protosw(&sctp_stream_protosw); + inet_unregister_protosw(&sctp_seqpacket_protosw); + + /* Unregister notifier for inet address additions/deletions. */ + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + sctp_sysctl_unregister(); list_del(&sctp_ipv4_specific.list); @@ -1236,16 +1254,13 @@ SCTP_STATIC __exit void sctp_exit(void) get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); - kmem_cache_destroy(sctp_chunk_cachep); - kmem_cache_destroy(sctp_bucket_cachep); - sctp_dbg_objcnt_exit(); sctp_proc_exit(); cleanup_sctp_mibs(); - inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); - inet_unregister_protosw(&sctp_seqpacket_protosw); - inet_unregister_protosw(&sctp_stream_protosw); + kmem_cache_destroy(sctp_chunk_cachep); + kmem_cache_destroy(sctp_bucket_cachep); + proto_unregister(&sctp_prot); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index be783a3761c4..8d18f570c2e6 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1939,7 +1939,6 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, * association. */ if (!asoc->temp) { - int assoc_id; int error; asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams, @@ -1947,19 +1946,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, if (!asoc->ssnmap) goto clean_up; - retry: - if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp))) + error = sctp_assoc_set_id(asoc, gfp); + if (error) goto clean_up; - spin_lock_bh(&sctp_assocs_id_lock); - error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, 1, - &assoc_id); - spin_unlock_bh(&sctp_assocs_id_lock); - if (error == -EAGAIN) - goto retry; - else if (error) - goto clean_up; - - asoc->assoc_id = (sctp_assoc_t) assoc_id; } /* ADDIP Section 4.1 ASCONF Chunk Procedures diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index b37a7adeb150..d9fad4f6ffc3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -862,6 +862,33 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error) sk->sk_err = error; } +/* Helper function to generate an association change event */ +static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands, + struct sctp_association *asoc, + u8 state) +{ + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_assoc_change(asoc, 0, state, 0, + asoc->c.sinit_num_ostreams, + asoc->c.sinit_max_instreams, + NULL, GFP_ATOMIC); + if (ev) + sctp_ulpq_tail_event(&asoc->ulpq, ev); +} + +/* Helper function to generate an adaptation indication event */ +static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands, + struct sctp_association *asoc) +{ + struct sctp_ulpevent *ev; + + ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); + + if (ev) + sctp_ulpq_tail_event(&asoc->ulpq, ev); +} + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1485,6 +1512,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_SET_SK_ERR: sctp_cmd_set_sk_err(asoc, cmd->obj.error); break; + case SCTP_CMD_ASSOC_CHANGE: + sctp_cmd_assoc_change(commands, asoc, + cmd->obj.u8); + break; + case SCTP_CMD_ADAPTATION_IND: + sctp_cmd_adaptation_ind(commands, asoc); + break; + default: printk(KERN_WARNING "Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9e28a5d51200..f02ce3dddb7b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1656,7 +1656,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, struct sctp_association *new_asoc) { sctp_init_chunk_t *peer_init; - struct sctp_ulpevent *ev; struct sctp_chunk *repl; /* new_asoc is a brand-new association, so these are not yet @@ -1687,34 +1686,28 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, * D) IMPLEMENTATION NOTE: An implementation may choose to * send the Communication Up notification to the SCTP user * upon reception of a valid COOKIE ECHO chunk. + * + * Sadly, this needs to be implemented as a side-effect, because + * we are not guaranteed to have set the association id of the real + * association and so these notifications need to be delayed until + * the association id is allocated. */ - ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0, - new_asoc->c.sinit_num_ostreams, - new_asoc->c.sinit_max_instreams, - NULL, GFP_ATOMIC); - if (!ev) - goto nomem_ev; - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_CHANGE, SCTP_U8(SCTP_COMM_UP)); /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaptation Layer Indication parameter , SCTP * delivers this notification to inform the application that of the * peers requested adaptation layer. + * + * This also needs to be done as a side effect for the same reason as + * above. */ - if (asoc->peer.adaptation_ind) { - ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC); - if (!ev) - goto nomem_ev; - - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); - } + if (asoc->peer.adaptation_ind) + sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; -nomem_ev: - sctp_chunk_free(repl); nomem: return SCTP_DISPOSITION_NOMEM; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2fc0a92caa78..9f1a908776de 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -972,6 +972,7 @@ static int __sctp_connect(struct sock* sk, int walk_size = 0; union sctp_addr *sa_addr; void *addr_buf; + unsigned short port; sp = sctp_sk(sk); ep = sp->ep; @@ -992,6 +993,7 @@ static int __sctp_connect(struct sock* sk, while (walk_size < addrs_size) { sa_addr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); + port = ntohs(sa_addr->v4.sin_port); /* If the address family is not supported or if this address * causes the address buffer to overflow return EINVAL. @@ -1005,6 +1007,12 @@ static int __sctp_connect(struct sock* sk, if (err) goto out_free; + /* Make sure the destination port is correctly set + * in all addresses. + */ + if (asoc && asoc->peer.port && asoc->peer.port != port) + goto out_free; + memcpy(&to, sa_addr, af->sockaddr_len); /* Check if there already is a matching association on the @@ -5012,7 +5020,8 @@ pp_found: struct hlist_node *node; SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); - if (pp->fastreuse && sk->sk_reuse) + if (pp->fastreuse && sk->sk_reuse && + sk->sk_state != SCTP_SS_LISTENING) goto success; /* Run through the list of sockets bound to the port @@ -5029,7 +5038,8 @@ pp_found: struct sctp_endpoint *ep2; ep2 = sctp_sk(sk2)->ep; - if (reuse && sk2->sk_reuse) + if (reuse && sk2->sk_reuse && + sk2->sk_state != SCTP_SS_LISTENING) continue; if (sctp_bind_addr_match(&ep2->base.bind_addr, addr, @@ -5050,9 +5060,13 @@ pp_not_found: * if sk->sk_reuse is too (that is, if the caller requested * SO_REUSEADDR on this socket -sk-). */ - if (hlist_empty(&pp->owner)) - pp->fastreuse = sk->sk_reuse ? 1 : 0; - else if (pp->fastreuse && !sk->sk_reuse) + if (hlist_empty(&pp->owner)) { + if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING) + pp->fastreuse = 1; + else + pp->fastreuse = 0; + } else if (pp->fastreuse && + (!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING)) pp->fastreuse = 0; /* We are set, so fill up all the data in the hash table @@ -5060,8 +5074,8 @@ pp_not_found: * sockets FIXME: Blurry, NPI (ipg). */ success: - inet_sk(sk)->num = snum; if (!sctp_sk(sk)->bind_hash) { + inet_sk(sk)->num = snum; sk_add_bind_node(sk, &pp->owner); sctp_sk(sk)->bind_hash = pp; } @@ -5134,12 +5148,16 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) * This is not currently spelled out in the SCTP sockets * extensions draft, but follows the practice as seen in TCP * sockets. + * + * Additionally, turn off fastreuse flag since we are not listening */ + sk->sk_state = SCTP_SS_LISTENING; if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; - } - sk->sk_state = SCTP_SS_LISTENING; + } else + sctp_sk(sk)->bind_hash->fastreuse = 0; + sctp_hash_endpoint(ep); return 0; } @@ -5177,11 +5195,13 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) * extensions draft, but follows the practice as seen in TCP * sockets. */ + sk->sk_state = SCTP_SS_LISTENING; if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; - } - sk->sk_state = SCTP_SS_LISTENING; + } else + sctp_sk(sk)->bind_hash->fastreuse = 0; + sk->sk_max_ack_backlog = backlog; sctp_hash_endpoint(ep); return 0; diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index cdcab9ca4c60..8ebfc4db7f51 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ - pmap_clnt.o timer.o xdr.o \ + rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 104cbf4f769f..d158635de6c0 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c @@ -123,9 +123,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, return GSS_S_COMPLETE; out_err: - if (md5cksum.data) - kfree(md5cksum.data); - token->data = NULL; token->len = 0; return GSS_S_FAILURE; @@ -152,7 +149,7 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, switch (cksumtype) { case CKSUMTYPE_HMAC_MD5: - cksumname = "md5"; + cksumname = "hmac(md5)"; break; default: dprintk("RPC: spkm3_make_checksum:" @@ -172,8 +169,12 @@ make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header, if (err) goto out; + err = crypto_hash_init(&desc); + if (err) + goto out; + sg_set_buf(sg, header, hdrlen); - crypto_hash_update(&desc, sg, 1); + crypto_hash_update(&desc, sg, sg->length); xdr_process_buf(body, body_offset, body->len - body_offset, spkm3_checksummer, &desc); @@ -184,5 +185,3 @@ out: return err ? GSS_S_FAILURE : 0; } - -EXPORT_SYMBOL(make_spkm3_checksum); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 396cdbe249d1..d8fbee40a19c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -36,8 +36,6 @@ #include <linux/sunrpc/metrics.h> -#define RPC_SLACK_SPACE (1024) /* total overkill */ - #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL #endif @@ -747,21 +745,38 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { + unsigned int slack = task->tk_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; - unsigned int bufsiz; + struct rpc_procinfo *proc = task->tk_msg.rpc_proc; dprint_status(task); + task->tk_status = 0; task->tk_action = call_bind; + if (req->rq_buffer) return; - /* FIXME: compute buffer requirements more exactly using - * auth->au_wslack */ - bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE; + if (proc->p_proc != 0) { + BUG_ON(proc->p_arglen == 0); + if (proc->p_decode != NULL) + BUG_ON(proc->p_replen == 0); + } - if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL) + /* + * Calculate the size (in quads) of the RPC call + * and reply headers, and convert both values + * to byte sizes. + */ + req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen; + req->rq_callsize <<= 2; + req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen; + req->rq_rcvsize <<= 2; + + req->rq_buffer = xprt->ops->buf_alloc(task, + req->rq_callsize + req->rq_rcvsize); + if (req->rq_buffer != NULL) return; dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); @@ -788,6 +803,17 @@ rpc_task_force_reencode(struct rpc_task *task) task->tk_rqstp->rq_snd_buf.len = 0; } +static inline void +rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) +{ + buf->head[0].iov_base = start; + buf->head[0].iov_len = len; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->len = 0; + buf->buflen = len; +} + /* * 3. Encode arguments of an RPC call */ @@ -795,28 +821,17 @@ static void call_encode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct xdr_buf *sndbuf = &req->rq_snd_buf; - struct xdr_buf *rcvbuf = &req->rq_rcv_buf; - unsigned int bufsiz; kxdrproc_t encode; __be32 *p; dprint_status(task); - /* Default buffer setup */ - bufsiz = req->rq_bufsize >> 1; - sndbuf->head[0].iov_base = (void *)req->rq_buffer; - sndbuf->head[0].iov_len = bufsiz; - sndbuf->tail[0].iov_len = 0; - sndbuf->page_len = 0; - sndbuf->len = 0; - sndbuf->buflen = bufsiz; - rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz); - rcvbuf->head[0].iov_len = bufsiz; - rcvbuf->tail[0].iov_len = 0; - rcvbuf->page_len = 0; - rcvbuf->len = 0; - rcvbuf->buflen = bufsiz; + rpc_xdr_buf_init(&req->rq_snd_buf, + req->rq_buffer, + req->rq_callsize); + rpc_xdr_buf_init(&req->rq_rcv_buf, + (char *)req->rq_buffer + req->rq_callsize, + req->rq_rcvsize); /* Encode header and provided arguments */ encode = task->tk_msg.rpc_proc->p_encode; @@ -887,9 +902,11 @@ call_bind_status(struct rpc_task *task) task->tk_pid); break; case -EPROTONOSUPPORT: - dprintk("RPC: %5u remote rpcbind version 2 unavailable\n", + dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", task->tk_pid); - break; + task->tk_status = 0; + task->tk_action = call_bind; + return; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c deleted file mode 100644 index d9f765344589..000000000000 --- a/net/sunrpc/pmap_clnt.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * linux/net/sunrpc/pmap_clnt.c - * - * In-kernel RPC portmapper client. - * - * Portmapper supports version 2 of the rpcbind protocol (RFC 1833). - * - * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> - */ - -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/uio.h> -#include <linux/in.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/sched.h> - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_PMAP -#endif - -#define PMAP_SET 1 -#define PMAP_UNSET 2 -#define PMAP_GETPORT 3 - -struct portmap_args { - u32 pm_prog; - u32 pm_vers; - u32 pm_prot; - unsigned short pm_port; - struct rpc_xprt * pm_xprt; -}; - -static struct rpc_procinfo pmap_procedures[]; -static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); -static void pmap_getport_done(struct rpc_task *, void *); -static struct rpc_program pmap_program; - -static void pmap_getport_prepare(struct rpc_task *task, void *calldata) -{ - struct portmap_args *map = calldata; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[PMAP_GETPORT], - .rpc_argp = map, - .rpc_resp = &map->pm_port, - }; - - rpc_call_setup(task, &msg, 0); -} - -static inline struct portmap_args *pmap_map_alloc(void) -{ - return kmalloc(sizeof(struct portmap_args), GFP_NOFS); -} - -static inline void pmap_map_free(struct portmap_args *map) -{ - kfree(map); -} - -static void pmap_map_release(void *data) -{ - struct portmap_args *map = data; - - xprt_put(map->pm_xprt); - pmap_map_free(map); -} - -static const struct rpc_call_ops pmap_getport_ops = { - .rpc_call_prepare = pmap_getport_prepare, - .rpc_call_done = pmap_getport_done, - .rpc_release = pmap_map_release, -}; - -static inline void pmap_wake_portmap_waiters(struct rpc_xprt *xprt, int status) -{ - xprt_clear_binding(xprt); - rpc_wake_up_status(&xprt->binding, status); -} - -/** - * rpc_getport - obtain the port for a given RPC service on a given host - * @task: task that is waiting for portmapper request - * - * This one can be called for an ongoing RPC request, and can be used in - * an async (rpciod) context. - */ -void rpc_getport(struct rpc_task *task) -{ - struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = task->tk_xprt; - struct sockaddr_in addr; - struct portmap_args *map; - struct rpc_clnt *pmap_clnt; - struct rpc_task *child; - int status; - - dprintk("RPC: %5u rpc_getport(%s, %u, %u, %d)\n", - task->tk_pid, clnt->cl_server, - clnt->cl_prog, clnt->cl_vers, xprt->prot); - - /* Autobind on cloned rpc clients is discouraged */ - BUG_ON(clnt->cl_parent != clnt); - - status = -EACCES; /* tell caller to check again */ - if (xprt_test_and_set_binding(xprt)) - goto bailout_nowake; - - /* Put self on queue before sending rpcbind request, in case - * pmap_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL, NULL); - - /* Someone else may have bound if we slept */ - status = 0; - if (xprt_bound(xprt)) - goto bailout_nofree; - - status = -ENOMEM; - map = pmap_map_alloc(); - if (!map) - goto bailout_nofree; - map->pm_prog = clnt->cl_prog; - map->pm_vers = clnt->cl_vers; - map->pm_prot = xprt->prot; - map->pm_port = 0; - map->pm_xprt = xprt_get(xprt); - - rpc_peeraddr(clnt, (struct sockaddr *) &addr, sizeof(addr)); - pmap_clnt = pmap_create(clnt->cl_server, &addr, map->pm_prot, 0); - status = PTR_ERR(pmap_clnt); - if (IS_ERR(pmap_clnt)) - goto bailout; - - status = -EIO; - child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map); - if (IS_ERR(child)) - goto bailout_nofree; - rpc_put_task(child); - - task->tk_xprt->stat.bind_count++; - return; - -bailout: - pmap_map_free(map); - xprt_put(xprt); -bailout_nofree: - pmap_wake_portmap_waiters(xprt, status); -bailout_nowake: - task->tk_status = status; -} - -#ifdef CONFIG_ROOT_NFS -/** - * rpc_getport_external - obtain the port for a given RPC service on a given host - * @sin: address of remote peer - * @prog: RPC program number to bind - * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request - * - * This one is called from outside the RPC client in a synchronous task context. - */ -int rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) -{ - struct portmap_args map = { - .pm_prog = prog, - .pm_vers = vers, - .pm_prot = prot, - .pm_port = 0 - }; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[PMAP_GETPORT], - .rpc_argp = &map, - .rpc_resp = &map.pm_port, - }; - struct rpc_clnt *pmap_clnt; - char hostname[32]; - int status; - - dprintk("RPC: rpc_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", - NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); - - sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); - pmap_clnt = pmap_create(hostname, sin, prot, 0); - if (IS_ERR(pmap_clnt)) - return PTR_ERR(pmap_clnt); - - /* Setup the call info struct */ - status = rpc_call_sync(pmap_clnt, &msg, 0); - - if (status >= 0) { - if (map.pm_port != 0) - return map.pm_port; - status = -EACCES; - } - return status; -} -#endif - -/* - * Portmapper child task invokes this callback via tk_exit. - */ -static void pmap_getport_done(struct rpc_task *child, void *data) -{ - struct portmap_args *map = data; - struct rpc_xprt *xprt = map->pm_xprt; - int status = child->tk_status; - - if (status < 0) { - /* Portmapper not available */ - xprt->ops->set_port(xprt, 0); - } else if (map->pm_port == 0) { - /* Requested RPC service wasn't registered */ - xprt->ops->set_port(xprt, 0); - status = -EACCES; - } else { - /* Succeeded */ - xprt->ops->set_port(xprt, map->pm_port); - xprt_set_bound(xprt); - status = 0; - } - - dprintk("RPC: %5u pmap_getport_done(status %d, port %u)\n", - child->tk_pid, status, map->pm_port); - - pmap_wake_portmap_waiters(xprt, status); -} - -/** - * rpc_register - set or unset a port registration with the local portmapper - * @prog: RPC program number to bind - * @vers: RPC version number to bind - * @prot: transport protocol to use to make this request - * @port: port value to register - * @okay: result code - * - * port == 0 means unregister, port != 0 means register. - */ -int rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) -{ - struct sockaddr_in sin = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; - struct portmap_args map = { - .pm_prog = prog, - .pm_vers = vers, - .pm_prot = prot, - .pm_port = port, - }; - struct rpc_message msg = { - .rpc_proc = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET], - .rpc_argp = &map, - .rpc_resp = okay, - }; - struct rpc_clnt *pmap_clnt; - int error = 0; - - dprintk("RPC: registering (%u, %u, %d, %u) with portmapper.\n", - prog, vers, prot, port); - - pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); - if (IS_ERR(pmap_clnt)) { - error = PTR_ERR(pmap_clnt); - dprintk("RPC: couldn't create pmap client. Error = %d\n", - error); - return error; - } - - error = rpc_call_sync(pmap_clnt, &msg, 0); - - if (error < 0) { - printk(KERN_WARNING - "RPC: failed to contact portmap (errno %d).\n", - error); - } - dprintk("RPC: registration status %d/%d\n", error, *okay); - - /* Client deleted automatically because cl_oneshot == 1 */ - return error; -} - -static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) -{ - struct rpc_create_args args = { - .protocol = proto, - .address = (struct sockaddr *)srvaddr, - .addrsize = sizeof(*srvaddr), - .servername = hostname, - .program = &pmap_program, - .version = RPC_PMAP_VERSION, - .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_ONESHOT | - RPC_CLNT_CREATE_NOPING), - }; - - srvaddr->sin_port = htons(RPC_PMAP_PORT); - if (!privileged) - args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; - return rpc_create(&args); -} - -/* - * XDR encode/decode functions for PMAP - */ -static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map) -{ - dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n", - map->pm_prog, map->pm_vers, - map->pm_prot, map->pm_port); - *p++ = htonl(map->pm_prog); - *p++ = htonl(map->pm_vers); - *p++ = htonl(map->pm_prot); - *p++ = htonl(map->pm_port); - - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); - return 0; -} - -static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp) -{ - *portp = (unsigned short) ntohl(*p++); - return 0; -} - -static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) -{ - *boolp = (unsigned int) ntohl(*p++); - return 0; -} - -static struct rpc_procinfo pmap_procedures[] = { -[PMAP_SET] = { - .p_proc = PMAP_SET, - .p_encode = (kxdrproc_t) xdr_encode_mapping, - .p_decode = (kxdrproc_t) xdr_decode_bool, - .p_bufsiz = 4, - .p_count = 1, - .p_statidx = PMAP_SET, - .p_name = "SET", - }, -[PMAP_UNSET] = { - .p_proc = PMAP_UNSET, - .p_encode = (kxdrproc_t) xdr_encode_mapping, - .p_decode = (kxdrproc_t) xdr_decode_bool, - .p_bufsiz = 4, - .p_count = 1, - .p_statidx = PMAP_UNSET, - .p_name = "UNSET", - }, -[PMAP_GETPORT] = { - .p_proc = PMAP_GETPORT, - .p_encode = (kxdrproc_t) xdr_encode_mapping, - .p_decode = (kxdrproc_t) xdr_decode_port, - .p_bufsiz = 4, - .p_count = 1, - .p_statidx = PMAP_GETPORT, - .p_name = "GETPORT", - }, -}; - -static struct rpc_version pmap_version2 = { - .number = 2, - .nrprocs = 4, - .procs = pmap_procedures -}; - -static struct rpc_version * pmap_version[] = { - NULL, - NULL, - &pmap_version2 -}; - -static struct rpc_stat pmap_stats; - -static struct rpc_program pmap_program = { - .name = "portmap", - .number = RPC_PMAP_PROGRAM, - .nrvers = ARRAY_SIZE(pmap_version), - .version = pmap_version, - .stats = &pmap_stats, -}; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c new file mode 100644 index 000000000000..6c7aa8a1f0c6 --- /dev/null +++ b/net/sunrpc/rpcb_clnt.c @@ -0,0 +1,625 @@ +/* + * In-kernel rpcbind client supporting versions 2, 3, and 4 of the rpcbind + * protocol + * + * Based on RFC 1833: "Binding Protocols for ONC RPC Version 2" and + * RFC 3530: "Network File System (NFS) version 4 Protocol" + * + * Original: Gilles Quillard, Bull Open Source, 2005 <gilles.quillard@bull.net> + * Updated: Chuck Lever, Oracle Corporation, 2007 <chuck.lever@oracle.com> + * + * Descended from net/sunrpc/pmap_clnt.c, + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/kernel.h> +#include <linux/errno.h> + +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/sched.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_BIND +#endif + +#define RPCBIND_PROGRAM (100000u) +#define RPCBIND_PORT (111u) + +enum { + RPCBPROC_NULL, + RPCBPROC_SET, + RPCBPROC_UNSET, + RPCBPROC_GETPORT, + RPCBPROC_GETADDR = 3, /* alias for GETPORT */ + RPCBPROC_DUMP, + RPCBPROC_CALLIT, + RPCBPROC_BCAST = 5, /* alias for CALLIT */ + RPCBPROC_GETTIME, + RPCBPROC_UADDR2TADDR, + RPCBPROC_TADDR2UADDR, + RPCBPROC_GETVERSADDR, + RPCBPROC_INDIRECT, + RPCBPROC_GETADDRLIST, + RPCBPROC_GETSTAT, +}; + +#define RPCB_HIGHPROC_2 RPCBPROC_CALLIT +#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR +#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT + +/* + * r_addr + * + * Quoting RFC 3530, section 2.2: + * + * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the + * US-ASCII string: + * + * h1.h2.h3.h4.p1.p2 + * + * The prefix, "h1.h2.h3.h4", is the standard textual form for + * representing an IPv4 address, which is always four octets long. + * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively, + * the first through fourth octets each converted to ASCII-decimal. + * Assuming big-endian ordering, p1 and p2 are, respectively, the first + * and second octets each converted to ASCII-decimal. For example, if a + * host, in big-endian order, has an address of 0x0A010307 and there is + * a service listening on, in big endian order, port 0x020F (decimal + * 527), then the complete universal address is "10.1.3.7.2.15". + * + * ... + * + * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the + * US-ASCII string: + * + * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2 + * + * The suffix "p1.p2" is the service port, and is computed the same way + * as with universal addresses for TCP and UDP over IPv4. The prefix, + * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for + * representing an IPv6 address as defined in Section 2.2 of [RFC2373]. + * Additionally, the two alternative forms specified in Section 2.2 of + * [RFC2373] are also acceptable. + * + * XXX: Currently this implementation does not explicitly convert the + * stored address to US-ASCII on non-ASCII systems. + */ +#define RPCB_MAXADDRLEN (128u) + +/* + * r_netid + * + * Quoting RFC 3530, section 2.2: + * + * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP + * over IPv4 the value of r_netid is the string "udp". + * + * ... + * + * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP + * over IPv6 the value of r_netid is the string "udp6". + */ +#define RPCB_NETID_UDP "\165\144\160" /* "udp" */ +#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ +#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ +#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ + +#define RPCB_MAXNETIDLEN (4u) + +/* + * r_owner + * + * The "owner" is allowed to unset a service in the rpcbind database. + * We always use the following (arbitrary) fixed string. + */ +#define RPCB_OWNER_STRING "rpcb" +#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) + +static void rpcb_getport_done(struct rpc_task *, void *); +extern struct rpc_program rpcb_program; + +struct rpcbind_args { + struct rpc_xprt * r_xprt; + + u32 r_prog; + u32 r_vers; + u32 r_prot; + unsigned short r_port; + char * r_netid; + char r_addr[RPCB_MAXADDRLEN]; + char * r_owner; +}; + +static struct rpc_procinfo rpcb_procedures2[]; +static struct rpc_procinfo rpcb_procedures3[]; + +static struct rpcb_info { + int rpc_vers; + struct rpc_procinfo * rpc_proc; +} rpcb_next_version[]; + +static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) +{ + struct rpcbind_args *map = calldata; + struct rpc_xprt *xprt = map->r_xprt; + struct rpc_message msg = { + .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc, + .rpc_argp = map, + .rpc_resp = &map->r_port, + }; + + rpc_call_setup(task, &msg, 0); +} + +static void rpcb_map_release(void *data) +{ + struct rpcbind_args *map = data; + + xprt_put(map->r_xprt); + kfree(map); +} + +static const struct rpc_call_ops rpcb_getport_ops = { + .rpc_call_prepare = rpcb_getport_prepare, + .rpc_call_done = rpcb_getport_done, + .rpc_release = rpcb_map_release, +}; + +static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) +{ + xprt_clear_binding(xprt); + rpc_wake_up_status(&xprt->binding, status); +} + +static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, + int proto, int version, int privileged) +{ + struct rpc_create_args args = { + .protocol = proto, + .address = srvaddr, + .addrsize = sizeof(struct sockaddr_in), + .servername = hostname, + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, + .flags = (RPC_CLNT_CREATE_ONESHOT | + RPC_CLNT_CREATE_NOPING), + }; + + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + if (!privileged) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + return rpc_create(&args); +} + +/** + * rpcb_register - set or unset a port registration with the local rpcbind svc + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * @port: port value to register + * @okay: result code + * + * port == 0 means unregister, port != 0 means register. + * + * This routine supports only rpcbind version 2. + */ +int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + struct rpcbind_args map = { + .r_prog = prog, + .r_vers = vers, + .r_prot = prot, + .r_port = port, + }; + struct rpc_message msg = { + .rpc_proc = &rpcb_procedures2[port ? + RPCBPROC_SET : RPCBPROC_UNSET], + .rpc_argp = &map, + .rpc_resp = okay, + }; + struct rpc_clnt *rpcb_clnt; + int error = 0; + + dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " + "rpcbind\n", (port ? "" : "un"), + prog, vers, prot, port); + + rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, + IPPROTO_UDP, 2, 1); + if (IS_ERR(rpcb_clnt)) + return PTR_ERR(rpcb_clnt); + + error = rpc_call_sync(rpcb_clnt, &msg, 0); + + if (error < 0) + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); + dprintk("RPC: registration status %d/%d\n", error, *okay); + + return error; +} + +#ifdef CONFIG_ROOT_NFS +/** + * rpcb_getport_external - obtain the port for an RPC service on a given host + * @sin: address of remote peer + * @prog: RPC program number to bind + * @vers: RPC version number to bind + * @prot: transport protocol to use to make this request + * + * Called from outside the RPC client in a synchronous task context. + * + * For now, this supports only version 2 queries, but is used only by + * mount_clnt for NFS_ROOT. + */ +int rpcb_getport_external(struct sockaddr_in *sin, __u32 prog, + __u32 vers, int prot) +{ + struct rpcbind_args map = { + .r_prog = prog, + .r_vers = vers, + .r_prot = prot, + .r_port = 0, + }; + struct rpc_message msg = { + .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], + .rpc_argp = &map, + .rpc_resp = &map.r_port, + }; + struct rpc_clnt *rpcb_clnt; + char hostname[40]; + int status; + + dprintk("RPC: rpcb_getport_external(%u.%u.%u.%u, %u, %u, %d)\n", + NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); + + sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); + rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); + if (IS_ERR(rpcb_clnt)) + return PTR_ERR(rpcb_clnt); + + status = rpc_call_sync(rpcb_clnt, &msg, 0); + + if (status >= 0) { + if (map.r_port != 0) + return map.r_port; + status = -EACCES; + } + return status; +} +#endif + +/** + * rpcb_getport - obtain the port for a given RPC service on a given host + * @task: task that is waiting for portmapper request + * + * This one can be called for an ongoing RPC request, and can be used in + * an async (rpciod) context. + */ +void rpcb_getport(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + int bind_version; + struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_clnt *rpcb_clnt; + static struct rpcbind_args *map; + struct rpc_task *child; + struct sockaddr addr; + int status; + + dprintk("RPC: %5u rpcb_getport(%s, %u, %u, %d)\n", + task->tk_pid, clnt->cl_server, + clnt->cl_prog, clnt->cl_vers, xprt->prot); + + /* Autobind on cloned rpc clients is discouraged */ + BUG_ON(clnt->cl_parent != clnt); + + if (xprt_test_and_set_binding(xprt)) { + status = -EACCES; /* tell caller to check again */ + dprintk("RPC: %5u rpcb_getport waiting for another binder\n", + task->tk_pid); + goto bailout_nowake; + } + + /* Put self on queue before sending rpcbind request, in case + * rpcb_getport_done completes before we return from rpc_run_task */ + rpc_sleep_on(&xprt->binding, task, NULL, NULL); + + /* Someone else may have bound if we slept */ + if (xprt_bound(xprt)) { + status = 0; + dprintk("RPC: %5u rpcb_getport already bound\n", task->tk_pid); + goto bailout_nofree; + } + + if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { + xprt->bind_index = 0; + status = -EACCES; /* tell caller to try again later */ + dprintk("RPC: %5u rpcb_getport no more getport versions " + "available\n", task->tk_pid); + goto bailout_nofree; + } + bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; + + dprintk("RPC: %5u rpcb_getport trying rpcbind version %u\n", + task->tk_pid, bind_version); + + map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); + if (!map) { + status = -ENOMEM; + dprintk("RPC: %5u rpcb_getport no memory available\n", + task->tk_pid); + goto bailout_nofree; + } + map->r_prog = clnt->cl_prog; + map->r_vers = clnt->cl_vers; + map->r_prot = xprt->prot; + map->r_port = 0; + map->r_xprt = xprt_get(xprt); + map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : + RPCB_NETID_UDP; + memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), + sizeof(map->r_addr)); + map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u rpcb_getport rpcb_create failed, error %ld\n", + task->tk_pid, PTR_ERR(rpcb_clnt)); + goto bailout; + } + + child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + if (IS_ERR(child)) { + status = -EIO; + dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", + task->tk_pid); + goto bailout_nofree; + } + rpc_put_task(child); + + task->tk_xprt->stat.bind_count++; + return; + +bailout: + kfree(map); + xprt_put(xprt); +bailout_nofree: + rpcb_wake_rpcbind_waiters(xprt, status); +bailout_nowake: + task->tk_status = status; +} + +/* + * Rpcbind child task calls this callback via tk_exit. + */ +static void rpcb_getport_done(struct rpc_task *child, void *data) +{ + struct rpcbind_args *map = data; + struct rpc_xprt *xprt = map->r_xprt; + int status = child->tk_status; + + /* rpcbind server doesn't support this rpcbind protocol version */ + if (status == -EPROTONOSUPPORT) + xprt->bind_index++; + + if (status < 0) { + /* rpcbind server not available on remote host? */ + xprt->ops->set_port(xprt, 0); + } else if (map->r_port == 0) { + /* Requested RPC service wasn't registered on remote host */ + xprt->ops->set_port(xprt, 0); + status = -EACCES; + } else { + /* Succeeded */ + xprt->ops->set_port(xprt, map->r_port); + xprt_set_bound(xprt); + status = 0; + } + + dprintk("RPC: %5u rpcb_getport_done(status %d, port %u)\n", + child->tk_pid, status, map->r_port); + + rpcb_wake_rpcbind_waiters(xprt, status); +} + +static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) +{ + dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", + rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); + *p++ = htonl(rpcb->r_prog); + *p++ = htonl(rpcb->r_vers); + *p++ = htonl(rpcb->r_prot); + *p++ = htonl(rpcb->r_port); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + return 0; +} + +static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, + unsigned short *portp) +{ + *portp = (unsigned short) ntohl(*p++); + dprintk("RPC: rpcb_decode_getport result %u\n", + *portp); + return 0; +} + +static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, + unsigned int *boolp) +{ + *boolp = (unsigned int) ntohl(*p++); + dprintk("RPC: rpcb_decode_set result %u\n", + *boolp); + return 0; +} + +static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) +{ + dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", + rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); + *p++ = htonl(rpcb->r_prog); + *p++ = htonl(rpcb->r_vers); + + p = xdr_encode_string(p, rpcb->r_netid); + p = xdr_encode_string(p, rpcb->r_addr); + p = xdr_encode_string(p, rpcb->r_owner); + + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + return 0; +} + +static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, + unsigned short *portp) +{ + char *addr; + int addr_len, c, i, f, first, val; + + *portp = 0; + addr_len = (unsigned int) ntohl(*p++); + if (addr_len > RPCB_MAXADDRLEN) /* sanity */ + return -EINVAL; + + dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n", + (char *) p); + + addr = (char *)p; + val = 0; + first = 1; + f = 1; + for (i = addr_len - 1; i > 0; i--) { + c = addr[i]; + if (c >= '0' && c <= '9') { + val += (c - '0') * f; + f *= 10; + } else if (c == '.') { + if (first) { + *portp = val; + val = first = 0; + f = 1; + } else { + *portp |= (val << 8); + break; + } + } + } + + dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); + return 0; +} + +#define RPCB_program_sz (1u) +#define RPCB_version_sz (1u) +#define RPCB_protocol_sz (1u) +#define RPCB_port_sz (1u) +#define RPCB_boolean_sz (1u) + +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) +#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) +#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) + +#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ + RPCB_protocol_sz+RPCB_port_sz +#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \ + RPCB_netid_sz+RPCB_addr_sz+ \ + RPCB_ownerstring_sz + +#define RPCB_setres_sz RPCB_boolean_sz +#define RPCB_getportres_sz RPCB_port_sz + +/* + * Note that RFC 1833 does not put any size restrictions on the + * address string returned by the remote rpcbind database. + */ +#define RPCB_getaddrres_sz RPCB_addr_sz + +#define PROC(proc, argtype, restype) \ + [RPCBPROC_##proc] = { \ + .p_proc = RPCBPROC_##proc, \ + .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \ + .p_decode = (kxdrproc_t) rpcb_decode_##restype, \ + .p_arglen = RPCB_##argtype##args_sz, \ + .p_replen = RPCB_##restype##res_sz, \ + .p_statidx = RPCBPROC_##proc, \ + .p_timer = 0, \ + .p_name = #proc, \ + } + +/* + * Not all rpcbind procedures described in RFC 1833 are implemented + * since the Linux kernel RPC code requires only these. + */ +static struct rpc_procinfo rpcb_procedures2[] = { + PROC(SET, mapping, set), + PROC(UNSET, mapping, set), + PROC(GETADDR, mapping, getport), +}; + +static struct rpc_procinfo rpcb_procedures3[] = { + PROC(SET, mapping, set), + PROC(UNSET, mapping, set), + PROC(GETADDR, getaddr, getaddr), +}; + +static struct rpc_procinfo rpcb_procedures4[] = { + PROC(SET, mapping, set), + PROC(UNSET, mapping, set), + PROC(GETVERSADDR, getaddr, getaddr), +}; + +static struct rpcb_info rpcb_next_version[] = { +#ifdef CONFIG_SUNRPC_BIND34 + { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, + { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, +#endif + { 2, &rpcb_procedures2[RPCBPROC_GETPORT] }, + { 0, NULL }, +}; + +static struct rpc_version rpcb_version2 = { + .number = 2, + .nrprocs = RPCB_HIGHPROC_2, + .procs = rpcb_procedures2 +}; + +static struct rpc_version rpcb_version3 = { + .number = 3, + .nrprocs = RPCB_HIGHPROC_3, + .procs = rpcb_procedures3 +}; + +static struct rpc_version rpcb_version4 = { + .number = 4, + .nrprocs = RPCB_HIGHPROC_4, + .procs = rpcb_procedures4 +}; + +static struct rpc_version *rpcb_version[] = { + NULL, + NULL, + &rpcb_version2, + &rpcb_version3, + &rpcb_version4 +}; + +static struct rpc_stat rpcb_stats; + +struct rpc_program rpcb_program = { + .name = "rpcbind", + .number = RPCBIND_PROGRAM, + .nrvers = ARRAY_SIZE(rpcb_version), + .version = rpcb_version, + .stats = &rpcb_stats, +}; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6d87320074b1..4a53e94f8134 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -741,50 +741,53 @@ static void rpc_async_schedule(struct work_struct *work) * @task: RPC task that will use this buffer * @size: requested byte size * - * We try to ensure that some NFS reads and writes can always proceed - * by using a mempool when allocating 'small' buffers. + * To prevent rpciod from hanging, this allocator never sleeps, + * returning NULL if the request cannot be serviced immediately. + * The caller can arrange to sleep in a way that is safe for rpciod. + * + * Most requests are 'small' (under 2KiB) and can be serviced from a + * mempool, ensuring that NFS reads and writes can always proceed, + * and that there is good locality of reference for these buffers. + * * In order to avoid memory starvation triggering more writebacks of - * NFS requests, we use GFP_NOFS rather than GFP_KERNEL. + * NFS requests, we avoid using GFP_KERNEL. */ -void * rpc_malloc(struct rpc_task *task, size_t size) +void *rpc_malloc(struct rpc_task *task, size_t size) { - struct rpc_rqst *req = task->tk_rqstp; - gfp_t gfp; + size_t *buf; + gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; - if (task->tk_flags & RPC_TASK_SWAPPER) - gfp = GFP_ATOMIC; + size += sizeof(size_t); + if (size <= RPC_BUFFER_MAXSIZE) + buf = mempool_alloc(rpc_buffer_mempool, gfp); else - gfp = GFP_NOFS; - - if (size > RPC_BUFFER_MAXSIZE) { - req->rq_buffer = kmalloc(size, gfp); - if (req->rq_buffer) - req->rq_bufsize = size; - } else { - req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp); - if (req->rq_buffer) - req->rq_bufsize = RPC_BUFFER_MAXSIZE; - } - return req->rq_buffer; + buf = kmalloc(size, gfp); + *buf = size; + dprintk("RPC: %5u allocated buffer of size %u at %p\n", + task->tk_pid, size, buf); + return (void *) ++buf; } /** * rpc_free - free buffer allocated via rpc_malloc - * @task: RPC task with a buffer to be freed + * @buffer: buffer to free * */ -void rpc_free(struct rpc_task *task) +void rpc_free(void *buffer) { - struct rpc_rqst *req = task->tk_rqstp; + size_t size, *buf = (size_t *) buffer; - if (req->rq_buffer) { - if (req->rq_bufsize == RPC_BUFFER_MAXSIZE) - mempool_free(req->rq_buffer, rpc_buffer_mempool); - else - kfree(req->rq_buffer); - req->rq_buffer = NULL; - req->rq_bufsize = 0; - } + if (!buffer) + return; + size = *buf; + buf--; + + dprintk("RPC: freeing buffer of size %u at %p\n", + size, buf); + if (size <= RPC_BUFFER_MAXSIZE) + mempool_free(buf, rpc_buffer_mempool); + else + kfree(buf); } /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b4db53ff1435..b7503c103ae8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -757,7 +757,7 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) if (progp->pg_vers[i]->vs_hidden) continue; - error = rpc_register(progp->pg_prog, i, proto, port, &dummy); + error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); if (error < 0) break; if (port && !dummy) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 456a14510308..5b05b73e4c1d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -823,7 +823,6 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; - req->rq_bufsize = 0; req->rq_xid = xprt_alloc_xid(xprt); req->rq_release_snd_buf = NULL; xprt_reset_majortimeo(req); @@ -855,7 +854,7 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); - xprt->ops->buf_free(task); + xprt->ops->buf_free(req->rq_buffer); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); @@ -928,6 +927,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si xprt->timer.data = (unsigned long) xprt; xprt->last_used = jiffies; xprt->cwnd = RPC_INITCWND; + xprt->bind_index = 0; rpc_init_wait_queue(&xprt->binding, "xprt_binding"); rpc_init_wait_queue(&xprt->pending, "xprt_pending"); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a5a32029e728..cc33c5880abb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1476,7 +1476,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, - .rpcbind = rpc_getport, + .rpcbind = rpcb_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, @@ -1493,7 +1493,7 @@ static struct rpc_xprt_ops xs_udp_ops = { static struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, - .rpcbind = rpc_getport, + .rpcbind = rpcb_getport, .set_port = xs_set_port, .connect = xs_connect, .buf_alloc = rpc_malloc, diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 67bb29b44d1b..0ee6ded18f3a 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -120,16 +120,18 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev, static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = dev_base; + struct net_device *dev, *pdev; struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; /* Find device with specified name */ - - while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) { - dev = dev->next; - } + dev = NULL; + for_each_netdev(pdev) + if (!strncmp(dev->name, driver_name, IFNAMSIZ)) { + dev = pdev; + break; + } if (!dev) return -ENODEV; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 263e34e45265..95271e8426a1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -579,7 +579,7 @@ static inline int xfrm_byidx_should_resize(int total) return 0; } -void xfrm_spd_getinfo(struct xfrm_spdinfo *si) +void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f3a61ebd8d65..9955ff4da0a2 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -421,7 +421,7 @@ restart: } EXPORT_SYMBOL(xfrm_state_flush); -void xfrm_sad_getinfo(struct xfrm_sadinfo *si) +void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) { spin_lock_bh(&xfrm_state_lock); si->sadcnt = xfrm_state_num; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4210d91624cd..b14c7e590c31 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -674,7 +674,9 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { - struct xfrm_spdinfo si; + struct xfrmk_spdinfo si; + struct xfrmu_spdinfo spc; + struct xfrmu_spdhinfo sph; struct nlmsghdr *nlh; u32 *f; @@ -685,23 +687,17 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) f = nlmsg_data(nlh); *f = flags; xfrm_spd_getinfo(&si); - - if (flags & XFRM_SPD_HMASK) - NLA_PUT_U32(skb, XFRMA_SPDHMASK, si.spdhcnt); - if (flags & XFRM_SPD_HMAX) - NLA_PUT_U32(skb, XFRMA_SPDHMAX, si.spdhmcnt); - if (flags & XFRM_SPD_ICNT) - NLA_PUT_U32(skb, XFRMA_SPDICNT, si.incnt); - if (flags & XFRM_SPD_OCNT) - NLA_PUT_U32(skb, XFRMA_SPDOCNT, si.outcnt); - if (flags & XFRM_SPD_FCNT) - NLA_PUT_U32(skb, XFRMA_SPDFCNT, si.fwdcnt); - if (flags & XFRM_SPD_ISCNT) - NLA_PUT_U32(skb, XFRMA_SPDISCNT, si.inscnt); - if (flags & XFRM_SPD_OSCNT) - NLA_PUT_U32(skb, XFRMA_SPDOSCNT, si.inscnt); - if (flags & XFRM_SPD_FSCNT) - NLA_PUT_U32(skb, XFRMA_SPDFSCNT, si.inscnt); + spc.incnt = si.incnt; + spc.outcnt = si.outcnt; + spc.fwdcnt = si.fwdcnt; + spc.inscnt = si.inscnt; + spc.outscnt = si.outscnt; + spc.fwdscnt = si.fwdscnt; + sph.spdhcnt = si.spdhcnt; + sph.spdhmcnt = si.spdhmcnt; + + NLA_PUT(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); + NLA_PUT(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); return nlmsg_end(skb, nlh); @@ -719,23 +715,8 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, u32 seq = nlh->nlmsg_seq; int len = NLMSG_LENGTH(sizeof(u32)); - - if (*flags & XFRM_SPD_HMASK) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SPD_HMAX) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SPD_ICNT) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SPD_OCNT) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SPD_FCNT) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SPD_ISCNT) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SPD_OSCNT) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SPD_FSCNT) - len += RTA_SPACE(sizeof(u32)); + len += RTA_SPACE(sizeof(struct xfrmu_spdinfo)); + len += RTA_SPACE(sizeof(struct xfrmu_spdhinfo)); r_skb = alloc_skb(len, GFP_ATOMIC); if (r_skb == NULL) @@ -749,7 +730,8 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { - struct xfrm_sadinfo si; + struct xfrmk_sadinfo si; + struct xfrmu_sadhinfo sh; struct nlmsghdr *nlh; u32 *f; @@ -761,12 +743,11 @@ static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) *f = flags; xfrm_sad_getinfo(&si); - if (flags & XFRM_SAD_HMASK) - NLA_PUT_U32(skb, XFRMA_SADHMASK, si.sadhcnt); - if (flags & XFRM_SAD_HMAX) - NLA_PUT_U32(skb, XFRMA_SADHMAX, si.sadhmcnt); - if (flags & XFRM_SAD_CNT) - NLA_PUT_U32(skb, XFRMA_SADCNT, si.sadcnt); + sh.sadhmcnt = si.sadhmcnt; + sh.sadhcnt = si.sadhcnt; + + NLA_PUT_U32(skb, XFRMA_SAD_CNT, si.sadcnt); + NLA_PUT(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); return nlmsg_end(skb, nlh); @@ -784,12 +765,8 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, u32 seq = nlh->nlmsg_seq; int len = NLMSG_LENGTH(sizeof(u32)); - if (*flags & XFRM_SAD_HMASK) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SAD_HMAX) - len += RTA_SPACE(sizeof(u32)); - if (*flags & XFRM_SAD_CNT) - len += RTA_SPACE(sizeof(u32)); + len += RTA_SPACE(sizeof(struct xfrmu_sadhinfo)); + len += RTA_SPACE(sizeof(u32)); r_skb = alloc_skb(len, GFP_ATOMIC); |