diff options
Diffstat (limited to 'net')
59 files changed, 1629 insertions, 508 deletions
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index fd8e0847b254..80caad1a31a5 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -204,8 +204,8 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v) "%02X %d\n", s->sk_type, ntohs(at->src_net), at->src_node, at->src_port, ntohs(at->dest_net), at->dest_node, at->dest_port, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); out: return 0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b603cbacdc58..590b83963622 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -162,8 +162,7 @@ static void atalk_destroy_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; add_timer(&sk->sk_timer); } else @@ -175,8 +174,7 @@ static inline void atalk_destroy_socket(struct sock *sk) atalk_remove_socket(sk); skb_queue_purge(&sk->sk_receive_queue); - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { setup_timer(&sk->sk_timer, atalk_destroy_timer, (unsigned long)sk); sk->sk_timer.expires = jiffies + SOCK_DESTROY_TIME; @@ -1750,8 +1748,7 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { /* Protocol layer */ case TIOCOUTQ: { - long amount = sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc); + long amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; diff --git a/net/atm/common.c b/net/atm/common.c index d34edbe754c8..c1c97936192c 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -62,15 +62,15 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc,unsigned int size) struct sk_buff *skb; struct sock *sk = sk_atm(vcc); - if (atomic_read(&sk->sk_wmem_alloc) && !atm_may_send(vcc, size)) { + if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) { pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", - atomic_read(&sk->sk_wmem_alloc), size, + sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); return NULL; } - while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule(); - pr_debug("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), - skb->truesize); + while (!(skb = alloc_skb(size, GFP_KERNEL))) + schedule(); + pr_debug("AlTx %d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc); return skb; } @@ -145,7 +145,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family) memset(&vcc->local,0,sizeof(struct sockaddr_atmsvc)); memset(&vcc->remote,0,sizeof(struct sockaddr_atmsvc)); vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */ - atomic_set(&sk->sk_wmem_alloc, 0); + atomic_set(&sk->sk_wmem_alloc, 1); atomic_set(&sk->sk_rmem_alloc, 0); vcc->push = NULL; vcc->pop = NULL; diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 76ed3c8d26e6..4da8892ced5f 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -63,8 +63,7 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg error = -EINVAL; goto done; } - error = put_user(sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc), + error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk), (int __user *) argp) ? -EFAULT : 0; goto done; case SIOCINQ: diff --git a/net/atm/proc.c b/net/atm/proc.c index e7b3b273907d..38de5ff61ecd 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -204,8 +204,8 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc) seq_printf(seq, "%3d", sk->sk_family); } seq_printf(seq, " %04lx %5d %7d/%7d %7d/%7d [%d]\n", vcc->flags, sk->sk_err, - atomic_read(&sk->sk_wmem_alloc), sk->sk_sndbuf, - atomic_read(&sk->sk_rmem_alloc), sk->sk_rcvbuf, + sk_wmem_alloc_get(sk), sk->sk_sndbuf, + sk_rmem_alloc_get(sk), sk->sk_rcvbuf, atomic_read(&sk->sk_refcnt)); } diff --git a/net/atm/raw.c b/net/atm/raw.c index b0a2d8cb6744..cbfcc71a17b1 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -33,7 +33,7 @@ static void atm_pop_raw(struct atm_vcc *vcc,struct sk_buff *skb) struct sock *sk = sk_atm(vcc); pr_debug("APopR (%d) %d -= %d\n", vcc->vci, - atomic_read(&sk->sk_wmem_alloc), skb->truesize); + sk_wmem_alloc_get(sk), skb->truesize); atomic_sub(skb->truesize, &sk->sk_wmem_alloc); dev_kfree_skb_any(skb); sk->sk_write_space(sk); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd9d06f291dc..da0f64f82b57 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -330,8 +330,7 @@ void ax25_destroy_socket(ax25_cb *ax25) } if (ax25->sk != NULL) { - if (atomic_read(&ax25->sk->sk_wmem_alloc) || - atomic_read(&ax25->sk->sk_rmem_alloc)) { + if (sk_has_allocations(ax25->sk)) { /* Defer: outstanding buffers */ setup_timer(&ax25->dtimer, ax25_destroy_timer, (unsigned long)ax25); @@ -1691,7 +1690,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; res = put_user(amount, (int __user *)argp); @@ -1781,8 +1781,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ); ax25_info.n2count = ax25->n2count; ax25_info.state = ax25->state; - ax25_info.rcv_q = atomic_read(&sk->sk_rmem_alloc); - ax25_info.snd_q = atomic_read(&sk->sk_wmem_alloc); + ax25_info.rcv_q = sk_wmem_alloc_get(sk); + ax25_info.snd_q = sk_rmem_alloc_get(sk); ax25_info.vs = ax25->vs; ax25_info.vr = ax25->vr; ax25_info.va = ax25->va; @@ -1922,8 +1922,8 @@ static int ax25_info_show(struct seq_file *seq, void *v) if (ax25->sk != NULL) { seq_printf(seq, " %d %d %lu\n", - atomic_read(&ax25->sk->sk_wmem_alloc), - atomic_read(&ax25->sk->sk_rmem_alloc), + sk_wmem_alloc_get(ax25->sk), + sk_rmem_alloc_get(ax25->sk), sock_i_ino(ax25->sk)); } else { seq_puts(seq, " * * *\n"); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 02b9baa1930b..0250e0600150 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -337,7 +337,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (sk->sk_state == BT_LISTEN) return -EINVAL; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *) arg); diff --git a/net/core/dev.c b/net/core/dev.c index 576a61574a93..baf2dc13a34a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3461,10 +3461,10 @@ void __dev_set_rx_mode(struct net_device *dev) /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ - if (dev->uc_count > 0 && !dev->uc_promisc) { + if (dev->uc.count > 0 && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); dev->uc_promisc = 1; - } else if (dev->uc_count == 0 && dev->uc_promisc) { + } else if (dev->uc.count == 0 && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); dev->uc_promisc = 0; } @@ -3483,9 +3483,8 @@ void dev_set_rx_mode(struct net_device *dev) /* hw addresses list handling functions */ -static int __hw_addr_add(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; int alloc_size; @@ -3493,7 +3492,7 @@ static int __hw_addr_add(struct list_head *list, int *delta, if (addr_len > MAX_ADDR_LEN) return -EINVAL; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && ha->type == addr_type) { ha->refcount++; @@ -3512,9 +3511,8 @@ static int __hw_addr_add(struct list_head *list, int *delta, ha->type = addr_type; ha->refcount = 1; ha->synced = false; - list_add_tail_rcu(&ha->list, list); - if (delta) - (*delta)++; + list_add_tail_rcu(&ha->list, &list->list); + list->count++; return 0; } @@ -3526,120 +3524,121 @@ static void ha_rcu_free(struct rcu_head *head) kfree(ha); } -static int __hw_addr_del(struct list_head *list, int *delta, - unsigned char *addr, int addr_len, - unsigned char addr_type) +static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; - list_for_each_entry(ha, list, list) { + list_for_each_entry(ha, &list->list, list) { if (!memcmp(ha->addr, addr, addr_len) && (ha->type == addr_type || !addr_type)) { if (--ha->refcount) return 0; list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); - if (delta) - (*delta)--; + list->count--; return 0; } } return -ENOENT; } -static int __hw_addr_add_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { int err; struct netdev_hw_addr *ha, *ha2; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - err = __hw_addr_add(to_list, to_delta, ha->addr, - addr_len, type); + err = __hw_addr_add(to_list, ha->addr, addr_len, type); if (err) goto unroll; } return 0; unroll: - list_for_each_entry(ha2, from_list, list) { + list_for_each_entry(ha2, &from_list->list, list) { if (ha2 == ha) break; type = addr_type ? addr_type : ha2->type; - __hw_addr_del(to_list, to_delta, ha2->addr, - addr_len, type); + __hw_addr_del(to_list, ha2->addr, addr_len, type); } return err; } -static void __hw_addr_del_multiple(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int addr_len, +static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len, unsigned char addr_type) { struct netdev_hw_addr *ha; unsigned char type; - list_for_each_entry(ha, from_list, list) { + list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, addr_type); + __hw_addr_del(to_list, ha->addr, addr_len, addr_type); } } -static int __hw_addr_sync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { int err = 0; struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (!ha->synced) { - err = __hw_addr_add(to_list, to_delta, ha->addr, + err = __hw_addr_add(to_list, ha->addr, addr_len, ha->type); if (err) break; ha->synced = true; ha->refcount++; } else if (ha->refcount == 1) { - __hw_addr_del(to_list, to_delta, ha->addr, - addr_len, ha->type); - __hw_addr_del(from_list, from_delta, ha->addr, - addr_len, ha->type); + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } return err; } -static void __hw_addr_unsync(struct list_head *to_list, int *to_delta, - struct list_head *from_list, int *from_delta, +static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, int addr_len) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, from_list, list) { + list_for_each_entry_safe(ha, tmp, &from_list->list, list) { if (ha->synced) { - __hw_addr_del(to_list, to_delta, ha->addr, + __hw_addr_del(to_list, ha->addr, addr_len, ha->type); ha->synced = false; - __hw_addr_del(from_list, from_delta, ha->addr, + __hw_addr_del(from_list, ha->addr, addr_len, ha->type); } } } - -static void __hw_addr_flush(struct list_head *list) +static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; - list_for_each_entry_safe(ha, tmp, list, list) { + list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); call_rcu(&ha->rcu_head, ha_rcu_free); } + list->count = 0; +} + +static void __hw_addr_init(struct netdev_hw_addr_list *list) +{ + INIT_LIST_HEAD(&list->list); + list->count = 0; } /* Device addresses handling functions */ @@ -3648,7 +3647,7 @@ static void dev_addr_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - __hw_addr_flush(&dev->dev_addr_list); + __hw_addr_flush(&dev->dev_addrs); dev->dev_addr = NULL; } @@ -3660,16 +3659,16 @@ static int dev_addr_init(struct net_device *dev) /* rtnl_mutex must be held here */ - INIT_LIST_HEAD(&dev->dev_addr_list); + __hw_addr_init(&dev->dev_addrs); memset(addr, 0, sizeof(addr)); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, sizeof(addr), + err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), NETDEV_HW_ADDR_T_LAN); if (!err) { /* * Get the first (previously created) address from the list * and set dev_addr pointer to this location. */ - ha = list_first_entry(&dev->dev_addr_list, + ha = list_first_entry(&dev->dev_addrs.list, struct netdev_hw_addr, list); dev->dev_addr = ha->addr; } @@ -3694,8 +3693,7 @@ int dev_addr_add(struct net_device *dev, unsigned char *addr, ASSERT_RTNL(); - err = __hw_addr_add(&dev->dev_addr_list, NULL, addr, dev->addr_len, - addr_type); + err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3725,11 +3723,12 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr, * We can not remove the first address from the list because * dev->dev_addr points to that. */ - ha = list_first_entry(&dev->dev_addr_list, struct netdev_hw_addr, list); + ha = list_first_entry(&dev->dev_addrs.list, + struct netdev_hw_addr, list); if (ha->addr == dev->dev_addr && ha->refcount == 1) return -ENOENT; - err = __hw_addr_del(&dev->dev_addr_list, NULL, addr, dev->addr_len, + err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); @@ -3757,8 +3756,7 @@ int dev_addr_add_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - err = __hw_addr_add_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); @@ -3784,15 +3782,14 @@ int dev_addr_del_multiple(struct net_device *to_dev, if (from_dev->addr_len != to_dev->addr_len) return -EINVAL; - __hw_addr_del_multiple(&to_dev->dev_addr_list, NULL, - &from_dev->dev_addr_list, + __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, to_dev->addr_len, addr_type); call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); return 0; } EXPORT_SYMBOL(dev_addr_del_multiple); -/* unicast and multicast addresses handling functions */ +/* multicast addresses handling functions */ int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int glbl) @@ -3868,8 +3865,8 @@ int dev_unicast_delete(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_del(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + err = __hw_addr_del(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); return err; @@ -3892,8 +3889,8 @@ int dev_unicast_add(struct net_device *dev, void *addr) ASSERT_RTNL(); - err = __hw_addr_add(&dev->uc_list, &dev->uc_count, addr, - dev->addr_len, NETDEV_HW_ADDR_T_UNICAST); + err = __hw_addr_add(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST); if (!err) __dev_set_rx_mode(dev); return err; @@ -3966,8 +3963,7 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - err = __hw_addr_sync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); return err; @@ -3990,8 +3986,7 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return; - __hw_addr_unsync(&to->uc_list, &to->uc_count, - &from->uc_list, &from->uc_count, to->addr_len); + __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); } EXPORT_SYMBOL(dev_unicast_unsync); @@ -4000,15 +3995,14 @@ static void dev_unicast_flush(struct net_device *dev) { /* rtnl_mutex must be held here */ - __hw_addr_flush(&dev->uc_list); - dev->uc_count = 0; + __hw_addr_flush(&dev->uc); } static void dev_unicast_init(struct net_device *dev) { /* rtnl_mutex must be held here */ - INIT_LIST_HEAD(&dev->uc_list); + __hw_addr_init(&dev->uc); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5c93435b0347..9e0597d189b0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -204,6 +204,10 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->end = skb->tail + size; kmemcheck_annotate_bitfield(skb, flags1); kmemcheck_annotate_bitfield(skb, flags2); +#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb->mac_header = ~0U; +#endif + /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); @@ -665,7 +669,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) /* {transport,network,mac}_header are relative to skb->head */ new->transport_header += offset; new->network_header += offset; - new->mac_header += offset; + if (skb_mac_header_was_set(new)) + new->mac_header += offset; #endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -847,7 +852,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->tail += off; skb->transport_header += off; skb->network_header += off; - skb->mac_header += off; + if (skb_mac_header_was_set(skb)) + skb->mac_header += off; skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; @@ -939,7 +945,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, #ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; - n->mac_header += off; + if (skb_mac_header_was_set(skb)) + n->mac_header += off; #endif return n; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a5e3a593e472..d351b8db0df5 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1240,7 +1240,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return val; case TIOCOUTQ: - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *)arg); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 8121bf0029e3..2e1f836d4240 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -540,8 +540,7 @@ static void econet_destroy_timer(unsigned long data) { struct sock *sk=(struct sock *)data; - if (!atomic_read(&sk->sk_wmem_alloc) && - !atomic_read(&sk->sk_rmem_alloc)) { + if (!sk_has_allocations(sk)) { sk_free(sk); return; } @@ -579,8 +578,7 @@ static int econet_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); - if (atomic_read(&sk->sk_rmem_alloc) || - atomic_read(&sk->sk_wmem_alloc)) { + if (sk_has_allocations(sk)) { sk->sk_timer.data = (unsigned long)sk; sk->sk_timer.expires = jiffies + HZ; sk->sk_timer.function = econet_destroy_timer; diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 882a927cefae..3bb6bdb1dac1 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -39,14 +39,6 @@ #include "af802154.h" -#define DBG_DUMP(data, len) { \ - int i; \ - pr_debug("function: %s: data: len %d:\n", __func__, len); \ - for (i = 0; i < len; i++) {\ - pr_debug("%02x: %02x\n", i, (data)[i]); \ - } \ -} - /* * Utility function for families */ @@ -302,10 +294,12 @@ static struct net_proto_family ieee802154_family_ops = { static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - DBG_DUMP(skb->data, skb->len); if (!netif_running(dev)) return -ENODEV; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); +#ifdef DEBUG + print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); +#endif if (!net_eq(dev_net(dev), &init_net)) goto drop; diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1779677aed46..14d39840dd62 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -126,7 +126,8 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d1a39b1277d6..012cf5a68581 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -391,13 +391,8 @@ static inline void tnode_free(struct tnode *tn) static void tnode_free_safe(struct tnode *tn) { BUG_ON(IS_LEAF(tn)); - - if (node_parent((struct node *) tn)) { - tn->tnode_free = tnode_free_head; - tnode_free_head = tn; - } else { - tnode_free(tn); - } + tn->tnode_free = tnode_free_head; + tnode_free_head = tn; } static void tnode_free_flush(void) @@ -1009,7 +1004,7 @@ fib_find_node(struct trie *t, u32 key) return NULL; } -static struct node *trie_rebalance(struct trie *t, struct tnode *tn) +static void trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; t_key cindex, key; @@ -1033,12 +1028,13 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) } /* Handle last (top) tnode */ - if (IS_TNODE(tn)) { + if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - tnode_free_flush(); - } - return (struct node *)tn; + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); + + return; } /* only used from updater-side */ @@ -1186,7 +1182,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) /* Rebalance the trie */ - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); done: return fa_head; } @@ -1605,7 +1601,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) if (tp) { t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, NULL); - rcu_assign_pointer(t->trie, trie_rebalance(t, tp)); + trie_rebalance(t, tp); } else rcu_assign_pointer(t->trie, NULL); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index b0b273503e2a..a706a47f4dbb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -156,10 +156,10 @@ static int inet_csk_diag_fill(struct sock *sk, r->idiag_inode = sock_i_ino(sk); if (minfo) { - minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); + minfo->idiag_rmem = sk_rmem_alloc_get(sk); minfo->idiag_wmem = sk->sk_wmem_queued; minfo->idiag_fmem = sk->sk_forward_alloc; - minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); + minfo->idiag_tmem = sk_wmem_alloc_get(sk); } handler->idiag_get_info(sk, r, info); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3dc9171a272f..2979f14bb188 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -799,7 +799,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: { @@ -935,8 +936,8 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cd76b3cb7092..65b3a8b11a6c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1085,8 +1085,16 @@ restart: now = jiffies; if (!rt_caching(dev_net(rt->u.dst.dev))) { - rt_drop(rt); - return 0; + /* + * If we're not caching, just tell the caller we + * were successful and don't touch the route. The + * caller hold the sole reference to the cache entry, and + * it will be released when the caller is done with it. + * If we drop it here, the callers have no way to resolve routes + * when we're not caching. Instead, just point *rp at rt, so + * the caller gets a single use out of the route + */ + goto report_and_exit; } rthp = &rt_hash_table[hash].chain; @@ -1217,6 +1225,8 @@ restart: rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); + +report_and_exit: if (rp) *rp = rt; else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8f4158d7c9a6..80e3812837ad 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -840,7 +840,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } @@ -1721,8 +1722,8 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 36a090d87a3d..8b0b6f948063 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1130,7 +1130,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) switch(cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: @@ -1236,8 +1237,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fc333d854728..023beda6b224 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1061,8 +1061,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 1627050e29fd..417b0e309495 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1835,7 +1835,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; rc = put_user(amount, (int __user *)argp); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 5ed97ad0e2e3..576178482f89 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -280,8 +280,8 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v) } seq_printf(seq, "%08X %08X %02X %03d\n", - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); out: return 0; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 3eb5bcc75f99..5922febe25c4 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1762,7 +1762,8 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; if (put_user(amount, (unsigned int __user *)arg)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 656cbd195825..6be5f92d1094 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -54,6 +54,38 @@ static const u8 iprm_shutdown[8] = #define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ #define CB_TRGCLS_LEN (TRGCLS_SIZE) +#define __iucv_sock_wait(sk, condition, timeo, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + long __timeo = timeo; \ + ret = 0; \ + while (!(condition)) { \ + prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + if (!__timeo) { \ + ret = -EAGAIN; \ + break; \ + } \ + if (signal_pending(current)) { \ + ret = sock_intr_errno(__timeo); \ + break; \ + } \ + release_sock(sk); \ + __timeo = schedule_timeout(__timeo); \ + lock_sock(sk); \ + ret = sock_error(sk); \ + if (ret) \ + break; \ + } \ + finish_wait(sk->sk_sleep, &__wait); \ +} while (0) + +#define iucv_sock_wait(sk, condition, timeo) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __iucv_sock_wait(sk, condition, timeo, __ret); \ + __ret; \ +}) static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); @@ -238,6 +270,48 @@ static inline size_t iucv_msg_length(struct iucv_message *msg) return msg->length; } +/** + * iucv_sock_in_state() - check for specific states + * @sk: sock structure + * @state: first iucv sk state + * @state: second iucv sk state + * + * Returns true if the socket in either in the first or second state. + */ +static int iucv_sock_in_state(struct sock *sk, int state, int state2) +{ + return (sk->sk_state == state || sk->sk_state == state2); +} + +/** + * iucv_below_msglim() - function to check if messages can be sent + * @sk: sock structure + * + * Returns true if the send queue length is lower than the message limit. + * Always returns true if the socket is not connected (no iucv path for + * checking the message limit). + */ +static inline int iucv_below_msglim(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (sk->sk_state != IUCV_CONNECTED) + return 1; + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); +} + +/** + * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit + */ +static void iucv_sock_wake_msglim(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible_all(sk->sk_sleep); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + read_unlock(&sk->sk_callback_lock); +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -329,7 +403,9 @@ static void iucv_sock_close(struct sock *sk) timeo = sk->sk_lingertime; else timeo = IUCV_DISCONN_TIMEOUT; - err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); + err = iucv_sock_wait(sk, + iucv_sock_in_state(sk, IUCV_CLOSED, 0), + timeo); } case IUCV_CLOSING: /* fall through */ @@ -510,39 +586,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) return NULL; } -int iucv_sock_wait_state(struct sock *sk, int state, int state2, - unsigned long timeo) -{ - DECLARE_WAITQUEUE(wait, current); - int err = 0; - - add_wait_queue(sk->sk_sleep, &wait); - while (sk->sk_state != state && sk->sk_state != state2) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!timeo) { - err = -EAGAIN; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - err = sock_error(sk); - if (err) - break; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); - return err; -} - /* Bind an unbound socket */ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) @@ -687,8 +730,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, } if (sk->sk_state != IUCV_CONNECTED) { - err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN, - sock_sndtimeo(sk, flags & O_NONBLOCK)); + err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, + IUCV_DISCONN), + sock_sndtimeo(sk, flags & O_NONBLOCK)); } if (sk->sk_state == IUCV_DISCONN) { @@ -842,9 +886,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_message txmsg; struct cmsghdr *cmsg; int cmsg_done; + long timeo; char user_id[9]; char appl_id[9]; int err; + int noblock = msg->msg_flags & MSG_DONTWAIT; err = sock_error(sk); if (err) @@ -864,108 +910,119 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - if (sk->sk_state == IUCV_CONNECTED) { - /* initialize defaults */ - cmsg_done = 0; /* check for duplicate headers */ - txmsg.class = 0; + /* Return if the socket is not in connected state */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ENOTCONN; + goto out; + } - /* iterate over control messages */ - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; - cmsg = CMSG_NXTHDR(msg, cmsg)) { + /* initialize defaults */ + cmsg_done = 0; /* check for duplicate headers */ + txmsg.class = 0; - if (!CMSG_OK(msg, cmsg)) { - err = -EINVAL; - goto out; - } + /* iterate over control messages */ + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) { + err = -EINVAL; + goto out; + } + + if (cmsg->cmsg_level != SOL_IUCV) + continue; - if (cmsg->cmsg_level != SOL_IUCV) - continue; + if (cmsg->cmsg_type & cmsg_done) { + err = -EINVAL; + goto out; + } + cmsg_done |= cmsg->cmsg_type; - if (cmsg->cmsg_type & cmsg_done) { + switch (cmsg->cmsg_type) { + case SCM_IUCV_TRGCLS: + if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { err = -EINVAL; goto out; } - cmsg_done |= cmsg->cmsg_type; - - switch (cmsg->cmsg_type) { - case SCM_IUCV_TRGCLS: - if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { - err = -EINVAL; - goto out; - } - /* set iucv message target class */ - memcpy(&txmsg.class, - (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); + /* set iucv message target class */ + memcpy(&txmsg.class, + (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); - break; + break; - default: - err = -EINVAL; - goto out; - break; - } + default: + err = -EINVAL; + goto out; + break; } + } - /* allocate one skb for each iucv message: - * this is fine for SOCK_SEQPACKET (unless we want to support - * segmented records using the MSG_EOR flag), but - * for SOCK_STREAM we might want to improve it in future */ - if (!(skb = sock_alloc_send_skb(sk, len, - msg->msg_flags & MSG_DONTWAIT, - &err))) - goto out; + /* allocate one skb for each iucv message: + * this is fine for SOCK_SEQPACKET (unless we want to support + * segmented records using the MSG_EOR flag), but + * for SOCK_STREAM we might want to improve it in future */ + skb = sock_alloc_send_skb(sk, len, noblock, &err); + if (!skb) + goto out; + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { + err = -EFAULT; + goto fail; + } - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { - err = -EFAULT; - goto fail; - } + /* wait if outstanding messages for iucv path has reached */ + timeo = sock_sndtimeo(sk, noblock); + err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo); + if (err) + goto fail; - /* increment and save iucv message tag for msg_completion cbk */ - txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); - skb_queue_tail(&iucv->send_skb_q, skb); + /* return -ECONNRESET if the socket is no longer connected */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ECONNRESET; + goto fail; + } - if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) - && skb->len <= 7) { - err = iucv_send_iprm(iucv->path, &txmsg, skb); + /* increment and save iucv message tag for msg_completion cbk */ + txmsg.tag = iucv->send_tag++; + memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + skb_queue_tail(&iucv->send_skb_q, skb); - /* on success: there is no message_complete callback - * for an IPRMDATA msg; remove skb from send queue */ - if (err == 0) { - skb_unlink(skb, &iucv->send_skb_q); - kfree_skb(skb); - } + if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) + && skb->len <= 7) { + err = iucv_send_iprm(iucv->path, &txmsg, skb); - /* this error should never happen since the - * IUCV_IPRMDATA path flag is set... sever path */ - if (err == 0x15) { - iucv_path_sever(iucv->path, NULL); - skb_unlink(skb, &iucv->send_skb_q); - err = -EPIPE; - goto fail; - } - } else - err = iucv_message_send(iucv->path, &txmsg, 0, 0, - (void *) skb->data, skb->len); - if (err) { - if (err == 3) { - user_id[8] = 0; - memcpy(user_id, iucv->dst_user_id, 8); - appl_id[8] = 0; - memcpy(appl_id, iucv->dst_name, 8); - pr_err("Application %s on z/VM guest %s" - " exceeds message limit\n", - user_id, appl_id); - } + /* on success: there is no message_complete callback + * for an IPRMDATA msg; remove skb from send queue */ + if (err == 0) { + skb_unlink(skb, &iucv->send_skb_q); + kfree_skb(skb); + } + + /* this error should never happen since the + * IUCV_IPRMDATA path flag is set... sever path */ + if (err == 0x15) { + iucv_path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } - - } else { - err = -ENOTCONN; - goto out; + } else + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); + if (err) { + if (err == 3) { + user_id[8] = 0; + memcpy(user_id, iucv->dst_user_id, 8); + appl_id[8] = 0; + memcpy(appl_id, iucv->dst_name, 8); + pr_err("Application %s on z/VM guest %s" + " exceeds message limit\n", + appl_id, user_id); + err = -EAGAIN; + } else + err = -EPIPE; + skb_unlink(skb, &iucv->send_skb_q); + goto fail; } release_sock(sk); @@ -1581,7 +1638,11 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); - kfree_skb(this); + if (this) { + kfree_skb(this); + /* wake up any process waiting for sending */ + iucv_sock_wake_msglim(sk); + } } BUG_ON(!this); diff --git a/net/key/af_key.c b/net/key/af_key.c index 643c1be2d02e..dba9abd27f90 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3662,8 +3662,8 @@ static int pfkey_seq_show(struct seq_file *f, void *v) seq_printf(f ,"%p %-6d %-6u %-6u %-6u %-6lu\n", s, atomic_read(&s->sk_refcnt), - atomic_read(&s->sk_rmem_alloc), - atomic_read(&s->sk_wmem_alloc), + sk_rmem_alloc_get(s), + sk_wmem_alloc_get(s), sock_i_uid(s), sock_i_ino(s) ); diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index d208b3396d94..f97be471fe2e 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -134,8 +134,8 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) seq_printf(seq, "@%02X ", llc->sap->laddr.lsap); llc_ui_format_mac(seq, llc->daddr.mac); seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap, - atomic_read(&sk->sk_wmem_alloc), - atomic_read(&sk->sk_rmem_alloc) - llc->copied_seq, + sk_wmem_alloc_get(sk), + sk_rmem_alloc_get(sk) - llc->copied_seq, sk->sk_state, sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, llc->link); diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 11c72311f35b..6c439cd5ccea 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -163,6 +163,29 @@ static const struct file_operations noack_ops = { .open = mac80211_open_file_generic }; +static ssize_t queues_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + unsigned long flags; + char buf[IEEE80211_MAX_QUEUES * 20]; + int q, res = 0; + + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + for (q = 0; q < local->hw.queues; q++) + res += sprintf(buf + res, "%02d: %#.8lx/%d\n", q, + local->queue_stop_reasons[q], + __netif_subqueue_stopped(local->mdev, q)); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + return simple_read_from_buffer(user_buf, count, ppos, buf, res); +} + +static const struct file_operations queues_ops = { + .read = queues_read, + .open = mac80211_open_file_generic +}; + /* statistics stuff */ #define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ @@ -298,6 +321,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(tsf); + DEBUGFS_ADD(queues); DEBUGFS_ADD_MODE(reset, 0200); DEBUGFS_ADD(noack); @@ -350,6 +374,7 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(wep_iv); DEBUGFS_DEL(tsf); + DEBUGFS_DEL(queues); DEBUGFS_DEL(reset); DEBUGFS_DEL(noack); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4dbc28964196..68eb5052179a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -783,6 +783,7 @@ struct ieee80211_local { struct dentry *total_ps_buffered; struct dentry *wep_iv; struct dentry *tsf; + struct dentry *queues; struct dentry *reset; struct dentry *noack; struct dentry *statistics; @@ -1100,7 +1101,6 @@ void ieee802_11_parse_elems(u8 *start, size_t len, u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, struct ieee802_11_elems *elems, u64 filter, u32 crc); -int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d779c57a8220..aca22b00b6a3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1102,14 +1102,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; u32 changed = 0, config_changed = 0; - rcu_read_lock(); - - sta = sta_info_get(local, ifmgd->bssid); - if (!sta) { - rcu_read_unlock(); - return; - } - if (deauth) { ifmgd->direct_probe_tries = 0; ifmgd->auth_tries = 0; @@ -1120,7 +1112,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - ieee80211_sta_tear_down_BA_sessions(sta); + rcu_read_lock(); + sta = sta_info_get(local, ifmgd->bssid); + if (sta) + ieee80211_sta_tear_down_BA_sessions(sta); + rcu_read_unlock(); bss = ieee80211_rx_bss_get(local, ifmgd->bssid, conf->channel->center_freq, @@ -1156,8 +1152,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->ssid, ifmgd->ssid_len); } - rcu_read_unlock(); - ieee80211_set_wmm_default(sdata); ieee80211_recalc_idle(local); @@ -2223,7 +2217,10 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) capa_mask, capa_val); if (bss) { - ieee80211_set_freq(sdata, bss->cbss.channel->center_freq); + local->oper_channel = bss->cbss.channel; + local->oper_channel_type = NL80211_CHAN_NO_HT; + ieee80211_hw_config(local, 0); + if (!(ifmgd->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(sdata, bss->ssid, bss->ssid_len); @@ -2445,6 +2442,14 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, true, true, WLAN_REASON_DEAUTH_LEAVING); + if (ifmgd->ssid_len == 0) { + /* + * Only allow association to be started if a valid SSID + * is configured. + */ + return; + } + if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) || ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE) set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); @@ -2476,6 +2481,10 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size ifmgd = &sdata->u.mgd; if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) { + if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, true, true, + WLAN_REASON_DEAUTH_LEAVING); + /* * Do not use reassociation if SSID is changed (different ESS). */ @@ -2500,6 +2509,11 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + if (compare_ether_addr(bssid, ifmgd->bssid) != 0 && + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, true, true, + WLAN_REASON_DEAUTH_LEAVING); + if (is_valid_ether_addr(bssid)) { memcpy(ifmgd->bssid, bssid, ETH_ALEN); ifmgd->flags |= IEEE80211_STA_BSSID_SET; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 66ce96a69f31..915e77769312 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -774,31 +774,6 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, dev_queue_xmit(skb); } -int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) -{ - int ret = -EINVAL; - struct ieee80211_channel *chan; - struct ieee80211_local *local = sdata->local; - - chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); - - if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - chan->flags & IEEE80211_CHAN_NO_IBSS) - return ret; - local->oper_channel = chan; - local->oper_channel_type = NL80211_CHAN_NO_HT; - - if (local->sw_scanning || local->hw_scanning) - ret = 0; - else - ret = ieee80211_hw_config( - local, IEEE80211_CONF_CHANGE_CHANNEL); - } - - return ret; -} - u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band) { diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index d2d81b103341..1da81f456744 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -55,6 +55,8 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_freq *freq, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_channel *chan; if (sdata->vif.type == NL80211_IFTYPE_ADHOC) return cfg80211_ibss_wext_siwfreq(dev, info, freq, extra); @@ -69,17 +71,38 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(sdata, + chan = ieee80211_get_channel(local->hw.wiphy, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; - if (div > 0) - return ieee80211_set_freq(sdata, freq->m / div); - else + if (div <= 0) return -EINVAL; + chan = ieee80211_get_channel(local->hw.wiphy, freq->m / div); } + + if (!chan) + return -EINVAL; + + if (chan->flags & IEEE80211_CHAN_DISABLED) + return -EINVAL; + + /* + * no change except maybe auto -> fixed, ignore the HT + * setting so you can fix a channel you're on already + */ + if (local->oper_channel == chan) + return 0; + + if (sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_sta_req_auth(sdata); + + local->oper_channel = chan; + local->oper_channel_type = NL80211_CHAN_NO_HT; + ieee80211_hw_config(local, 0); + + return 0; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8b6bbb3032b0..2936fa3b6dc8 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1914,8 +1914,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v) s->sk_protocol, nlk->pid, nlk->groups ? (u32)nlk->groups[0] : 0, - atomic_read(&s->sk_rmem_alloc), - atomic_read(&s->sk_wmem_alloc), + sk_rmem_alloc_get(s), + sk_wmem_alloc_get(s), nlk->cb, atomic_read(&s->sk_refcnt), atomic_read(&s->sk_drops) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 3be0e016ab7d..ce51ce012cda 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -286,8 +286,7 @@ void nr_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.function = nr_destroy_timer; sk->sk_timer.expires = jiffies + 2 * HZ; @@ -1206,7 +1205,7 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) long amount; lock_sock(sk); - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; release_sock(sk); @@ -1342,8 +1341,8 @@ static int nr_info_show(struct seq_file *seq, void *v) nr->n2count, nr->n2, nr->window, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); bh_unlock_sock(s); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4f76e5552d8e..ebe5718baa31 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1987,7 +1987,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, switch (cmd) { case SIOCOUTQ: { - int amount = atomic_read(&sk->sk_wmem_alloc); + int amount = sk_wmem_alloc_get(sk); + return put_user(amount, (int __user *)arg); } case SIOCINQ: diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 4e68ab439d5d..79693fe2001e 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -56,7 +56,6 @@ struct rfkill { u32 idx; bool registered; - bool suspended; bool persistent; const struct rfkill_ops *ops; @@ -224,7 +223,7 @@ static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) static void rfkill_event(struct rfkill *rfkill) { - if (!rfkill->registered || rfkill->suspended) + if (!rfkill->registered) return; kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); @@ -270,6 +269,9 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) unsigned long flags; int err; + if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) + return; + /* * Some platforms (...!) generate input events which affect the * _hard_ kill state -- whenever something tries to change the @@ -292,9 +294,6 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) rfkill->state |= RFKILL_BLOCK_SW_SETCALL; spin_unlock_irqrestore(&rfkill->lock, flags); - if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) - return; - err = rfkill->ops->set_block(rfkill->data, blocked); spin_lock_irqsave(&rfkill->lock, flags); @@ -508,19 +507,32 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) blocked = blocked || hwblock; spin_unlock_irqrestore(&rfkill->lock, flags); - if (!rfkill->registered) { - rfkill->persistent = true; - } else { - if (prev != blocked && !hwblock) - schedule_work(&rfkill->uevent_work); + if (!rfkill->registered) + return blocked; - rfkill_led_trigger_event(rfkill); - } + if (prev != blocked && !hwblock) + schedule_work(&rfkill->uevent_work); + + rfkill_led_trigger_event(rfkill); return blocked; } EXPORT_SYMBOL(rfkill_set_sw_state); +void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked) +{ + unsigned long flags; + + BUG_ON(!rfkill); + BUG_ON(rfkill->registered); + + spin_lock_irqsave(&rfkill->lock, flags); + __rfkill_set_sw_state(rfkill, blocked); + rfkill->persistent = true; + spin_unlock_irqrestore(&rfkill->lock, flags); +} +EXPORT_SYMBOL(rfkill_init_sw_state); + void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) { unsigned long flags; @@ -598,6 +610,15 @@ static ssize_t rfkill_idx_show(struct device *dev, return sprintf(buf, "%d\n", rfkill->idx); } +static ssize_t rfkill_persistent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%d\n", rfkill->persistent); +} + static u8 user_state_from_blocked(unsigned long state) { if (state & RFKILL_BLOCK_HW) @@ -656,6 +677,7 @@ static struct device_attribute rfkill_dev_attrs[] = { __ATTR(name, S_IRUGO, rfkill_name_show, NULL), __ATTR(type, S_IRUGO, rfkill_type_show, NULL), __ATTR(index, S_IRUGO, rfkill_idx_show, NULL), + __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL), __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), __ATTR_NULL @@ -718,8 +740,6 @@ static int rfkill_suspend(struct device *dev, pm_message_t state) rfkill_pause_polling(rfkill); - rfkill->suspended = true; - return 0; } @@ -728,10 +748,10 @@ static int rfkill_resume(struct device *dev) struct rfkill *rfkill = to_rfkill(dev); bool cur; - cur = !!(rfkill->state & RFKILL_BLOCK_SW); - rfkill_set_block(rfkill, cur); - - rfkill->suspended = false; + if (!rfkill->persistent) { + cur = !!(rfkill->state & RFKILL_BLOCK_SW); + rfkill_set_block(rfkill, cur); + } rfkill_resume_polling(rfkill); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 877a7f65f707..6bd8e93869ed 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -356,8 +356,7 @@ void rose_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ setup_timer(&sk->sk_timer, rose_destroy_timer, (unsigned long)sk); @@ -1310,7 +1309,8 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { long amount; - amount = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; return put_user(amount, (unsigned int __user *) argp); @@ -1481,8 +1481,8 @@ static int rose_info_show(struct seq_file *seq, void *v) rose->hb / HZ, ax25_display_timer(&rose->idletimer) / (60 * HZ), rose->idle / (60 * HZ), - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index f8f047b61245..723964c3ee4f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -294,6 +294,8 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; + if (police->tcf_action == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } @@ -327,6 +329,8 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, } police->tcf_qstats.overlimits++; + if (police->tcf_action == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcf_action; } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 266151ae85a3..18d85d259104 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -349,13 +349,13 @@ META_COLLECTOR(int_sk_type) META_COLLECTOR(int_sk_rmem_alloc) { SKIP_NONLOCAL(skb); - dst->value = atomic_read(&skb->sk->sk_rmem_alloc); + dst->value = sk_rmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_wmem_alloc) { SKIP_NONLOCAL(skb); - dst->value = atomic_read(&skb->sk->sk_wmem_alloc); + dst->value = sk_wmem_alloc_get(skb->sk); } META_COLLECTOR(int_sk_omem_alloc) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0f01e5d8a24f..35ba035970a2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -130,7 +130,7 @@ static inline int sctp_wspace(struct sctp_association *asoc) if (asoc->ep->sndbuf_policy) amt = asoc->sndbuf_used; else - amt = atomic_read(&asoc->base.sk->sk_wmem_alloc); + amt = sk_wmem_alloc_get(asoc->base.sk); if (amt >= asoc->base.sk->sk_sndbuf) { if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK) @@ -6523,7 +6523,7 @@ static int sctp_writeable(struct sock *sk) { int amt = 0; - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amt < 0) amt = 0; return amt; diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 5369aa369b35..db73fd2a3f0e 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,5 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o +sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c new file mode 100644 index 000000000000..553621fb2c41 --- /dev/null +++ b/net/sunrpc/backchannel_rqst.c @@ -0,0 +1,281 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#include <linux/tcp.h> +#include <linux/sunrpc/xprt.h> + +#ifdef RPC_DEBUG +#define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +#if defined(CONFIG_NFS_V4_1) + +/* + * Helper routines that track the number of preallocation elements + * on the transport. + */ +static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) +{ + return xprt->bc_alloc_count > 0; +} + +static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + xprt->bc_alloc_count += n; +} + +static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + return xprt->bc_alloc_count -= n; +} + +/* + * Free the preallocated rpc_rqst structure and the memory + * buffers hanging off of it. + */ +static void xprt_free_allocation(struct rpc_rqst *req) +{ + struct xdr_buf *xbufp; + + dprintk("RPC: free allocations for req= %p\n", req); + BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + xbufp = &req->rq_private_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + xbufp = &req->rq_snd_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + list_del(&req->rq_bc_pa_list); + kfree(req); +} + +/* + * Preallocate up to min_reqs structures and related buffers for use + * by the backchannel. This function can be called multiple times + * when creating new sessions that use the same rpc_xprt. The + * preallocated buffers are added to the pool of resources used by + * the rpc_xprt. Anyone of these resources may be used used by an + * incoming callback request. It's up to the higher levels in the + * stack to enforce that the maximum number of session slots is not + * being exceeded. + * + * Some callback arguments can be large. For example, a pNFS server + * using multiple deviceids. The list can be unbound, but the client + * has the ability to tell the server the maximum size of the callback + * requests. Each deviceID is 16 bytes, so allocate one page + * for the arguments to have enough room to receive a number of these + * deviceIDs. The NFS client indicates to the pNFS server that its + * callback requests can be up to 4096 bytes in size. + */ +int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) +{ + struct page *page_rcv = NULL, *page_snd = NULL; + struct xdr_buf *xbufp = NULL; + struct rpc_rqst *req, *tmp; + struct list_head tmp_list; + int i; + + dprintk("RPC: setup backchannel transport\n"); + + /* + * We use a temporary list to keep track of the preallocated + * buffers. Once we're done building the list we splice it + * into the backchannel preallocation list off of the rpc_xprt + * struct. This helps minimize the amount of time the list + * lock is held on the rpc_xprt struct. It also makes cleanup + * easier in case of memory allocation errors. + */ + INIT_LIST_HEAD(&tmp_list); + for (i = 0; i < min_reqs; i++) { + /* Pre-allocate one backchannel rpc_rqst */ + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (req == NULL) { + printk(KERN_ERR "Failed to create bc rpc_rqst\n"); + goto out_free; + } + + /* Add the allocated buffer to the tmp list */ + dprintk("RPC: adding req= %p\n", req); + list_add(&req->rq_bc_pa_list, &tmp_list); + + req->rq_xprt = xprt; + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_bc_list); + + /* Preallocate one XDR receive buffer */ + page_rcv = alloc_page(GFP_KERNEL); + if (page_rcv == NULL) { + printk(KERN_ERR "Failed to create bc receive xbuf\n"); + goto out_free; + } + xbufp = &req->rq_rcv_buf; + xbufp->head[0].iov_base = page_address(page_rcv); + xbufp->head[0].iov_len = PAGE_SIZE; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = PAGE_SIZE; + xbufp->buflen = PAGE_SIZE; + + /* Preallocate one XDR send buffer */ + page_snd = alloc_page(GFP_KERNEL); + if (page_snd == NULL) { + printk(KERN_ERR "Failed to create bc snd xbuf\n"); + goto out_free; + } + + xbufp = &req->rq_snd_buf; + xbufp->head[0].iov_base = page_address(page_snd); + xbufp->head[0].iov_len = 0; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = 0; + xbufp->buflen = PAGE_SIZE; + } + + /* + * Add the temporary list to the backchannel preallocation list + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_splice(&tmp_list, &xprt->bc_pa_list); + xprt_inc_alloc_count(xprt, min_reqs); + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: setup backchannel transport done\n"); + return 0; + +out_free: + /* + * Memory allocation failed, free the temporary list + */ + list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) + xprt_free_allocation(req); + + dprintk("RPC: setup backchannel transport failed\n"); + return -1; +} +EXPORT_SYMBOL(xprt_setup_backchannel); + +/* + * Destroys the backchannel preallocated structures. + * Since these structures may have been allocated by multiple calls + * to xprt_setup_backchannel, we only destroy up to the maximum number + * of reqs specified by the caller. + * @xprt: the transport holding the preallocated strucures + * @max_reqs the maximum number of preallocated structures to destroy + */ +void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) +{ + struct rpc_rqst *req = NULL, *tmp = NULL; + + dprintk("RPC: destroy backchannel transport\n"); + + BUG_ON(max_reqs == 0); + spin_lock_bh(&xprt->bc_pa_lock); + xprt_dec_alloc_count(xprt, max_reqs); + list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { + dprintk("RPC: req=%p\n", req); + xprt_free_allocation(req); + if (--max_reqs == 0) + break; + } + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: backchannel list empty= %s\n", + list_empty(&xprt->bc_pa_list) ? "true" : "false"); +} +EXPORT_SYMBOL(xprt_destroy_backchannel); + +/* + * One or more rpc_rqst structure have been preallocated during the + * backchannel setup. Buffer space for the send and private XDR buffers + * has been preallocated as well. Use xprt_alloc_bc_request to allocate + * to this request. Use xprt_free_bc_request to return it. + * + * We know that we're called in soft interrupt context, grab the spin_lock + * since there is no need to grab the bottom half spin_lock. + * + * Return an available rpc_rqst, otherwise NULL if non are available. + */ +struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) +{ + struct rpc_rqst *req; + + dprintk("RPC: allocate a backchannel request\n"); + spin_lock(&xprt->bc_pa_lock); + if (!list_empty(&xprt->bc_pa_list)) { + req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + list_del(&req->rq_bc_pa_list); + } else { + req = NULL; + } + spin_unlock(&xprt->bc_pa_lock); + + if (req != NULL) { + set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + req->rq_reply_bytes_recvd = 0; + req->rq_bytes_sent = 0; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + sizeof(req->rq_private_buf)); + } + dprintk("RPC: backchannel req=%p\n", req); + return req; +} + +/* + * Return the preallocated rpc_rqst structure and XDR buffers + * associated with this rpc_task. + */ +void xprt_free_bc_request(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + + dprintk("RPC: free backchannel req=%p\n", req); + + smp_mb__before_clear_bit(); + BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + smp_mb__after_clear_bit(); + + if (!xprt_need_to_requeue(xprt)) { + /* + * The last remaining session was destroyed while this + * entry was in use. Free the entry and don't attempt + * to add back to the list because there is no need to + * have anymore preallocated entries. + */ + dprintk("RPC: Last session removed req=%p\n", req); + xprt_free_allocation(req); + return; + } + + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c new file mode 100644 index 000000000000..13f214f53120 --- /dev/null +++ b/net/sunrpc/bc_svc.c @@ -0,0 +1,81 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * The NFSv4.1 callback service helper routines. + * They implement the transport level processing required to send the + * reply over an existing open connection previously established by the client. + */ + +#if defined(CONFIG_NFS_V4_1) + +#include <linux/module.h> + +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/sched.h> +#include <linux/sunrpc/bc_xprt.h> + +#define RPCDBG_FACILITY RPCDBG_SVCDSP + +void bc_release_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + dprintk("RPC: bc_release_request: task= %p\n", task); + + /* + * Release this request only if it's a backchannel + * preallocated request + */ + if (!bc_prealloc(req)) + return; + xprt_free_bc_request(req); +} + +/* Empty callback ops */ +static const struct rpc_call_ops nfs41_callback_ops = { +}; + + +/* + * Send the callback reply + */ +int bc_send(struct rpc_rqst *req) +{ + struct rpc_task *task; + int ret; + + dprintk("RPC: bc_send req= %p\n", req); + task = rpc_run_bc_task(req, &nfs41_callback_ops); + if (IS_ERR(task)) + ret = PTR_ERR(task); + else { + BUG_ON(atomic_read(&task->tk_count) != 1); + ret = task->tk_status; + rpc_put_task(task); + } + return ret; + dprintk("RPC: bc_send ret= %d \n", ret); +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20029a79a5de..ff0c23053d2f 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -488,7 +488,7 @@ static void do_cache_clean(struct work_struct *work) { int delay = 5; if (cache_clean() == -1) - delay = 30*HZ; + delay = round_jiffies_relative(30*HZ); if (list_empty(&cache_list)) delay = 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5abab094441f..5bc2f45bddf0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -36,7 +36,9 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/metrics.h> +#include <linux/sunrpc/bc_xprt.h> +#include "sunrpc.h" #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -63,6 +65,9 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); +#if defined(CONFIG_NFS_V4_1) +static void call_bc_transmit(struct rpc_task *task); +#endif /* CONFIG_NFS_V4_1 */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -613,6 +618,50 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, } EXPORT_SYMBOL_GPL(rpc_call_async); +#if defined(CONFIG_NFS_V4_1) +/** + * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run + * rpc_execute against it + * @ops: RPC call ops + */ +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + const struct rpc_call_ops *tk_ops) +{ + struct rpc_task *task; + struct xdr_buf *xbufp = &req->rq_snd_buf; + struct rpc_task_setup task_setup_data = { + .callback_ops = tk_ops, + }; + + dprintk("RPC: rpc_run_bc_task req= %p\n", req); + /* + * Create an rpc_task to send the data + */ + task = rpc_new_task(&task_setup_data); + if (!task) { + xprt_free_bc_request(req); + goto out; + } + task->tk_rqstp = req; + + /* + * Set up the xdr_buf length. + * This also indicates that the buffer is XDR encoded already. + */ + xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + + xbufp->tail[0].iov_len; + + task->tk_action = call_bc_transmit; + atomic_inc(&task->tk_count); + BUG_ON(atomic_read(&task->tk_count) != 2); + rpc_execute(task); + +out: + dprintk("RPC: rpc_run_bc_task: task= %p\n", task); + return task; +} +#endif /* CONFIG_NFS_V4_1 */ + void rpc_call_start(struct rpc_task *task) { @@ -695,6 +744,19 @@ void rpc_force_rebind(struct rpc_clnt *clnt) EXPORT_SYMBOL_GPL(rpc_force_rebind); /* + * Restart an (async) RPC call from the call_prepare state. + * Usually called from within the exit handler. + */ +void +rpc_restart_call_prepare(struct rpc_task *task) +{ + if (RPC_ASSASSINATED(task)) + return; + task->tk_action = rpc_prepare_task; +} +EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); + +/* * Restart an (async) RPC call. Usually called from within the * exit handler. */ @@ -1085,7 +1147,7 @@ call_transmit(struct rpc_task *task) * in order to allow access to the socket to other RPC requests. */ call_transmit_status(task); - if (task->tk_msg.rpc_proc->p_decode != NULL) + if (rpc_reply_expected(task)) return; task->tk_action = rpc_exit_task; rpc_wake_up_queued_task(&task->tk_xprt->pending, task); @@ -1120,6 +1182,72 @@ call_transmit_status(struct rpc_task *task) } } +#if defined(CONFIG_NFS_V4_1) +/* + * 5b. Send the backchannel RPC reply. On error, drop the reply. In + * addition, disconnect on connectivity errors. + */ +static void +call_bc_transmit(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + BUG_ON(task->tk_status != 0); + task->tk_status = xprt_prepare_transmit(task); + if (task->tk_status == -EAGAIN) { + /* + * Could not reserve the transport. Try again after the + * transport is released. + */ + task->tk_status = 0; + task->tk_action = call_bc_transmit; + return; + } + + task->tk_action = rpc_exit_task; + if (task->tk_status < 0) { + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + return; + } + + xprt_transmit(task); + xprt_end_transmit(task); + dprint_status(task); + switch (task->tk_status) { + case 0: + /* Success */ + break; + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -ETIMEDOUT: + /* + * Problem reaching the server. Disconnect and let the + * forechannel reestablish the connection. The server will + * have to retransmit the backchannel request and we'll + * reprocess it. Since these ops are idempotent, there's no + * need to cache our reply at this time. + */ + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); + break; + default: + /* + * We were unable to reply and will have to drop the + * request. The server should reconnect and retransmit. + */ + BUG_ON(task->tk_status == -EAGAIN); + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + break; + } + rpc_wake_up_queued_task(&req->rq_xprt->pending, task); +} +#endif /* CONFIG_NFS_V4_1 */ + /* * 6. Sort out the RPC call status */ @@ -1130,8 +1258,8 @@ call_status(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; int status; - if (req->rq_received > 0 && !req->rq_bytes_sent) - task->tk_status = req->rq_received; + if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent) + task->tk_status = req->rq_reply_bytes_recvd; dprint_status(task); @@ -1248,7 +1376,7 @@ call_decode(struct rpc_task *task) /* * Ensure that we see all writes made by xprt_complete_rqst() - * before it changed req->rq_received. + * before it changed req->rq_reply_bytes_recvd. */ smp_rmb(); req->rq_rcv_buf.len = req->rq_private_buf.len; @@ -1289,7 +1417,7 @@ out_retry: task->tk_status = 0; /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { - req->rq_received = req->rq_rcv_buf.len = 0; + req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) xprt_conditional_disconnect(task->tk_xprt, req->rq_connect_cookie); @@ -1377,13 +1505,14 @@ rpc_verify_header(struct rpc_task *task) } if ((len -= 3) < 0) goto out_overflow; - p += 1; /* skip XID */ + p += 1; /* skip XID */ if ((n = ntohl(*p++)) != RPC_REPLY) { dprintk("RPC: %5u %s: not an RPC reply: %x\n", - task->tk_pid, __func__, n); + task->tk_pid, __func__, n); goto out_garbage; } + if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { if (--len < 0) goto out_overflow; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ff50a0546865..1102ce1251f7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); /* * Helper to call task->tk_ops->rpc_call_prepare */ -static void rpc_prepare_task(struct rpc_task *task) +void rpc_prepare_task(struct rpc_task *task) { task->tk_ops->rpc_call_prepare(task, task->tk_calldata); } diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 1ef6e46d9da2..1b4e6791ecf3 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -141,12 +141,14 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); void rpc_count_iostats(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *stats = task->tk_client->cl_metrics; + struct rpc_iostats *stats; struct rpc_iostats *op_metrics; long rtt, execute, queue; - if (!stats || !req) + if (!task->tk_client || !task->tk_client->cl_metrics || !req) return; + + stats = task->tk_client->cl_metrics; op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; op_metrics->om_ops++; @@ -154,7 +156,7 @@ void rpc_count_iostats(struct rpc_task *task) op_metrics->om_timeouts += task->tk_timeouts; op_metrics->om_bytes_sent += task->tk_bytes_sent; - op_metrics->om_bytes_recv += req->rq_received; + op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; queue = (long)req->rq_xtime - task->tk_start; if (queue < 0) diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h new file mode 100644 index 000000000000..5d9dd742264b --- /dev/null +++ b/net/sunrpc/sunrpc.h @@ -0,0 +1,37 @@ +/****************************************************************************** + +(c) 2008 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * Functions and macros used internally by RPC + */ + +#ifndef _NET_SUNRPC_SUNRPC_H +#define _NET_SUNRPC_SUNRPC_H + +static inline int rpc_reply_expected(struct rpc_task *task) +{ + return (task->tk_msg.rpc_proc != NULL) && + (task->tk_msg.rpc_proc->p_decode != NULL); +} + +#endif /* _NET_SUNRPC_SUNRPC_H */ + diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5ed8931dfe98..952f206ff307 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -25,6 +25,7 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/bc_xprt.h> #define RPCDBG_FACILITY RPCDBG_SVCDSP @@ -486,6 +487,10 @@ svc_destroy(struct svc_serv *serv) if (svc_serv_is_pooled(serv)) svc_pool_map_put(); +#if defined(CONFIG_NFS_V4_1) + svc_sock_destroy(serv->bc_xprt); +#endif /* CONFIG_NFS_V4_1 */ + svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); @@ -970,20 +975,18 @@ svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) } /* - * Process the RPC request. + * Common routine for processing the RPC request. */ -int -svc_process(struct svc_rqst *rqstp) +static int +svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) { struct svc_program *progp; struct svc_version *versp = NULL; /* compiler food */ struct svc_procedure *procp = NULL; - struct kvec * argv = &rqstp->rq_arg.head[0]; - struct kvec * resv = &rqstp->rq_res.head[0]; struct svc_serv *serv = rqstp->rq_server; kxdrproc_t xdr; __be32 *statp; - u32 dir, prog, vers, proc; + u32 prog, vers, proc; __be32 auth_stat, rpc_stat; int auth_res; __be32 *reply_statp; @@ -993,19 +996,6 @@ svc_process(struct svc_rqst *rqstp) if (argv->iov_len < 6*4) goto err_short_len; - /* setup response xdr_buf. - * Initially it has just one page - */ - rqstp->rq_resused = 1; - resv->iov_base = page_address(rqstp->rq_respages[0]); - resv->iov_len = 0; - rqstp->rq_res.pages = rqstp->rq_respages + 1; - rqstp->rq_res.len = 0; - rqstp->rq_res.page_base = 0; - rqstp->rq_res.page_len = 0; - rqstp->rq_res.buflen = PAGE_SIZE; - rqstp->rq_res.tail[0].iov_base = NULL; - rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ @@ -1014,17 +1004,13 @@ svc_process(struct svc_rqst *rqstp) /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); - rqstp->rq_xid = svc_getu32(argv); svc_putu32(resv, rqstp->rq_xid); - dir = svc_getnl(argv); vers = svc_getnl(argv); /* First words of reply: */ svc_putnl(resv, 1); /* REPLY */ - if (dir != 0) /* direction != CALL */ - goto err_bad_dir; if (vers != 2) /* RPC version number */ goto err_bad_rpc; @@ -1147,7 +1133,7 @@ svc_process(struct svc_rqst *rqstp) sendit: if (svc_authorise(rqstp)) goto dropit; - return svc_send(rqstp); + return 1; /* Caller can now send it */ dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ @@ -1161,12 +1147,6 @@ err_short_len: goto dropit; /* drop request */ -err_bad_dir: - svc_printk(rqstp, "bad direction %d, dropping request\n", dir); - - serv->sv_stats->rpcbadfmt++; - goto dropit; /* drop request */ - err_bad_rpc: serv->sv_stats->rpcbadfmt++; svc_putnl(resv, 1); /* REJECT */ @@ -1220,6 +1200,100 @@ err_bad: EXPORT_SYMBOL_GPL(svc_process); /* + * Process the RPC request. + */ +int +svc_process(struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + struct svc_serv *serv = rqstp->rq_server; + u32 dir; + int error; + + /* + * Setup response xdr_buf. + * Initially it has just one page + */ + rqstp->rq_resused = 1; + resv->iov_base = page_address(rqstp->rq_respages[0]); + resv->iov_len = 0; + rqstp->rq_res.pages = rqstp->rq_respages + 1; + rqstp->rq_res.len = 0; + rqstp->rq_res.page_base = 0; + rqstp->rq_res.page_len = 0; + rqstp->rq_res.buflen = PAGE_SIZE; + rqstp->rq_res.tail[0].iov_base = NULL; + rqstp->rq_res.tail[0].iov_len = 0; + + rqstp->rq_xid = svc_getu32(argv); + + dir = svc_getnl(argv); + if (dir != 0) { + /* direction != CALL */ + svc_printk(rqstp, "bad direction %d, dropping request\n", dir); + serv->sv_stats->rpcbadfmt++; + svc_drop(rqstp); + return 0; + } + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + return svc_send(rqstp); +} + +#if defined(CONFIG_NFS_V4_1) +/* + * Process a backchannel RPC request that arrived over an existing + * outbound connection + */ +int +bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, + struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + int error; + + /* Build the svc_rqst used by the common processing routine */ + rqstp->rq_xprt = serv->bc_xprt; + rqstp->rq_xid = req->rq_xid; + rqstp->rq_prot = req->rq_xprt->prot; + rqstp->rq_server = serv; + + rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); + memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); + memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); + memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); + + /* reset result send buffer "put" position */ + resv->iov_len = 0; + + if (rqstp->rq_prot != IPPROTO_TCP) { + printk(KERN_ERR "No support for Non-TCP transports!\n"); + BUG(); + } + + /* + * Skip the next two words because they've already been + * processed in the trasport + */ + svc_getu32(argv); /* XID */ + svc_getnl(argv); /* CALLDIR */ + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + return bc_send(req); +} +EXPORT_SYMBOL(bc_svc_process); +#endif /* CONFIG_NFS_V4_1 */ + +/* * Return (transport-specific) limit on the rpc payload. */ u32 svc_max_payload(const struct svc_rqst *rqstp) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c200d92e57e4..6f33d33cc064 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -11,6 +11,7 @@ #include <net/sock.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> +#include <linux/sunrpc/svcsock.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -1097,36 +1098,58 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, } EXPORT_SYMBOL_GPL(svc_find_xprt); -/* - * Format a buffer with a list of the active transports. A zero for - * the buflen parameter disables target buffer overflow checking. +static int svc_one_xprt_name(const struct svc_xprt *xprt, + char *pos, int remaining) +{ + int len; + + len = snprintf(pos, remaining, "%s %u\n", + xprt->xpt_class->xcl_name, + svc_xprt_local_port(xprt)); + if (len >= remaining) + return -ENAMETOOLONG; + return len; +} + +/** + * svc_xprt_names - format a buffer with a list of transport names + * @serv: pointer to an RPC service + * @buf: pointer to a buffer to be filled in + * @buflen: length of buffer to be filled in + * + * Fills in @buf with a string containing a list of transport names, + * each name terminated with '\n'. + * + * Returns positive length of the filled-in string on success; otherwise + * a negative errno value is returned if an error occurs. */ -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) { struct svc_xprt *xprt; - char xprt_str[64]; - int totlen = 0; - int len; + int len, totlen; + char *pos; /* Sanity check args */ if (!serv) return 0; spin_lock_bh(&serv->sv_lock); + + pos = buf; + totlen = 0; list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { - len = snprintf(xprt_str, sizeof(xprt_str), - "%s %d\n", xprt->xpt_class->xcl_name, - svc_xprt_local_port(xprt)); - /* If the string was truncated, replace with error string */ - if (len >= sizeof(xprt_str)) - strcpy(xprt_str, "name-too-long\n"); - /* Don't overflow buffer */ - len = strlen(xprt_str); - if (buflen && (len + totlen >= buflen)) + len = svc_one_xprt_name(xprt, pos, buflen - totlen); + if (len < 0) { + *buf = '\0'; + totlen = len; + } + if (len <= 0) break; - strcpy(buf+totlen, xprt_str); + + pos += len; totlen += len; } + spin_unlock_bh(&serv->sv_lock); return totlen; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9d504234af4a..23128ee191ae 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -240,42 +240,76 @@ out: /* * Report socket names for nfsdfs */ -static int one_sock_name(char *buf, struct svc_sock *svsk) +static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) { + const struct sock *sk = svsk->sk_sk; + const char *proto_name = sk->sk_protocol == IPPROTO_UDP ? + "udp" : "tcp"; int len; - switch(svsk->sk_sk->sk_family) { - case AF_INET: - len = sprintf(buf, "ipv4 %s %pI4 %d\n", - svsk->sk_sk->sk_protocol == IPPROTO_UDP ? - "udp" : "tcp", - &inet_sk(svsk->sk_sk)->rcv_saddr, - inet_sk(svsk->sk_sk)->num); + switch (sk->sk_family) { + case PF_INET: + len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", + proto_name, + &inet_sk(sk)->rcv_saddr, + inet_sk(sk)->num); + break; + case PF_INET6: + len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", + proto_name, + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->num); break; default: - len = sprintf(buf, "*unknown-%d*\n", - svsk->sk_sk->sk_family); + len = snprintf(buf, remaining, "*unknown-%d*\n", + sk->sk_family); + } + + if (len >= remaining) { + *buf = '\0'; + return -ENAMETOOLONG; } return len; } -int -svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) +/** + * svc_sock_names - construct a list of listener names in a string + * @serv: pointer to RPC service + * @buf: pointer to a buffer to fill in with socket names + * @buflen: size of the buffer to be filled + * @toclose: pointer to '\0'-terminated C string containing the name + * of a listener to be closed + * + * Fills in @buf with a '\n'-separated list of names of listener + * sockets. If @toclose is not NULL, the socket named by @toclose + * is closed, and is not included in the output list. + * + * Returns positive length of the socket name string, or a negative + * errno value on error. + */ +int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, + const char *toclose) { struct svc_sock *svsk, *closesk = NULL; int len = 0; if (!serv) return 0; + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { - int onelen = one_sock_name(buf+len, svsk); - if (toclose && strcmp(toclose, buf+len) == 0) + int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); + if (onelen < 0) { + len = onelen; + break; + } + if (toclose && strcmp(toclose, buf + len) == 0) closesk = svsk; else len += onelen; } spin_unlock_bh(&serv->sv_lock); + if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... @@ -346,6 +380,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; + sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif } @@ -387,6 +422,15 @@ static void svc_write_space(struct sock *sk) } } +static void svc_tcp_write_space(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) + clear_bit(SOCK_NOSPACE, &sock->flags); + svc_write_space(sk); +} + /* * Copy the UDP datagram's destination address to the rqstp structure. * The 'destination' address in this case is the address to which the @@ -427,13 +471,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) long all[SVC_PKTINFO_SPACE / sizeof(long)]; } buffer; struct cmsghdr *cmh = &buffer.hdr; - int err, len; struct msghdr msg = { .msg_name = svc_addr(rqstp), .msg_control = cmh, .msg_controllen = sizeof(buffer), .msg_flags = MSG_DONTWAIT, }; + size_t len; + int err; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) /* udp sockets need large rcvbuf as all pending @@ -465,8 +510,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) return -EAGAIN; } len = svc_addr_len(svc_addr(rqstp)); - if (len < 0) - return len; + if (len == 0) + return -EAFNOSUPPORT; rqstp->rq_addrlen = len; if (skb->tstamp.tv64 == 0) { skb->tstamp = ktime_get_real(); @@ -980,25 +1025,16 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) static int svc_tcp_has_wspace(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct svc_serv *serv = svsk->sk_xprt.xpt_server; + struct svc_serv *serv = svsk->sk_xprt.xpt_server; int required; - int wspace; - /* - * Set the SOCK_NOSPACE flag before checking the available - * sock space. - */ + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) + return 1; + required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; + if (sk_stream_wspace(svsk->sk_sk) >= required) + return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; - wspace = sk_stream_wspace(svsk->sk_sk); - - if (wspace < sk_stream_min_wspace(svsk->sk_sk)) - return 0; - if (required * 2 > wspace) - return 0; - - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - return 1; + return 0; } static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, @@ -1054,7 +1090,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_tcp_data_ready; - sk->sk_write_space = svc_write_space; + sk->sk_write_space = svc_tcp_write_space; svsk->sk_reclen = 0; svsk->sk_tcplen = 0; @@ -1148,9 +1184,19 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } -int svc_addsock(struct svc_serv *serv, - int fd, - char *name_return) +/** + * svc_addsock - add a listener socket to an RPC service + * @serv: pointer to RPC service to which to add a new listener + * @fd: file descriptor of the new listener + * @name_return: pointer to buffer to fill in with name of listener + * @len: size of the buffer + * + * Fills in socket name and returns positive length of name if successful. + * Name is terminated with '\n'. On error, returns a negative errno + * value. + */ +int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, + const size_t len) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1190,7 +1236,7 @@ int svc_addsock(struct svc_serv *serv, sockfd_put(so); return err; } - return one_sock_name(name_return, svsk); + return svc_one_sock_name(svsk, name_return, len); } EXPORT_SYMBOL_GPL(svc_addsock); @@ -1327,3 +1373,42 @@ static void svc_sock_free(struct svc_xprt *xprt) sock_release(svsk->sk_sock); kfree(svsk); } + +/* + * Create a svc_xprt. + * + * For internal use only (e.g. nfsv4.1 backchannel). + * Callers should typically use the xpo_create() method. + */ +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) +{ + struct svc_sock *svsk; + struct svc_xprt *xprt = NULL; + + dprintk("svc: %s\n", __func__); + svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); + if (!svsk) + goto out; + + xprt = &svsk->sk_xprt; + if (prot == IPPROTO_TCP) + svc_xprt_init(&svc_tcp_class, xprt, serv); + else if (prot == IPPROTO_UDP) + svc_xprt_init(&svc_udp_class, xprt, serv); + else + BUG(); +out: + dprintk("svc: %s return %p\n", __func__, xprt); + return xprt; +} +EXPORT_SYMBOL_GPL(svc_sock_create); + +/* + * Destroy a svc_sock. + */ +void svc_sock_destroy(struct svc_xprt *xprt) +{ + if (xprt) + kfree(container_of(xprt, struct svc_sock, sk_xprt)); +} +EXPORT_SYMBOL_GPL(svc_sock_destroy); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 06ca058572f2..f412a852bc73 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -12,8 +12,9 @@ * - Next, the caller puts together the RPC message, stuffs it into * the request struct, and calls xprt_transmit(). * - xprt_transmit sends the message and installs the caller on the - * transport's wait list. At the same time, it installs a timer that - * is run after the packet's timeout has expired. + * transport's wait list. At the same time, if a reply is expected, + * it installs a timer that is run after the packet's timeout has + * expired. * - When a packet arrives, the data_ready handler walks the list of * pending requests for that transport. If a matching XID is found, the * caller is woken up, and the timer removed. @@ -46,6 +47,8 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/metrics.h> +#include "sunrpc.h" + /* * Local variables */ @@ -192,8 +195,8 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); */ int xprt_reserve_xprt(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -803,9 +806,10 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) list_del_init(&req->rq_list); req->rq_private_buf.len = copied; - /* Ensure all writes are done before we update req->rq_received */ + /* Ensure all writes are done before we update */ + /* req->rq_reply_bytes_recvd */ smp_wmb(); - req->rq_received = copied; + req->rq_reply_bytes_recvd = copied; rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); @@ -820,7 +824,7 @@ static void xprt_timer(struct rpc_task *task) dprintk("RPC: %5u xprt_timer\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (!req->rq_received) { + if (!req->rq_reply_bytes_recvd) { if (xprt->ops->timer) xprt->ops->timer(task); } else @@ -842,8 +846,8 @@ int xprt_prepare_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (req->rq_received && !req->rq_bytes_sent) { - err = req->rq_received; + if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { + err = req->rq_reply_bytes_recvd; goto out_unlock; } if (!xprt->ops->reserve_xprt(task)) @@ -855,7 +859,7 @@ out_unlock: void xprt_end_transmit(struct rpc_task *task) { - xprt_release_write(task->tk_xprt, task); + xprt_release_write(task->tk_rqstp->rq_xprt, task); } /** @@ -872,8 +876,11 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - if (!req->rq_received) { - if (list_empty(&req->rq_list)) { + if (!req->rq_reply_bytes_recvd) { + if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { + /* + * Add to the list only if we're expecting a reply + */ spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, @@ -908,8 +915,13 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_received) + else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) { + /* + * Sleep on the pending queue since + * we're expecting a reply. + */ rpc_sleep_on(&xprt->pending, task, xprt_timer); + } spin_unlock_bh(&xprt->transport_lock); } @@ -982,11 +994,17 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) */ void xprt_release(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt; struct rpc_rqst *req; + int is_bc_request; if (!(req = task->tk_rqstp)) return; + + /* Preallocated backchannel request? */ + is_bc_request = bc_prealloc(req); + + xprt = req->rq_xprt; rpc_count_iostats(task); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); @@ -999,10 +1017,19 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); - xprt->ops->buf_free(req->rq_buffer); + if (!bc_prealloc(req)) + xprt->ops->buf_free(req->rq_buffer); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); + + /* + * Early exit if this is a backchannel preallocated request. + * There is no need to have it added to the RPC slot list. + */ + if (is_bc_request) + return; + memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %5u release request %p\n", task->tk_pid, req); @@ -1049,6 +1076,11 @@ found: INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); +#if defined(CONFIG_NFS_V4_1) + spin_lock_init(&xprt->bc_pa_lock); + INIT_LIST_HEAD(&xprt->bc_pa_list); +#endif /* CONFIG_NFS_V4_1 */ + INIT_WORK(&xprt->task_cleanup, xprt_autoclose); setup_timer(&xprt->timer, xprt_init_autodisconnect, (unsigned long)xprt); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 42a6f9f20285..9e884383134f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -397,14 +397,14 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, if (!ch) return 0; - /* Allocate temporary reply and chunk maps */ - rpl_map = svc_rdma_get_req_map(); - chl_map = svc_rdma_get_req_map(); - svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; + /* Allocate temporary reply and chunk maps */ + rpl_map = svc_rdma_get_req_map(); + chl_map = svc_rdma_get_req_map(); + if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6c2d61586551..83c73c4d017a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -34,6 +34,9 @@ #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprtsock.h> #include <linux/file.h> +#ifdef CONFIG_NFS_V4_1 +#include <linux/sunrpc/bc_xprt.h> +#endif #include <net/sock.h> #include <net/checksum.h> @@ -270,6 +273,13 @@ struct sock_xprt { #define TCP_RCV_COPY_FRAGHDR (1UL << 1) #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) +#define TCP_RCV_READ_CALLDIR (1UL << 4) +#define TCP_RCV_COPY_CALLDIR (1UL << 5) + +/* + * TCP RPC flags + */ +#define TCP_RPC_REPLY (1UL << 6) static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) { @@ -956,7 +966,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_offset = 0; /* Sanity check of the record length */ - if (unlikely(transport->tcp_reclen < 4)) { + if (unlikely(transport->tcp_reclen < 8)) { dprintk("RPC: invalid TCP record fragment length\n"); xprt_force_disconnect(xprt); return; @@ -991,33 +1001,77 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r if (used != len) return; transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_COPY_DATA; + transport->tcp_flags |= TCP_RCV_READ_CALLDIR; transport->tcp_copied = 4; - dprintk("RPC: reading reply for XID %08x\n", + dprintk("RPC: reading %s XID %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" + : "request with", ntohl(transport->tcp_xid)); xs_tcp_check_fraghdr(transport); } -static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) +static inline void xs_tcp_read_calldir(struct sock_xprt *transport, + struct xdr_skb_reader *desc) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; + size_t len, used; + u32 offset; + __be32 calldir; + + /* + * We want transport->tcp_offset to be 8 at the end of this routine + * (4 bytes for the xid and 4 bytes for the call/reply flag). + * When this function is called for the first time, + * transport->tcp_offset is 4 (after having already read the xid). + */ + offset = transport->tcp_offset - sizeof(transport->tcp_xid); + len = sizeof(calldir) - offset; + dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); + used = xdr_skb_read_bits(desc, &calldir, len); + transport->tcp_offset += used; + if (used != len) + return; + transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_DATA; + /* + * We don't yet have the XDR buffer, so we will write the calldir + * out after we get the buffer from the 'struct rpc_rqst' + */ + if (ntohl(calldir) == RPC_REPLY) + transport->tcp_flags |= TCP_RPC_REPLY; + else + transport->tcp_flags &= ~TCP_RPC_REPLY; + dprintk("RPC: reading %s CALL/REPLY flag %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? + "reply for" : "request with", calldir); + xs_tcp_check_fraghdr(transport); +} + +static inline void xs_tcp_read_common(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc, + struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *rcvbuf; size_t len; ssize_t r; - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->transport_lock); - req = xprt_lookup_rqst(xprt, transport->tcp_xid); - if (!req) { - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->tcp_xid)); - spin_unlock(&xprt->transport_lock); - return; + rcvbuf = &req->rq_private_buf; + + if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + /* + * Save the RPC direction in the XDR buffer + */ + __be32 calldir = transport->tcp_flags & TCP_RPC_REPLY ? + htonl(RPC_REPLY) : 0; + + memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, + &calldir, sizeof(calldir)); + transport->tcp_copied += sizeof(calldir); + transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; } - rcvbuf = &req->rq_private_buf; len = desc->count; if (len > transport->tcp_reclen - transport->tcp_offset) { struct xdr_skb_reader my_desc; @@ -1054,7 +1108,7 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea "tcp_offset = %u, tcp_reclen = %u\n", xprt, transport->tcp_copied, transport->tcp_offset, transport->tcp_reclen); - goto out; + return; } dprintk("RPC: XID %08x read %Zd bytes\n", @@ -1070,11 +1124,125 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_flags &= ~TCP_RCV_COPY_DATA; } -out: + return; +} + +/* + * Finds the request corresponding to the RPC xid and invokes the common + * tcp read code to read the data. + */ +static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); + + /* Find and lock the request corresponding to this xid */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_rqst(xprt, transport->tcp_xid); + if (!req) { + dprintk("RPC: XID %08x request not found!\n", + ntohl(transport->tcp_xid)); + spin_unlock(&xprt->transport_lock); + return -1; + } + + xs_tcp_read_common(xprt, desc, req); + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); + spin_unlock(&xprt->transport_lock); - xs_tcp_check_fraghdr(transport); + return 0; +} + +#if defined(CONFIG_NFS_V4_1) +/* + * Obtains an rpc_rqst previously allocated and invokes the common + * tcp read code to read the data. The result is placed in the callback + * queue. + * If we're unable to obtain the rpc_rqst we schedule the closing of the + * connection and return -1. + */ +static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + req = xprt_alloc_bc_request(xprt); + if (req == NULL) { + printk(KERN_WARNING "Callback slot table overflowed\n"); + xprt_force_disconnect(xprt); + return -1; + } + + req->rq_xid = transport->tcp_xid; + dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); + xs_tcp_read_common(xprt, desc, req); + + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { + struct svc_serv *bc_serv = xprt->bc_serv; + + /* + * Add callback request to callback list. The callback + * service sleeps on the sv_cb_waitq waiting for new + * requests. Wake it up after adding enqueing the + * request. + */ + dprintk("RPC: add callback request to list\n"); + spin_lock(&bc_serv->sv_cb_lock); + list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); + spin_unlock(&bc_serv->sv_cb_lock); + wake_up(&bc_serv->sv_cb_waitq); + } + + req->rq_private_buf.len = transport->tcp_copied; + + return 0; +} + +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + return (transport->tcp_flags & TCP_RPC_REPLY) ? + xs_tcp_read_reply(xprt, desc) : + xs_tcp_read_callback(xprt, desc); +} +#else +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + return xs_tcp_read_reply(xprt, desc); +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Read data off the transport. This can be either an RPC_CALL or an + * RPC_REPLY. Relay the processing to helper functions. + */ +static void xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + if (_xs_tcp_read_data(xprt, desc) == 0) + xs_tcp_check_fraghdr(transport); + else { + /* + * The transport_lock protects the request handling. + * There's no need to hold it to update the tcp_flags. + */ + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + } } static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) @@ -1114,9 +1282,14 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns xs_tcp_read_xid(transport, &desc); continue; } + /* Read in the call/reply flag */ + if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) { + xs_tcp_read_calldir(transport, &desc); + continue; + } /* Read in the request data */ if (transport->tcp_flags & TCP_RCV_COPY_DATA) { - xs_tcp_read_request(xprt, &desc); + xs_tcp_read_data(xprt, &desc); continue; } /* Skip over any trailing bytes on short reads */ @@ -1792,6 +1965,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, */ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); xprt_force_disconnect(xprt); + break; case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: @@ -2010,6 +2184,9 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, +#if defined(CONFIG_NFS_V4_1) + .release_request = bc_release_request, +#endif /* CONFIG_NFS_V4_1 */ .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9dcc6e7f96ec..36d4e44d6233 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1946,7 +1946,7 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case SIOCOUTQ: - amount = atomic_read(&sk->sk_wmem_alloc); + amount = sk_wmem_alloc_get(sk); err = put_user(amount, (int __user *)arg); break; case SIOCINQ: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 24168560ebae..241bddd0b4f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1687,13 +1687,52 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* disallow mesh-specific things */ + if (params.plink_action) + err = -EINVAL; + break; + case NL80211_IFTYPE_STATION: + /* disallow everything but AUTHORIZED flag */ + if (params.plink_action) + err = -EINVAL; + if (params.vlan) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; @@ -1728,9 +1767,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) - return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; @@ -1745,9 +1781,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); - if (!params.aid || params.aid > IEEE80211_MAX_AID) - return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_AID]) { + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (!params.aid || params.aid > IEEE80211_MAX_AID) + return -EINVAL; + } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = @@ -1762,13 +1800,39 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* all ok but must have AID */ + if (!params.aid) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.aid) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; @@ -1812,7 +1876,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ed80af8ca5fb..21cdc872004e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -332,14 +332,14 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) /* * Deferred destroy. */ -void x25_destroy_socket(struct sock *); +static void __x25_destroy_socket(struct sock *); /* * handler for deferred kills. */ static void x25_destroy_timer(unsigned long data) { - x25_destroy_socket((struct sock *)data); + x25_destroy_socket_from_timer((struct sock *)data); } /* @@ -349,12 +349,10 @@ static void x25_destroy_timer(unsigned long data) * will touch it and we are (fairly 8-) ) safe. * Not static as it's used by the timer */ -void x25_destroy_socket(struct sock *sk) +static void __x25_destroy_socket(struct sock *sk) { struct sk_buff *skb; - sock_hold(sk); - lock_sock(sk); x25_stop_heartbeat(sk); x25_stop_timer(sk); @@ -374,8 +372,7 @@ void x25_destroy_socket(struct sock *sk) kfree_skb(skb); } - if (atomic_read(&sk->sk_wmem_alloc) || - atomic_read(&sk->sk_rmem_alloc)) { + if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; sk->sk_timer.function = x25_destroy_timer; @@ -385,7 +382,22 @@ void x25_destroy_socket(struct sock *sk) /* drop last reference so sock_put will free */ __sock_put(sk); } +} + +void x25_destroy_socket_from_timer(struct sock *sk) +{ + sock_hold(sk); + bh_lock_sock(sk); + __x25_destroy_socket(sk); + bh_unlock_sock(sk); + sock_put(sk); +} +static void x25_destroy_socket(struct sock *sk) +{ + sock_hold(sk); + lock_sock(sk); + __x25_destroy_socket(sk); release_sock(sk); sock_put(sk); } @@ -1259,8 +1271,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case TIOCOUTQ: { - int amount = sk->sk_sndbuf - - atomic_read(&sk->sk_wmem_alloc); + int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + if (amount < 0) amount = 0; rc = put_user(amount, (unsigned int __user *)argp); diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c index 1afa44d25beb..0a04e62e0e18 100644 --- a/net/x25/x25_proc.c +++ b/net/x25/x25_proc.c @@ -163,8 +163,8 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v) devname, x25->lci & 0x0FFF, x25->state, x25->vs, x25->vr, x25->va, x25_display_timer(s) / HZ, x25->t2 / HZ, x25->t21 / HZ, x25->t22 / HZ, x25->t23 / HZ, - atomic_read(&s->sk_wmem_alloc), - atomic_read(&s->sk_rmem_alloc), + sk_wmem_alloc_get(s), + sk_rmem_alloc_get(s), s->sk_socket ? SOCK_INODE(s->sk_socket)->i_ino : 0L); out: return 0; diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index d3e3e54db936..5c5db1a36399 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -113,7 +113,7 @@ static void x25_heartbeat_expiry(unsigned long param) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); - x25_destroy_socket(sk); + x25_destroy_socket_from_timer(sk); return; } break; |