diff options
author | James Morris <jmorris@namei.org> | 2009-02-06 01:01:45 +0100 |
---|---|---|
committer | James Morris <jmorris@namei.org> | 2009-02-06 01:01:45 +0100 |
commit | cb5629b10d64a8006622ce3a52bc887d91057d69 (patch) | |
tree | 7c06d8f30783115e3384721046258ce615b129c5 /net | |
parent | TPM: integrity fix (diff) | |
parent | seq_file: fix big-enough lseek() + read() (diff) | |
download | linux-cb5629b10d64a8006622ce3a52bc887d91057d69.tar.xz linux-cb5629b10d64a8006622ce3a52bc887d91057d69.zip |
Merge branch 'master' into next
Conflicts:
fs/namei.c
Manually merged per:
diff --cc fs/namei.c
index 734f2b5,bbc15c2..0000000
--- a/fs/namei.c
+++ b/fs/namei.c
@@@ -860,9 -848,8 +849,10 @@@ static int __link_path_walk(const char
nd->flags |= LOOKUP_CONTINUE;
err = exec_permission_lite(inode);
if (err == -EAGAIN)
- err = vfs_permission(nd, MAY_EXEC);
+ err = inode_permission(nd->path.dentry->d_inode,
+ MAY_EXEC);
+ if (!err)
+ err = ima_path_check(&nd->path, MAY_EXEC);
if (err)
break;
@@@ -1525,14 -1506,9 +1509,14 @@@ int may_open(struct path *path, int acc
flag &= ~O_TRUNC;
}
- error = vfs_permission(nd, acc_mode);
+ error = inode_permission(inode, acc_mode);
if (error)
return error;
+
- error = ima_path_check(&nd->path,
++ error = ima_path_check(path,
+ acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
+ if (error)
+ return error;
/*
* An append-only file must be opened in append mode for writing.
*/
Signed-off-by: James Morris <jmorris@namei.org>
Diffstat (limited to 'net')
116 files changed, 3687 insertions, 1047 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index dd86a1dc4cd0..e9db889d6222 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,46 +3,35 @@ #include <linux/if_vlan.h> #include "vlan.h" -struct vlan_hwaccel_cb { - struct net_device *dev; -}; - -static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) -{ - return (struct vlan_hwaccel_cb *)skb->cb; -} - /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); - - if (skb_bond_should_drop(skb)) { - dev_kfree_skb_any(skb); - return NET_RX_DROP; - } + if (skb_bond_should_drop(skb)) + goto drop; skb->vlan_tci = vlan_tci; - cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + if (!skb->dev) + goto drop; return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + +drop: + dev_kfree_skb_any(skb); + return NET_RX_DROP; } EXPORT_SYMBOL(__vlan_hwaccel_rx); int vlan_hwaccel_do_receive(struct sk_buff *skb) { - struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); - struct net_device *dev = cb->dev; + struct net_device *dev = skb->dev; struct net_device_stats *stats; + skb->dev = vlan_dev_info(dev)->real_dev; netif_nit_deliver(skb); - if (dev == NULL) { - kfree_skb(skb); - return -1; - } - skb->dev = dev; skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; @@ -73,10 +62,86 @@ struct net_device *vlan_dev_real_dev(const struct net_device *dev) { return vlan_dev_info(dev)->real_dev; } -EXPORT_SYMBOL_GPL(vlan_dev_real_dev); +EXPORT_SYMBOL(vlan_dev_real_dev); u16 vlan_dev_vlan_id(const struct net_device *dev) { return vlan_dev_info(dev)->vlan_id; } -EXPORT_SYMBOL_GPL(vlan_dev_vlan_id); +EXPORT_SYMBOL(vlan_dev_vlan_id); + +static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) +{ + struct sk_buff *p; + + if (skb_bond_should_drop(skb)) + goto drop; + + skb->vlan_tci = vlan_tci; + skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + if (!skb->dev) + goto drop; + + for (p = napi->gro_list; p; p = p->next) { + NAPI_GRO_CB(p)->same_flow = p->dev == skb->dev; + NAPI_GRO_CB(p)->flush = 0; + } + + return dev_gro_receive(napi, skb); + +drop: + return 2; +} + +int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) +{ + int err = NET_RX_SUCCESS; + + switch (vlan_gro_common(napi, grp, vlan_tci, skb)) { + case -1: + return netif_receive_skb(skb); + + case 2: + err = NET_RX_DROP; + /* fall through */ + + case 1: + kfree_skb(skb); + break; + } + + return err; +} +EXPORT_SYMBOL(vlan_gro_receive); + +int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct napi_gro_fraginfo *info) +{ + struct sk_buff *skb = napi_fraginfo_skb(napi, info); + int err = NET_RX_DROP; + + if (!skb) + goto out; + + err = NET_RX_SUCCESS; + + switch (vlan_gro_common(napi, grp, vlan_tci, skb)) { + case -1: + return netif_receive_skb(skb); + + case 2: + err = NET_RX_DROP; + /* fall through */ + + case 1: + napi_reuse_skb(napi, skb); + break; + } + +out: + return err; +} +EXPORT_SYMBOL(vlan_gro_frags); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 89a3bbdfca3f..4a19acd3a32b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -546,6 +546,18 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } +static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; + int err = 0; + + if (netif_device_present(real_dev) && ops->ndo_neigh_setup) + err = ops->ndo_neigh_setup(dev, pa); + + return err; +} + static void vlan_dev_change_rx_flags(struct net_device *dev, int change) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; @@ -713,6 +725,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_set_multicast_list = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, + .ndo_neigh_setup = vlan_dev_neigh_setup, }; static const struct net_device_ops vlan_netdev_accel_ops = { @@ -728,6 +741,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = { .ndo_set_multicast_list = vlan_dev_set_rx_mode, .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, + .ndo_neigh_setup = vlan_dev_neigh_setup, }; void vlan_setup(struct net_device *dev) diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 0663f99e977a..7ed75c7bd5d1 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -23,7 +23,7 @@ config NET_9P_VIRTIO guest partitions and a host partition. config NET_9P_RDMA - depends on INET && INFINIBAND && EXPERIMENTAL + depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL tristate "9P RDMA Transport (Experimental)" help This builds support for an RDMA transport. diff --git a/net/9p/client.c b/net/9p/client.c index 821f1ec0b2c3..1eb580c38fbb 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -618,7 +618,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) return ERR_PTR(-ENOMEM); ret = p9_idpool_get(clnt->fidpool); - if (fid->fid < 0) { + if (ret < 0) { ret = -ENOSPC; goto error; } diff --git a/net/Kconfig b/net/Kconfig index 6ec2cce7c167..cdb8fdef6c4a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -24,14 +24,6 @@ if NET menu "Networking options" -config NET_NS - bool "Network namespace support" - default n - depends on EXPERIMENTAL && NAMESPACES - help - Allow user space to create what appear to be multiple instances - of the network stack. - config COMPAT_NET_DEV_OPS def_bool y @@ -254,6 +246,8 @@ source "net/mac80211/Kconfig" endif # WIRELESS +source "net/wimax/Kconfig" + source "net/rfkill/Kconfig" source "net/9p/Kconfig" diff --git a/net/Makefile b/net/Makefile index ba4460432b7c..0fcce89d7169 100644 --- a/net/Makefile +++ b/net/Makefile @@ -63,3 +63,4 @@ endif ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o endif +obj-$(CONFIG_WIMAX) += wimax/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index b03ff58e9308..89f99d3beb60 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -443,13 +443,14 @@ static void aarp_send_probe_phase1(struct atalk_iface *iface) { struct ifreq atreq; struct sockaddr_at *sa = (struct sockaddr_at *)&atreq.ifr_addr; + const struct net_device_ops *ops = iface->dev->netdev_ops; sa->sat_addr.s_node = iface->address.s_node; sa->sat_addr.s_net = ntohs(iface->address.s_net); /* We pass the Net:Node to the drivers/cards by a Device ioctl. */ - if (!(iface->dev->do_ioctl(iface->dev, &atreq, SIOCSIFADDR))) { - (void)iface->dev->do_ioctl(iface->dev, &atreq, SIOCGIFADDR); + if (!(ops->ndo_do_ioctl(iface->dev, &atreq, SIOCSIFADDR))) { + ops->ndo_do_ioctl(iface->dev, &atreq, SIOCGIFADDR); if (iface->address.s_net != htons(sa->sat_addr.s_net) || iface->address.s_node != sa->sat_addr.s_node) iface->status |= ATIF_PROBE_FAIL; diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index d20f8a40f36e..0d9e506f5d5a 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -165,7 +165,6 @@ struct bnep_session { struct socket *sock; struct net_device *dev; - struct net_device_stats stats; }; void bnep_net_setup(struct net_device *dev); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 70fea8bdb4e5..52a6ce0d772b 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -306,7 +306,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) struct sk_buff *nskb; u8 type; - s->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += skb->len; type = *(u8 *) skb->data; skb_pull(skb, 1); @@ -343,7 +343,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) * may not be modified and because of the alignment requirements. */ nskb = alloc_skb(2 + ETH_HLEN + skb->len, GFP_KERNEL); if (!nskb) { - s->stats.rx_dropped++; + dev->stats.rx_dropped++; kfree_skb(skb); return -ENOMEM; } @@ -378,14 +378,14 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb) skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len); kfree_skb(skb); - s->stats.rx_packets++; + dev->stats.rx_packets++; nskb->ip_summed = CHECKSUM_NONE; nskb->protocol = eth_type_trans(nskb, dev); netif_rx_ni(nskb); return 0; badframe: - s->stats.rx_errors++; + dev->stats.rx_errors++; kfree_skb(skb); return 0; } @@ -448,8 +448,8 @@ send: kfree_skb(skb); if (len > 0) { - s->stats.tx_bytes += len; - s->stats.tx_packets++; + s->dev->stats.tx_bytes += len; + s->dev->stats.tx_packets++; return 0; } diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index f897da6e0444..d7a0e9722def 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -55,12 +55,6 @@ static int bnep_net_close(struct net_device *dev) return 0; } -static struct net_device_stats *bnep_net_get_stats(struct net_device *dev) -{ - struct bnep_session *s = netdev_priv(dev); - return &s->stats; -} - static void bnep_net_set_mc_list(struct net_device *dev) { #ifdef CONFIG_BT_BNEP_MC_FILTER @@ -128,11 +122,6 @@ static void bnep_net_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int bnep_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - return -EINVAL; -} - #ifdef CONFIG_BT_BNEP_MC_FILTER static inline int bnep_net_mc_filter(struct sk_buff *skb, struct bnep_session *s) { @@ -217,6 +206,18 @@ static int bnep_net_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } +static const struct net_device_ops bnep_netdev_ops = { + .ndo_open = bnep_net_open, + .ndo_stop = bnep_net_close, + .ndo_start_xmit = bnep_net_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_multicast_list = bnep_net_set_mc_list, + .ndo_set_mac_address = bnep_net_set_mac_addr, + .ndo_tx_timeout = bnep_net_timeout, + .ndo_change_mtu = eth_change_mtu, + +}; + void bnep_net_setup(struct net_device *dev) { @@ -224,15 +225,7 @@ void bnep_net_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; ether_setup(dev); - - dev->open = bnep_net_open; - dev->stop = bnep_net_close; - dev->hard_start_xmit = bnep_net_xmit; - dev->get_stats = bnep_net_get_stats; - dev->do_ioctl = bnep_net_ioctl; - dev->set_mac_address = bnep_net_set_mac_addr; - dev->set_multicast_list = bnep_net_set_mc_list; + dev->netdev_ops = &bnep_netdev_ops; dev->watchdog_timeo = HZ * 2; - dev->tx_timeout = bnep_net_timeout; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index a65e43a17fbb..cf754ace0b75 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -58,11 +58,11 @@ static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; static int brnf_call_ip6tables __read_mostly = 1; static int brnf_call_arptables __read_mostly = 1; -static int brnf_filter_vlan_tagged __read_mostly = 1; -static int brnf_filter_pppoe_tagged __read_mostly = 1; +static int brnf_filter_vlan_tagged __read_mostly = 0; +static int brnf_filter_pppoe_tagged __read_mostly = 0; #else -#define brnf_filter_vlan_tagged 1 -#define brnf_filter_pppoe_tagged 1 +#define brnf_filter_vlan_tagged 0 +#define brnf_filter_pppoe_tagged 0 #endif static inline __be16 vlan_proto(const struct sk_buff *skb) @@ -686,8 +686,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) pf = PF_INET; - else + else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || + IS_PPPOE_IPV6(skb)) pf = PF_INET6; + else + return NF_ACCEPT; nf_bridge_pull_encap_header(skb); @@ -828,8 +831,11 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) pf = PF_INET; - else + else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || + IS_PPPOE_IPV6(skb)) pf = PF_INET6; + else + return NF_ACCEPT; #ifdef CONFIG_NETFILTER_DEBUG if (skb->dst == NULL) { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index fa108c46e851..820252aee81f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -79,18 +79,19 @@ static inline int ebt_do_match (struct ebt_entry_match *m, { par->match = m->u.match; par->matchinfo = m->data; - return m->u.match->match(skb, par); + return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH; } static inline int ebt_dev_check(char *entry, const struct net_device *device) { int i = 0; - const char *devname = device->name; + const char *devname; if (*entry == '\0') return 0; if (!device) return 1; + devname = device->name; /* 1 is the wildcard token */ while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i]) i++; diff --git a/net/can/af_can.c b/net/can/af_can.c index 3dadb338addd..fa417ca6cbe6 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -414,6 +414,12 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, * The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can * filter for error frames (CAN_ERR_FLAG bit set in mask). * + * The provided pointer to the sk_buff is guaranteed to be valid as long as + * the callback function is running. The callback function must *not* free + * the given sk_buff while processing it's task. When the given sk_buff is + * needed after the end of the callback function it must be cloned inside + * the callback function with skb_clone(). + * * Return: * 0 on success * -ENOMEM on missing cache mem to create subscription entry @@ -569,13 +575,8 @@ EXPORT_SYMBOL(can_rx_unregister); static inline void deliver(struct sk_buff *skb, struct receiver *r) { - struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); - - if (clone) { - clone->sk = skb->sk; - r->func(clone, r->data); - r->matches++; - } + r->func(skb, r->data); + r->matches++; } static int can_rcv_filter(struct dev_rcv_lists *d, struct sk_buff *skb) diff --git a/net/can/bcm.c b/net/can/bcm.c index da0d426c0ce4..b7c7d4651136 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -70,7 +70,7 @@ #define CAN_BCM_VERSION CAN_VERSION static __initdata const char banner[] = KERN_INFO - "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n"; + "can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n"; MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); @@ -90,6 +90,7 @@ struct bcm_op { unsigned long frames_abs, frames_filtered; struct timeval ival1, ival2; struct hrtimer timer, thrtimer; + struct tasklet_struct tsklet, thrtsklet; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; int count; @@ -341,19 +342,15 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } -/* - * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions - */ -static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) +static void bcm_tx_timeout_tsklet(unsigned long data) { - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); - enum hrtimer_restart ret = HRTIMER_NORESTART; + struct bcm_op *op = (struct bcm_op *)data; + struct bcm_msg_head msg_head; if (op->kt_ival1.tv64 && (op->count > 0)) { op->count--; if (!op->count && (op->flags & TX_COUNTEVT)) { - struct bcm_msg_head msg_head; /* create notification to user */ msg_head.opcode = TX_EXPIRED; @@ -372,20 +369,32 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) /* send (next) frame */ bcm_can_tx(op); - hrtimer_forward(hrtimer, ktime_get(), op->kt_ival1); - ret = HRTIMER_RESTART; + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); } else { if (op->kt_ival2.tv64) { /* send (next) frame */ bcm_can_tx(op); - hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); - ret = HRTIMER_RESTART; + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); } } +} - return ret; +/* + * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions + */ +static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) +{ + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + + tasklet_schedule(&op->tsklet); + + return HRTIMER_NORESTART; } /* @@ -402,6 +411,9 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data) if (op->frames_filtered > ULONG_MAX/100) op->frames_filtered = op->frames_abs = 0; + /* this element is not throttled anymore */ + data->can_dlc &= (BCM_CAN_DLC_MASK|RX_RECV); + head.opcode = RX_CHANGED; head.flags = op->flags; head.count = op->count; @@ -420,37 +432,32 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data) */ static void bcm_rx_update_and_send(struct bcm_op *op, struct can_frame *lastdata, - struct can_frame *rxdata) + const struct can_frame *rxdata) { memcpy(lastdata, rxdata, CFSIZ); - /* mark as used */ - lastdata->can_dlc |= RX_RECV; + /* mark as used and throttled by default */ + lastdata->can_dlc |= (RX_RECV|RX_THR); - /* throtteling mode inactive OR data update already on the run ? */ - if (!op->kt_ival2.tv64 || hrtimer_callback_running(&op->thrtimer)) { + /* throtteling mode inactive ? */ + if (!op->kt_ival2.tv64) { /* send RX_CHANGED to the user immediately */ - bcm_rx_changed(op, rxdata); + bcm_rx_changed(op, lastdata); return; } - if (hrtimer_active(&op->thrtimer)) { - /* mark as 'throttled' */ - lastdata->can_dlc |= RX_THR; + /* with active throttling timer we are just done here */ + if (hrtimer_active(&op->thrtimer)) return; - } - if (!op->kt_lastmsg.tv64) { - /* send first RX_CHANGED to the user immediately */ - bcm_rx_changed(op, rxdata); - op->kt_lastmsg = ktime_get(); - return; - } + /* first receiption with enabled throttling mode */ + if (!op->kt_lastmsg.tv64) + goto rx_changed_settime; + /* got a second frame inside a potential throttle period? */ if (ktime_us_delta(ktime_get(), op->kt_lastmsg) < ktime_to_us(op->kt_ival2)) { - /* mark as 'throttled' and start timer */ - lastdata->can_dlc |= RX_THR; + /* do not send the saved data - only start throttle timer */ hrtimer_start(&op->thrtimer, ktime_add(op->kt_lastmsg, op->kt_ival2), HRTIMER_MODE_ABS); @@ -458,7 +465,8 @@ static void bcm_rx_update_and_send(struct bcm_op *op, } /* the gap was that big, that throttling was not needed here */ - bcm_rx_changed(op, rxdata); +rx_changed_settime: + bcm_rx_changed(op, lastdata); op->kt_lastmsg = ktime_get(); } @@ -467,7 +475,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op, * received data stored in op->last_frames[] */ static void bcm_rx_cmp_to_index(struct bcm_op *op, int index, - struct can_frame *rxdata) + const struct can_frame *rxdata) { /* * no one uses the MSBs of can_dlc for comparation, @@ -511,14 +519,12 @@ static void bcm_rx_starttimer(struct bcm_op *op) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); } -/* - * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out - */ -static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) +static void bcm_rx_timeout_tsklet(unsigned long data) { - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + struct bcm_op *op = (struct bcm_op *)data; struct bcm_msg_head msg_head; + /* create notification to user */ msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; msg_head.count = op->count; @@ -528,6 +534,17 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); +} + +/* + * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out + */ +static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) +{ + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + + /* schedule before NET_RX_SOFTIRQ */ + tasklet_hi_schedule(&op->tsklet); /* no restart of the timer is done here! */ @@ -541,9 +558,25 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) } /* + * bcm_rx_do_flush - helper for bcm_rx_thr_flush + */ +static inline int bcm_rx_do_flush(struct bcm_op *op, int update, int index) +{ + if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) { + if (update) + bcm_rx_changed(op, &op->last_frames[index]); + return 1; + } + return 0; +} + +/* * bcm_rx_thr_flush - Check for throttled data and send it to the userspace + * + * update == 0 : just check if throttled data is available (any irq context) + * update == 1 : check and send throttled data to userspace (soft_irq context) */ -static int bcm_rx_thr_flush(struct bcm_op *op) +static int bcm_rx_thr_flush(struct bcm_op *op, int update) { int updated = 0; @@ -551,27 +584,25 @@ static int bcm_rx_thr_flush(struct bcm_op *op) int i; /* for MUX filter we start at index 1 */ - for (i = 1; i < op->nframes; i++) { - if ((op->last_frames) && - (op->last_frames[i].can_dlc & RX_THR)) { - op->last_frames[i].can_dlc &= ~RX_THR; - bcm_rx_changed(op, &op->last_frames[i]); - updated++; - } - } + for (i = 1; i < op->nframes; i++) + updated += bcm_rx_do_flush(op, update, i); } else { /* for RX_FILTER_ID and simple filter */ - if (op->last_frames && (op->last_frames[0].can_dlc & RX_THR)) { - op->last_frames[0].can_dlc &= ~RX_THR; - bcm_rx_changed(op, &op->last_frames[0]); - updated++; - } + updated += bcm_rx_do_flush(op, update, 0); } return updated; } +static void bcm_rx_thr_tsklet(unsigned long data) +{ + struct bcm_op *op = (struct bcm_op *)data; + + /* push the changed data to the userspace */ + bcm_rx_thr_flush(op, 1); +} + /* * bcm_rx_thr_handler - the time for blocked content updates is over now: * Check for throttled data and send it to the userspace @@ -580,7 +611,9 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); - if (bcm_rx_thr_flush(op)) { + tasklet_schedule(&op->thrtsklet); + + if (bcm_rx_thr_flush(op, 0)) { hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); return HRTIMER_RESTART; } else { @@ -596,29 +629,21 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) static void bcm_rx_handler(struct sk_buff *skb, void *data) { struct bcm_op *op = (struct bcm_op *)data; - struct can_frame rxframe; + const struct can_frame *rxframe = (struct can_frame *)skb->data; int i; /* disable timeout */ hrtimer_cancel(&op->timer); - if (skb->len == sizeof(rxframe)) { - memcpy(&rxframe, skb->data, sizeof(rxframe)); - /* save rx timestamp */ - op->rx_stamp = skb->tstamp; - /* save originator for recvfrom() */ - op->rx_ifindex = skb->dev->ifindex; - /* update statistics */ - op->frames_abs++; - kfree_skb(skb); - - } else { - kfree_skb(skb); + if (op->can_id != rxframe->can_id) return; - } - if (op->can_id != rxframe.can_id) - return; + /* save rx timestamp */ + op->rx_stamp = skb->tstamp; + /* save originator for recvfrom() */ + op->rx_ifindex = skb->dev->ifindex; + /* update statistics */ + op->frames_abs++; if (op->flags & RX_RTR_FRAME) { /* send reply for RTR-request (placed in op->frames[0]) */ @@ -628,16 +653,14 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) if (op->flags & RX_FILTER_ID) { /* the easiest case */ - bcm_rx_update_and_send(op, &op->last_frames[0], &rxframe); - bcm_rx_starttimer(op); - return; + bcm_rx_update_and_send(op, &op->last_frames[0], rxframe); + goto rx_starttimer; } if (op->nframes == 1) { /* simple compare with index 0 */ - bcm_rx_cmp_to_index(op, 0, &rxframe); - bcm_rx_starttimer(op); - return; + bcm_rx_cmp_to_index(op, 0, rxframe); + goto rx_starttimer; } if (op->nframes > 1) { @@ -649,15 +672,17 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) */ for (i = 1; i < op->nframes; i++) { - if ((GET_U64(&op->frames[0]) & GET_U64(&rxframe)) == + if ((GET_U64(&op->frames[0]) & GET_U64(rxframe)) == (GET_U64(&op->frames[0]) & GET_U64(&op->frames[i]))) { - bcm_rx_cmp_to_index(op, i, &rxframe); + bcm_rx_cmp_to_index(op, i, rxframe); break; } } - bcm_rx_starttimer(op); } + +rx_starttimer: + bcm_rx_starttimer(op); } /* @@ -681,6 +706,12 @@ static void bcm_remove_op(struct bcm_op *op) hrtimer_cancel(&op->timer); hrtimer_cancel(&op->thrtimer); + if (op->tsklet.func) + tasklet_kill(&op->tsklet); + + if (op->thrtsklet.func) + tasklet_kill(&op->thrtsklet); + if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -891,6 +922,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); op->timer.function = bcm_tx_timeout_handler; + /* initialize tasklet for tx countevent notification */ + tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet, + (unsigned long) op); + /* currently unused in tx_ops */ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1054,9 +1089,17 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); op->timer.function = bcm_rx_timeout_handler; + /* initialize tasklet for rx timeout notification */ + tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet, + (unsigned long) op); + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); op->thrtimer.function = bcm_rx_thr_handler; + /* initialize tasklet for rx throttle handling */ + tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet, + (unsigned long) op); + /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); @@ -1102,7 +1145,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, */ op->kt_lastmsg = ktime_set(0, 0); hrtimer_cancel(&op->thrtimer); - bcm_rx_thr_flush(op); + bcm_rx_thr_flush(op, 1); } if ((op->flags & STARTTIMER) && op->kt_ival1.tv64) diff --git a/net/can/raw.c b/net/can/raw.c index 27aab63df467..0703cba4bf9f 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -99,13 +99,14 @@ static void raw_rcv(struct sk_buff *skb, void *data) struct raw_sock *ro = raw_sk(sk); struct sockaddr_can *addr; - if (!ro->recv_own_msgs) { - /* check the received tx sock reference */ - if (skb->sk == sk) { - kfree_skb(skb); - return; - } - } + /* check the received tx sock reference */ + if (!ro->recv_own_msgs && skb->sk == sk) + return; + + /* clone the given skb to be able to enqueue it into the rcv queue */ + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return; /* * Put the datagram to the queue so that raw_recvmsg() can diff --git a/net/core/dev.c b/net/core/dev.c index 09c66a449da6..5379b0c1190a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,9 @@ /* Instead of increasing this, you should create a hash table. */ #define MAX_GRO_SKBS 8 +/* This should be increased if a protocol with a bigger head is added. */ +#define GRO_MAX_HEAD (MAX_HEADER + 128) + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -167,25 +170,6 @@ static DEFINE_SPINLOCK(ptype_lock); static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; static struct list_head ptype_all __read_mostly; /* Taps */ -#ifdef CONFIG_NET_DMA -struct net_dma { - struct dma_client client; - spinlock_t lock; - cpumask_t channel_mask; - struct dma_chan **channels; -}; - -static enum dma_state_client -netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_state state); - -static struct net_dma net_dma = { - .client = { - .event_callback = netdev_dma_event, - }, -}; -#endif - /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. @@ -1104,6 +1088,11 @@ int dev_open(struct net_device *dev) dev->flags |= IFF_UP; /* + * Enable NET_DMA + */ + dmaengine_get(); + + /* * Initialize multicasting status */ dev_set_rx_mode(dev); @@ -1180,6 +1169,11 @@ int dev_close(struct net_device *dev) */ call_netdevice_notifiers(NETDEV_DOWN, dev); + /* + * Shutdown NET_DMA + */ + dmaengine_put(); + return 0; } @@ -1540,7 +1534,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); - if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + struct net_device *dev = skb->dev; + struct ethtool_drvinfo info = {}; + + if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) + dev->ethtool_ops->get_drvinfo(dev, &info); + + WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " + "ip_summed=%d", + info.driver, dev ? dev->features : 0L, + skb->sk ? skb->sk->sk_route_caps : 0L, + skb->len, skb->data_len, skb->ip_summed); + if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -2345,7 +2351,7 @@ static int napi_gro_complete(struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int err = -ENOENT; - if (!skb_shinfo(skb)->frag_list) + if (NAPI_GRO_CB(skb)->count == 1) goto out; rcu_read_lock(); @@ -2365,6 +2371,7 @@ static int napi_gro_complete(struct sk_buff *skb) } out: + skb_shinfo(skb)->gso_size = 0; __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2383,7 +2390,7 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_type *ptype; @@ -2392,10 +2399,14 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int count = 0; int same_flow; int mac_len; + int free; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; + if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) + goto normal; + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; @@ -2408,14 +2419,18 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; + NAPI_GRO_CB(skb)->free = 0; for (p = napi->gro_list; p; p = p->next) { count++; - NAPI_GRO_CB(p)->same_flow = - p->mac_len == mac_len && - !memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len); - NAPI_GRO_CB(p)->flush = 0; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + if (p->mac_len != mac_len || + memcmp(skb_mac_header(p), skb_mac_header(skb), + mac_len)) + NAPI_GRO_CB(p)->same_flow = 0; } pp = ptype->gro_receive(&napi->gro_list, skb); @@ -2427,6 +2442,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) goto normal; same_flow = NAPI_GRO_CB(skb)->same_flow; + free = NAPI_GRO_CB(skb)->free; if (pp) { struct sk_buff *nskb = *pp; @@ -2446,17 +2462,119 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) } NAPI_GRO_CB(skb)->count = 1; + skb_shinfo(skb)->gso_size = skb->len; skb->next = napi->gro_list; napi->gro_list = skb; ok: - return NET_RX_SUCCESS; + return free; normal: - return netif_receive_skb(skb); + return -1; +} +EXPORT_SYMBOL(dev_gro_receive); + +static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff *p; + + for (p = napi->gro_list; p; p = p->next) { + NAPI_GRO_CB(p)->same_flow = 1; + NAPI_GRO_CB(p)->flush = 0; + } + + return dev_gro_receive(napi, skb); +} + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + switch (__napi_gro_receive(napi, skb)) { + case -1: + return netif_receive_skb(skb); + + case 1: + kfree_skb(skb); + break; + } + + return NET_RX_SUCCESS; } EXPORT_SYMBOL(napi_gro_receive); +void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) +{ + __skb_pull(skb, skb_headlen(skb)); + skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); + + napi->skb = skb; +} +EXPORT_SYMBOL(napi_reuse_skb); + +struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, + struct napi_gro_fraginfo *info) +{ + struct net_device *dev = napi->dev; + struct sk_buff *skb = napi->skb; + + napi->skb = NULL; + + if (!skb) { + skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); + if (!skb) + goto out; + + skb_reserve(skb, NET_IP_ALIGN); + } + + BUG_ON(info->nr_frags > MAX_SKB_FRAGS); + skb_shinfo(skb)->nr_frags = info->nr_frags; + memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); + + skb->data_len = info->len; + skb->len += info->len; + skb->truesize += info->len; + + if (!pskb_may_pull(skb, ETH_HLEN)) { + napi_reuse_skb(napi, skb); + skb = NULL; + goto out; + } + + skb->protocol = eth_type_trans(skb, dev); + + skb->ip_summed = info->ip_summed; + skb->csum = info->csum; + +out: + return skb; +} +EXPORT_SYMBOL(napi_fraginfo_skb); + +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +{ + struct sk_buff *skb = napi_fraginfo_skb(napi, info); + int err = NET_RX_DROP; + + if (!skb) + goto out; + + err = NET_RX_SUCCESS; + + switch (__napi_gro_receive(napi, skb)) { + case -1: + return netif_receive_skb(skb); + + case 0: + goto out; + } + + napi_reuse_skb(napi, skb); + +out: + return err; +} +EXPORT_SYMBOL(napi_gro_frags); + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; @@ -2535,11 +2653,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, { INIT_LIST_HEAD(&napi->poll_list); napi->gro_list = NULL; + napi->skb = NULL; napi->poll = poll; napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); -#ifdef CONFIG_NETPOLL napi->dev = dev; +#ifdef CONFIG_NETPOLL spin_lock_init(&napi->poll_lock); napi->poll_owner = -1; #endif @@ -2552,6 +2671,7 @@ void netif_napi_del(struct napi_struct *napi) struct sk_buff *skb, *next; list_del_init(&napi->dev_list); + kfree(napi->skb); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; @@ -2635,14 +2755,7 @@ out: * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (!cpus_empty(net_dma.channel_mask)) { - int chan_idx; - for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) { - struct dma_chan *chan = net_dma.channels[chan_idx]; - if (chan) - dma_async_memcpy_issue_pending(chan); - } - } + dma_issue_pending_all(); #endif return; @@ -4331,6 +4444,45 @@ err_uninit: } /** + * init_dummy_netdev - init a dummy network device for NAPI + * @dev: device to init + * + * This takes a network device structure and initialize the minimum + * amount of fields so it can be used to schedule NAPI polls without + * registering a full blown interface. This is to be used by drivers + * that need to tie several hardware interfaces to a single NAPI + * poll scheduler due to HW limitations. + */ +int init_dummy_netdev(struct net_device *dev) +{ + /* Clear everything. Note we don't initialize spinlocks + * are they aren't supposed to be taken by any of the + * NAPI code and this dummy netdev is supposed to be + * only ever used for NAPI polls + */ + memset(dev, 0, sizeof(struct net_device)); + + /* make sure we BUG if trying to hit standard + * register/unregister code path + */ + dev->reg_state = NETREG_DUMMY; + + /* initialize the ref count */ + atomic_set(&dev->refcnt, 1); + + /* NAPI wants this */ + INIT_LIST_HEAD(&dev->napi_list); + + /* a dummy interface is started by default */ + set_bit(__LINK_STATE_PRESENT, &dev->state); + set_bit(__LINK_STATE_START, &dev->state); + + return 0; +} +EXPORT_SYMBOL_GPL(init_dummy_netdev); + + +/** * register_netdev - register a network device * @dev: device to register * @@ -4833,122 +4985,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#ifdef CONFIG_NET_DMA -/** - * net_dma_rebalance - try to maintain one DMA channel per CPU - * @net_dma: DMA client and associated data (lock, channels, channel_mask) - * - * This is called when the number of channels allocated to the net_dma client - * changes. The net_dma client tries to have one DMA channel per CPU. - */ - -static void net_dma_rebalance(struct net_dma *net_dma) -{ - unsigned int cpu, i, n, chan_idx; - struct dma_chan *chan; - - if (cpus_empty(net_dma->channel_mask)) { - for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); - return; - } - - i = 0; - cpu = first_cpu(cpu_online_map); - - for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) { - chan = net_dma->channels[chan_idx]; - - n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) - + (i < (num_online_cpus() % - cpus_weight(net_dma->channel_mask)) ? 1 : 0)); - - while(n) { - per_cpu(softnet_data, cpu).net_dma = chan; - cpu = next_cpu(cpu, cpu_online_map); - n--; - } - i++; - } -} - -/** - * netdev_dma_event - event callback for the net_dma_client - * @client: should always be net_dma_client - * @chan: DMA channel for the event - * @state: DMA state to be handled - */ -static enum dma_state_client -netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_state state) -{ - int i, found = 0, pos = -1; - struct net_dma *net_dma = - container_of(client, struct net_dma, client); - enum dma_state_client ack = DMA_DUP; /* default: take no action */ - - spin_lock(&net_dma->lock); - switch (state) { - case DMA_RESOURCE_AVAILABLE: - for (i = 0; i < nr_cpu_ids; i++) - if (net_dma->channels[i] == chan) { - found = 1; - break; - } else if (net_dma->channels[i] == NULL && pos < 0) - pos = i; - - if (!found && pos >= 0) { - ack = DMA_ACK; - net_dma->channels[pos] = chan; - cpu_set(pos, net_dma->channel_mask); - net_dma_rebalance(net_dma); - } - break; - case DMA_RESOURCE_REMOVED: - for (i = 0; i < nr_cpu_ids; i++) - if (net_dma->channels[i] == chan) { - found = 1; - pos = i; - break; - } - - if (found) { - ack = DMA_ACK; - cpu_clear(pos, net_dma->channel_mask); - net_dma->channels[i] = NULL; - net_dma_rebalance(net_dma); - } - break; - default: - break; - } - spin_unlock(&net_dma->lock); - - return ack; -} - -/** - * netdev_dma_register - register the networking subsystem as a DMA client - */ -static int __init netdev_dma_register(void) -{ - net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma), - GFP_KERNEL); - if (unlikely(!net_dma.channels)) { - printk(KERN_NOTICE - "netdev_dma: no memory for net_dma.channels\n"); - return -ENOMEM; - } - spin_lock_init(&net_dma.lock); - dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); - dma_async_client_register(&net_dma.client); - dma_async_client_chan_request(&net_dma.client); - return 0; -} - -#else -static int __init netdev_dma_register(void) { return -ENODEV; } -#endif /* CONFIG_NET_DMA */ /** * netdev_increment_features - increment feature set by one @@ -5168,8 +5204,6 @@ static int __init net_dev_init(void) if (register_pernet_device(&default_device_ops)) goto out; - netdev_dma_register(); - open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 55cffad2f328..55151faaf90c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -341,8 +341,8 @@ again: rv = register_pernet_operations(first_device, ops); if (rv < 0) ida_remove(&net_generic_ids, *id); - mutex_unlock(&net_mutex); out: + mutex_unlock(&net_mutex); return rv; } EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8d0abb26433..da74b844f4ea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -73,17 +73,13 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly; static void sock_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - struct sk_buff *skb = (struct sk_buff *) buf->private; - - kfree_skb(skb); + put_page(buf->page); } static void sock_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - struct sk_buff *skb = (struct sk_buff *) buf->private; - - skb_get(skb); + get_page(buf->page); } static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -1334,9 +1330,19 @@ fault: */ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) { - struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private; + put_page(spd->pages[i]); +} - kfree_skb(skb); +static inline struct page *linear_to_page(struct page *page, unsigned int len, + unsigned int offset) +{ + struct page *p = alloc_pages(GFP_KERNEL, 0); + + if (!p) + return NULL; + memcpy(page_address(p) + offset, page_address(page) + offset, len); + + return p; } /* @@ -1344,16 +1350,23 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) */ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, unsigned int len, unsigned int offset, - struct sk_buff *skb) + struct sk_buff *skb, int linear) { if (unlikely(spd->nr_pages == PIPE_BUFFERS)) return 1; + if (linear) { + page = linear_to_page(page, len, offset); + if (!page) + return 1; + } else + get_page(page); + spd->pages[spd->nr_pages] = page; spd->partial[spd->nr_pages].len = len; spd->partial[spd->nr_pages].offset = offset; - spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb); spd->nr_pages++; + return 0; } @@ -1369,7 +1382,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff, static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd) + struct splice_pipe_desc *spd, int linear) { if (!*len) return 1; @@ -1392,7 +1405,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, flen, poff, skb)) + if (spd_fill_page(spd, page, flen, poff, skb, linear)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1419,7 +1432,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd)) + offset, len, skb, spd, 1)) return 1; /* @@ -1429,7 +1442,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd)) + offset, len, skb, spd, 0)) return 1; } @@ -1442,7 +1455,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, * the frag list, if such a thing exists. We'd probably need to recurse to * handle that cleanly. */ -int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, +int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { @@ -1455,16 +1468,6 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, }; - struct sk_buff *skb; - - /* - * I'd love to avoid the clone here, but tcp_read_sock() - * ignores reference counts and unconditonally kills the sk_buff - * on return from the actor. - */ - skb = skb_clone(__skb, GFP_KERNEL); - if (unlikely(!skb)) - return -ENOMEM; /* * __skb_splice_bits() only fails if the output has no room left, @@ -1488,15 +1491,9 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, } done: - /* - * drop our reference to the clone, the pipe consumption will - * drop the rest. - */ - kfree_skb(skb); - if (spd.nr_pages) { + struct sock *sk = skb->sk; int ret; - struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse @@ -2215,10 +2212,10 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, return 0; next_skb: - block_limit = skb_headlen(st->cur_skb); + block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; if (abs_offset < block_limit) { - *data = st->cur_skb->data + abs_offset; + *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } @@ -2253,13 +2250,14 @@ next_skb: st->frag_data = NULL; } - if (st->cur_skb->next) { - st->cur_skb = st->cur_skb->next; + if (st->root_skb == st->cur_skb && + skb_shinfo(st->root_skb)->frag_list) { + st->cur_skb = skb_shinfo(st->root_skb)->frag_list; st->frag_idx = 0; goto next_skb; - } else if (st->root_skb == st->cur_skb && - skb_shinfo(st->root_skb)->frag_list) { - st->cur_skb = skb_shinfo(st->root_skb)->frag_list; + } else if (st->cur_skb->next) { + st->cur_skb = st->cur_skb->next; + st->frag_idx = 0; goto next_skb; } @@ -2588,12 +2586,30 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *nskb; unsigned int headroom; unsigned int hlen = p->data - skb_mac_header(p); + unsigned int len = skb->len; - if (hlen + p->len + skb->len >= 65536) + if (hlen + p->len + len >= 65536) return -E2BIG; if (skb_shinfo(p)->frag_list) goto merge; + else if (!skb_headlen(p) && !skb_headlen(skb) && + skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < + MAX_SKB_FRAGS) { + memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, + skb_shinfo(skb)->frags, + skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); + + skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags; + skb_shinfo(skb)->nr_frags = 0; + + skb->truesize -= skb->data_len; + skb->len -= skb->data_len; + skb->data_len = 0; + + NAPI_GRO_CB(skb)->free = 1; + goto done; + } headroom = skb_headroom(p); nskb = netdev_alloc_skb(p->dev, headroom); @@ -2613,6 +2629,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; + skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size; skb_header_release(p); nskb->prev = p; @@ -2627,14 +2644,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: - NAPI_GRO_CB(p)->count++; p->prev->next = skb; p->prev = skb; skb_header_release(skb); - p->data_len += skb->len; - p->truesize += skb->len; - p->len += skb->len; +done: + NAPI_GRO_CB(p)->count++; + p->data_len += len; + p->truesize += len; + p->len += len; NAPI_GRO_CB(skb)->same_flow = 1; return 0; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 5dbfe5fdc0d6..8379496de82b 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -191,7 +191,7 @@ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); return ret; } @@ -272,7 +272,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return -EINVAL; } @@ -314,7 +314,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb, nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return -EINVAL; } @@ -380,7 +380,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return -EINVAL; } @@ -458,7 +458,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return ret; } @@ -687,7 +687,7 @@ err_pg: nla_nest_cancel(dcbnl_skb, pg_nest); nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: ret = -EINVAL; return ret; @@ -949,7 +949,7 @@ err_bcn: nla_nest_cancel(dcbnl_skb, bcn_nest); nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: ret = -EINVAL; return ret; diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 7aa2a7acc7ec..ad6dffd9070e 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -1,7 +1,6 @@ menuconfig IP_DCCP tristate "The DCCP Protocol (EXPERIMENTAL)" depends on INET && EXPERIMENTAL - select IP_DCCP_CCID2 ---help--- Datagram Congestion Control Protocol (RFC 4340) @@ -25,9 +24,6 @@ config INET_DCCP_DIAG def_tristate y if (IP_DCCP = y && INET_DIAG = y) def_tristate m -config IP_DCCP_ACKVEC - bool - source "net/dccp/ccids/Kconfig" menu "DCCP Kernel Hacking" diff --git a/net/dccp/Makefile b/net/dccp/Makefile index f4f8793aafff..2991efcc8dea 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -2,14 +2,23 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o +# +# CCID algorithms to be used by dccp.ko +# +# CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency +dccp-y += ccids/ccid2.o ackvec.o +dccp-$(CONFIG_IP_DCCP_CCID3) += ccids/ccid3.o +dccp-$(CONFIG_IP_DCCP_TFRC_LIB) += ccids/lib/tfrc.o \ + ccids/lib/tfrc_equation.o \ + ccids/lib/packet_history.o \ + ccids/lib/loss_interval.o + dccp_ipv4-y := ipv4.o # build dccp_ipv6 as module whenever either IPv6 or DCCP is a module obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o dccp_ipv6-y := ipv6.o -dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o - obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o @@ -17,5 +26,3 @@ dccp-$(CONFIG_SYSCTL) += sysctl.o dccp_diag-y := diag.o dccp_probe-y := probe.o - -obj-y += ccids/ diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 4ccee030524e..45f95e55f873 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -84,7 +84,6 @@ struct dccp_ackvec_record { struct sock; struct sk_buff; -#ifdef CONFIG_IP_DCCP_ACKVEC extern int dccp_ackvec_init(void); extern void dccp_ackvec_exit(void); @@ -106,52 +105,4 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) { return av->av_vec_len; } -#else /* CONFIG_IP_DCCP_ACKVEC */ -static inline int dccp_ackvec_init(void) -{ - return 0; -} - -static inline void dccp_ackvec_exit(void) -{ -} - -static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) -{ - return NULL; -} - -static inline void dccp_ackvec_free(struct dccp_ackvec *av) -{ -} - -static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state) -{ - return -1; -} - -static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, - struct sock *sk, const u64 ackno) -{ -} - -static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u64 *ackno, const u8 opt, - const u8 *value, const u8 len) -{ - return -1; -} - -static inline int dccp_insert_option_ackvec(const struct sock *sk, - const struct sk_buff *skb) -{ - return -1; -} - -static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) -{ - return 0; -} -#endif /* CONFIG_IP_DCCP_ACKVEC */ #endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index bcc643f992ae..f3e9ba1cfd01 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -12,56 +12,70 @@ */ #include "ccid.h" +#include "ccids/lib/tfrc.h" -static u8 builtin_ccids[] = { - DCCPC_CCID2, /* CCID2 is supported by default */ -#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE) - DCCPC_CCID3, +static struct ccid_operations *ccids[] = { + &ccid2_ops, +#ifdef CONFIG_IP_DCCP_CCID3 + &ccid3_ops, #endif }; -static struct ccid_operations *ccids[CCID_MAX]; -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) -static atomic_t ccids_lockct = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(ccids_lock); - -/* - * The strategy is: modifications ccids vector are short, do not sleep and - * veeery rare, but read access should be free of any exclusive locks. - */ -static void ccids_write_lock(void) +static struct ccid_operations *ccid_by_number(const u8 id) { - spin_lock(&ccids_lock); - while (atomic_read(&ccids_lockct) != 0) { - spin_unlock(&ccids_lock); - yield(); - spin_lock(&ccids_lock); - } + int i; + + for (i = 0; i < ARRAY_SIZE(ccids); i++) + if (ccids[i]->ccid_id == id) + return ccids[i]; + return NULL; } -static inline void ccids_write_unlock(void) +/* check that up to @array_len members in @ccid_array are supported */ +bool ccid_support_check(u8 const *ccid_array, u8 array_len) { - spin_unlock(&ccids_lock); + while (array_len > 0) + if (ccid_by_number(ccid_array[--array_len]) == NULL) + return false; + return true; } -static inline void ccids_read_lock(void) +/** + * ccid_get_builtin_ccids - Populate a list of built-in CCIDs + * @ccid_array: pointer to copy into + * @array_len: value to return length into + * This function allocates memory - caller must see that it is freed after use. + */ +int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) { - atomic_inc(&ccids_lockct); - smp_mb__after_atomic_inc(); - spin_unlock_wait(&ccids_lock); + *ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any()); + if (*ccid_array == NULL) + return -ENOBUFS; + + for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1) + (*ccid_array)[*array_len] = ccids[*array_len]->ccid_id; + return 0; } -static inline void ccids_read_unlock(void) +int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *optval, int __user *optlen) { - atomic_dec(&ccids_lockct); -} + u8 *ccid_array, array_len; + int err = 0; -#else -#define ccids_write_lock() do { } while(0) -#define ccids_write_unlock() do { } while(0) -#define ccids_read_lock() do { } while(0) -#define ccids_read_unlock() do { } while(0) -#endif + if (len < ARRAY_SIZE(ccids)) + return -EINVAL; + + if (ccid_get_builtin_ccids(&ccid_array, &array_len)) + return -ENOBUFS; + + if (put_user(array_len, optlen) || + copy_to_user(optval, ccid_array, array_len)) + err = -EFAULT; + + kfree(ccid_array); + return err; +} static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...) { @@ -93,48 +107,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) } } -/* check that up to @array_len members in @ccid_array are supported */ -bool ccid_support_check(u8 const *ccid_array, u8 array_len) -{ - u8 i, j, found; - - for (i = 0, found = 0; i < array_len; i++, found = 0) { - for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++) - found = (ccid_array[i] == builtin_ccids[j]); - if (!found) - return false; - } - return true; -} - -/** - * ccid_get_builtin_ccids - Provide copy of `builtin' CCID array - * @ccid_array: pointer to copy into - * @array_len: value to return length into - * This function allocates memory - caller must see that it is freed after use. - */ -int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) -{ - *ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any()); - if (*ccid_array == NULL) - return -ENOBUFS; - *array_len = ARRAY_SIZE(builtin_ccids); - return 0; -} - -int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - if (len < sizeof(builtin_ccids)) - return -EINVAL; - - if (put_user(sizeof(builtin_ccids), optlen) || - copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids))) - return -EFAULT; - return 0; -} - -int ccid_register(struct ccid_operations *ccid_ops) +static int ccid_activate(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; @@ -152,79 +125,40 @@ int ccid_register(struct ccid_operations *ccid_ops) if (ccid_ops->ccid_hc_tx_slab == NULL) goto out_free_rx_slab; - ccids_write_lock(); - err = -EEXIST; - if (ccids[ccid_ops->ccid_id] == NULL) { - ccids[ccid_ops->ccid_id] = ccid_ops; - err = 0; - } - ccids_write_unlock(); - if (err != 0) - goto out_free_tx_slab; - - pr_info("CCID: Registered CCID %d (%s)\n", + pr_info("CCID: Activated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); + err = 0; out: return err; -out_free_tx_slab: - ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab); - ccid_ops->ccid_hc_tx_slab = NULL; - goto out; out_free_rx_slab: ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; goto out; } -EXPORT_SYMBOL_GPL(ccid_register); - -int ccid_unregister(struct ccid_operations *ccid_ops) +static void ccid_deactivate(struct ccid_operations *ccid_ops) { - ccids_write_lock(); - ccids[ccid_ops->ccid_id] = NULL; - ccids_write_unlock(); - ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab); ccid_ops->ccid_hc_tx_slab = NULL; ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; - pr_info("CCID: Unregistered CCID %d (%s)\n", + pr_info("CCID: Deactivated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); - return 0; } -EXPORT_SYMBOL_GPL(ccid_unregister); - -struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) +struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx) { - struct ccid_operations *ccid_ops; + struct ccid_operations *ccid_ops = ccid_by_number(id); struct ccid *ccid = NULL; - ccids_read_lock(); -#ifdef CONFIG_MODULES - if (ccids[id] == NULL) { - /* We only try to load if in process context */ - ccids_read_unlock(); - if (gfp & GFP_ATOMIC) - goto out; - request_module("net-dccp-ccid-%d", id); - ccids_read_lock(); - } -#endif - ccid_ops = ccids[id]; if (ccid_ops == NULL) - goto out_unlock; - - if (!try_module_get(ccid_ops->ccid_owner)) - goto out_unlock; - - ccids_read_unlock(); + goto out; ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab : - ccid_ops->ccid_hc_tx_slab, gfp); + ccid_ops->ccid_hc_tx_slab, gfp_any()); if (ccid == NULL) - goto out_module_put; + goto out; ccid->ccid_ops = ccid_ops; if (rx) { memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size); @@ -239,53 +173,57 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) } out: return ccid; -out_unlock: - ccids_read_unlock(); - goto out; out_free_ccid: kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab : ccid_ops->ccid_hc_tx_slab, ccid); ccid = NULL; -out_module_put: - module_put(ccid_ops->ccid_owner); goto out; } -EXPORT_SYMBOL_GPL(ccid_new); - -static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx) +void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk) { - struct ccid_operations *ccid_ops; - - if (ccid == NULL) - return; - - ccid_ops = ccid->ccid_ops; - if (rx) { - if (ccid_ops->ccid_hc_rx_exit != NULL) - ccid_ops->ccid_hc_rx_exit(sk); - kmem_cache_free(ccid_ops->ccid_hc_rx_slab, ccid); - } else { - if (ccid_ops->ccid_hc_tx_exit != NULL) - ccid_ops->ccid_hc_tx_exit(sk); - kmem_cache_free(ccid_ops->ccid_hc_tx_slab, ccid); + if (ccid != NULL) { + if (ccid->ccid_ops->ccid_hc_rx_exit != NULL) + ccid->ccid_ops->ccid_hc_rx_exit(sk); + kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid); } - ccids_read_lock(); - if (ccids[ccid_ops->ccid_id] != NULL) - module_put(ccid_ops->ccid_owner); - ccids_read_unlock(); } -void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk) +void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk) { - ccid_delete(ccid, sk, 1); + if (ccid != NULL) { + if (ccid->ccid_ops->ccid_hc_tx_exit != NULL) + ccid->ccid_ops->ccid_hc_tx_exit(sk); + kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid); + } } -EXPORT_SYMBOL_GPL(ccid_hc_rx_delete); - -void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk) +int __init ccid_initialize_builtins(void) { - ccid_delete(ccid, sk, 0); + int i, err = tfrc_lib_init(); + + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(ccids); i++) { + err = ccid_activate(ccids[i]); + if (err) + goto unwind_registrations; + } + return 0; + +unwind_registrations: + while(--i >= 0) + ccid_deactivate(ccids[i]); + tfrc_lib_exit(); + return err; } -EXPORT_SYMBOL_GPL(ccid_hc_tx_delete); +void ccid_cleanup_builtins(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ccids); i++) + ccid_deactivate(ccids[i]); + tfrc_lib_exit(); +} diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 18f69423a708..facedd20b531 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -29,7 +29,6 @@ struct tcp_info; * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.) * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled) * @ccid_name: alphabetical identifier string for @ccid_id - * @ccid_owner: module which implements/owns this CCID * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket * @@ -48,7 +47,6 @@ struct ccid_operations { unsigned char ccid_id; __u32 ccid_ccmps; const char *ccid_name; - struct module *ccid_owner; struct kmem_cache *ccid_hc_rx_slab, *ccid_hc_tx_slab; __u32 ccid_hc_rx_obj_size, @@ -90,8 +88,13 @@ struct ccid_operations { int __user *optlen); }; -extern int ccid_register(struct ccid_operations *ccid_ops); -extern int ccid_unregister(struct ccid_operations *ccid_ops); +extern struct ccid_operations ccid2_ops; +#ifdef CONFIG_IP_DCCP_CCID3 +extern struct ccid_operations ccid3_ops; +#endif + +extern int ccid_initialize_builtins(void); +extern void ccid_cleanup_builtins(void); struct ccid { struct ccid_operations *ccid_ops; @@ -108,8 +111,7 @@ extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, char __user *, int __user *); -extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, - gfp_t gfp); +extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) { diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 12275943eab8..4b5db44970aa 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -1,80 +1,51 @@ menu "DCCP CCIDs Configuration (EXPERIMENTAL)" depends on EXPERIMENTAL -config IP_DCCP_CCID2 - tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" - def_tristate IP_DCCP - select IP_DCCP_ACKVEC - ---help--- - CCID 2, TCP-like Congestion Control, denotes Additive Increase, - Multiplicative Decrease (AIMD) congestion control with behavior - modelled directly on TCP, including congestion window, slow start, - timeouts, and so forth [RFC 2581]. CCID 2 achieves maximum - bandwidth over the long term, consistent with the use of end-to-end - congestion control, but halves its congestion window in response to - each congestion event. This leads to the abrupt rate changes - typical of TCP. Applications should use CCID 2 if they prefer - maximum bandwidth utilization to steadiness of rate. This is often - the case for applications that are not playing their data directly - to the user. For example, a hypothetical application that - transferred files over DCCP, using application-level retransmissions - for lost packets, would prefer CCID 2 to CCID 3. On-line games may - also prefer CCID 2. See RFC 4341 for further details. - - CCID2 is the default CCID used by DCCP. - config IP_DCCP_CCID2_DEBUG - bool "CCID2 debugging messages" - depends on IP_DCCP_CCID2 - ---help--- - Enable CCID2-specific debugging messages. + bool "CCID-2 debugging messages" + ---help--- + Enable CCID-2 specific debugging messages. - When compiling CCID2 as a module, this debugging output can - additionally be toggled by setting the ccid2_debug module - parameter to 0 or 1. + The debugging output can additionally be toggled by setting the + ccid2_debug parameter to 0 or 1. - If in doubt, say N. + If in doubt, say N. config IP_DCCP_CCID3 - tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" - def_tristate IP_DCCP - select IP_DCCP_TFRC_LIB + bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)" + def_bool y if (IP_DCCP = y || IP_DCCP = m) ---help--- - CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based + CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based rate-controlled congestion control mechanism. TFRC is designed to be reasonably fair when competing for bandwidth with TCP-like flows, where a flow is "reasonably fair" if its sending rate is generally within a factor of two of the sending rate of a TCP flow under the same conditions. However, TFRC has a much lower variation of - throughput over time compared with TCP, which makes CCID 3 more - suitable than CCID 2 for applications such streaming media where a + throughput over time compared with TCP, which makes CCID-3 more + suitable than CCID-2 for applications such streaming media where a relatively smooth sending rate is of importance. - CCID 3 is further described in RFC 4342, + CCID-3 is further described in RFC 4342, http://www.ietf.org/rfc/rfc4342.txt The TFRC congestion control algorithms were initially described in - RFC 3448. + RFC 5348. This text was extracted from RFC 4340 (sec. 10.2), http://www.ietf.org/rfc/rfc4340.txt - - To compile this CCID as a module, choose M here: the module will be - called dccp_ccid3. - If in doubt, say M. + If in doubt, say N. config IP_DCCP_CCID3_DEBUG - bool "CCID3 debugging messages" - depends on IP_DCCP_CCID3 - ---help--- - Enable CCID3-specific debugging messages. + bool "CCID-3 debugging messages" + depends on IP_DCCP_CCID3 + ---help--- + Enable CCID-3 specific debugging messages. - When compiling CCID3 as a module, this debugging output can - additionally be toggled by setting the ccid3_debug module - parameter to 0 or 1. + The debugging output can additionally be toggled by setting the + ccid3_debug parameter to 0 or 1. - If in doubt, say N. + If in doubt, say N. config IP_DCCP_CCID3_RTO int "Use higher bound for nofeedback timer" @@ -108,12 +79,8 @@ config IP_DCCP_CCID3_RTO therefore not be performed on WANs. config IP_DCCP_TFRC_LIB - tristate - default n + def_bool y if IP_DCCP_CCID3 config IP_DCCP_TFRC_DEBUG - bool - depends on IP_DCCP_TFRC_LIB - default y if IP_DCCP_CCID3_DEBUG - + def_bool y if IP_DCCP_CCID3_DEBUG endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile deleted file mode 100644 index 438f20bccff7..000000000000 --- a/net/dccp/ccids/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o - -dccp_ccid3-y := ccid3.o - -obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o - -dccp_ccid2-y := ccid2.o - -obj-y += lib/ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index c9ea19a4d85e..d235294ace23 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -768,10 +768,9 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } } -static struct ccid_operations ccid2 = { +struct ccid_operations ccid2_ops = { .ccid_id = DCCPC_CCID2, .ccid_name = "TCP-like", - .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), .ccid_hc_tx_init = ccid2_hc_tx_init, .ccid_hc_tx_exit = ccid2_hc_tx_exit, @@ -784,22 +783,5 @@ static struct ccid_operations ccid2 = { #ifdef CONFIG_IP_DCCP_CCID2_DEBUG module_param(ccid2_debug, bool, 0644); -MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); +MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages"); #endif - -static __init int ccid2_module_init(void) -{ - return ccid_register(&ccid2); -} -module_init(ccid2_module_init); - -static __exit void ccid2_module_exit(void) -{ - ccid_unregister(&ccid2); -} -module_exit(ccid2_module_exit); - -MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>"); -MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("net-dccp-ccid-2"); diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 3b8bd7ca6761..a27b7f4c19c5 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -940,10 +940,9 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, return 0; } -static struct ccid_operations ccid3 = { +struct ccid_operations ccid3_ops = { .ccid_id = DCCPC_CCID3, .ccid_name = "TCP-Friendly Rate Control", - .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), .ccid_hc_tx_init = ccid3_hc_tx_init, .ccid_hc_tx_exit = ccid3_hc_tx_exit, @@ -964,23 +963,5 @@ static struct ccid_operations ccid3 = { #ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, bool, 0644); -MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); +MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages"); #endif - -static __init int ccid3_module_init(void) -{ - return ccid_register(&ccid3); -} -module_init(ccid3_module_init); - -static __exit void ccid3_module_exit(void) -{ - ccid_unregister(&ccid3); -} -module_exit(ccid3_module_exit); - -MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>, " - "Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); -MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("net-dccp-ccid-3"); diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile deleted file mode 100644 index 68c93e3d89dc..000000000000 --- a/net/dccp/ccids/lib/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o - -dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 5b3ce0688c5c..4d1e40127264 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -60,7 +60,6 @@ void tfrc_lh_cleanup(struct tfrc_loss_hist *lh) lh->ring[LIH_INDEX(lh->counter)] = NULL; } } -EXPORT_SYMBOL_GPL(tfrc_lh_cleanup); static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) { @@ -121,7 +120,6 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) return (lh->i_mean < old_i_mean); } -EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, @@ -169,7 +167,6 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, } return 1; } -EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); int __init tfrc_li_init(void) { diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 6cc108afdc3b..b7785b3581ec 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -94,7 +94,6 @@ int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) *headp = entry; return 0; } -EXPORT_SYMBOL_GPL(tfrc_tx_hist_add); void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) { @@ -109,7 +108,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) *headp = NULL; } -EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, const ktime_t now) @@ -127,7 +125,6 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, return rtt; } -EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); /* @@ -172,7 +169,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, tfrc_rx_hist_entry_from_skb(entry, skb, ndp); } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet); /* has the packet contained in skb been seen before? */ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) @@ -189,7 +185,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) return 0; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { @@ -390,7 +385,6 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, } return is_new_loss; } -EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) { @@ -412,7 +406,6 @@ out_free: } return -ENOBUFS; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc); void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { @@ -424,7 +417,6 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) h->ring[i] = NULL; } } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); /** * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against @@ -495,4 +487,3 @@ keep_ref_for_next_time: return sample; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 185916218e07..4902029854d8 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -1,20 +1,18 @@ /* - * TFRC: main module holding the pieces of the TFRC library together + * TFRC library initialisation * * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> */ -#include <linux/module.h> -#include <linux/moduleparam.h> #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG int tfrc_debug; module_param(tfrc_debug, bool, 0644); -MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); +MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif -static int __init tfrc_module_init(void) +int __init tfrc_lib_init(void) { int rc = tfrc_li_init(); @@ -38,18 +36,9 @@ out: return rc; } -static void __exit tfrc_module_exit(void) +void tfrc_lib_exit(void) { tfrc_rx_packet_history_exit(); tfrc_tx_packet_history_exit(); tfrc_li_exit(); } - -module_init(tfrc_module_init); -module_exit(tfrc_module_exit); - -MODULE_AUTHOR("Gerrit Renker <gerrit@erg.abdn.ac.uk>, " - "Ian McDonald <ian.mcdonald@jandi.co.nz>, " - "Arnaldo Carvalho de Melo <acme@redhat.com>"); -MODULE_DESCRIPTION("DCCP TFRC library"); -MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ed9857527acf..e9720b143275 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -17,7 +17,8 @@ #include <linux/types.h> #include <linux/math64.h> #include "../../dccp.h" -/* internal includes that this module exports: */ + +/* internal includes that this library exports: */ #include "loss_interval.h" #include "packet_history.h" @@ -66,4 +67,12 @@ extern void tfrc_rx_packet_history_exit(void); extern int tfrc_li_init(void); extern void tfrc_li_exit(void); + +#ifdef CONFIG_IP_DCCP_TFRC_LIB +extern int tfrc_lib_init(void); +extern void tfrc_lib_exit(void); +#else +#define tfrc_lib_init() (0) +#define tfrc_lib_exit() +#endif #endif /* _TFRC_H_ */ diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 2f20a29cffe4..c5d3a9e5a5a4 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -659,8 +659,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) return scaled_div32(result, f); } -EXPORT_SYMBOL_GPL(tfrc_calc_x); - /** * tfrc_calc_x_reverse_lookup - try to find p given f(p) * @@ -693,5 +691,3 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) index = tfrc_binsearch(fvalue, 0); return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; } - -EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 0bc4c9a02e19..f2230fc168e1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -432,10 +432,8 @@ static inline int dccp_ack_pending(const struct sock *sk) { const struct dccp_sock *dp = dccp_sk(sk); return dp->dccps_timestamp_echo != 0 || -#ifdef CONFIG_IP_DCCP_ACKVEC (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || -#endif inet_csk_ack_scheduled(sk); } diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 30f9fb76b921..4152308958ab 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -34,7 +34,7 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any()); + struct ccid *new_ccid = ccid_new(ccid, sk, rx); if (new_ccid == NULL) return -ENOMEM; @@ -1214,8 +1214,6 @@ const char *dccp_feat_typename(const u8 type) return NULL; } -EXPORT_SYMBOL_GPL(dccp_feat_typename); - const char *dccp_feat_name(const u8 feat) { static const char *feature_names[] = { @@ -1240,6 +1238,4 @@ const char *dccp_feat_name(const u8 feat) return feature_names[feat]; } - -EXPORT_SYMBOL_GPL(dccp_feat_name); #endif /* CONFIG_IP_DCCP_DEBUG */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 5eb443f656c1..7648f316310f 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -741,5 +741,3 @@ u32 dccp_sample_rtt(struct sock *sk, long delta) return delta; } - -EXPORT_SYMBOL_GPL(dccp_sample_rtt); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 1747ccae8e8d..945b4d5d23b3 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1118,9 +1118,15 @@ static int __init dccp_init(void) if (rc) goto out_ackvec_exit; + rc = ccid_initialize_builtins(); + if (rc) + goto out_sysctl_exit; + dccp_timestamping_init(); out: return rc; +out_sysctl_exit: + dccp_sysctl_exit(); out_ackvec_exit: dccp_ackvec_exit(); out_free_dccp_mib: @@ -1143,6 +1149,7 @@ out_free_percpu: static void __exit dccp_fini(void) { + ccid_cleanup_builtins(); dccp_mib_exit(); free_pages((unsigned long)dccp_hashinfo.bhash, get_order(dccp_hashinfo.bhash_size * diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a3a410d20da0..a68fd79e9eca 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -286,6 +286,42 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_sset_count = dsa_slave_get_sset_count, }; +#ifdef CONFIG_NET_DSA_TAG_DSA +static const struct net_device_ops dsa_netdev_ops = { + .ndo_open = dsa_slave_open, + .ndo_stop = dsa_slave_close, + .ndo_start_xmit = dsa_xmit, + .ndo_change_rx_flags = dsa_slave_change_rx_flags, + .ndo_set_rx_mode = dsa_slave_set_rx_mode, + .ndo_set_multicast_list = dsa_slave_set_rx_mode, + .ndo_set_mac_address = dsa_slave_set_mac_address, + .ndo_do_ioctl = dsa_slave_ioctl, +}; +#endif +#ifdef CONFIG_NET_DSA_TAG_EDSA +static const struct net_device_ops edsa_netdev_ops = { + .ndo_open = dsa_slave_open, + .ndo_stop = dsa_slave_close, + .ndo_start_xmit = edsa_xmit, + .ndo_change_rx_flags = dsa_slave_change_rx_flags, + .ndo_set_rx_mode = dsa_slave_set_rx_mode, + .ndo_set_multicast_list = dsa_slave_set_rx_mode, + .ndo_set_mac_address = dsa_slave_set_mac_address, + .ndo_do_ioctl = dsa_slave_ioctl, +}; +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER +static const struct net_device_ops trailer_netdev_ops = { + .ndo_open = dsa_slave_open, + .ndo_stop = dsa_slave_close, + .ndo_start_xmit = trailer_xmit, + .ndo_change_rx_flags = dsa_slave_change_rx_flags, + .ndo_set_rx_mode = dsa_slave_set_rx_mode, + .ndo_set_multicast_list = dsa_slave_set_rx_mode, + .ndo_set_mac_address = dsa_slave_set_mac_address, + .ndo_do_ioctl = dsa_slave_ioctl, +}; +#endif /* slave device setup *******************************************************/ struct net_device * @@ -306,32 +342,27 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); slave_dev->tx_queue_len = 0; + switch (ds->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case htons(ETH_P_DSA): - slave_dev->hard_start_xmit = dsa_xmit; + slave_dev->netdev_ops = &dsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_EDSA case htons(ETH_P_EDSA): - slave_dev->hard_start_xmit = edsa_xmit; + slave_dev->netdev_ops = &edsa_netdev_ops; break; #endif #ifdef CONFIG_NET_DSA_TAG_TRAILER case htons(ETH_P_TRAILER): - slave_dev->hard_start_xmit = trailer_xmit; + slave_dev->netdev_ops = &trailer_netdev_ops; break; #endif default: BUG(); } - slave_dev->open = dsa_slave_open; - slave_dev->stop = dsa_slave_close; - slave_dev->change_rx_flags = dsa_slave_change_rx_flags; - slave_dev->set_rx_mode = dsa_slave_set_rx_mode; - slave_dev->set_multicast_list = dsa_slave_set_rx_mode; - slave_dev->set_mac_address = dsa_slave_set_mac_address; - slave_dev->do_ioctl = dsa_slave_ioctl; + SET_NETDEV_DEV(slave_dev, parent); slave_dev->vlan_features = master->vlan_features; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 42a0f3dd3fd6..d722013c1cae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1268,6 +1268,9 @@ __be32 __init root_nfs_parse_addr(char *name) static int __init ip_auto_config(void) { __be32 addr; +#ifdef IPCONFIG_DYNAMIC + int retries = CONF_OPEN_RETRIES; +#endif #ifdef CONFIG_PROC_FS proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); @@ -1304,9 +1307,6 @@ static int __init ip_auto_config(void) #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC - - int retries = CONF_OPEN_RETRIES; - if (ic_dynamic() < 0) { ic_close_devs(); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c9224310ebae..52cb6939d093 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -93,13 +93,8 @@ ipt_local_out_hook(unsigned int hook, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_filter: ignoring short SOCK_RAW " - "packet.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } - return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_filter); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 69f2c4287146..3929d20b9e45 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -132,12 +132,8 @@ ipt_local_hook(unsigned int hook, /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_mangle: ignoring short SOCK_RAW " - "packet.\n"); + || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } /* Save things which could affect route */ mark = skb->mark; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 8faebfe638f1..7f65d18333e3 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -65,12 +65,8 @@ ipt_local_hook(unsigned int hook, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_raw: ignoring short SOCK_RAW " - "packet.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_raw); } diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 36f3be3cc428..a52a35f4a584 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -96,12 +96,8 @@ ipt_local_out_hook(unsigned int hook, { /* Somebody is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk(KERN_INFO "iptable_security: ignoring short " - "SOCK_RAW packet.\n"); + || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_security); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index b2141e11575e..4beb04fac588 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -145,11 +145,8 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 1fd3ef7718b6..2a8bee26f43d 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -20,7 +20,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_log.h> -static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f28acf11fc67..76b148bcb0dc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -522,8 +522,13 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct tcp_splice_state *tss = rd_desc->arg.data; + int ret; - return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags); + ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len), + tss->flags); + if (ret > 0) + rd_desc->count -= ret; + return ret; } static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) @@ -531,6 +536,7 @@ static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) /* Store TCP splice context information in read_descriptor_t. */ read_descriptor_t rd_desc = { .arg.data = tss, + .count = tss->len, }; return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); @@ -580,10 +586,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, else if (!ret) { if (spliced) break; - if (flags & SPLICE_F_NONBLOCK) { - ret = -EAGAIN; - break; - } if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { @@ -615,11 +617,13 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, tss.len -= ret; spliced += ret; + if (!timeo) + break; release_sock(sk); lock_sock(sk); if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || + (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } @@ -1317,7 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if ((available < target) && (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && !sysctl_tcp_low_latency && - __get_cpu_var(softnet_data).net_dma) { + dma_find_channel(DMA_MEMCPY)) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); @@ -1527,7 +1531,7 @@ do_prequeue: if (!(flags & MSG_TRUNC)) { #ifdef CONFIG_NET_DMA if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = get_softnet_dma(); + tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); if (tp->ucopy.dma_chan) { tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( @@ -1632,7 +1636,6 @@ skip_copy: /* Safe to free early-copied skbs now */ __skb_queue_purge(&sk->sk_async_wait_queue); - dma_chan_put(tp->ucopy.dma_chan); tp->ucopy.dma_chan = NULL; } if (tp->ucopy.pinned_list) { @@ -2387,7 +2390,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) unsigned int seq; __be32 delta; unsigned int oldlen; - unsigned int len; + unsigned int mss; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -2403,10 +2406,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) oldlen = (u16)~skb->len; __skb_pull(skb, thlen); + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - int mss; if (unlikely(type & ~(SKB_GSO_TCPV4 | @@ -2417,7 +2423,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; - mss = skb_shinfo(skb)->gso_size; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); segs = NULL; @@ -2428,8 +2433,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) if (IS_ERR(segs)) goto out; - len = skb_shinfo(skb)->gso_size; - delta = htonl(oldlen + (thlen + len)); + delta = htonl(oldlen + (thlen + mss)); skb = segs; th = tcp_hdr(skb); @@ -2445,7 +2449,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) csum_fold(csum_partial(skb_transport_header(skb), thlen, skb->csum)); - seq += len; + seq += mss; skb = skb->next; th = tcp_hdr(skb); @@ -2519,9 +2523,7 @@ found: flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); total = p->len; - mss = total; - if (skb_shinfo(p)->frag_list) - mss = skb_shinfo(p)->frag_list->len; + mss = skb_shinfo(p)->gso_size; flush |= skb->len > mss || skb->len <= 0; flush |= ntohl(th2->seq) + total != ntohl(th->seq); @@ -2548,6 +2550,7 @@ out: return pp; } +EXPORT_SYMBOL(tcp_gro_receive); int tcp_gro_complete(struct sk_buff *skb) { @@ -2557,7 +2560,6 @@ int tcp_gro_complete(struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; - skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len; skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) @@ -2565,6 +2567,7 @@ int tcp_gro_complete(struct sk_buff *skb) return 0; } +EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 99b7ecbe8893..a6961d75c7ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5005,7 +5005,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, return 0; if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = get_softnet_dma(); + tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9d839fa9331e..19d7b429a262 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1594,7 +1594,7 @@ process: #ifdef CONFIG_NET_DMA struct tcp_sock *tp = tcp_sk(sk); if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = get_softnet_dma(); + tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); if (tp->ucopy.dma_chan) ret = tcp_v4_do_rcv(sk, skb); else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf5ab0581eba..1ab180bad72a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, + unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && - sk2->sk_hash == num && + (bitmap || sk2->sk_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) - return 1; + (*saddr_comp)(sk, sk2)) { + if (bitmap) + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, + bitmap); + else + return 1; + } return 0; } @@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (!snum) { int low, high, remaining; unsigned rand; - unsigned short first; + unsigned short first, last; + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; rand = net_random(); - snum = first = rand % remaining + low; - rand |= 1; - for (;;) { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + first = (((u64)rand * remaining) >> 32) + low; + /* + * force rand to be an odd multiple of UDP_HTABLE_SIZE + */ + rand = (rand | 1) * UDP_HTABLE_SIZE; + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { + hslot = &udptable->hash[udp_hashfn(net, first)]; + bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) - break; - spin_unlock_bh(&hslot->lock); + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, + saddr_comp); + + snum = first; + /* + * Iterate on all possible values of snum for this hash. + * Using steps of an odd multiple of UDP_HTABLE_SIZE + * give us randomization and full range coverage. + */ do { - snum = snum + rand; - } while (snum < low || snum > high); - if (snum == first) - goto fail; + if (low <= snum && snum <= high && + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + goto found; + snum += rand; + } while (snum != first); + spin_unlock_bh(&hslot->lock); } + goto fail; } else { hslot = &udptable->hash[udp_hashfn(net, snum)]; spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) goto fail_unlock; } +found: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { @@ -992,9 +1015,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) + if (rc == -ENOMEM) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); + atomic_inc(&sk->sk_drops); + } goto drop; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e92ad8455c63..f9afb452249c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4250,7 +4250,7 @@ static struct addrconf_sysctl_table .procname = "mc_forwarding", .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = proc_dointvec, }, #endif diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 437b750b98fd..c802bc1658a8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -672,8 +672,7 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) EXPORT_SYMBOL_GPL(ipv6_opt_accepted); -static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, - int proto) +static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) { struct inet6_protocol *ops = NULL; @@ -704,7 +703,7 @@ static struct inet6_protocol *ipv6_gso_pull_exthdrs(struct sk_buff *skb, __skb_pull(skb, len); } - return ops; + return proto; } static int ipv6_gso_send_check(struct sk_buff *skb) @@ -721,7 +720,9 @@ static int ipv6_gso_send_check(struct sk_buff *skb) err = -EPROTONOSUPPORT; rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + ops = rcu_dereference(inet6_protos[ + ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); + if (likely(ops && ops->gso_send_check)) { skb_reset_transport_header(skb); err = ops->gso_send_check(skb); @@ -757,7 +758,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) segs = ERR_PTR(-EPROTONOSUPPORT); rcu_read_lock(); - ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + ops = rcu_dereference(inet6_protos[ + ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); + if (likely(ops && ops->gso_segment)) { skb_reset_transport_header(skb); segs = ops->gso_segment(skb, features); @@ -777,11 +780,112 @@ out: return segs; } +struct ipv6_gro_cb { + struct napi_gro_cb napi; + int proto; +}; + +#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb) + +static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct inet6_protocol *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct ipv6hdr *iph; + unsigned int nlen; + int flush = 1; + int proto; + __wsum csum; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = ipv6_hdr(skb); + __skb_pull(skb, sizeof(*iph)); + + flush += ntohs(iph->payload_len) != skb->len; + + rcu_read_lock(); + proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr); + iph = ipv6_hdr(skb); + IPV6_GRO_CB(skb)->proto = proto; + ops = rcu_dereference(inet6_protos[proto]); + if (!ops || !ops->gro_receive) + goto out_unlock; + + flush--; + skb_reset_transport_header(skb); + nlen = skb_network_header_len(skb); + + for (p = *head; p; p = p->next) { + struct ipv6hdr *iph2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ipv6_hdr(p); + + /* All fields must match except length. */ + if (nlen != skb_network_header_len(p) || + memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) || + memcmp(&iph->nexthdr, &iph2->nexthdr, + nlen - offsetof(struct ipv6hdr, nexthdr))) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + + csum = skb->csum; + skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + + pp = ops->gro_receive(head, skb); + + skb->csum = csum; + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int ipv6_gro_complete(struct sk_buff *skb) +{ + struct inet6_protocol *ops; + struct ipv6hdr *iph = ipv6_hdr(skb); + int err = -ENOSYS; + + iph->payload_len = htons(skb->len - skb_network_offset(skb) - + sizeof(*iph)); + + rcu_read_lock(); + ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]); + if (WARN_ON(!ops || !ops->gro_complete)) + goto out_unlock; + + err = ops->gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + static struct packet_type ipv6_packet_type = { .type = __constant_htons(ETH_P_IPV6), .func = ipv6_rcv, .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, + .gro_receive = ipv6_gro_receive, + .gro_complete = ipv6_gro_complete, }; static int __init ipv6_packet_init(void) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4f433847d95f..36dff8807183 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -443,10 +443,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) goto relookup_failed; - if (ip6_dst_lookup(sk, &dst2, &fl)) + if (ip6_dst_lookup(sk, &dst2, &fl2)) goto relookup_failed; - err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 29c7c99e69f7..52ee1dced2ff 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -298,6 +298,10 @@ static void fib6_dump_end(struct netlink_callback *cb) struct fib6_walker_t *w = (void*)cb->args[2]; if (w) { + if (cb->args[4]) { + cb->args[4] = 0; + fib6_walker_unlink(w); + } cb->args[2] = 0; kfree(w); } @@ -330,15 +334,12 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, read_lock_bh(&table->tb6_lock); res = fib6_walk_continue(w); read_unlock_bh(&table->tb6_lock); - if (res != 0) { - if (res < 0) - fib6_walker_unlink(w); - goto end; + if (res <= 0) { + fib6_walker_unlink(w); + cb->args[4] = 0; } - fib6_walker_unlink(w); - cb->args[4] = 0; } -end: + return res; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 936f48946e20..f171e8dbac91 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -255,6 +255,7 @@ int ip6_mc_input(struct sk_buff *skb) * IPv6 multicast router mode is now supported ;) */ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate @@ -316,7 +317,6 @@ int ip6_mc_input(struct sk_buff *skb) } if (skb2) { - skb2->dev = skb2->dst->dev; ip6_mr_input(skb2); } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 3c51b2d827f4..228be551e9c1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -48,6 +48,7 @@ #include <linux/pim.h> #include <net/addrconf.h> #include <linux/netfilter_ipv6.h> +#include <net/ip6_checksum.h> /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. @@ -365,7 +366,9 @@ static int pim6_rcv(struct sk_buff *skb) pim = (struct pimreghdr *)skb_transport_header(skb); if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || - (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && + (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + sizeof(*pim), IPPROTO_PIM, + csum_partial((void *)pim, sizeof(*pim), 0)) && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; @@ -392,7 +395,7 @@ static int pim6_rcv(struct sk_buff *skb) skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->dev = reg_dev; - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); @@ -481,6 +484,7 @@ static int mif6_delete(struct net *net, int vifi) { struct mif_device *v; struct net_device *dev; + struct inet6_dev *in6_dev; if (vifi < 0 || vifi >= net->ipv6.maxvif) return -EADDRNOTAVAIL; @@ -513,6 +517,10 @@ static int mif6_delete(struct net *net, int vifi) dev_set_allmulti(dev, -1); + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding--; + if (v->flags & MIFF_REGISTER) unregister_netdevice(dev); @@ -622,6 +630,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) int vifi = vifc->mif6c_mifi; struct mif_device *v = &net->ipv6.vif6_table[vifi]; struct net_device *dev; + struct inet6_dev *in6_dev; int err; /* Is vif busy ? */ @@ -662,6 +671,10 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) return -EINVAL; } + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding++; + /* * Fill in the VIF structures */ @@ -838,8 +851,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, skb->dst = dst_clone(pkt->dst); skb->ip_summed = CHECKSUM_UNNECESSARY; - - skb_pull(skb, sizeof(struct ipv6hdr)); } if (net->ipv6.mroute6_sk == NULL) { @@ -1222,8 +1233,10 @@ static int ip6mr_sk_init(struct sock *sk) rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(net->ipv6.mroute6_sk == NULL)) + if (likely(net->ipv6.mroute6_sk == NULL)) { net->ipv6.mroute6_sk = sk; + net->ipv6.devconf_all->mc_forwarding++; + } else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1242,6 +1255,7 @@ int ip6mr_sk_done(struct sock *sk) if (sk == net->ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); net->ipv6.mroute6_sk = NULL; + net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); mroute_clean_tables(net); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index eeeaad2e8b5c..40f324655e24 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -404,7 +404,7 @@ sticky_done: else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) goto e_inval; - if (copy_from_user(&pkt, optval, optlen)) { + if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { retv = -EFAULT; break; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index bd52151d31e9..c455cf4ee756 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -26,7 +26,7 @@ #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> #include <net/netfilter/nf_log.h> -static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 18c486cf4987..9c574235c905 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad rt = ip6_rt_copy(ort); if (rt) { + struct neighbour *neigh; + int attempts = !in_softirq(); + if (!(rt->rt6i_flags&RTF_GATEWAY)) { if (rt->rt6i_dst.plen != 128 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) @@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad } #endif - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + retry: + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { + struct net *net = dev_net(rt->rt6i_dev); + int saved_rt_min_interval = + net->ipv6.sysctl.ip6_rt_gc_min_interval; + int saved_rt_elasticity = + net->ipv6.sysctl.ip6_rt_gc_elasticity; + + if (attempts-- > 0) { + net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; + net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; + + ip6_dst_gc(net->ipv6.ip6_dst_ops); + + net->ipv6.sysctl.ip6_rt_gc_elasticity = + saved_rt_elasticity; + net->ipv6.sysctl.ip6_rt_gc_min_interval = + saved_rt_min_interval; + goto retry; + } + + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table overflow.\n"); + dst_free(&rt->u.dst); + return NULL; + } + rt->rt6i_nexthop = neigh; } @@ -763,7 +794,7 @@ void ip6_route_input(struct sk_buff *skb) .proto = iph->nexthdr, }; - if (rt6_need_strict(&iph->daddr)) + if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); @@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, dev_hold(dev); if (neigh) neigh_hold(neigh); - else + else { neigh = ndisc_get_neigh(dev, addr); + if (IS_ERR(neigh)) + neigh = NULL; + } rt->rt6i_dev = dev; rt->rt6i_idev = idev; @@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); + struct neighbour *neigh; if (rt == NULL) return ERR_PTR(-ENOMEM); @@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); - if (rt->rt6i_nexthop == NULL) { + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { dst_free(&rt->u.dst); - return ERR_PTR(-ENOMEM); + + /* We are casting this because that is the return + * value type. But an errno encoded pointer is the + * same regardless of the underlying pointer type, + * and that's what we are returning. So this is OK. + */ + return (struct rt6_info *) neigh; } + rt->rt6i_nexthop = neigh; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; @@ -2710,7 +2752,7 @@ int __init ip6_route_init(void) kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN, NULL); if (!ip6_dst_ops_template.kmem_cachep) - goto out;; + goto out; ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 9048fe7e7ea7..a031034720b4 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -128,7 +128,7 @@ static struct ctl_table_header *ip6_header; int ipv6_sysctl_register(void) { - int err = -ENOMEM;; + int err = -ENOMEM; ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table); if (ip6_header == NULL) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e8b8337a8310..e5b85d45bee8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,7 +101,7 @@ static void tcp_v6_hash(struct sock *sk) } } -static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len, +static __inline__ __sum16 tcp_v6_check(int len, struct in6_addr *saddr, struct in6_addr *daddr, __wsum base) @@ -501,7 +501,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) if (skb) { struct tcphdr *th = tcp_hdr(skb); - th->check = tcp_v6_check(th, skb->len, + th->check = tcp_v6_check(skb->len, &treq->loc_addr, &treq->rmt_addr, csum_partial(th, skb->len, skb->csum)); @@ -942,6 +942,41 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) return 0; } +struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v6_check(skb->len, &iph->saddr, &iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + /* fall through */ + case CHECKSUM_NONE: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return tcp_gro_receive(head, skb); +} +EXPORT_SYMBOL(tcp6_gro_receive); + +int tcp6_gro_complete(struct sk_buff *skb) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), + &iph->saddr, &iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + + return tcp_gro_complete(skb); +} +EXPORT_SYMBOL(tcp6_gro_complete); + static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, struct tcp_md5sig_key *key, int rst) { @@ -1429,14 +1464,14 @@ out: static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr, + if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; return 0; } } - skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len, + skb->csum = ~csum_unfold(tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0)); @@ -1640,7 +1675,7 @@ process: #ifdef CONFIG_NET_DMA struct tcp_sock *tp = tcp_sk(sk); if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) - tp->ucopy.dma_chan = get_softnet_dma(); + tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); if (tp->ucopy.dma_chan) ret = tcp_v6_do_rcv(sk, skb); else @@ -2062,6 +2097,8 @@ static struct inet6_protocol tcpv6_protocol = { .err_handler = tcp_v6_err, .gso_send_check = tcp_v6_gso_send_check, .gso_segment = tcp_tso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index e4e2caeb9d82..086d5ef098fd 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -371,9 +371,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) IRDA_DEBUG(2, "%s()\n", __func__ ); line = tty->index; - if ((line < 0) || (line >= IRCOMM_TTY_PORTS)) { + if (line >= IRCOMM_TTY_PORTS) return -ENODEV; - } /* Check if instance already exists */ self = hashbin_lock_find(ircomm_tty, line, NULL); @@ -405,6 +404,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) * Force TTY into raw mode by default which is usually what * we want for IrCOMM and IrLPT. This way applications will * not have to twiddle with printcap etc. + * + * Note this is completely usafe and doesn't work properly */ tty->termios->c_iflag = 0; tty->termios->c_oflag = 0; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index af3192d2a5a3..eb8a2a0b6eb7 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -494,7 +494,21 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, if (err) { iucv_path_free(iucv->path); iucv->path = NULL; - err = -ECONNREFUSED; + switch (err) { + case 0x0b: /* Target communicator is not logged on */ + err = -ENETUNREACH; + break; + case 0x0d: /* Max connections for this guest exceeded */ + case 0x0e: /* Max connections for target guest exceeded */ + err = -EAGAIN; + break; + case 0x0f: /* Missing IUCV authorization */ + err = -EACCES; + break; + default: + err = -ECONNREFUSED; + break; + } goto done; } @@ -507,6 +521,13 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, release_sock(sk); return -ECONNREFUSED; } + + if (err) { + iucv_path_sever(iucv->path, NULL); + iucv_path_free(iucv->path); + iucv->path = NULL; + } + done: release_sock(sk); return err; @@ -1021,12 +1042,14 @@ static int iucv_callback_connreq(struct iucv_path *path, ASCEBC(user_data, sizeof(user_data)); if (sk->sk_state != IUCV_LISTEN) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); goto fail; } /* Check for backlog size */ if (sk_acceptq_is_full(sk)) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); goto fail; } @@ -1034,6 +1057,7 @@ static int iucv_callback_connreq(struct iucv_path *path, nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC); if (!nsk) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); goto fail; } @@ -1057,6 +1081,8 @@ static int iucv_callback_connreq(struct iucv_path *path, err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); + iucv_sock_kill(nsk); goto fail; } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 8f57d4f4328a..a35240f61ec3 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -50,7 +50,6 @@ #include <asm/ebcdic.h> #include <asm/io.h> #include <asm/s390_ext.h> -#include <asm/s390_rdev.h> #include <asm/smp.h> /* @@ -517,6 +516,7 @@ static int iucv_enable(void) size_t alloc_size; int cpu, rc; + get_online_cpus(); rc = -ENOMEM; alloc_size = iucv_max_pathid * sizeof(struct iucv_path); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); @@ -524,19 +524,17 @@ static int iucv_enable(void) goto out; /* Declare per cpu buffers. */ rc = -EIO; - get_online_cpus(); for_each_online_cpu(cpu) smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ - goto out_path; + goto out; put_online_cpus(); return 0; - -out_path: - put_online_cpus(); - kfree(iucv_path_table); out: + kfree(iucv_path_table); + iucv_path_table = NULL; + put_online_cpus(); return rc; } @@ -551,8 +549,9 @@ static void iucv_disable(void) { get_online_cpus(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); - put_online_cpus(); kfree(iucv_path_table); + iucv_path_table = NULL; + put_online_cpus(); } static int __cpuinit iucv_cpu_notify(struct notifier_block *self, @@ -589,10 +588,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: + if (!iucv_path_table) + break; smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: + if (!iucv_path_table) + break; cpumask = iucv_buffer_cpumask; cpu_clear(cpu, cpumask); if (cpus_empty(cpumask)) @@ -1692,7 +1695,7 @@ static int __init iucv_init(void) rc = register_external_interrupt(0x4000, iucv_external_interrupt); if (rc) goto out; - iucv_root = s390_root_dev_register("iucv"); + iucv_root = root_device_register("iucv"); if (IS_ERR(iucv_root)) { rc = PTR_ERR(iucv_root); goto out_int; @@ -1736,7 +1739,7 @@ out_free: kfree(iucv_irq_data[cpu]); iucv_irq_data[cpu] = NULL; } - s390_root_dev_unregister(iucv_root); + root_device_unregister(iucv_root); out_int: unregister_external_interrupt(0x4000, iucv_external_interrupt); out: @@ -1766,7 +1769,7 @@ static void __exit iucv_exit(void) kfree(iucv_irq_data[cpu]); iucv_irq_data[cpu] = NULL; } - s390_root_dev_unregister(iucv_root); + root_device_unregister(iucv_root); bus_unregister(&iucv_bus); unregister_external_interrupt(0x4000, iucv_external_interrupt); } diff --git a/net/key/af_key.c b/net/key/af_key.c index f8bd8df5e257..7dcbde3ea7d9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1285,6 +1285,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } + memset(&natt->encap_oa, 0, sizeof(natt->encap_oa)); } err = xfrm_init_state(x); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 5f510a13b9f0..c5c0c5271096 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -469,7 +469,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) struct ieee80211_sub_if_data *sdata; u16 start_seq_num; u8 *state; - int ret; + int ret = 0; if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5abbc3f07dd6..b9074824862a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -699,7 +699,8 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, return 0; /* Setting ad-hoc mode on non-IBSS channel is not supported. */ - if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) + if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS && + type == NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; /* diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 929ba542fd72..1159bdb4119c 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -107,6 +107,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, sta->flags = WLAN_STA_AUTHORIZED; sta->sta.supp_rates[local->hw.conf.channel->band] = rates; + rate_control_rate_init(sta); return sta; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5ba721b6a399..2b890af01ba4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -620,8 +620,8 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, if (use_short_slot != bss_conf->use_short_slot) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (net_ratelimit()) { - printk(KERN_DEBUG "%s: switched to %s slot" - " (BSSID=%s)\n", + printk(KERN_DEBUG "%s: switched to %s slot time" + " (BSSID=%pM)\n", sdata->dev->name, use_short_slot ? "short" : "long", ifsta->bssid); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 2b3b490a6073..3824990d340b 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -395,13 +395,15 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, { struct minstrel_sta_info *mi = priv_sta; struct minstrel_priv *mp = priv; - struct minstrel_rate *mr_ctl; + struct ieee80211_local *local = hw_to_local(mp->hw); + struct ieee80211_rate *ctl_rate; unsigned int i, n = 0; unsigned int t_slot = 9; /* FIXME: get real slot time */ mi->lowest_rix = rate_lowest_index(sband, sta); - mr_ctl = &mi->r[rix_to_ndx(mi, mi->lowest_rix)]; - mi->sp_ack_dur = mr_ctl->ack_time; + ctl_rate = &sband->bitrates[mi->lowest_rix]; + mi->sp_ack_dur = ieee80211_frame_duration(local, 10, ctl_rate->bitrate, + !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1); for (i = 0; i < sband->n_bitrates; i++) { struct minstrel_rate *mr = &mi->r[n]; @@ -416,7 +418,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, mr->rix = i; mr->bitrate = sband->bitrates[i].bitrate / 5; - calc_rate_durations(mi, hw_to_local(mp->hw), mr, + calc_rate_durations(mi, local, mr, &sband->bitrates[i]); /* calculate maximum number of retransmissions before diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index dc2606d0ae77..e49a5b99cf10 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -195,7 +195,6 @@ struct sta_ampdu_mlme { * @tx_packets: number of RX/TX MSDUs * @tx_bytes: number of bytes transmitted to this STA * @tx_fragments: number of transmitted MPDUs - * @last_txrate: description of the last used transmit rate * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a4af3a124cce..4278e545638f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1307,8 +1307,10 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) if (is_multicast_ether_addr(hdr->addr3)) memcpy(hdr->addr1, hdr->addr3, ETH_ALEN); else - if (mesh_nexthop_lookup(skb, osdata)) - return 0; + if (mesh_nexthop_lookup(skb, osdata)) { + dev_put(odev); + return 0; + } if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0) IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh, fwded_frames); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7e83f74cd5de..90ce9ddb9451 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -469,7 +469,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp) { - struct nf_conn *ct = NULL; + struct nf_conn *ct; if (unlikely(!nf_conntrack_hash_rnd_initted)) { get_random_bytes(&nf_conntrack_hash_rnd, 4); @@ -551,7 +551,7 @@ init_conntrack(struct net *net, } ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); - if (ct == NULL || IS_ERR(ct)) { + if (IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 00e8c27130ff..c32a7e8e3a1b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -831,13 +831,16 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); + spin_unlock_bh(&nf_conntrack_lock); nfnl_unlock(); if (request_module("nf-nat-ipv4") < 0) { nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); return -EOPNOTSUPP; } nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); rcu_read_lock(); if (nfnetlink_parse_nat_setup_hook) return -EAGAIN; @@ -1134,7 +1137,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_helper *helper; ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); - if (ct == NULL || IS_ERR(ct)) + if (IS_ERR(ct)) return -ENOMEM; if (!cda[CTA_TIMEOUT]) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 89837a4eef76..bfbf521f6ea5 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -273,6 +273,10 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + + if (af != NFPROTO_UNSPEC && !have_rev) + return match_revfn(NFPROTO_UNSPEC, name, revision, bestp); + return have_rev; } @@ -289,6 +293,10 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + + if (af != NFPROTO_UNSPEC && !have_rev) + return target_revfn(NFPROTO_UNSPEC, name, revision, bestp); + return have_rev; } diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 29375ba8db73..93acaa59d108 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -243,6 +243,17 @@ static struct xt_match xt_time_mt_reg __read_mostly = { static int __init time_mt_init(void) { + int minutes = sys_tz.tz_minuteswest; + + if (minutes < 0) /* east of Greenwich */ + printk(KERN_INFO KBUILD_MODNAME + ": kernel timezone is +%02d%02d\n", + -minutes / 60, -minutes % 60); + else /* west of Greenwich */ + printk(KERN_INFO KBUILD_MODNAME + ": kernel timezone is -%02d%02d\n", + minutes / 60, minutes % 60); + return xt_register_match(&xt_time_mt_reg); } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 3e1191cecaf0..1d3dd30099df 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -225,6 +225,7 @@ void genl_unregister_mc_group(struct genl_family *family, __genl_unregister_mc_group(family, grp); genl_unlock(); } +EXPORT_SYMBOL(genl_unregister_mc_group); static void genl_unregister_mc_groups(struct genl_family *family) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5f94db2f3e9e..1fc4a7885c41 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -77,6 +77,7 @@ #include <linux/poll.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/mutex.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -175,6 +176,7 @@ struct packet_sock { #endif struct packet_type prot_hook; spinlock_t bind_lock; + struct mutex pg_vec_lock; unsigned int running:1, /* prot_hook is attached*/ auxdata:1, origdev:1; @@ -220,13 +222,13 @@ static void *packet_lookup_frame(struct packet_sock *po, unsigned int position, h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size); switch (po->tp_version) { case TPACKET_V1: - if (status != h.h1->tp_status ? TP_STATUS_USER : - TP_STATUS_KERNEL) + if (status != (h.h1->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL)) return NULL; break; case TPACKET_V2: - if (status != h.h2->tp_status ? TP_STATUS_USER : - TP_STATUS_KERNEL) + if (status != (h.h2->tp_status ? TP_STATUS_USER : + TP_STATUS_KERNEL)) return NULL; break; } @@ -1069,6 +1071,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) */ spin_lock_init(&po->bind_lock); + mutex_init(&po->pg_vec_lock); po->prot_hook.func = packet_rcv; if (sock->type == SOCK_PACKET) @@ -1865,6 +1868,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing synchronize_net(); err = -EBUSY; + mutex_lock(&po->pg_vec_lock); if (closing || atomic_read(&po->mapped) == 0) { err = 0; #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; }) @@ -1886,6 +1890,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing if (atomic_read(&po->mapped)) printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped)); } + mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); if (was_running && !po->running) { @@ -1918,7 +1923,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st size = vma->vm_end - vma->vm_start; - lock_sock(sk); + mutex_lock(&po->pg_vec_lock); if (po->pg_vec == NULL) goto out; if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE) @@ -1941,7 +1946,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st err = 0; out: - release_sock(sk); + mutex_unlock(&po->pg_vec_lock); return err; } #endif diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index b0ceac2d6cd1..6a91a32a80c1 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -227,6 +227,13 @@ static int gprs_set_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops gprs_netdev_ops = { + .ndo_open = gprs_open, + .ndo_stop = gprs_close, + .ndo_start_xmit = gprs_xmit, + .ndo_change_mtu = gprs_set_mtu, +}; + static void gprs_setup(struct net_device *dev) { dev->features = NETIF_F_FRAGLIST; @@ -237,11 +244,8 @@ static void gprs_setup(struct net_device *dev) dev->addr_len = 0; dev->tx_queue_len = 10; + dev->netdev_ops = &gprs_netdev_ops; dev->destructor = free_netdev; - dev->open = gprs_open; - dev->stop = gprs_close; - dev->hard_start_xmit = gprs_xmit; /* mandatory */ - dev->change_mtu = gprs_set_mtu; } /* diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 3c94f76d5525..3eaa39403c13 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -54,10 +54,10 @@ static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; static bool rfkill_epo_lock_active; +#ifdef CONFIG_RFKILL_LEDS static void rfkill_led_trigger(struct rfkill *rfkill, enum rfkill_state state) { -#ifdef CONFIG_RFKILL_LEDS struct led_trigger *led = &rfkill->led_trigger; if (!led->name) @@ -66,10 +66,8 @@ static void rfkill_led_trigger(struct rfkill *rfkill, led_trigger_event(led, LED_OFF); else led_trigger_event(led, LED_FULL); -#endif /* CONFIG_RFKILL_LEDS */ } -#ifdef CONFIG_RFKILL_LEDS static void rfkill_led_trigger_activate(struct led_classdev *led) { struct rfkill *rfkill = container_of(led->trigger, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 05d178008cbc..07372f60bee3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -638,8 +638,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, break; n->next = *ins; - wmb(); + tcf_tree_lock(tp); *ins = n; + tcf_tree_unlock(tp); *arg = (unsigned long)n; return 0; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5070643ce534..2f0f0b04d3fb 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -661,12 +661,13 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static psched_time_t htb_do_events(struct htb_sched *q, int level) +static psched_time_t htb_do_events(struct htb_sched *q, int level, + unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of 1 to simplify things when jiffy is going to be incremented too soon */ - unsigned long stop_at = jiffies + 2; + unsigned long stop_at = start + 2; while (time_before(jiffies, stop_at)) { struct htb_class *cl; long diff; @@ -685,8 +686,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level) if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } - /* too much load - let's continue on next jiffie */ - return q->now + PSCHED_TICKS_PER_SEC / HZ; + /* too much load - let's continue on next jiffie (including above) */ + return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL @@ -845,6 +846,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); int level; psched_time_t next_event; + unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); @@ -857,6 +859,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (!sch->q.qlen) goto fin; q->now = psched_get_time(); + start_at = jiffies; next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; @@ -866,14 +869,14 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) psched_time_t event; if (q->now >= q->near_ev_cache[level]) { - event = htb_do_events(q, level); + event = htb_do_events(q, level, start_at); if (!event) event = q->now + PSCHED_TICKS_PER_SEC; q->near_ev_cache[level] = event; } else event = q->near_ev_cache[level]; - if (event && next_event > event) + if (next_event > event) next_event = event; m = ~q->row_mask[level]; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f3965df00559..33133d27b539 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -435,7 +435,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; q->perturb_timer.function = sfq_perturbation; - q->perturb_timer.data = (unsigned long)sch;; + q->perturb_timer.data = (unsigned long)sch; init_timer_deferrable(&q->perturb_timer); for (i = 0; i < SFQ_HASH_DIVISOR; i++) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index cfc8e7caba62..ec697cebb63b 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -289,9 +289,9 @@ restart: do { struct net_device *slave = qdisc_dev(q); - struct netdev_queue *slave_txq; + struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); + const struct net_device_ops *slave_ops = slave->netdev_ops; - slave_txq = netdev_get_tx_queue(slave, 0); if (slave_txq->qdisc_sleeping != q) continue; if (__netif_subqueue_stopped(slave, subq) || @@ -305,7 +305,7 @@ restart: if (__netif_tx_trylock(slave_txq)) { if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && - slave->hard_start_xmit(skb, slave) == 0) { + slave_ops->ndo_start_xmit(skb, slave) == 0) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); @@ -420,6 +420,14 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) return 0; } +static const struct net_device_ops teql_netdev_ops = { + .ndo_open = teql_master_open, + .ndo_stop = teql_master_close, + .ndo_start_xmit = teql_master_xmit, + .ndo_get_stats = teql_master_stats, + .ndo_change_mtu = teql_master_mtu, +}; + static __init void teql_master_setup(struct net_device *dev) { struct teql_master *master = netdev_priv(dev); @@ -436,11 +444,7 @@ static __init void teql_master_setup(struct net_device *dev) ops->destroy = teql_destroy; ops->owner = THIS_MODULE; - dev->open = teql_master_open; - dev->hard_start_xmit = teql_master_xmit; - dev->stop = teql_master_close; - dev->get_stats = teql_master_stats; - dev->change_mtu = teql_master_mtu; + dev->netdev_ops = &teql_netdev_ops; dev->type = ARPHRD_VOID; dev->mtu = 1500; dev->tx_queue_len = 100; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 52db5f60daa0..56935bbc1496 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -141,8 +141,8 @@ void sctp_auth_destroy_keys(struct list_head *keys) /* Compare two byte vectors as numbers. Return values * are: * 0 - vectors are equal - * < 0 - vector 1 is smaller then vector2 - * > 0 - vector 1 is greater then vector2 + * < 0 - vector 1 is smaller than vector2 + * > 0 - vector 1 is greater than vector2 * * Algorithm is: * This is performed by selecting the numerically smaller key vector... @@ -489,7 +489,7 @@ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) return 0; out_err: - /* Clean up any successfull allocations */ + /* Clean up any successful allocations */ sctp_auth_destroy_hmacs(ep->auth_hmacs); return -ENOMEM; } diff --git a/net/sctp/input.c b/net/sctp/input.c index bf612d954d41..2e4a8646dbc3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -249,6 +249,19 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); + if (sk != rcvr->sk) { + /* Our cached sk is different from the rcvr->sk. This is + * because migrate()/accept() may have moved the association + * to a new socket and released all the sockets. So now we + * are holding a lock on the old socket while the user may + * be doing something with the new socket. Switch our veiw + * of the current sk. + */ + sctp_bh_unlock_sock(sk); + sk = rcvr->sk; + sctp_bh_lock_sock(sk); + } + if (sock_owned_by_user(sk)) { SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); sctp_add_backlog(sk, skb); diff --git a/net/sctp/output.c b/net/sctp/output.c index c3f417f7ec6e..73639355157e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -324,14 +324,16 @@ append: switch (chunk->chunk_hdr->type) { case SCTP_CID_DATA: retval = sctp_packet_append_data(packet, chunk); + if (SCTP_XMIT_OK != retval) + goto finish; /* Disallow SACK bundling after DATA. */ packet->has_sack = 1; /* Disallow AUTH bundling after DATA */ packet->has_auth = 1; /* Let it be knows that packet has DATA in it */ packet->has_data = 1; - if (SCTP_XMIT_OK != retval) - goto finish; + /* timestamp the chunk for rtx purposes */ + chunk->sent_at = jiffies; break; case SCTP_CID_COOKIE_ECHO: packet->has_cookie_echo = 1; @@ -470,7 +472,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) } else chunk->resent = 1; - chunk->sent_at = jiffies; has_data = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 247ebc95c1e5..bc411c896216 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -929,7 +929,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) } /* Finally, transmit new packets. */ - start_timer = 0; while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. @@ -1028,7 +1027,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport, start_timer-1); + sctp_transport_reset_timers(transport, 0); q->empty = 0; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 1c4e5d6c29c0..3a0cd075914f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4268,9 +4268,9 @@ nomem: /* * Handle a protocol violation when the chunk length is invalid. - * "Invalid" length is identified as smaller then the minimal length a + * "Invalid" length is identified as smaller than the minimal length a * given chunk can be. For example, a SACK chunk has invalid length - * if it's length is set to be smaller then the size of sctp_sack_chunk_t. + * if its length is set to be smaller than the size of sctp_sack_chunk_t. * * We inform the other end by sending an ABORT with a Protocol Violation * error code. @@ -4300,7 +4300,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen( /* * Handle a protocol violation when the parameter length is invalid. - * "Invalid" length is identified as smaller then the minimal length a + * "Invalid" length is identified as smaller than the minimal length a * given parameter can be. */ static sctp_disposition_t sctp_sf_violation_paramlen( diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b14a8f33e42d..ff0a8f88de04 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2717,7 +2717,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o paths++; } - /* Only validate asocmaxrxt if we have more then + /* Only validate asocmaxrxt if we have more than * one path/transport. We do this because path * retransmissions are only counted when we have more * then one path. diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 35c73e82553a..9bd64565021a 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -227,7 +227,7 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) */ bitmap_zero(map->tsn_map, map->len); } else { - /* If the gap is smaller then the map size, + /* If the gap is smaller than the map size, * shift the map by 'gap' bits and update further. */ bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len); diff --git a/net/socket.c b/net/socket.c index 2c730fc718ab..35dd7371752a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1214,7 +1214,7 @@ int sock_create_kern(int family, int type, int protocol, struct socket **res) return __sock_create(&init_net, family, type, protocol, res, 1); } -asmlinkage long sys_socket(int family, int type, int protocol) +SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { int retval; struct socket *sock; @@ -1255,8 +1255,8 @@ out_release: * Create a pair of connected sockets. */ -asmlinkage long sys_socketpair(int family, int type, int protocol, - int __user *usockvec) +SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, + int __user *, usockvec) { struct socket *sock1, *sock2; int fd1, fd2, err; @@ -1313,13 +1313,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, goto out_fd1; } - err = audit_fd_pair(fd1, fd2); - if (err < 0) { - fput(newfile1); - fput(newfile2); - goto out_fd; - } - + audit_fd_pair(fd1, fd2); fd_install(fd1, newfile1); fd_install(fd2, newfile2); /* fd1 and fd2 may be already another descriptors. @@ -1349,7 +1343,6 @@ out_fd2: out_fd1: put_filp(newfile2); sock_release(sock2); -out_fd: put_unused_fd(fd1); put_unused_fd(fd2); goto out; @@ -1363,7 +1356,7 @@ out_fd: * the protocol layer (having also checked the address is ok). */ -asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) +SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1392,7 +1385,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) * ready for listening. */ -asmlinkage long sys_listen(int fd, int backlog) +SYSCALL_DEFINE2(listen, int, fd, int, backlog) { struct socket *sock; int err, fput_needed; @@ -1425,8 +1418,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, + int __user *, upeer_addrlen, int, flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1509,8 +1502,8 @@ out_fd: goto out_put; } -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) +SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, + int __user *, upeer_addrlen) { return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } @@ -1527,8 +1520,8 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, * include the -EINPROGRESS status for such sockets. */ -asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, - int addrlen) +SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, + int, addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1559,8 +1552,8 @@ out: * name to user space. */ -asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1590,8 +1583,8 @@ out: * name to user space. */ -asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1622,9 +1615,9 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, * the protocol. */ -asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, - unsigned flags, struct sockaddr __user *addr, - int addr_len) +SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, + unsigned, flags, struct sockaddr __user *, addr, + int, addr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1667,7 +1660,8 @@ out: * Send a datagram down a socket. */ -asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) +SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, + unsigned, flags) { return sys_sendto(fd, buff, len, flags, NULL, 0); } @@ -1678,9 +1672,9 @@ asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) * sender address from kernel to user space. */ -asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, - unsigned flags, struct sockaddr __user *addr, - int __user *addr_len) +SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, + unsigned, flags, struct sockaddr __user *, addr, + int __user *, addr_len) { struct socket *sock; struct iovec iov; @@ -1732,8 +1726,8 @@ asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, * to pass the user mode parameter for the protocols to sort out. */ -asmlinkage long sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen) +SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, + char __user *, optval, int, optlen) { int err, fput_needed; struct socket *sock; @@ -1766,8 +1760,8 @@ out_put: * to pass a user mode parameter for the protocols to sort out. */ -asmlinkage long sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, + char __user *, optval, int __user *, optlen) { int err, fput_needed; struct socket *sock; @@ -1796,7 +1790,7 @@ out_put: * Shutdown a socket. */ -asmlinkage long sys_shutdown(int fd, int how) +SYSCALL_DEFINE2(shutdown, int, fd, int, how) { int err, fput_needed; struct socket *sock; @@ -1822,7 +1816,7 @@ asmlinkage long sys_shutdown(int fd, int how) * BSD sendmsg interface */ -asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) +SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -1928,8 +1922,8 @@ out: * BSD recvmsg interface */ -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, - unsigned int flags) +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -2052,7 +2046,7 @@ static const unsigned char nargs[19]={ * it is set by the callees. */ -asmlinkage long sys_socketcall(int call, unsigned long __user *args) +SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) { unsigned long a[6]; unsigned long a0, a1; @@ -2065,9 +2059,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) if (copy_from_user(a, args, nargs[call])) return -EFAULT; - err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); - if (err) - return err; + audit_socketcall(nargs[call] / sizeof(unsigned long), a); a0 = a[0]; a1 = a[1]; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig new file mode 100644 index 000000000000..5592883e1e4a --- /dev/null +++ b/net/sunrpc/Kconfig @@ -0,0 +1,78 @@ +config SUNRPC + tristate + +config SUNRPC_GSS + tristate + +config SUNRPC_XPRT_RDMA + tristate + depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL + default SUNRPC && INFINIBAND + help + This option allows the NFS client and server to support + an RDMA-enabled transport. + + To compile RPC client RDMA transport support as a module, + choose M here: the module will be called xprtrdma. + + If unsure, say N. + +config SUNRPC_REGISTER_V4 + bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + default n + help + Sun added support for registering RPC services at an IPv6 + address by creating two new versions of the rpcbind protocol + (RFC 1833). + + This option enables support in the kernel RPC server for + registering kernel RPC services via version 4 of the rpcbind + protocol. If you enable this option, you must run a portmapper + daemon that supports rpcbind protocol version 4. + + Serving NFS over IPv6 from knfsd (the kernel's NFS server) + requires that you enable this option and use a portmapper that + supports rpcbind version 4. + + If unsure, say N to get traditional behavior (register kernel + RPC services using only rpcbind version 2). Distributions + using the legacy Linux portmapper daemon must say N here. + +config RPCSEC_GSS_KRB5 + tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES + select CRYPTO_CBC + help + Choose Y here to enable Secure RPC using the Kerberos version 5 + GSS-API mechanism (RFC 1964). + + Secure RPC calls with Kerberos require an auxiliary user-space + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. In addition, user-space + Kerberos support should be installed. + + If unsure, say N. + +config RPCSEC_GSS_SPKM3 + tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES + select CRYPTO_CAST5 + select CRYPTO_CBC + help + Choose Y here to enable Secure RPC using the SPKM3 public key + GSS-API mechansim (RFC 2025). + + Secure RPC calls with SPKM3 require an auxiliary userspace + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. + + If unsure, say N. diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index c9966713282a..4735caad26ed 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -98,7 +98,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, return new; } -EXPORT_SYMBOL(sunrpc_cache_lookup); +EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); static void queue_loose(struct cache_detail *detail, struct cache_head *ch); @@ -173,7 +173,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, cache_put(old, detail); return tmp; } -EXPORT_SYMBOL(sunrpc_cache_update); +EXPORT_SYMBOL_GPL(sunrpc_cache_update); static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); /* @@ -245,7 +245,7 @@ int cache_check(struct cache_detail *detail, cache_put(h, detail); return rv; } -EXPORT_SYMBOL(cache_check); +EXPORT_SYMBOL_GPL(cache_check); /* * caches need to be periodically cleaned. @@ -373,7 +373,7 @@ int cache_register(struct cache_detail *cd) schedule_delayed_work(&cache_cleaner, 0); return 0; } -EXPORT_SYMBOL(cache_register); +EXPORT_SYMBOL_GPL(cache_register); void cache_unregister(struct cache_detail *cd) { @@ -399,7 +399,7 @@ void cache_unregister(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } -EXPORT_SYMBOL(cache_unregister); +EXPORT_SYMBOL_GPL(cache_unregister); /* clean cache tries to find something to clean * and cleans it. @@ -514,7 +514,7 @@ void cache_flush(void) while (cache_clean() != -1) cond_resched(); } -EXPORT_SYMBOL(cache_flush); +EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { @@ -523,7 +523,7 @@ void cache_purge(struct cache_detail *detail) cache_flush(); detail->flush_time = 1; } -EXPORT_SYMBOL(cache_purge); +EXPORT_SYMBOL_GPL(cache_purge); /* @@ -988,7 +988,7 @@ void qword_add(char **bpp, int *lp, char *str) *bpp = bp; *lp = len; } -EXPORT_SYMBOL(qword_add); +EXPORT_SYMBOL_GPL(qword_add); void qword_addhex(char **bpp, int *lp, char *buf, int blen) { @@ -1017,7 +1017,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen) *bpp = bp; *lp = len; } -EXPORT_SYMBOL(qword_addhex); +EXPORT_SYMBOL_GPL(qword_addhex); static void warn_no_listener(struct cache_detail *detail) { @@ -1140,7 +1140,7 @@ int qword_get(char **bpp, char *dest, int bufsize) *dest = '\0'; return len; } -EXPORT_SYMBOL(qword_get); +EXPORT_SYMBOL_GPL(qword_get); /* diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 192453248870..577385a4a5dc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -522,8 +522,6 @@ rpc_get_inode(struct super_block *sb, int mode) if (!inode) return NULL; inode->i_mode = mode; - inode->i_uid = inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { case S_IFDIR: diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 50b049c6598a..085372ef4feb 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -106,7 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { seq_putc(seq, '\n'); } } -EXPORT_SYMBOL(svc_seq_show); +EXPORT_SYMBOL_GPL(svc_seq_show); /** * rpc_alloc_iostats - allocate an rpc_iostats structure @@ -249,14 +249,14 @@ svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) { return do_register(statp->program->pg_name, statp, fops); } -EXPORT_SYMBOL(svc_proc_register); +EXPORT_SYMBOL_GPL(svc_proc_register); void svc_proc_unregister(const char *name) { remove_proc_entry(name, proc_net_rpc); } -EXPORT_SYMBOL(svc_proc_unregister); +EXPORT_SYMBOL_GPL(svc_proc_unregister); void rpc_proc_init(void) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 54c98d876847..c51fed4d1af1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -431,7 +431,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, { return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); } -EXPORT_SYMBOL(svc_create); +EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, @@ -450,7 +450,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, return serv; } -EXPORT_SYMBOL(svc_create_pooled); +EXPORT_SYMBOL_GPL(svc_create_pooled); /* * Destroy an RPC service. Should be called with appropriate locking to @@ -492,7 +492,7 @@ svc_destroy(struct svc_serv *serv) kfree(serv->sv_pools); kfree(serv); } -EXPORT_SYMBOL(svc_destroy); +EXPORT_SYMBOL_GPL(svc_destroy); /* * Allocate an RPC server's buffer space. @@ -567,7 +567,7 @@ out_thread: out_enomem: return ERR_PTR(-ENOMEM); } -EXPORT_SYMBOL(svc_prepare_thread); +EXPORT_SYMBOL_GPL(svc_prepare_thread); /* * Choose a pool in which to create a new thread, for svc_set_num_threads @@ -689,7 +689,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return error; } -EXPORT_SYMBOL(svc_set_num_threads); +EXPORT_SYMBOL_GPL(svc_set_num_threads); /* * Called from a server thread as it's exiting. Caller must hold the BKL or @@ -717,7 +717,7 @@ svc_exit_thread(struct svc_rqst *rqstp) if (serv) svc_destroy(serv); } -EXPORT_SYMBOL(svc_exit_thread); +EXPORT_SYMBOL_GPL(svc_exit_thread); #ifdef CONFIG_SUNRPC_REGISTER_V4 @@ -1231,7 +1231,7 @@ err_bad: svc_putnl(resv, ntohl(rpc_stat)); goto sendit; } -EXPORT_SYMBOL(svc_process); +EXPORT_SYMBOL_GPL(svc_process); /* * Return (transport-specific) limit on the rpc payload. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bf5b5cdafebf..e588df5d6b34 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -440,7 +440,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space) svc_xprt_enqueue(xprt); } } -EXPORT_SYMBOL(svc_reserve); +EXPORT_SYMBOL_GPL(svc_reserve); static void svc_xprt_release(struct svc_rqst *rqstp) { @@ -448,6 +448,9 @@ static void svc_xprt_release(struct svc_rqst *rqstp) rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); + kfree(rqstp->rq_deferred); + rqstp->rq_deferred = NULL; + svc_free_res_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; @@ -498,7 +501,7 @@ void svc_wake_up(struct svc_serv *serv) spin_unlock_bh(&pool->sp_lock); } } -EXPORT_SYMBOL(svc_wake_up); +EXPORT_SYMBOL_GPL(svc_wake_up); int svc_port_is_privileged(struct sockaddr *sin) { @@ -515,8 +518,10 @@ int svc_port_is_privileged(struct sockaddr *sin) } /* - * Make sure that we don't have too many active connections. If we - * have, something must be dropped. + * Make sure that we don't have too many active connections. If we have, + * something must be dropped. It's not clear what will happen if we allow + * "too many" connections, but when dealing with network-facing software, + * we have to code defensively. Here we do that by imposing hard limits. * * There's no point in trying to do random drop here for DoS * prevention. The NFS clients does 1 reconnect in 15 seconds. An @@ -525,19 +530,27 @@ int svc_port_is_privileged(struct sockaddr *sin) * The only somewhat efficient mechanism would be if drop old * connections from the same IP first. But right now we don't even * record the client IP in svc_sock. + * + * single-threaded services that expect a lot of clients will probably + * need to set sv_maxconn to override the default value which is based + * on the number of threads */ static void svc_check_conn_limits(struct svc_serv *serv) { - if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { + unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : + (serv->sv_nrthreads+3) * 20; + + if (serv->sv_tmpcnt > limit) { struct svc_xprt *xprt = NULL; spin_lock_bh(&serv->sv_lock); if (!list_empty(&serv->sv_tempsocks)) { if (net_ratelimit()) { /* Try to help the admin */ printk(KERN_NOTICE "%s: too many open " - "connections, consider increasing the " - "number of nfsd threads\n", - serv->sv_name); + "connections, consider increasing %s\n", + serv->sv_name, serv->sv_maxconn ? + "the max number of connections." : + "the number of threads."); } /* * Always select the oldest connection. It's not fair, @@ -730,7 +743,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) serv->sv_stats->netcnt++; return len; } -EXPORT_SYMBOL(svc_recv); +EXPORT_SYMBOL_GPL(svc_recv); /* * Drop request @@ -740,7 +753,7 @@ void svc_drop(struct svc_rqst *rqstp) dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); svc_xprt_release(rqstp); } -EXPORT_SYMBOL(svc_drop); +EXPORT_SYMBOL_GPL(svc_drop); /* * Return reply to client. @@ -837,6 +850,11 @@ static void svc_age_temp_xprts(unsigned long closure) void svc_delete_xprt(struct svc_xprt *xprt) { struct svc_serv *serv = xprt->xpt_server; + struct svc_deferred_req *dr; + + /* Only do this once */ + if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) + return; dprintk("svc: svc_delete_xprt(%p)\n", xprt); xprt->xpt_ops->xpo_detach(xprt); @@ -851,12 +869,16 @@ void svc_delete_xprt(struct svc_xprt *xprt) * while still attached to a queue, the queue itself * is about to be destroyed (in svc_destroy). */ - if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) { - BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2); - if (test_bit(XPT_TEMP, &xprt->xpt_flags)) - serv->sv_tmpcnt--; + if (test_bit(XPT_TEMP, &xprt->xpt_flags)) + serv->sv_tmpcnt--; + + for (dr = svc_deferred_dequeue(xprt); dr; + dr = svc_deferred_dequeue(xprt)) { svc_xprt_put(xprt); + kfree(dr); } + + svc_xprt_put(xprt); spin_unlock_bh(&serv->sv_lock); } @@ -902,17 +924,19 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) container_of(dreq, struct svc_deferred_req, handle); struct svc_xprt *xprt = dr->xprt; - if (too_many) { + spin_lock(&xprt->xpt_lock); + set_bit(XPT_DEFERRED, &xprt->xpt_flags); + if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) { + spin_unlock(&xprt->xpt_lock); + dprintk("revisit canceled\n"); svc_xprt_put(xprt); kfree(dr); return; } dprintk("revisit queued\n"); dr->xprt = NULL; - spin_lock(&xprt->xpt_lock); list_add(&dr->handle.recent, &xprt->xpt_deferred); spin_unlock(&xprt->xpt_lock); - set_bit(XPT_DEFERRED, &xprt->xpt_flags); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 8a73cbb16052..e64109b02aee 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -57,13 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) rqstp->rq_authop = aops; return aops->accept(rqstp, authp); } -EXPORT_SYMBOL(svc_authenticate); +EXPORT_SYMBOL_GPL(svc_authenticate); int svc_set_client(struct svc_rqst *rqstp) { return rqstp->rq_authop->set_client(rqstp); } -EXPORT_SYMBOL(svc_set_client); +EXPORT_SYMBOL_GPL(svc_set_client); /* A request, which was authenticated, has now executed. * Time to finalise the credentials and verifier @@ -95,7 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops) spin_unlock(&authtab_lock); return rv; } -EXPORT_SYMBOL(svc_auth_register); +EXPORT_SYMBOL_GPL(svc_auth_register); void svc_auth_unregister(rpc_authflavor_t flavor) @@ -105,7 +105,7 @@ svc_auth_unregister(rpc_authflavor_t flavor) authtab[flavor] = NULL; spin_unlock(&authtab_lock); } -EXPORT_SYMBOL(svc_auth_unregister); +EXPORT_SYMBOL_GPL(svc_auth_unregister); /************************************************** * 'auth_domains' are stored in a hash table indexed by name. @@ -132,7 +132,7 @@ void auth_domain_put(struct auth_domain *dom) spin_unlock(&auth_domain_lock); } } -EXPORT_SYMBOL(auth_domain_put); +EXPORT_SYMBOL_GPL(auth_domain_put); struct auth_domain * auth_domain_lookup(char *name, struct auth_domain *new) @@ -157,10 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new) spin_unlock(&auth_domain_lock); return new; } -EXPORT_SYMBOL(auth_domain_lookup); +EXPORT_SYMBOL_GPL(auth_domain_lookup); struct auth_domain *auth_domain_find(char *name) { return auth_domain_lookup(name, NULL); } -EXPORT_SYMBOL(auth_domain_find); +EXPORT_SYMBOL_GPL(auth_domain_find); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 82240e6127b2..5c865e2d299e 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -64,7 +64,7 @@ struct auth_domain *unix_domain_find(char *name) rv = auth_domain_lookup(name, &new->h); } } -EXPORT_SYMBOL(unix_domain_find); +EXPORT_SYMBOL_GPL(unix_domain_find); static void svcauth_unix_domain_release(struct auth_domain *dom) { @@ -358,7 +358,7 @@ int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom) else return -ENOMEM; } -EXPORT_SYMBOL(auth_unix_add_addr); +EXPORT_SYMBOL_GPL(auth_unix_add_addr); int auth_unix_forget_old(struct auth_domain *dom) { @@ -370,7 +370,7 @@ int auth_unix_forget_old(struct auth_domain *dom) udom->addr_changes++; return 0; } -EXPORT_SYMBOL(auth_unix_forget_old); +EXPORT_SYMBOL_GPL(auth_unix_forget_old); struct auth_domain *auth_unix_lookup(struct in6_addr *addr) { @@ -395,13 +395,13 @@ struct auth_domain *auth_unix_lookup(struct in6_addr *addr) cache_put(&ipm->h, &ip_map_cache); return rv; } -EXPORT_SYMBOL(auth_unix_lookup); +EXPORT_SYMBOL_GPL(auth_unix_lookup); void svcauth_unix_purge(void) { cache_purge(&ip_map_cache); } -EXPORT_SYMBOL(svcauth_unix_purge); +EXPORT_SYMBOL_GPL(svcauth_unix_purge); static inline struct ip_map * ip_map_cached_get(struct svc_rqst *rqstp) @@ -714,7 +714,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) return SVC_OK; } -EXPORT_SYMBOL(svcauth_unix_set_client); +EXPORT_SYMBOL_GPL(svcauth_unix_set_client); static int svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ef3238d665ee..5763e6460fea 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -59,6 +59,7 @@ static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); static void svc_sock_detach(struct svc_xprt *); +static void svc_tcp_sock_detach(struct svc_xprt *); static void svc_sock_free(struct svc_xprt *); static struct svc_xprt *svc_create_socket(struct svc_serv *, int, @@ -102,7 +103,6 @@ static void svc_reclassify_socket(struct socket *sock) static void svc_release_skb(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; - struct svc_deferred_req *dr = rqstp->rq_deferred; if (skb) { struct svc_sock *svsk = @@ -112,10 +112,6 @@ static void svc_release_skb(struct svc_rqst *rqstp) dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); skb_free_datagram(svsk->sk_sk, skb); } - if (dr) { - rqstp->rq_deferred = NULL; - kfree(dr); - } } union svc_pktinfo_u { @@ -289,7 +285,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) return -ENOENT; return len; } -EXPORT_SYMBOL(svc_sock_names); +EXPORT_SYMBOL_GPL(svc_sock_names); /* * Check input queue length @@ -1017,7 +1013,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, .xpo_release_rqst = svc_release_skb, - .xpo_detach = svc_sock_detach, + .xpo_detach = svc_tcp_sock_detach, .xpo_free = svc_sock_free, .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, .xpo_has_wspace = svc_tcp_has_wspace, @@ -1101,7 +1097,7 @@ void svc_sock_update_bufs(struct svc_serv *serv) } spin_unlock_bh(&serv->sv_lock); } -EXPORT_SYMBOL(svc_sock_update_bufs); +EXPORT_SYMBOL_GPL(svc_sock_update_bufs); /* * Initialize socket for RPC use and create svc_sock struct @@ -1287,6 +1283,24 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_state_change = svsk->sk_ostate; sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; + + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible(sk->sk_sleep); +} + +/* + * Disconnect the socket, and reset the callbacks + */ +static void svc_tcp_sock_detach(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk); + + svc_sock_detach(xprt); + + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); } /* diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 5aa024b99c55..2f2d731bc1c2 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -124,7 +124,7 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, struct tipc_node_map *nm_diff) { - int stop = sizeof(nm_a->map) / sizeof(u32); + int stop = ARRAY_SIZE(nm_a->map); int w; int b; u32 map; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c6250d0055d2..d1b89820ab4f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -836,7 +836,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = mnt_want_write(nd.path.mnt); if (err) goto out_mknod_dput; + err = security_path_mknod(&nd.path, dentry, mode, 0); + if (err) + goto out_mknod_drop_write; err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); +out_mknod_drop_write: mnt_drop_write(nd.path.mnt); if (err) goto out_mknod_dput; diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig new file mode 100644 index 000000000000..18495cdcd10d --- /dev/null +++ b/net/wimax/Kconfig @@ -0,0 +1,52 @@ +# +# WiMAX LAN device configuration +# +# Note the ugly 'depends on' on WIMAX: that disallows RFKILL to be a +# module if WIMAX is to be linked in. The WiMAX code is done in such a +# way that it doesn't require and explicit dependency on RFKILL in +# case an embedded system wants to rip it out. +# +# As well, enablement of the RFKILL code means we need the INPUT layer +# support to inject events coming from hw rfkill switches. That +# dependency could be killed if input.h provided appropiate means to +# work when input is disabled. + +comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled" + depends on INPUT = n && RFKILL != n + +menuconfig WIMAX + tristate "WiMAX Wireless Broadband support" + depends on (y && RFKILL != m) || m + depends on (INPUT && RFKILL != n) || RFKILL = n + help + + Select to configure support for devices that provide + wireless broadband connectivity using the WiMAX protocol + (IEEE 802.16). + + Please note that most of these devices require signing up + for a service plan with a provider. + + The different WiMAX drivers can be enabled in the menu entry + + Device Drivers > Network device support > WiMAX Wireless + Broadband devices + + If unsure, it is safe to select M (module). + +config WIMAX_DEBUG_LEVEL + int "WiMAX debug level" + depends on WIMAX + default 8 + help + + Select the maximum debug verbosity level to be compiled into + the WiMAX stack code. + + By default, debug messages are disabled at runtime and can + be selectively enabled for different parts of the code using + the sysfs debug-levels file. + + If set at zero, this will compile out all the debug code. + + It is recommended that it is left at 8. diff --git a/net/wimax/Makefile b/net/wimax/Makefile new file mode 100644 index 000000000000..5b80b941c2c9 --- /dev/null +++ b/net/wimax/Makefile @@ -0,0 +1,13 @@ + +obj-$(CONFIG_WIMAX) += wimax.o + +wimax-y := \ + id-table.o \ + op-msg.o \ + op-reset.o \ + op-rfkill.o \ + stack.o + +wimax-$(CONFIG_DEBUG_FS) += debugfs.o + + diff --git a/net/wimax/debug-levels.h b/net/wimax/debug-levels.h new file mode 100644 index 000000000000..1c29123a3aa9 --- /dev/null +++ b/net/wimax/debug-levels.h @@ -0,0 +1,42 @@ +/* + * Linux WiMAX Stack + * Debug levels control file for the wimax module + * + * + * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ +#ifndef __debug_levels__h__ +#define __debug_levels__h__ + +/* Maximum compile and run time debug level for all submodules */ +#define D_MODULENAME wimax +#define D_MASTER CONFIG_WIMAX_DEBUG_LEVEL + +#include <linux/wimax/debug.h> + +/* List of all the enabled modules */ +enum d_module { + D_SUBMODULE_DECLARE(debugfs), + D_SUBMODULE_DECLARE(id_table), + D_SUBMODULE_DECLARE(op_msg), + D_SUBMODULE_DECLARE(op_reset), + D_SUBMODULE_DECLARE(op_rfkill), + D_SUBMODULE_DECLARE(stack), +}; + +#endif /* #ifndef __debug_levels__h__ */ diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c new file mode 100644 index 000000000000..94d216a46407 --- /dev/null +++ b/net/wimax/debugfs.c @@ -0,0 +1,79 @@ +/* + * Linux WiMAX + * Debugfs support + * + * + * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ +#include <linux/debugfs.h> +#include <linux/wimax.h> +#include "wimax-internal.h" + +#define D_SUBMODULE debugfs +#include "debug-levels.h" + + +#define __debugfs_register(prefix, name, parent) \ +do { \ + result = d_level_register_debugfs(prefix, name, parent); \ + if (result < 0) \ + goto error; \ +} while (0) + + +int wimax_debugfs_add(struct wimax_dev *wimax_dev) +{ + int result; + struct net_device *net_dev = wimax_dev->net_dev; + struct device *dev = net_dev->dev.parent; + struct dentry *dentry; + char buf[128]; + + snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name); + dentry = debugfs_create_dir(buf, NULL); + result = PTR_ERR(dentry); + if (IS_ERR(dentry)) { + if (result == -ENODEV) + result = 0; /* No debugfs support */ + else + dev_err(dev, "Can't create debugfs dentry: %d\n", + result); + goto out; + } + wimax_dev->debugfs_dentry = dentry; + __debugfs_register("wimax_dl_", debugfs, dentry); + __debugfs_register("wimax_dl_", id_table, dentry); + __debugfs_register("wimax_dl_", op_msg, dentry); + __debugfs_register("wimax_dl_", op_reset, dentry); + __debugfs_register("wimax_dl_", op_rfkill, dentry); + __debugfs_register("wimax_dl_", stack, dentry); + result = 0; +out: + return result; + +error: + debugfs_remove_recursive(wimax_dev->debugfs_dentry); + return result; +} + +void wimax_debugfs_rm(struct wimax_dev *wimax_dev) +{ + debugfs_remove_recursive(wimax_dev->debugfs_dentry); +} + + diff --git a/net/wimax/id-table.c b/net/wimax/id-table.c new file mode 100644 index 000000000000..5e685f7eda90 --- /dev/null +++ b/net/wimax/id-table.c @@ -0,0 +1,144 @@ +/* + * Linux WiMAX + * Mappping of generic netlink family IDs to net devices + * + * + * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * We assign a single generic netlink family ID to each device (to + * simplify lookup). + * + * We need a way to map family ID to a wimax_dev pointer. + * + * The idea is to use a very simple lookup. Using a netlink attribute + * with (for example) the interface name implies a heavier search over + * all the network devices; seemed kind of a waste given that we know + * we are looking for a WiMAX device and that most systems will have + * just a single WiMAX adapter. + * + * We put all the WiMAX devices in the system in a linked list and + * match the generic link family ID against the list. + * + * By using a linked list, the case of a single adapter in the system + * becomes (almost) no overhead, while still working for many more. If + * it ever goes beyond two, I'll be surprised. + */ +#include <linux/device.h> +#include <net/genetlink.h> +#include <linux/netdevice.h> +#include <linux/list.h> +#include <linux/wimax.h> +#include "wimax-internal.h" + + +#define D_SUBMODULE id_table +#include "debug-levels.h" + + +static DEFINE_SPINLOCK(wimax_id_table_lock); +static struct list_head wimax_id_table = LIST_HEAD_INIT(wimax_id_table); + + +/* + * wimax_id_table_add - add a gennetlink familiy ID / wimax_dev mapping + * + * @wimax_dev: WiMAX device descriptor to associate to the Generic + * Netlink family ID. + * + * Look for an empty spot in the ID table; if none found, double the + * table's size and get the first spot. + */ +void wimax_id_table_add(struct wimax_dev *wimax_dev) +{ + d_fnstart(3, NULL, "(wimax_dev %p)\n", wimax_dev); + spin_lock(&wimax_id_table_lock); + list_add(&wimax_dev->id_table_node, &wimax_id_table); + spin_unlock(&wimax_id_table_lock); + d_fnend(3, NULL, "(wimax_dev %p)\n", wimax_dev); +} + + +/* + * wimax_get_netdev_by_info - lookup a wimax_dev from the gennetlink info + * + * The generic netlink family ID has been filled out in the + * nlmsghdr->nlmsg_type field, so we pull it from there, look it up in + * the mapping table and reference the wimax_dev. + * + * When done, the reference should be dropped with + * 'dev_put(wimax_dev->net_dev)'. + */ +struct wimax_dev *wimax_dev_get_by_genl_info( + struct genl_info *info, int ifindex) +{ + struct wimax_dev *wimax_dev = NULL; + + d_fnstart(3, NULL, "(info %p ifindex %d)\n", info, ifindex); + spin_lock(&wimax_id_table_lock); + list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) { + if (wimax_dev->net_dev->ifindex == ifindex) { + dev_hold(wimax_dev->net_dev); + break; + } + } + if (wimax_dev == NULL) + d_printf(1, NULL, "wimax: no devices found with ifindex %d\n", + ifindex); + spin_unlock(&wimax_id_table_lock); + d_fnend(3, NULL, "(info %p ifindex %d) = %p\n", + info, ifindex, wimax_dev); + return wimax_dev; +} + + +/* + * wimax_id_table_rm - Remove a gennetlink familiy ID / wimax_dev mapping + * + * @id: family ID to remove from the table + */ +void wimax_id_table_rm(struct wimax_dev *wimax_dev) +{ + spin_lock(&wimax_id_table_lock); + list_del_init(&wimax_dev->id_table_node); + spin_unlock(&wimax_id_table_lock); +} + + +/* + * Release the gennetlink family id / mapping table + * + * On debug, verify that the table is empty upon removal. We want the + * code always compiled, to ensure it doesn't bit rot. It will be + * compiled out if CONFIG_BUG is disabled. + */ +void wimax_id_table_release(void) +{ + struct wimax_dev *wimax_dev; + +#ifndef CONFIG_BUG + return; +#endif + spin_lock(&wimax_id_table_lock); + list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) { + printk(KERN_ERR "BUG: %s wimax_dev %p ifindex %d not cleared\n", + __func__, wimax_dev, wimax_dev->net_dev->ifindex); + WARN_ON(1); + } + spin_unlock(&wimax_id_table_lock); +} diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c new file mode 100644 index 000000000000..cb3b4ad53683 --- /dev/null +++ b/net/wimax/op-msg.c @@ -0,0 +1,421 @@ +/* + * Linux WiMAX + * Generic messaging interface between userspace and driver/device + * + * + * Copyright (C) 2007-2008 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * This implements a direct communication channel between user space and + * the driver/device, by which free form messages can be sent back and + * forth. + * + * This is intended for device-specific features, vendor quirks, etc. + * + * See include/net/wimax.h + * + * GENERIC NETLINK ENCODING AND CAPACITY + * + * A destination "pipe name" is added to each message; it is up to the + * drivers to assign or use those names (if using them at all). + * + * Messages are encoded as a binary netlink attribute using nla_put() + * using type NLA_UNSPEC (as some versions of libnl still in + * deployment don't yet understand NLA_BINARY). + * + * The maximum capacity of this transport is PAGESIZE per message (so + * the actual payload will be bit smaller depending on the + * netlink/generic netlink attributes and headers). + * + * RECEPTION OF MESSAGES + * + * When a message is received from user space, it is passed verbatim + * to the driver calling wimax_dev->op_msg_from_user(). The return + * value from this function is passed back to user space as an ack + * over the generic netlink protocol. + * + * The stack doesn't do any processing or interpretation of these + * messages. + * + * SENDING MESSAGES + * + * Messages can be sent with wimax_msg(). + * + * If the message delivery needs to happen on a different context to + * that of its creation, wimax_msg_alloc() can be used to get a + * pointer to the message that can be delivered later on with + * wimax_msg_send(). + * + * ROADMAP + * + * wimax_gnl_doit_msg_from_user() Process a message from user space + * wimax_dev_get_by_genl_info() + * wimax_dev->op_msg_from_user() Delivery of message to the driver + * + * wimax_msg() Send a message to user space + * wimax_msg_alloc() + * wimax_msg_send() + */ +#include <linux/device.h> +#include <net/genetlink.h> +#include <linux/netdevice.h> +#include <linux/wimax.h> +#include <linux/security.h> +#include "wimax-internal.h" + + +#define D_SUBMODULE op_msg +#include "debug-levels.h" + + +/** + * wimax_msg_alloc - Create a new skb for sending a message to userspace + * + * @wimax_dev: WiMAX device descriptor + * @pipe_name: "named pipe" the message will be sent to + * @msg: pointer to the message data to send + * @size: size of the message to send (in bytes), including the header. + * @gfp_flags: flags for memory allocation. + * + * Returns: %0 if ok, negative errno code on error + * + * Description: + * + * Allocates an skb that will contain the message to send to user + * space over the messaging pipe and initializes it, copying the + * payload. + * + * Once this call is done, you can deliver it with + * wimax_msg_send(). + * + * IMPORTANT: + * + * Don't use skb_push()/skb_pull()/skb_reserve() on the skb, as + * wimax_msg_send() depends on skb->data being placed at the + * beginning of the user message. + */ +struct sk_buff *wimax_msg_alloc(struct wimax_dev *wimax_dev, + const char *pipe_name, + const void *msg, size_t size, + gfp_t gfp_flags) +{ + int result; + struct device *dev = wimax_dev->net_dev->dev.parent; + size_t msg_size; + void *genl_msg; + struct sk_buff *skb; + + msg_size = nla_total_size(size) + + nla_total_size(sizeof(u32)) + + (pipe_name ? nla_total_size(strlen(pipe_name)) : 0); + result = -ENOMEM; + skb = genlmsg_new(msg_size, gfp_flags); + if (skb == NULL) + goto error_new; + genl_msg = genlmsg_put(skb, 0, 0, &wimax_gnl_family, + 0, WIMAX_GNL_OP_MSG_TO_USER); + if (genl_msg == NULL) { + dev_err(dev, "no memory to create generic netlink message\n"); + goto error_genlmsg_put; + } + result = nla_put_u32(skb, WIMAX_GNL_MSG_IFIDX, + wimax_dev->net_dev->ifindex); + if (result < 0) { + dev_err(dev, "no memory to add ifindex attribute\n"); + goto error_nla_put; + } + if (pipe_name) { + result = nla_put_string(skb, WIMAX_GNL_MSG_PIPE_NAME, + pipe_name); + if (result < 0) { + dev_err(dev, "no memory to add pipe_name attribute\n"); + goto error_nla_put; + } + } + result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg); + if (result < 0) { + dev_err(dev, "no memory to add payload in attribute\n"); + goto error_nla_put; + } + genlmsg_end(skb, genl_msg); + return skb; + +error_nla_put: +error_genlmsg_put: +error_new: + nlmsg_free(skb); + return ERR_PTR(result); + +} +EXPORT_SYMBOL_GPL(wimax_msg_alloc); + + +/** + * wimax_msg_data_len - Return a pointer and size of a message's payload + * + * @msg: Pointer to a message created with wimax_msg_alloc() + * @size: Pointer to where to store the message's size + * + * Returns the pointer to the message data. + */ +const void *wimax_msg_data_len(struct sk_buff *msg, size_t *size) +{ + struct nlmsghdr *nlh = (void *) msg->head; + struct nlattr *nla; + + nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr), + WIMAX_GNL_MSG_DATA); + if (nla == NULL) { + printk(KERN_ERR "Cannot find attribute WIMAX_GNL_MSG_DATA\n"); + return NULL; + } + *size = nla_len(nla); + return nla_data(nla); +} +EXPORT_SYMBOL_GPL(wimax_msg_data_len); + + +/** + * wimax_msg_data - Return a pointer to a message's payload + * + * @msg: Pointer to a message created with wimax_msg_alloc() + */ +const void *wimax_msg_data(struct sk_buff *msg) +{ + struct nlmsghdr *nlh = (void *) msg->head; + struct nlattr *nla; + + nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr), + WIMAX_GNL_MSG_DATA); + if (nla == NULL) { + printk(KERN_ERR "Cannot find attribute WIMAX_GNL_MSG_DATA\n"); + return NULL; + } + return nla_data(nla); +} +EXPORT_SYMBOL_GPL(wimax_msg_data); + + +/** + * wimax_msg_len - Return a message's payload length + * + * @msg: Pointer to a message created with wimax_msg_alloc() + */ +ssize_t wimax_msg_len(struct sk_buff *msg) +{ + struct nlmsghdr *nlh = (void *) msg->head; + struct nlattr *nla; + + nla = nlmsg_find_attr(nlh, sizeof(struct genlmsghdr), + WIMAX_GNL_MSG_DATA); + if (nla == NULL) { + printk(KERN_ERR "Cannot find attribute WIMAX_GNL_MSG_DATA\n"); + return -EINVAL; + } + return nla_len(nla); +} +EXPORT_SYMBOL_GPL(wimax_msg_len); + + +/** + * wimax_msg_send - Send a pre-allocated message to user space + * + * @wimax_dev: WiMAX device descriptor + * + * @skb: &struct sk_buff returned by wimax_msg_alloc(). Note the + * ownership of @skb is transferred to this function. + * + * Returns: 0 if ok, < 0 errno code on error + * + * Description: + * + * Sends a free-form message that was preallocated with + * wimax_msg_alloc() and filled up. + * + * Assumes that once you pass an skb to this function for sending, it + * owns it and will release it when done (on success). + * + * IMPORTANT: + * + * Don't use skb_push()/skb_pull()/skb_reserve() on the skb, as + * wimax_msg_send() depends on skb->data being placed at the + * beginning of the user message. + */ +int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) +{ + int result; + struct device *dev = wimax_dev->net_dev->dev.parent; + void *msg = skb->data; + size_t size = skb->len; + might_sleep(); + + d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size); + d_dump(2, dev, msg, size); + result = genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); + d_printf(1, dev, "CTX: genl multicast result %d\n", result); + if (result == -ESRCH) /* Nobody connected, ignore it */ + result = 0; /* btw, the skb is freed already */ + return result; +} +EXPORT_SYMBOL_GPL(wimax_msg_send); + + +/** + * wimax_msg - Send a message to user space + * + * @wimax_dev: WiMAX device descriptor (properly referenced) + * @pipe_name: "named pipe" the message will be sent to + * @buf: pointer to the message to send. + * @size: size of the buffer pointed to by @buf (in bytes). + * @gfp_flags: flags for memory allocation. + * + * Returns: %0 if ok, negative errno code on error. + * + * Description: + * + * Sends a free-form message to user space on the device @wimax_dev. + * + * NOTES: + * + * Once the @skb is given to this function, who will own it and will + * release it when done (unless it returns error). + */ +int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name, + const void *buf, size_t size, gfp_t gfp_flags) +{ + int result = -ENOMEM; + struct sk_buff *skb; + + skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags); + if (skb == NULL) + goto error_msg_new; + result = wimax_msg_send(wimax_dev, skb); +error_msg_new: + return result; +} +EXPORT_SYMBOL_GPL(wimax_msg); + + +static const +struct nla_policy wimax_gnl_msg_policy[WIMAX_GNL_ATTR_MAX + 1] = { + [WIMAX_GNL_MSG_IFIDX] = { + .type = NLA_U32, + }, + [WIMAX_GNL_MSG_DATA] = { + .type = NLA_UNSPEC, /* libnl doesn't grok BINARY yet */ + }, +}; + + +/* + * Relays a message from user space to the driver + * + * The skb is passed to the driver-specific function with the netlink + * and generic netlink headers already stripped. + * + * This call will block while handling/relaying the message. + */ +static +int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info) +{ + int result, ifindex; + struct wimax_dev *wimax_dev; + struct device *dev; + struct nlmsghdr *nlh = info->nlhdr; + char *pipe_name; + void *msg_buf; + size_t msg_len; + + might_sleep(); + d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info); + result = -ENODEV; + if (info->attrs[WIMAX_GNL_MSG_IFIDX] == NULL) { + printk(KERN_ERR "WIMAX_GNL_MSG_FROM_USER: can't find IFIDX " + "attribute\n"); + goto error_no_wimax_dev; + } + ifindex = nla_get_u32(info->attrs[WIMAX_GNL_MSG_IFIDX]); + wimax_dev = wimax_dev_get_by_genl_info(info, ifindex); + if (wimax_dev == NULL) + goto error_no_wimax_dev; + dev = wimax_dev_to_dev(wimax_dev); + + /* Unpack arguments */ + result = -EINVAL; + if (info->attrs[WIMAX_GNL_MSG_DATA] == NULL) { + dev_err(dev, "WIMAX_GNL_MSG_FROM_USER: can't find MSG_DATA " + "attribute\n"); + goto error_no_data; + } + msg_buf = nla_data(info->attrs[WIMAX_GNL_MSG_DATA]); + msg_len = nla_len(info->attrs[WIMAX_GNL_MSG_DATA]); + + if (info->attrs[WIMAX_GNL_MSG_PIPE_NAME] == NULL) + pipe_name = NULL; + else { + struct nlattr *attr = info->attrs[WIMAX_GNL_MSG_PIPE_NAME]; + size_t attr_len = nla_len(attr); + /* libnl-1.1 does not yet support NLA_NUL_STRING */ + result = -ENOMEM; + pipe_name = kstrndup(nla_data(attr), attr_len + 1, GFP_KERNEL); + if (pipe_name == NULL) + goto error_alloc; + pipe_name[attr_len] = 0; + } + mutex_lock(&wimax_dev->mutex); + result = wimax_dev_is_ready(wimax_dev); + if (result < 0) + goto error_not_ready; + result = -ENOSYS; + if (wimax_dev->op_msg_from_user == NULL) + goto error_noop; + + d_printf(1, dev, + "CRX: nlmsghdr len %u type %u flags 0x%04x seq 0x%x pid %u\n", + nlh->nlmsg_len, nlh->nlmsg_type, nlh->nlmsg_flags, + nlh->nlmsg_seq, nlh->nlmsg_pid); + d_printf(1, dev, "CRX: wimax message %zu bytes\n", msg_len); + d_dump(2, dev, msg_buf, msg_len); + + result = wimax_dev->op_msg_from_user(wimax_dev, pipe_name, + msg_buf, msg_len, info); +error_noop: +error_not_ready: + mutex_unlock(&wimax_dev->mutex); +error_alloc: + kfree(pipe_name); +error_no_data: + dev_put(wimax_dev->net_dev); +error_no_wimax_dev: + d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result); + return result; +} + + +/* + * Generic Netlink glue + */ + +struct genl_ops wimax_gnl_msg_from_user = { + .cmd = WIMAX_GNL_OP_MSG_FROM_USER, + .flags = GENL_ADMIN_PERM, + .policy = wimax_gnl_msg_policy, + .doit = wimax_gnl_doit_msg_from_user, + .dumpit = NULL, +}; + diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c new file mode 100644 index 000000000000..ca269178c4d4 --- /dev/null +++ b/net/wimax/op-reset.c @@ -0,0 +1,143 @@ +/* + * Linux WiMAX + * Implement and export a method for resetting a WiMAX device + * + * + * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * This implements a simple synchronous call to reset a WiMAX device. + * + * Resets aim at being warm, keeping the device handles active; + * however, when that fails, it falls back to a cold reset (that will + * disconnect and reconnect the device). + */ + +#include <net/wimax.h> +#include <net/genetlink.h> +#include <linux/wimax.h> +#include <linux/security.h> +#include "wimax-internal.h" + +#define D_SUBMODULE op_reset +#include "debug-levels.h" + + +/** + * wimax_reset - Reset a WiMAX device + * + * @wimax_dev: WiMAX device descriptor + * + * Returns: + * + * %0 if ok and a warm reset was done (the device still exists in + * the system). + * + * -%ENODEV if a cold/bus reset had to be done (device has + * disconnected and reconnected, so current handle is not valid + * any more). + * + * -%EINVAL if the device is not even registered. + * + * Any other negative error code shall be considered as + * non-recoverable. + * + * Description: + * + * Called when wanting to reset the device for any reason. Device is + * taken back to power on status. + * + * This call blocks; on succesful return, the device has completed the + * reset process and is ready to operate. + */ +int wimax_reset(struct wimax_dev *wimax_dev) +{ + int result = -EINVAL; + struct device *dev = wimax_dev_to_dev(wimax_dev); + enum wimax_st state; + + might_sleep(); + d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev); + mutex_lock(&wimax_dev->mutex); + dev_hold(wimax_dev->net_dev); + state = wimax_dev->state; + mutex_unlock(&wimax_dev->mutex); + + if (state >= WIMAX_ST_DOWN) { + mutex_lock(&wimax_dev->mutex_reset); + result = wimax_dev->op_reset(wimax_dev); + mutex_unlock(&wimax_dev->mutex_reset); + } + dev_put(wimax_dev->net_dev); + + d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result); + return result; +} +EXPORT_SYMBOL(wimax_reset); + + +static const +struct nla_policy wimax_gnl_reset_policy[WIMAX_GNL_ATTR_MAX + 1] = { + [WIMAX_GNL_RESET_IFIDX] = { + .type = NLA_U32, + }, +}; + + +/* + * Exporting to user space over generic netlink + * + * Parse the reset command from user space, return error code. + * + * No attributes. + */ +static +int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info) +{ + int result, ifindex; + struct wimax_dev *wimax_dev; + struct device *dev; + + d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info); + result = -ENODEV; + if (info->attrs[WIMAX_GNL_RESET_IFIDX] == NULL) { + printk(KERN_ERR "WIMAX_GNL_OP_RFKILL: can't find IFIDX " + "attribute\n"); + goto error_no_wimax_dev; + } + ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RESET_IFIDX]); + wimax_dev = wimax_dev_get_by_genl_info(info, ifindex); + if (wimax_dev == NULL) + goto error_no_wimax_dev; + dev = wimax_dev_to_dev(wimax_dev); + /* Execute the operation and send the result back to user space */ + result = wimax_reset(wimax_dev); + dev_put(wimax_dev->net_dev); +error_no_wimax_dev: + d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result); + return result; +} + + +struct genl_ops wimax_gnl_reset = { + .cmd = WIMAX_GNL_OP_RESET, + .flags = GENL_ADMIN_PERM, + .policy = wimax_gnl_reset_policy, + .doit = wimax_gnl_doit_reset, + .dumpit = NULL, +}; diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c new file mode 100644 index 000000000000..2b75aee04217 --- /dev/null +++ b/net/wimax/op-rfkill.c @@ -0,0 +1,532 @@ +/* + * Linux WiMAX + * RF-kill framework integration + * + * + * Copyright (C) 2008 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * This integrates into the Linux Kernel rfkill susbystem so that the + * drivers just have to do the bare minimal work, which is providing a + * method to set the software RF-Kill switch and to report changes in + * the software and hardware switch status. + * + * A non-polled generic rfkill device is embedded into the WiMAX + * subsystem's representation of a device. + * + * FIXME: Need polled support? use a timer or add the implementation + * to the stack. + * + * All device drivers have to do is after wimax_dev_init(), call + * wimax_report_rfkill_hw() and wimax_report_rfkill_sw() to update + * initial state and then every time it changes. See wimax.h:struct + * wimax_dev for more information. + * + * ROADMAP + * + * wimax_gnl_doit_rfkill() User space calling wimax_rfkill() + * wimax_rfkill() Kernel calling wimax_rfkill() + * __wimax_rf_toggle_radio() + * + * wimax_rfkill_toggle_radio() RF-Kill subsytem calling + * __wimax_rf_toggle_radio() + * + * __wimax_rf_toggle_radio() + * wimax_dev->op_rfkill_sw_toggle() Driver backend + * __wimax_state_change() + * + * wimax_report_rfkill_sw() Driver reports state change + * __wimax_state_change() + * + * wimax_report_rfkill_hw() Driver reports state change + * __wimax_state_change() + * + * wimax_rfkill_add() Initialize/shutdown rfkill support + * wimax_rfkill_rm() [called by wimax_dev_add/rm()] + */ + +#include <net/wimax.h> +#include <net/genetlink.h> +#include <linux/wimax.h> +#include <linux/security.h> +#include <linux/rfkill.h> +#include <linux/input.h> +#include "wimax-internal.h" + +#define D_SUBMODULE op_rfkill +#include "debug-levels.h" + +#if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE) + + +/** + * wimax_report_rfkill_hw - Reports changes in the hardware RF switch + * + * @wimax_dev: WiMAX device descriptor + * + * @state: New state of the RF Kill switch. %WIMAX_RF_ON radio on, + * %WIMAX_RF_OFF radio off. + * + * When the device detects a change in the state of thehardware RF + * switch, it must call this function to let the WiMAX kernel stack + * know that the state has changed so it can be properly propagated. + * + * The WiMAX stack caches the state (the driver doesn't need to). As + * well, as the change is propagated it will come back as a request to + * change the software state to mirror the hardware state. + * + * If the device doesn't have a hardware kill switch, just report + * it on initialization as always on (%WIMAX_RF_ON, radio on). + */ +void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev, + enum wimax_rf_state state) +{ + int result; + struct device *dev = wimax_dev_to_dev(wimax_dev); + enum wimax_st wimax_state; + enum rfkill_state rfkill_state; + + d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); + BUG_ON(state == WIMAX_RF_QUERY); + BUG_ON(state != WIMAX_RF_ON && state != WIMAX_RF_OFF); + + mutex_lock(&wimax_dev->mutex); + result = wimax_dev_is_ready(wimax_dev); + if (result < 0) + goto error_not_ready; + + if (state != wimax_dev->rf_hw) { + wimax_dev->rf_hw = state; + rfkill_state = state == WIMAX_RF_ON ? + RFKILL_STATE_OFF : RFKILL_STATE_ON; + if (wimax_dev->rf_hw == WIMAX_RF_ON + && wimax_dev->rf_sw == WIMAX_RF_ON) + wimax_state = WIMAX_ST_READY; + else + wimax_state = WIMAX_ST_RADIO_OFF; + __wimax_state_change(wimax_dev, wimax_state); + input_report_key(wimax_dev->rfkill_input, KEY_WIMAX, + rfkill_state); + } +error_not_ready: + mutex_unlock(&wimax_dev->mutex); + d_fnend(3, dev, "(wimax_dev %p state %u) = void [%d]\n", + wimax_dev, state, result); +} +EXPORT_SYMBOL_GPL(wimax_report_rfkill_hw); + + +/** + * wimax_report_rfkill_sw - Reports changes in the software RF switch + * + * @wimax_dev: WiMAX device descriptor + * + * @state: New state of the RF kill switch. %WIMAX_RF_ON radio on, + * %WIMAX_RF_OFF radio off. + * + * Reports changes in the software RF switch state to the the WiMAX + * stack. + * + * The main use is during initialization, so the driver can query the + * device for its current software radio kill switch state and feed it + * to the system. + * + * On the side, the device does not change the software state by + * itself. In practice, this can happen, as the device might decide to + * switch (in software) the radio off for different reasons. + */ +void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev, + enum wimax_rf_state state) +{ + int result; + struct device *dev = wimax_dev_to_dev(wimax_dev); + enum wimax_st wimax_state; + + d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); + BUG_ON(state == WIMAX_RF_QUERY); + BUG_ON(state != WIMAX_RF_ON && state != WIMAX_RF_OFF); + + mutex_lock(&wimax_dev->mutex); + result = wimax_dev_is_ready(wimax_dev); + if (result < 0) + goto error_not_ready; + + if (state != wimax_dev->rf_sw) { + wimax_dev->rf_sw = state; + if (wimax_dev->rf_hw == WIMAX_RF_ON + && wimax_dev->rf_sw == WIMAX_RF_ON) + wimax_state = WIMAX_ST_READY; + else + wimax_state = WIMAX_ST_RADIO_OFF; + __wimax_state_change(wimax_dev, wimax_state); + } +error_not_ready: + mutex_unlock(&wimax_dev->mutex); + d_fnend(3, dev, "(wimax_dev %p state %u) = void [%d]\n", + wimax_dev, state, result); +} +EXPORT_SYMBOL_GPL(wimax_report_rfkill_sw); + + +/* + * Callback for the RF Kill toggle operation + * + * This function is called by: + * + * - The rfkill subsystem when the RF-Kill key is pressed in the + * hardware and the driver notifies through + * wimax_report_rfkill_hw(). The rfkill subsystem ends up calling back + * here so the software RF Kill switch state is changed to reflect + * the hardware switch state. + * + * - When the user sets the state through sysfs' rfkill/state file + * + * - When the user calls wimax_rfkill(). + * + * This call blocks! + * + * WARNING! When we call rfkill_unregister(), this will be called with + * state 0! + * + * WARNING: wimax_dev must be locked + */ +static +int __wimax_rf_toggle_radio(struct wimax_dev *wimax_dev, + enum wimax_rf_state state) +{ + int result = 0; + struct device *dev = wimax_dev_to_dev(wimax_dev); + enum wimax_st wimax_state; + + might_sleep(); + d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); + if (wimax_dev->rf_sw == state) + goto out_no_change; + if (wimax_dev->op_rfkill_sw_toggle != NULL) + result = wimax_dev->op_rfkill_sw_toggle(wimax_dev, state); + else if (state == WIMAX_RF_OFF) /* No op? can't turn off */ + result = -ENXIO; + else /* No op? can turn on */ + result = 0; /* should never happen tho */ + if (result >= 0) { + result = 0; + wimax_dev->rf_sw = state; + wimax_state = state == WIMAX_RF_ON ? + WIMAX_ST_READY : WIMAX_ST_RADIO_OFF; + __wimax_state_change(wimax_dev, wimax_state); + } +out_no_change: + d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n", + wimax_dev, state, result); + return result; +} + + +/* + * Translate from rfkill state to wimax state + * + * NOTE: Special state handling rules here + * + * Just pretend the call didn't happen if we are in a state where + * we know for sure it cannot be handled (WIMAX_ST_DOWN or + * __WIMAX_ST_QUIESCING). rfkill() needs it to register and + * unregister, as it will run this path. + * + * NOTE: This call will block until the operation is completed. + */ +static +int wimax_rfkill_toggle_radio(void *data, enum rfkill_state state) +{ + int result; + struct wimax_dev *wimax_dev = data; + struct device *dev = wimax_dev_to_dev(wimax_dev); + enum wimax_rf_state rf_state; + + d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); + switch (state) { + case RFKILL_STATE_ON: + rf_state = WIMAX_RF_OFF; + break; + case RFKILL_STATE_OFF: + rf_state = WIMAX_RF_ON; + break; + default: + BUG(); + } + mutex_lock(&wimax_dev->mutex); + if (wimax_dev->state <= __WIMAX_ST_QUIESCING) + result = 0; /* just pretend it didn't happen */ + else + result = __wimax_rf_toggle_radio(wimax_dev, rf_state); + mutex_unlock(&wimax_dev->mutex); + d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n", + wimax_dev, state, result); + return result; +} + + +/** + * wimax_rfkill - Set the software RF switch state for a WiMAX device + * + * @wimax_dev: WiMAX device descriptor + * + * @state: New RF state. + * + * Returns: + * + * >= 0 toggle state if ok, < 0 errno code on error. The toggle state + * is returned as a bitmap, bit 0 being the hardware RF state, bit 1 + * the software RF state. + * + * 0 means disabled (%WIMAX_RF_ON, radio on), 1 means enabled radio + * off (%WIMAX_RF_OFF). + * + * Description: + * + * Called by the user when he wants to request the WiMAX radio to be + * switched on (%WIMAX_RF_ON) or off (%WIMAX_RF_OFF). With + * %WIMAX_RF_QUERY, just the current state is returned. + * + * NOTE: + * + * This call will block until the operation is complete. + */ +int wimax_rfkill(struct wimax_dev *wimax_dev, enum wimax_rf_state state) +{ + int result; + struct device *dev = wimax_dev_to_dev(wimax_dev); + + d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); + mutex_lock(&wimax_dev->mutex); + result = wimax_dev_is_ready(wimax_dev); + if (result < 0) + goto error_not_ready; + switch (state) { + case WIMAX_RF_ON: + case WIMAX_RF_OFF: + result = __wimax_rf_toggle_radio(wimax_dev, state); + if (result < 0) + goto error; + break; + case WIMAX_RF_QUERY: + break; + default: + result = -EINVAL; + goto error; + } + result = wimax_dev->rf_sw << 1 | wimax_dev->rf_hw; +error: +error_not_ready: + mutex_unlock(&wimax_dev->mutex); + d_fnend(3, dev, "(wimax_dev %p state %u) = %d\n", + wimax_dev, state, result); + return result; +} +EXPORT_SYMBOL(wimax_rfkill); + + +/* + * Register a new WiMAX device's RF Kill support + * + * WARNING: wimax_dev->mutex must be unlocked + */ +int wimax_rfkill_add(struct wimax_dev *wimax_dev) +{ + int result; + struct rfkill *rfkill; + struct input_dev *input_dev; + struct device *dev = wimax_dev_to_dev(wimax_dev); + + d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev); + /* Initialize RF Kill */ + result = -ENOMEM; + rfkill = rfkill_allocate(dev, RFKILL_TYPE_WIMAX); + if (rfkill == NULL) + goto error_rfkill_allocate; + wimax_dev->rfkill = rfkill; + + rfkill->name = wimax_dev->name; + rfkill->state = RFKILL_STATE_OFF; + rfkill->data = wimax_dev; + rfkill->toggle_radio = wimax_rfkill_toggle_radio; + rfkill->user_claim_unsupported = 1; + + /* Initialize the input device for the hw key */ + input_dev = input_allocate_device(); + if (input_dev == NULL) + goto error_input_allocate; + wimax_dev->rfkill_input = input_dev; + d_printf(1, dev, "rfkill %p input %p\n", rfkill, input_dev); + + input_dev->name = wimax_dev->name; + /* FIXME: get a real device bus ID and stuff? do we care? */ + input_dev->id.bustype = BUS_HOST; + input_dev->id.vendor = 0xffff; + input_dev->evbit[0] = BIT(EV_KEY); + set_bit(KEY_WIMAX, input_dev->keybit); + + /* Register both */ + result = input_register_device(wimax_dev->rfkill_input); + if (result < 0) + goto error_input_register; + result = rfkill_register(wimax_dev->rfkill); + if (result < 0) + goto error_rfkill_register; + + /* If there is no SW toggle op, SW RFKill is always on */ + if (wimax_dev->op_rfkill_sw_toggle == NULL) + wimax_dev->rf_sw = WIMAX_RF_ON; + + d_fnend(3, dev, "(wimax_dev %p) = 0\n", wimax_dev); + return 0; + + /* if rfkill_register() suceeds, can't use rfkill_free() any + * more, only rfkill_unregister() [it owns the refcount]; with + * the input device we have the same issue--hence the if. */ +error_rfkill_register: + input_unregister_device(wimax_dev->rfkill_input); + wimax_dev->rfkill_input = NULL; +error_input_register: + if (wimax_dev->rfkill_input) + input_free_device(wimax_dev->rfkill_input); +error_input_allocate: + rfkill_free(wimax_dev->rfkill); +error_rfkill_allocate: + d_fnend(3, dev, "(wimax_dev %p) = %d\n", wimax_dev, result); + return result; +} + + +/* + * Deregister a WiMAX device's RF Kill support + * + * Ick, we can't call rfkill_free() after rfkill_unregister()...oh + * well. + * + * WARNING: wimax_dev->mutex must be unlocked + */ +void wimax_rfkill_rm(struct wimax_dev *wimax_dev) +{ + struct device *dev = wimax_dev_to_dev(wimax_dev); + d_fnstart(3, dev, "(wimax_dev %p)\n", wimax_dev); + rfkill_unregister(wimax_dev->rfkill); /* frees */ + input_unregister_device(wimax_dev->rfkill_input); + d_fnend(3, dev, "(wimax_dev %p)\n", wimax_dev); +} + + +#else /* #ifdef CONFIG_RFKILL */ + +void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev, + enum wimax_rf_state state) +{ +} +EXPORT_SYMBOL_GPL(wimax_report_rfkill_hw); + +void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev, + enum wimax_rf_state state) +{ +} +EXPORT_SYMBOL_GPL(wimax_report_rfkill_sw); + +int wimax_rfkill(struct wimax_dev *wimax_dev, + enum wimax_rf_state state) +{ + return WIMAX_RF_ON << 1 | WIMAX_RF_ON; +} +EXPORT_SYMBOL_GPL(wimax_rfkill); + +int wimax_rfkill_add(struct wimax_dev *wimax_dev) +{ + return 0; +} + +void wimax_rfkill_rm(struct wimax_dev *wimax_dev) +{ +} + +#endif /* #ifdef CONFIG_RFKILL */ + + +/* + * Exporting to user space over generic netlink + * + * Parse the rfkill command from user space, return a combination + * value that describe the states of the different toggles. + * + * Only one attribute: the new state requested (on, off or no change, + * just query). + */ + +static const +struct nla_policy wimax_gnl_rfkill_policy[WIMAX_GNL_ATTR_MAX + 1] = { + [WIMAX_GNL_RFKILL_IFIDX] = { + .type = NLA_U32, + }, + [WIMAX_GNL_RFKILL_STATE] = { + .type = NLA_U32 /* enum wimax_rf_state */ + }, +}; + + +static +int wimax_gnl_doit_rfkill(struct sk_buff *skb, struct genl_info *info) +{ + int result, ifindex; + struct wimax_dev *wimax_dev; + struct device *dev; + enum wimax_rf_state new_state; + + d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info); + result = -ENODEV; + if (info->attrs[WIMAX_GNL_RFKILL_IFIDX] == NULL) { + printk(KERN_ERR "WIMAX_GNL_OP_RFKILL: can't find IFIDX " + "attribute\n"); + goto error_no_wimax_dev; + } + ifindex = nla_get_u32(info->attrs[WIMAX_GNL_RFKILL_IFIDX]); + wimax_dev = wimax_dev_get_by_genl_info(info, ifindex); + if (wimax_dev == NULL) + goto error_no_wimax_dev; + dev = wimax_dev_to_dev(wimax_dev); + result = -EINVAL; + if (info->attrs[WIMAX_GNL_RFKILL_STATE] == NULL) { + dev_err(dev, "WIMAX_GNL_RFKILL: can't find RFKILL_STATE " + "attribute\n"); + goto error_no_pid; + } + new_state = nla_get_u32(info->attrs[WIMAX_GNL_RFKILL_STATE]); + + /* Execute the operation and send the result back to user space */ + result = wimax_rfkill(wimax_dev, new_state); +error_no_pid: + dev_put(wimax_dev->net_dev); +error_no_wimax_dev: + d_fnend(3, NULL, "(skb %p info %p) = %d\n", skb, info, result); + return result; +} + + +struct genl_ops wimax_gnl_rfkill = { + .cmd = WIMAX_GNL_OP_RFKILL, + .flags = GENL_ADMIN_PERM, + .policy = wimax_gnl_rfkill_policy, + .doit = wimax_gnl_doit_rfkill, + .dumpit = NULL, +}; + diff --git a/net/wimax/stack.c b/net/wimax/stack.c new file mode 100644 index 000000000000..3869c0327882 --- /dev/null +++ b/net/wimax/stack.c @@ -0,0 +1,612 @@ +/* + * Linux WiMAX + * Initialization, addition and removal of wimax devices + * + * + * Copyright (C) 2005-2006 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * This implements: + * + * - basic life cycle of 'struct wimax_dev' [wimax_dev_*()]; on + * addition/registration initialize all subfields and allocate + * generic netlink resources for user space communication. On + * removal/unregistration, undo all that. + * + * - device state machine [wimax_state_change()] and support to send + * reports to user space when the state changes + * [wimax_gnl_re_state_change*()]. + * + * See include/net/wimax.h for rationales and design. + * + * ROADMAP + * + * [__]wimax_state_change() Called by drivers to update device's state + * wimax_gnl_re_state_change_alloc() + * wimax_gnl_re_state_change_send() + * + * wimax_dev_init() Init a device + * wimax_dev_add() Register + * wimax_rfkill_add() + * wimax_gnl_add() Register all the generic netlink resources. + * wimax_id_table_add() + * wimax_dev_rm() Unregister + * wimax_id_table_rm() + * wimax_gnl_rm() + * wimax_rfkill_rm() + */ +#include <linux/device.h> +#include <net/genetlink.h> +#include <linux/netdevice.h> +#include <linux/wimax.h> +#include "wimax-internal.h" + + +#define D_SUBMODULE stack +#include "debug-levels.h" + +/* + * Authoritative source for the RE_STATE_CHANGE attribute policy + * + * We don't really use it here, but /me likes to keep the definition + * close to where the data is generated. + */ +/* +static const +struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = { + [WIMAX_GNL_STCH_STATE_OLD] = { .type = NLA_U8 }, + [WIMAX_GNL_STCH_STATE_NEW] = { .type = NLA_U8 }, +}; +*/ + + +/* + * Allocate a Report State Change message + * + * @header: save it, you need it for _send() + * + * Creates and fills a basic state change message; different code + * paths can then add more attributes to the message as needed. + * + * Use wimax_gnl_re_state_change_send() to send the returned skb. + * + * Returns: skb with the genl message if ok, IS_ERR() ptr on error + * with an errno code. + */ +static +struct sk_buff *wimax_gnl_re_state_change_alloc( + struct wimax_dev *wimax_dev, + enum wimax_st new_state, enum wimax_st old_state, + void **header) +{ + int result; + struct device *dev = wimax_dev_to_dev(wimax_dev); + void *data; + struct sk_buff *report_skb; + + d_fnstart(3, dev, "(wimax_dev %p new_state %u old_state %u)\n", + wimax_dev, new_state, old_state); + result = -ENOMEM; + report_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (report_skb == NULL) { + dev_err(dev, "RE_STCH: can't create message\n"); + goto error_new; + } + data = genlmsg_put(report_skb, 0, wimax_gnl_mcg.id, &wimax_gnl_family, + 0, WIMAX_GNL_RE_STATE_CHANGE); + if (data == NULL) { + dev_err(dev, "RE_STCH: can't put data into message\n"); + goto error_put; + } + *header = data; + + result = nla_put_u8(report_skb, WIMAX_GNL_STCH_STATE_OLD, old_state); + if (result < 0) { + dev_err(dev, "RE_STCH: Error adding OLD attr: %d\n", result); + goto error_put; + } + result = nla_put_u8(report_skb, WIMAX_GNL_STCH_STATE_NEW, new_state); + if (result < 0) { + dev_err(dev, "RE_STCH: Error adding NEW attr: %d\n", result); + goto error_put; + } + result = nla_put_u32(report_skb, WIMAX_GNL_STCH_IFIDX, + wimax_dev->net_dev->ifindex); + if (result < 0) { + dev_err(dev, "RE_STCH: Error adding IFINDEX attribute\n"); + goto error_put; + } + d_fnend(3, dev, "(wimax_dev %p new_state %u old_state %u) = %p\n", + wimax_dev, new_state, old_state, report_skb); + return report_skb; + +error_put: + nlmsg_free(report_skb); +error_new: + d_fnend(3, dev, "(wimax_dev %p new_state %u old_state %u) = %d\n", + wimax_dev, new_state, old_state, result); + return ERR_PTR(result); +} + + +/* + * Send a Report State Change message (as created with _alloc). + * + * @report_skb: as returned by wimax_gnl_re_state_change_alloc() + * @header: as returned by wimax_gnl_re_state_change_alloc() + * + * Returns: 0 if ok, < 0 errno code on error. + * + * If the message is NULL, pretend it didn't happen. + */ +static +int wimax_gnl_re_state_change_send( + struct wimax_dev *wimax_dev, struct sk_buff *report_skb, + void *header) +{ + int result = 0; + struct device *dev = wimax_dev_to_dev(wimax_dev); + d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n", + wimax_dev, report_skb); + if (report_skb == NULL) + goto out; + genlmsg_end(report_skb, header); + result = genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); + if (result == -ESRCH) /* Nobody connected, ignore it */ + result = 0; /* btw, the skb is freed already */ + if (result < 0) { + dev_err(dev, "RE_STCH: Error sending: %d\n", result); + nlmsg_free(report_skb); + } +out: + d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n", + wimax_dev, report_skb, result); + return result; +} + + +static +void __check_new_state(enum wimax_st old_state, enum wimax_st new_state, + unsigned allowed_states_bm) +{ + if (WARN_ON(((1 << new_state) & allowed_states_bm) == 0)) { + printk(KERN_ERR "SW BUG! Forbidden state change %u -> %u\n", + old_state, new_state); + } +} + + +/* + * Set the current state of a WiMAX device [unlocking version of + * wimax_state_change(). + */ +void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state) +{ + struct device *dev = wimax_dev_to_dev(wimax_dev); + enum wimax_st old_state = wimax_dev->state; + struct sk_buff *stch_skb; + void *header; + + d_fnstart(3, dev, "(wimax_dev %p new_state %u [old %u])\n", + wimax_dev, new_state, old_state); + + if (WARN_ON(new_state >= __WIMAX_ST_INVALID)) { + dev_err(dev, "SW BUG: requesting invalid state %u\n", + new_state); + goto out; + } + if (old_state == new_state) + goto out; + header = NULL; /* gcc complains? can't grok why */ + stch_skb = wimax_gnl_re_state_change_alloc( + wimax_dev, new_state, old_state, &header); + + /* Verify the state transition and do exit-from-state actions */ + switch (old_state) { + case __WIMAX_ST_NULL: + __check_new_state(old_state, new_state, + 1 << WIMAX_ST_DOWN); + break; + case WIMAX_ST_DOWN: + __check_new_state(old_state, new_state, + 1 << __WIMAX_ST_QUIESCING + | 1 << WIMAX_ST_UNINITIALIZED + | 1 << WIMAX_ST_RADIO_OFF); + break; + case __WIMAX_ST_QUIESCING: + __check_new_state(old_state, new_state, 1 << WIMAX_ST_DOWN); + break; + case WIMAX_ST_UNINITIALIZED: + __check_new_state(old_state, new_state, + 1 << __WIMAX_ST_QUIESCING + | 1 << WIMAX_ST_RADIO_OFF); + break; + case WIMAX_ST_RADIO_OFF: + __check_new_state(old_state, new_state, + 1 << __WIMAX_ST_QUIESCING + | 1 << WIMAX_ST_READY); + break; + case WIMAX_ST_READY: + __check_new_state(old_state, new_state, + 1 << __WIMAX_ST_QUIESCING + | 1 << WIMAX_ST_RADIO_OFF + | 1 << WIMAX_ST_SCANNING + | 1 << WIMAX_ST_CONNECTING + | 1 << WIMAX_ST_CONNECTED); + break; + case WIMAX_ST_SCANNING: + __check_new_state(old_state, new_state, + 1 << __WIMAX_ST_QUIESCING + | 1 << WIMAX_ST_RADIO_OFF + | 1 << WIMAX_ST_READY + | 1 << WIMAX_ST_CONNECTING + | 1 << WIMAX_ST_CONNECTED); + break; + case WIMAX_ST_CONNECTING: + __check_new_state(old_state, new_state, + 1 << __WIMAX_ST_QUIESCING + | 1 << WIMAX_ST_RADIO_OFF + | 1 << WIMAX_ST_READY + | 1 << WIMAX_ST_SCANNING + | 1 << WIMAX_ST_CONNECTED); + break; + case WIMAX_ST_CONNECTED: + __check_new_state(old_state, new_state, + 1 << __WIMAX_ST_QUIESCING + | 1 << WIMAX_ST_RADIO_OFF + | 1 << WIMAX_ST_READY); + netif_tx_disable(wimax_dev->net_dev); + netif_carrier_off(wimax_dev->net_dev); + break; + case __WIMAX_ST_INVALID: + default: + dev_err(dev, "SW BUG: wimax_dev %p is in unknown state %u\n", + wimax_dev, wimax_dev->state); + WARN_ON(1); + goto out; + } + + /* Execute the actions of entry to the new state */ + switch (new_state) { + case __WIMAX_ST_NULL: + dev_err(dev, "SW BUG: wimax_dev %p entering NULL state " + "from %u\n", wimax_dev, wimax_dev->state); + WARN_ON(1); /* Nobody can enter this state */ + break; + case WIMAX_ST_DOWN: + break; + case __WIMAX_ST_QUIESCING: + break; + case WIMAX_ST_UNINITIALIZED: + break; + case WIMAX_ST_RADIO_OFF: + break; + case WIMAX_ST_READY: + break; + case WIMAX_ST_SCANNING: + break; + case WIMAX_ST_CONNECTING: + break; + case WIMAX_ST_CONNECTED: + netif_carrier_on(wimax_dev->net_dev); + netif_wake_queue(wimax_dev->net_dev); + break; + case __WIMAX_ST_INVALID: + default: + BUG(); + } + __wimax_state_set(wimax_dev, new_state); + if (stch_skb) + wimax_gnl_re_state_change_send(wimax_dev, stch_skb, header); +out: + d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n", + wimax_dev, new_state, old_state); + return; +} + + +/** + * wimax_state_change - Set the current state of a WiMAX device + * + * @wimax_dev: WiMAX device descriptor (properly referenced) + * @new_state: New state to switch to + * + * This implements the state changes for the wimax devices. It will + * + * - verify that the state transition is legal (for now it'll just + * print a warning if not) according to the table in + * linux/wimax.h's documentation for 'enum wimax_st'. + * + * - perform the actions needed for leaving the current state and + * whichever are needed for entering the new state. + * + * - issue a report to user space indicating the new state (and an + * optional payload with information about the new state). + * + * NOTE: @wimax_dev must be locked + */ +void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state) +{ + mutex_lock(&wimax_dev->mutex); + __wimax_state_change(wimax_dev, new_state); + mutex_unlock(&wimax_dev->mutex); + return; +} +EXPORT_SYMBOL_GPL(wimax_state_change); + + +/** + * wimax_state_get() - Return the current state of a WiMAX device + * + * @wimax_dev: WiMAX device descriptor + * + * Returns: Current state of the device according to its driver. + */ +enum wimax_st wimax_state_get(struct wimax_dev *wimax_dev) +{ + enum wimax_st state; + mutex_lock(&wimax_dev->mutex); + state = wimax_dev->state; + mutex_unlock(&wimax_dev->mutex); + return state; +} +EXPORT_SYMBOL_GPL(wimax_state_get); + + +/** + * wimax_dev_init - initialize a newly allocated instance + * + * @wimax_dev: WiMAX device descriptor to initialize. + * + * Initializes fields of a freshly allocated @wimax_dev instance. This + * function assumes that after allocation, the memory occupied by + * @wimax_dev was zeroed. + */ +void wimax_dev_init(struct wimax_dev *wimax_dev) +{ + INIT_LIST_HEAD(&wimax_dev->id_table_node); + __wimax_state_set(wimax_dev, WIMAX_ST_UNINITIALIZED); + mutex_init(&wimax_dev->mutex); + mutex_init(&wimax_dev->mutex_reset); +} +EXPORT_SYMBOL_GPL(wimax_dev_init); + +/* + * This extern is declared here because it's easier to keep track -- + * both declarations are a list of the same + */ +extern struct genl_ops + wimax_gnl_msg_from_user, + wimax_gnl_reset, + wimax_gnl_rfkill; + +static +struct genl_ops *wimax_gnl_ops[] = { + &wimax_gnl_msg_from_user, + &wimax_gnl_reset, + &wimax_gnl_rfkill, +}; + + +static +size_t wimax_addr_scnprint(char *addr_str, size_t addr_str_size, + unsigned char *addr, size_t addr_len) +{ + unsigned cnt, total; + for (total = cnt = 0; cnt < addr_len; cnt++) + total += scnprintf(addr_str + total, addr_str_size - total, + "%02x%c", addr[cnt], + cnt == addr_len - 1 ? '\0' : ':'); + return total; +} + + +/** + * wimax_dev_add - Register a new WiMAX device + * + * @wimax_dev: WiMAX device descriptor (as embedded in your @net_dev's + * priv data). You must have called wimax_dev_init() on it before. + * + * @net_dev: net device the @wimax_dev is associated with. The + * function expects SET_NETDEV_DEV() and register_netdev() were + * already called on it. + * + * Registers the new WiMAX device, sets up the user-kernel control + * interface (generic netlink) and common WiMAX infrastructure. + * + * Note that the parts that will allow interaction with user space are + * setup at the very end, when the rest is in place, as once that + * happens, the driver might get user space control requests via + * netlink or from debugfs that might translate into calls into + * wimax_dev->op_*(). + */ +int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev) +{ + int result; + struct device *dev = net_dev->dev.parent; + char addr_str[32]; + + d_fnstart(3, dev, "(wimax_dev %p net_dev %p)\n", wimax_dev, net_dev); + + /* Do the RFKILL setup before locking, as RFKILL will call + * into our functions. */ + wimax_dev->net_dev = net_dev; + result = wimax_rfkill_add(wimax_dev); + if (result < 0) + goto error_rfkill_add; + + /* Set up user-space interaction */ + mutex_lock(&wimax_dev->mutex); + wimax_id_table_add(wimax_dev); + result = wimax_debugfs_add(wimax_dev); + if (result < 0) { + dev_err(dev, "cannot initialize debugfs: %d\n", + result); + goto error_debugfs_add; + } + + __wimax_state_set(wimax_dev, WIMAX_ST_DOWN); + mutex_unlock(&wimax_dev->mutex); + + wimax_addr_scnprint(addr_str, sizeof(addr_str), + net_dev->dev_addr, net_dev->addr_len); + dev_err(dev, "WiMAX interface %s (%s) ready\n", + net_dev->name, addr_str); + d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev); + return 0; + +error_debugfs_add: + wimax_id_table_rm(wimax_dev); + mutex_unlock(&wimax_dev->mutex); + wimax_rfkill_rm(wimax_dev); +error_rfkill_add: + d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n", + wimax_dev, net_dev, result); + return result; +} +EXPORT_SYMBOL_GPL(wimax_dev_add); + + +/** + * wimax_dev_rm - Unregister an existing WiMAX device + * + * @wimax_dev: WiMAX device descriptor + * + * Unregisters a WiMAX device previously registered for use with + * wimax_add_rm(). + * + * IMPORTANT! Must call before calling unregister_netdev(). + * + * After this function returns, you will not get any more user space + * control requests (via netlink or debugfs) and thus to wimax_dev->ops. + * + * Reentrancy control is ensured by setting the state to + * %__WIMAX_ST_QUIESCING. rfkill operations coming through + * wimax_*rfkill*() will be stopped by the quiescing state; ops coming + * from the rfkill subsystem will be stopped by the support being + * removed by wimax_rfkill_rm(). + */ +void wimax_dev_rm(struct wimax_dev *wimax_dev) +{ + d_fnstart(3, NULL, "(wimax_dev %p)\n", wimax_dev); + + mutex_lock(&wimax_dev->mutex); + __wimax_state_change(wimax_dev, __WIMAX_ST_QUIESCING); + wimax_debugfs_rm(wimax_dev); + wimax_id_table_rm(wimax_dev); + __wimax_state_change(wimax_dev, WIMAX_ST_DOWN); + mutex_unlock(&wimax_dev->mutex); + wimax_rfkill_rm(wimax_dev); + d_fnend(3, NULL, "(wimax_dev %p) = void\n", wimax_dev); +} +EXPORT_SYMBOL_GPL(wimax_dev_rm); + + +/* Debug framework control of debug levels */ +struct d_level D_LEVEL[] = { + D_SUBMODULE_DEFINE(debugfs), + D_SUBMODULE_DEFINE(id_table), + D_SUBMODULE_DEFINE(op_msg), + D_SUBMODULE_DEFINE(op_reset), + D_SUBMODULE_DEFINE(op_rfkill), + D_SUBMODULE_DEFINE(stack), +}; +size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); + + +struct genl_family wimax_gnl_family = { + .id = GENL_ID_GENERATE, + .name = "WiMAX", + .version = WIMAX_GNL_VERSION, + .hdrsize = 0, + .maxattr = WIMAX_GNL_ATTR_MAX, +}; + +struct genl_multicast_group wimax_gnl_mcg = { + .name = "msg", +}; + + + +/* Shutdown the wimax stack */ +static +int __init wimax_subsys_init(void) +{ + int result, cnt; + + d_fnstart(4, NULL, "()\n"); + snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), + "WiMAX"); + result = genl_register_family(&wimax_gnl_family); + if (unlikely(result < 0)) { + printk(KERN_ERR "cannot register generic netlink family: %d\n", + result); + goto error_register_family; + } + + for (cnt = 0; cnt < ARRAY_SIZE(wimax_gnl_ops); cnt++) { + result = genl_register_ops(&wimax_gnl_family, + wimax_gnl_ops[cnt]); + d_printf(4, NULL, "registering generic netlink op code " + "%u: %d\n", wimax_gnl_ops[cnt]->cmd, result); + if (unlikely(result < 0)) { + printk(KERN_ERR "cannot register generic netlink op " + "code %u: %d\n", + wimax_gnl_ops[cnt]->cmd, result); + goto error_register_ops; + } + } + + result = genl_register_mc_group(&wimax_gnl_family, &wimax_gnl_mcg); + if (result < 0) + goto error_mc_group; + d_fnend(4, NULL, "() = 0\n"); + return 0; + +error_mc_group: +error_register_ops: + for (cnt--; cnt >= 0; cnt--) + genl_unregister_ops(&wimax_gnl_family, + wimax_gnl_ops[cnt]); + genl_unregister_family(&wimax_gnl_family); +error_register_family: + d_fnend(4, NULL, "() = %d\n", result); + return result; + +} +module_init(wimax_subsys_init); + + +/* Shutdown the wimax stack */ +static +void __exit wimax_subsys_exit(void) +{ + int cnt; + wimax_id_table_release(); + genl_unregister_mc_group(&wimax_gnl_family, &wimax_gnl_mcg); + for (cnt = ARRAY_SIZE(wimax_gnl_ops) - 1; cnt >= 0; cnt--) + genl_unregister_ops(&wimax_gnl_family, + wimax_gnl_ops[cnt]); + genl_unregister_family(&wimax_gnl_family); +} +module_exit(wimax_subsys_exit); + +MODULE_AUTHOR("Intel Corporation <linux-wimax@intel.com>"); +MODULE_DESCRIPTION("Linux WiMAX stack"); +MODULE_LICENSE("GPL"); + diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h new file mode 100644 index 000000000000..1e743d214856 --- /dev/null +++ b/net/wimax/wimax-internal.h @@ -0,0 +1,91 @@ +/* + * Linux WiMAX + * Internal API for kernel space WiMAX stack + * + * + * Copyright (C) 2007 Intel Corporation <linux-wimax@intel.com> + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * This header file is for declarations and definitions internal to + * the WiMAX stack. For public APIs and documentation, see + * include/net/wimax.h and include/linux/wimax.h. + */ + +#ifndef __WIMAX_INTERNAL_H__ +#define __WIMAX_INTERNAL_H__ +#ifdef __KERNEL__ + +#include <linux/device.h> +#include <net/wimax.h> + + +/* + * Decide if a (locked) device is ready for use + * + * Before using the device structure, it must be locked + * (wimax_dev->mutex). As well, most operations need to call this + * function to check if the state is the right one. + * + * An error value will be returned if the state is not the right + * one. In that case, the caller should not attempt to use the device + * and just unlock it. + */ +static inline __must_check +int wimax_dev_is_ready(struct wimax_dev *wimax_dev) +{ + if (wimax_dev->state == __WIMAX_ST_NULL) + return -EINVAL; /* Device is not even registered! */ + if (wimax_dev->state == WIMAX_ST_DOWN) + return -ENOMEDIUM; + if (wimax_dev->state == __WIMAX_ST_QUIESCING) + return -ESHUTDOWN; + return 0; +} + + +static inline +void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state) +{ + wimax_dev->state = state; +} +extern void __wimax_state_change(struct wimax_dev *, enum wimax_st); + +#ifdef CONFIG_DEBUG_FS +extern int wimax_debugfs_add(struct wimax_dev *); +extern void wimax_debugfs_rm(struct wimax_dev *); +#else +static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev) +{ + return 0; +} +static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {} +#endif + +extern void wimax_id_table_add(struct wimax_dev *); +extern struct wimax_dev *wimax_dev_get_by_genl_info(struct genl_info *, int); +extern void wimax_id_table_rm(struct wimax_dev *); +extern void wimax_id_table_release(void); + +extern int wimax_rfkill_add(struct wimax_dev *); +extern void wimax_rfkill_rm(struct wimax_dev *); + +extern struct genl_family wimax_gnl_family; +extern struct genl_multicast_group wimax_gnl_mcg; + +#endif /* #ifdef __KERNEL__ */ +#endif /* #ifndef __WIMAX_INTERNAL_H__ */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4f877535e666..85c9034c59b2 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -421,6 +421,31 @@ static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, return 0; } +/** + * freq_in_rule_band - tells us if a frequency is in a frequency band + * @freq_range: frequency rule we want to query + * @freq_khz: frequency we are inquiring about + * + * This lets us know if a specific frequency rule is or is not relevant to + * a specific frequency's band. Bands are device specific and artificial + * definitions (the "2.4 GHz band" and the "5 GHz band"), however it is + * safe for now to assume that a frequency rule should not be part of a + * frequency's band if the start freq or end freq are off by more than 2 GHz. + * This resolution can be lowered and should be considered as we add + * regulatory rule support for other "bands". + **/ +static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, + u32 freq_khz) +{ +#define ONE_GHZ_IN_KHZ 1000000 + if (abs(freq_khz - freq_range->start_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + return true; + if (abs(freq_khz - freq_range->end_freq_khz) <= (2 * ONE_GHZ_IN_KHZ)) + return true; + return false; +#undef ONE_GHZ_IN_KHZ +} + /* Converts a country IE to a regulatory domain. A regulatory domain * structure has a lot of information which the IE doesn't yet have, * so for the other values we use upper max values as we will intersect @@ -473,6 +498,7 @@ static struct ieee80211_regdomain *country_ie_2_rd( * calculate the number of reg rules we will need. We will need one * for each channel subband */ while (country_ie_len >= 3) { + int end_channel = 0; struct ieee80211_country_ie_triplet *triplet = (struct ieee80211_country_ie_triplet *) country_ie; int cur_sub_max_channel = 0, cur_channel = 0; @@ -484,9 +510,25 @@ static struct ieee80211_regdomain *country_ie_2_rd( continue; } + /* 2 GHz */ + if (triplet->chans.first_channel <= 14) + end_channel = triplet->chans.first_channel + + triplet->chans.num_channels; + else + /* + * 5 GHz -- For example in country IEs if the first + * channel given is 36 and the number of channels is 4 + * then the individual channel numbers defined for the + * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 + * and not 36, 37, 38, 39. + * + * See: http://tinyurl.com/11d-clarification + */ + end_channel = triplet->chans.first_channel + + (4 * (triplet->chans.num_channels - 1)); + cur_channel = triplet->chans.first_channel; - cur_sub_max_channel = ieee80211_channel_to_frequency( - cur_channel + triplet->chans.num_channels); + cur_sub_max_channel = end_channel; /* Basic sanity check */ if (cur_sub_max_channel < cur_channel) @@ -538,6 +580,7 @@ static struct ieee80211_regdomain *country_ie_2_rd( /* This time around we fill in the rd */ while (country_ie_len >= 3) { + int end_channel = 0; struct ieee80211_country_ie_triplet *triplet = (struct ieee80211_country_ie_triplet *) country_ie; struct ieee80211_reg_rule *reg_rule = NULL; @@ -559,6 +602,14 @@ static struct ieee80211_regdomain *country_ie_2_rd( reg_rule->flags = flags; + /* 2 GHz */ + if (triplet->chans.first_channel <= 14) + end_channel = triplet->chans.first_channel + + triplet->chans.num_channels; + else + end_channel = triplet->chans.first_channel + + (4 * (triplet->chans.num_channels - 1)); + /* The +10 is since the regulatory domain expects * the actual band edge, not the center of freq for * its start and end freqs, assuming 20 MHz bandwidth on @@ -568,8 +619,7 @@ static struct ieee80211_regdomain *country_ie_2_rd( triplet->chans.first_channel) - 10); freq_range->end_freq_khz = MHZ_TO_KHZ(ieee80211_channel_to_frequency( - triplet->chans.first_channel + - triplet->chans.num_channels) + 10); + end_channel) + 10); /* Large arbitrary values, we intersect later */ /* Increment this if we ever support >= 40 MHz channels @@ -748,12 +798,23 @@ static u32 map_regdom_flags(u32 rd_flags) * this value to the maximum allowed bandwidth. * @reg_rule: the regulatory rule which we have for this frequency * - * Use this function to get the regulatory rule for a specific frequency. + * Use this function to get the regulatory rule for a specific frequency on + * a given wireless device. If the device has a specific regulatory domain + * it wants to follow we respect that unless a country IE has been received + * and processed already. + * + * Returns 0 if it was able to find a valid regulatory rule which does + * apply to the given center_freq otherwise it returns non-zero. It will + * also return -ERANGE if we determine the given center_freq does not even have + * a regulatory rule for a frequency range in the center_freq's band. See + * freq_in_rule_band() for our current definition of a band -- this is purely + * subjective and right now its 802.11 specific. */ static int freq_reg_info(u32 center_freq, u32 *bandwidth, const struct ieee80211_reg_rule **reg_rule) { int i; + bool band_rule_found = false; u32 max_bandwidth = 0; if (!cfg80211_regdomain) @@ -767,7 +828,15 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth, rr = &cfg80211_regdomain->reg_rules[i]; fr = &rr->freq_range; pr = &rr->power_rule; + + /* We only need to know if one frequency rule was + * was in center_freq's band, that's enough, so lets + * not overwrite it once found */ + if (!band_rule_found) + band_rule_found = freq_in_rule_band(fr, center_freq); + max_bandwidth = freq_max_bandwidth(fr, center_freq); + if (max_bandwidth && *bandwidth <= max_bandwidth) { *reg_rule = rr; *bandwidth = max_bandwidth; @@ -775,23 +844,64 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth, } } + if (!band_rule_found) + return -ERANGE; + return !max_bandwidth; } -static void handle_channel(struct ieee80211_channel *chan) +static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, + unsigned int chan_idx) { int r; - u32 flags = chan->orig_flags; + u32 flags; u32 max_bandwidth = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + + sband = wiphy->bands[band]; + BUG_ON(chan_idx >= sband->n_channels); + chan = &sband->channels[chan_idx]; + + flags = chan->orig_flags; r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), &max_bandwidth, ®_rule); if (r) { - flags |= IEEE80211_CHAN_DISABLED; - chan->flags = flags; + /* This means no regulatory rule was found in the country IE + * with a frequency range on the center_freq's band, since + * IEEE-802.11 allows for a country IE to have a subset of the + * regulatory information provided in a country we ignore + * disabling the channel unless at least one reg rule was + * found on the center_freq's band. For details see this + * clarification: + * + * http://tinyurl.com/11d-clarification + */ + if (r == -ERANGE && + last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { +#ifdef CONFIG_CFG80211_REG_DEBUG + printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz " + "intact on %s - no rule found in band on " + "Country IE\n", + chan->center_freq, wiphy_name(wiphy)); +#endif + } else { + /* In this case we know the country IE has at least one reg rule + * for the band so we respect its band definitions */ +#ifdef CONFIG_CFG80211_REG_DEBUG + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + printk(KERN_DEBUG "cfg80211: Disabling " + "channel %d MHz on %s due to " + "Country IE\n", + chan->center_freq, wiphy_name(wiphy)); +#endif + flags |= IEEE80211_CHAN_DISABLED; + chan->flags = flags; + } return; } @@ -808,12 +918,16 @@ static void handle_channel(struct ieee80211_channel *chan) chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band(struct ieee80211_supported_band *sband) +static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) { - int i; + unsigned int i; + struct ieee80211_supported_band *sband; + + BUG_ON(!wiphy->bands[band]); + sband = wiphy->bands[band]; for (i = 0; i < sband->n_channels; i++) - handle_channel(&sband->channels[i]); + handle_channel(wiphy, band, i); } static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) @@ -840,7 +954,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) enum ieee80211_band band; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) - handle_band(wiphy->bands[band]); + handle_band(wiphy, band); if (wiphy->reg_notifier) wiphy->reg_notifier(wiphy, setby); } @@ -1170,7 +1284,7 @@ static void reg_country_ie_process_debug( if (intersected_rd) { printk(KERN_DEBUG "cfg80211: We intersect both of these " "and get:\n"); - print_regdomain_info(rd); + print_regdomain_info(intersected_rd); return; } printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); diff --git a/net/wireless/wext.c b/net/wireless/wext.c index e49a2d1ef1e4..cb6a5bb85d80 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -1055,8 +1055,8 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->do_ioctl) - return dev->do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index c609a4b98e15..42cd18391f46 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -63,7 +63,6 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) if (len > skb_tailroom(skb)) len = skb_tailroom(skb); - skb->truesize += len; __skb_put(skb, len); len += plen; |