diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2010-08-09 01:49:58 +0200 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2010-08-09 02:14:15 +0200 |
commit | d9a145fb6e5f37b9903dea8371ab5c3e34e8e2d1 (patch) | |
tree | e2b4bb46fa00f0ad20447e40dba6fb21a4ae0815 /net | |
parent | autofs/autofs4: Move compat_ioctl handling into fs (diff) | |
parent | Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile (diff) | |
download | linux-d9a145fb6e5f37b9903dea8371ab5c3e34e8e2d1.tar.xz linux-d9a145fb6e5f37b9903dea8371ab5c3e34e8e2d1.zip |
Merge commit 'linus/master' into bkl/core
Merge reason: The staging tree has introduced the easycap
driver lately. We need the latest updates to pushdown the
bkl in its ioctl helper.
Diffstat (limited to 'net')
328 files changed, 10753 insertions, 6314 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3c1c8c14e929..a2ad15250575 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -155,9 +155,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) BUG_ON(!grp); /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing. + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). */ - if (real_dev->features & NETIF_F_HW_VLAN_FILTER) + if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER)) ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); grp->nr_vlans--; @@ -419,6 +420,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (is_vlan_dev(dev)) __vlan_device_event(dev, event); + if ((event == NETDEV_UP) && + (dev->features & NETIF_F_HW_VLAN_FILTER) && + dev->netdev_ops->ndo_vlan_rx_add_vid) { + pr_info("8021q: adding VLAN 0 to HW filter on device %s\n", + dev->name); + dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); + } + grp = __vlan_find_group(dev); if (!grp) goto out; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 6abdcac1b2e8..8d9503ad01da 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -2,6 +2,7 @@ #define __BEN_VLAN_802_1Q_INC__ #include <linux/if_vlan.h> +#include <linux/u64_stats_sync.h> /** @@ -21,14 +22,16 @@ struct vlan_priority_tci_mapping { * struct vlan_rx_stats - VLAN percpu rx stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes - * @multicast: number of received multicast packets + * @rx_multicast: number of received multicast packets + * @syncp: synchronization point for 64bit counters * @rx_errors: number of errors */ struct vlan_rx_stats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long multicast; - unsigned long rx_errors; + u64 rx_packets; + u64 rx_bytes; + u64 rx_multicast; + struct u64_stats_sync syncp; + unsigned long rx_errors; }; /** diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 50f58f5f1c34..01ddb0472f86 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -8,6 +8,9 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { + struct net_device *vlan_dev; + u16 vlan_id; + if (netpoll_rx(skb)) return NET_RX_DROP; @@ -16,9 +19,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + vlan_id = vlan_tci & VLAN_VID_MASK; + vlan_dev = vlan_group_get_device(grp, vlan_id); - if (!skb->dev) + if (vlan_dev) + skb->dev = vlan_dev; + else if (vlan_id) goto drop; return (polling ? netif_receive_skb(skb) : netif_rx(skb)); @@ -41,9 +47,9 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, - smp_processor_id()); + rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats); + u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; @@ -51,7 +57,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) case PACKET_BROADCAST: break; case PACKET_MULTICAST: - rx_stats->multicast++; + rx_stats->rx_multicast++; break; case PACKET_OTHERHOST: /* Our lower layer thinks this is not local, let's make sure. @@ -62,6 +68,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) skb->pkt_type = PACKET_HOST; break; } + u64_stats_update_end(&rx_stats->syncp); return 0; } @@ -82,15 +89,20 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb) { struct sk_buff *p; + struct net_device *vlan_dev; + u16 vlan_id; if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) skb->deliver_no_wcard = 1; skb->skb_iif = skb->dev->ifindex; __vlan_hwaccel_put_tag(skb, vlan_tci); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + vlan_id = vlan_tci & VLAN_VID_MASK; + vlan_dev = vlan_group_get_device(grp, vlan_id); - if (!skb->dev) + if (vlan_dev) + skb->dev = vlan_dev; + else if (vlan_id) goto drop; for (p = napi->gro_list; p; p = p->next) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 529842677817..3d59c9bf8feb 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -142,6 +142,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, { struct vlan_hdr *vhdr; struct vlan_rx_stats *rx_stats; + struct net_device *vlan_dev; u16 vlan_id; u16 vlan_tci; @@ -157,53 +158,71 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, vlan_id = vlan_tci & VLAN_VID_MASK; rcu_read_lock(); - skb->dev = __find_vlan_dev(dev, vlan_id); - if (!skb->dev) { - pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", - __func__, vlan_id, dev->name); - goto err_unlock; - } - - rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, - smp_processor_id()); - rx_stats->rx_packets++; - rx_stats->rx_bytes += skb->len; - - skb_pull_rcsum(skb, VLAN_HLEN); - - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); - - pr_debug("%s: priority: %u for TCI: %hu\n", - __func__, skb->priority, vlan_tci); - - switch (skb->pkt_type) { - case PACKET_BROADCAST: /* Yeah, stats collect these together.. */ - /* stats->broadcast ++; // no such counter :-( */ - break; + vlan_dev = __find_vlan_dev(dev, vlan_id); - case PACKET_MULTICAST: - rx_stats->multicast++; - break; + /* If the VLAN device is defined, we use it. + * If not, and the VID is 0, it is a 802.1p packet (not + * really a VLAN), so we will just netif_rx it later to the + * original interface, but with the skb->proto set to the + * wrapped proto: we do nothing here. + */ - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. - */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - default: - break; + if (!vlan_dev) { + if (vlan_id) { + pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n", + __func__, vlan_id, dev->name); + goto err_unlock; + } + rx_stats = NULL; + } else { + skb->dev = vlan_dev; + + rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, + smp_processor_id()); + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->rx_packets++; + rx_stats->rx_bytes += skb->len; + + skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); + + pr_debug("%s: priority: %u for TCI: %hu\n", + __func__, skb->priority, vlan_tci); + + switch (skb->pkt_type) { + case PACKET_BROADCAST: + /* Yeah, stats collect these together.. */ + /* stats->broadcast ++; // no such counter :-( */ + break; + + case PACKET_MULTICAST: + rx_stats->rx_multicast++; + break; + + case PACKET_OTHERHOST: + /* Our lower layer thinks this is not local, let's make + * sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. + */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + skb->dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + break; + default: + break; + } + u64_stats_update_end(&rx_stats->syncp); } + skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); - if (!skb) { - rx_stats->rx_errors++; - goto err_unlock; + if (vlan_dev) { + skb = vlan_check_reorder_header(skb); + if (!skb) { + rx_stats->rx_errors++; + goto err_unlock; + } } netif_rx(skb); @@ -801,37 +820,65 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev) return dev_ethtool_get_flags(vlan->real_dev); } -static struct net_device_stats *vlan_dev_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - struct net_device_stats *stats = &dev->stats; - dev_txq_stats_fold(dev, stats); if (vlan_dev_info(dev)->vlan_rx_stats) { - struct vlan_rx_stats *p, rx = {0}; + struct vlan_rx_stats *p, accum = {0}; int i; for_each_possible_cpu(i) { + u64 rxpackets, rxbytes, rxmulticast; + unsigned int start; + p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); - rx.rx_packets += p->rx_packets; - rx.rx_bytes += p->rx_bytes; - rx.rx_errors += p->rx_errors; - rx.multicast += p->multicast; + do { + start = u64_stats_fetch_begin_bh(&p->syncp); + rxpackets = p->rx_packets; + rxbytes = p->rx_bytes; + rxmulticast = p->rx_multicast; + } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + accum.rx_packets += rxpackets; + accum.rx_bytes += rxbytes; + accum.rx_multicast += rxmulticast; + /* rx_errors is an ulong, not protected by syncp */ + accum.rx_errors += p->rx_errors; } - stats->rx_packets = rx.rx_packets; - stats->rx_bytes = rx.rx_bytes; - stats->rx_errors = rx.rx_errors; - stats->multicast = rx.multicast; + stats->rx_packets = accum.rx_packets; + stats->rx_bytes = accum.rx_bytes; + stats->rx_errors = accum.rx_errors; + stats->multicast = accum.rx_multicast; } return stats; } +static int vlan_ethtool_set_tso(struct net_device *dev, u32 data) +{ + if (data) { + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + + /* Underlying device must support TSO for VLAN-tagged packets + * and must have TSO enabled now. + */ + if (!(real_dev->vlan_features & NETIF_F_TSO)) + return -EOPNOTSUPP; + if (!(real_dev->features & NETIF_F_TSO)) + return -EINVAL; + dev->features |= NETIF_F_TSO; + } else { + dev->features &= ~NETIF_F_TSO; + } + return 0; +} + static const struct ethtool_ops vlan_ethtool_ops = { .get_settings = vlan_ethtool_get_settings, .get_drvinfo = vlan_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, .get_rx_csum = vlan_ethtool_get_rx_csum, .get_flags = vlan_ethtool_get_flags, + .set_tso = vlan_ethtool_set_tso, }; static const struct net_device_ops vlan_netdev_ops = { @@ -848,7 +895,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, @@ -872,7 +919,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, @@ -897,7 +944,7 @@ static const struct net_device_ops vlan_netdev_ops_sq = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, @@ -922,7 +969,7 @@ static const struct net_device_ops vlan_netdev_accel_ops_sq = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, - .ndo_get_stats = vlan_dev_get_stats, + .ndo_get_stats64 = vlan_dev_get_stats64, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index afead353e215..80e280f56686 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -278,25 +278,27 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) { struct net_device *vlandev = (struct net_device *) seq->private; const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); - const struct net_device_stats *stats; + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats; static const char fmt[] = "%30s %12lu\n"; + static const char fmt64[] = "%30s %12llu\n"; int i; if (!is_vlan_dev(vlandev)) return 0; - stats = dev_get_stats(vlandev); + stats = dev_get_stats(vlandev, &temp); seq_printf(seq, "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", vlandev->name, dev_info->vlan_id, (int)(dev_info->flags & 1), vlandev->priv_flags); - seq_printf(seq, fmt, "total frames received", stats->rx_packets); - seq_printf(seq, fmt, "total bytes received", stats->rx_bytes); - seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast); + seq_printf(seq, fmt64, "total frames received", stats->rx_packets); + seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes); + seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast); seq_puts(seq, "\n"); - seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets); - seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes); + seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets); + seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes); seq_printf(seq, fmt, "total headroom inc", dev_info->cnt_inc_headroom_on_tx); seq_printf(seq, fmt, "total encap on xmit", diff --git a/net/9p/client.c b/net/9p/client.c index 37c8da07a80b..dc6f2f26d023 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -460,7 +460,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) return err; } - if (p9_is_proto_dotu(c)) + if (p9_is_proto_dotu(c) || + p9_is_proto_dotl(c)) err = -ecode; if (!err || !IS_ERR_VALUE(err)) @@ -1015,14 +1016,18 @@ int p9_client_open(struct p9_fid *fid, int mode) struct p9_qid qid; int iounit; - P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode); - err = 0; clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", + p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); + err = 0; if (fid->mode != -1) return -EINVAL; - req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); + if (p9_is_proto_dotl(clnt)) + req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode); + else + req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; @@ -1034,10 +1039,9 @@ int p9_client_open(struct p9_fid *fid, int mode) goto free_and_error; } - P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", - qid.type, - (unsigned long long)qid.path, - qid.version, iounit); + P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", + p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, + (unsigned long long)qid.path, qid.version, iounit); fid->mode = mode; fid->iounit = iounit; @@ -1049,6 +1053,50 @@ error: } EXPORT_SYMBOL(p9_client_open); +int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, + gid_t gid, struct p9_qid *qid) +{ + int err = 0; + struct p9_client *clnt; + struct p9_req_t *req; + int iounit; + + P9_DPRINTK(P9_DEBUG_9P, + ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", + ofid->fid, name, flags, mode, gid); + clnt = ofid->clnt; + + if (ofid->mode != -1) + return -EINVAL; + + req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags, + mode, gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", + qid->type, + (unsigned long long)qid->path, + qid->version, iounit); + + ofid->mode = mode; + ofid->iounit = iounit; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_create_dotl); + int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension) { @@ -1094,6 +1142,59 @@ error: } EXPORT_SYMBOL(p9_client_fcreate); +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, + struct p9_qid *qid) +{ + int err = 0; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", + dfid->fid, name, symtgt); + clnt = dfid->clnt; + + req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt, + gid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", + qid->type, (unsigned long long)qid->path, qid->version); + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_symlink); + +int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname) +{ + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", + dfid->fid, oldfid->fid, newname); + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, + newname); + if (IS_ERR(req)) + return PTR_ERR(req); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n"); + p9_free_req(clnt, req); + return 0; +} +EXPORT_SYMBOL(p9_client_link); + int p9_client_clunk(struct p9_fid *fid) { int err; @@ -1139,9 +1240,8 @@ int p9_client_remove(struct p9_fid *fid) P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); p9_free_req(clnt, req); - p9_fid_destroy(fid); - error: + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_remove); @@ -1302,6 +1402,65 @@ error: } EXPORT_SYMBOL(p9_client_stat); +struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, + u64 request_mask) +{ + int err; + struct p9_client *clnt; + struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl), + GFP_KERNEL); + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", + fid->fid, request_mask); + + if (!ret) + return ERR_PTR(-ENOMEM); + + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, + "<<< RGETATTR st_result_mask=%lld\n" + "<<< qid=%x.%llx.%x\n" + "<<< st_mode=%8.8x st_nlink=%llu\n" + "<<< st_uid=%d st_gid=%d\n" + "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n" + "<<< st_atime_sec=%lld st_atime_nsec=%lld\n" + "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n" + "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n" + "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" + "<<< st_gen=%lld st_data_version=%lld", + ret->st_result_mask, ret->qid.type, ret->qid.path, + ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid, + ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize, + ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, + ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, + ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, + ret->st_gen, ret->st_data_version); + + p9_free_req(clnt, req); + return ret; + +error: + kfree(ret); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_getattr_dotl); + static int p9_client_statsize(struct p9_wstat *wst, int proto_version) { int ret; @@ -1366,6 +1525,36 @@ error: } EXPORT_SYMBOL(p9_client_wstat); +int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, + " valid=%x mode=%x uid=%d gid=%d size=%lld\n" + " atime_sec=%lld atime_nsec=%lld\n" + " mtime_sec=%lld mtime_nsec=%lld\n", + p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, + p9attr->mtime_sec, p9attr->mtime_nsec); + + req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr); + + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_setattr); + int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) { int err; @@ -1432,3 +1621,187 @@ error: } EXPORT_SYMBOL(p9_client_rename); +/* + * An xattrwalk without @attr_name gives the fid for the lisxattr namespace + */ +struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, + const char *attr_name, u64 *attr_size) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + struct p9_fid *attr_fid; + + err = 0; + clnt = file_fid->clnt; + attr_fid = p9_fid_create(clnt); + if (IS_ERR(attr_fid)) { + err = PTR_ERR(attr_fid); + attr_fid = NULL; + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, + ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n", + file_fid->fid, attr_fid->fid, attr_name); + + req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds", + file_fid->fid, attr_fid->fid, attr_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; + } + p9_free_req(clnt, req); + P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", + attr_fid->fid, *attr_size); + return attr_fid; +clunk_fid: + p9_client_clunk(attr_fid); + attr_fid = NULL; +error: + if (attr_fid && (attr_fid != file_fid)) + p9_fid_destroy(attr_fid); + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(p9_client_xattrwalk); + +int p9_client_xattrcreate(struct p9_fid *fid, const char *name, + u64 attr_size, int flags) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, + ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n", + fid->fid, name, (long long)attr_size, flags); + err = 0; + clnt = fid->clnt; + req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd", + fid->fid, name, attr_size, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL_GPL(p9_client_xattrcreate); + +int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) +{ + int err, rsize, total; + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", + fid->fid, (long long unsigned) offset, count); + + err = 0; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) + rsize = clnt->msize - P9_READDIRHDRSZ; + + if (count < rsize) + rsize = count; + + req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); + + if (data) + memmove(data, dataptr, count); + + p9_free_req(clnt, req); + return count; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_readdir); + +int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, + dev_t rdev, gid_t gid, struct p9_qid *qid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " + "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); + req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, + MAJOR(rdev), MINOR(rdev), gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); + +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_mknod_dotl); + +int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, + gid_t gid, struct p9_qid *qid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + err = 0; + clnt = fid->clnt; + P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", + fid->fid, name, mode, gid); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, + gid); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); + if (err) { + p9pdu_dump(1, req->rc); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, + (unsigned long long)qid->path, qid->version); + +error: + p9_free_req(clnt, req); + return err; + +} +EXPORT_SYMBOL(p9_client_mkdir_dotl); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 149f82160130..3acd3afb20c8 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -141,6 +141,7 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) D - data blob (int32_t size followed by void *, results are not freed) T - array of strings (int16_t count, followed by strings) R - array of qids (int16_t count, followed by qids) + A - stat for 9p2000.L (p9_stat_dotl) ? - if optional = 1, continue parsing */ @@ -340,6 +341,33 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, } } break; + case 'A': { + struct p9_stat_dotl *stbuf = + va_arg(ap, struct p9_stat_dotl *); + + memset(stbuf, 0, sizeof(struct p9_stat_dotl)); + errcode = + p9pdu_readf(pdu, proto_version, + "qQdddqqqqqqqqqqqqqqq", + &stbuf->st_result_mask, + &stbuf->qid, + &stbuf->st_mode, + &stbuf->st_uid, &stbuf->st_gid, + &stbuf->st_nlink, + &stbuf->st_rdev, &stbuf->st_size, + &stbuf->st_blksize, &stbuf->st_blocks, + &stbuf->st_atime_sec, + &stbuf->st_atime_nsec, + &stbuf->st_mtime_sec, + &stbuf->st_mtime_nsec, + &stbuf->st_ctime_sec, + &stbuf->st_ctime_nsec, + &stbuf->st_btime_sec, + &stbuf->st_btime_nsec, + &stbuf->st_gen, + &stbuf->st_data_version); + } + break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) @@ -488,6 +516,23 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, } } break; + case 'I':{ + struct p9_iattr_dotl *p9attr = va_arg(ap, + struct p9_iattr_dotl *); + + errcode = p9pdu_writef(pdu, proto_version, + "ddddqqqqq", + p9attr->valid, + p9attr->mode, + p9attr->uid, + p9attr->gid, + p9attr->size, + p9attr->atime_sec, + p9attr->atime_nsec, + p9attr->mtime_sec, + p9attr->mtime_nsec); + } + break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) @@ -580,3 +625,30 @@ void p9pdu_reset(struct p9_fcall *pdu) pdu->offset = 0; pdu->size = 0; } + +int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, + int proto_version) +{ + struct p9_fcall fake_pdu; + int ret; + char *nameptr; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, + &dirent->d_off, &dirent->d_type, &nameptr); + if (ret) { + P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); + p9pdu_dump(1, &fake_pdu); + goto out; + } + + strcpy(dirent->d_name, nameptr); + +out: + return fake_pdu.offset; +} +EXPORT_SYMBOL(p9dirent_read); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 98ce9bcb0e15..c85109d809ca 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -948,7 +948,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; - if (strlen(addr) > UNIX_PATH_MAX) { + if (strlen(addr) >= UNIX_PATH_MAX) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", addr); return -ENAMETOOLONG; diff --git a/net/Kconfig b/net/Kconfig index 0d68b40fc0e6..e330594d3709 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -32,7 +32,7 @@ config WANT_COMPAT_NETLINK_MESSAGES config COMPAT_NETLINK_MESSAGES def_bool y depends on COMPAT - depends on WIRELESS_EXT || WANT_COMPAT_NETLINK_MESSAGES + depends on WEXT_CORE || WANT_COMPAT_NETLINK_MESSAGES help This option makes it possible to send different netlink messages to tasks depending on whether the task is a compat task or not. To @@ -86,6 +86,16 @@ config NETWORK_SECMARK to nfmark, but designated for security purposes. If you are unsure how to answer this question, answer N. +config NETWORK_PHY_TIMESTAMPING + bool "Timestamping in PHY devices" + depends on EXPERIMENTAL + help + This allows timestamping of network packets by PHYs with + hardware timestamping capabilities. This option adds some + overhead in the transmit and receive paths. + + If you are unsure how to answer this question, answer N. + menuconfig NETFILTER bool "Network packet filtering framework (Netfilter)" ---help--- @@ -203,6 +213,7 @@ source "net/phonet/Kconfig" source "net/ieee802154/Kconfig" source "net/sched/Kconfig" source "net/dcb/Kconfig" +source "net/dns_resolver/Kconfig" config RPS boolean diff --git a/net/Makefile b/net/Makefile index cb7bdc1210cb..ea60fbce9b1b 100644 --- a/net/Makefile +++ b/net/Makefile @@ -50,7 +50,7 @@ endif obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_RDS) += rds/ -obj-y += wireless/ +obj-$(CONFIG_WIRELESS) += wireless/ obj-$(CONFIG_MAC80211) += mac80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ @@ -61,9 +61,10 @@ obj-$(CONFIG_CAIF) += caif/ ifneq ($(CONFIG_DCB),) obj-y += dcb/ endif -obj-y += ieee802154/ +obj-$(CONFIG_IEEE802154) += ieee802154/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o endif obj-$(CONFIG_WIMAX) += wimax/ +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 6719af6a59fa..651babdfab38 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -139,6 +139,43 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s) return NULL; } +static int atm_dev_event(struct notifier_block *this, unsigned long event, + void *arg) +{ + struct atm_dev *atm_dev = arg; + struct list_head *lh; + struct net_device *net_dev; + struct br2684_vcc *brvcc; + struct atm_vcc *atm_vcc; + unsigned long flags; + + pr_debug("event=%ld dev=%p\n", event, atm_dev); + + read_lock_irqsave(&devs_lock, flags); + list_for_each(lh, &br2684_devs) { + net_dev = list_entry_brdev(lh); + + list_for_each_entry(brvcc, &BRPRIV(net_dev)->brvccs, brvccs) { + atm_vcc = brvcc->atmvcc; + if (atm_vcc && brvcc->atmvcc->dev == atm_dev) { + + if (atm_vcc->dev->signal == ATM_PHY_SIG_LOST) + netif_carrier_off(net_dev); + else + netif_carrier_on(net_dev); + + } + } + } + read_unlock_irqrestore(&devs_lock, flags); + + return NOTIFY_DONE; +} + +static struct notifier_block atm_dev_notifier = { + .notifier_call = atm_dev_event, +}; + /* chained vcc->pop function. Check if we should wake the netif_queue */ static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb) { @@ -362,6 +399,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) unregister_netdev(net_dev); free_netdev(net_dev); } + read_lock_irq(&devs_lock); + if (list_empty(&br2684_devs)) { + /* last br2684 device */ + unregister_atmdevice_notifier(&atm_dev_notifier); + } + read_unlock_irq(&devs_lock); return; } @@ -530,6 +573,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) br2684_push(atmvcc, skb); } + + /* initialize netdev carrier state */ + if (atmvcc->dev->signal == ATM_PHY_SIG_LOST) + netif_carrier_off(net_dev); + else + netif_carrier_on(net_dev); + __module_get(THIS_MODULE); return 0; @@ -620,9 +670,16 @@ static int br2684_create(void __user *arg) } write_lock_irq(&devs_lock); + brdev->payload = payload; - brdev->number = list_empty(&br2684_devs) ? 1 : - BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; + + if (list_empty(&br2684_devs)) { + /* 1st br2684 device */ + register_atmdevice_notifier(&atm_dev_notifier); + brdev->number = 1; + } else + brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; + list_add_tail(&brdev->br2684_devs, &br2684_devs); write_unlock_irq(&devs_lock); return 0; @@ -772,6 +829,11 @@ static void __exit br2684_exit(void) remove_proc_entry("br2684", atm_proc_root); #endif + + /* if not already empty */ + if (!list_empty(&br2684_devs)) + unregister_atmdevice_notifier(&atm_dev_notifier); + while (!list_empty(&br2684_devs)) { net_dev = list_entry_brdev(br2684_devs.next); brdev = BRPRIV(net_dev); diff --git a/net/atm/clip.c b/net/atm/clip.c index 313aba11316b..95fdd1185067 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) error = ip_route_output_key(&init_net, &rt, &fl); if (error) return error; - neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); + neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); ip_rt_put(rt); if (!neigh) return -ENOMEM; diff --git a/net/atm/common.c b/net/atm/common.c index b43feb1a3995..940404a73b3d 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -37,6 +37,8 @@ EXPORT_SYMBOL(vcc_hash); DEFINE_RWLOCK(vcc_sklist_lock); EXPORT_SYMBOL(vcc_sklist_lock); +static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain); + static void __vcc_insert_socket(struct sock *sk) { struct atm_vcc *vcc = atm_sk(sk); @@ -212,6 +214,22 @@ void vcc_release_async(struct atm_vcc *vcc, int reply) } EXPORT_SYMBOL(vcc_release_async); +void atm_dev_signal_change(struct atm_dev *dev, char signal) +{ + pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n", + __func__, signal, dev, dev->number, dev->signal); + + /* atm driver sending invalid signal */ + WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND); + + if (dev->signal == signal) + return; /* no change */ + + dev->signal = signal; + + atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev); +} +EXPORT_SYMBOL(atm_dev_signal_change); void atm_dev_release_vccs(struct atm_dev *dev) { @@ -781,6 +799,18 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len); } +int register_atmdevice_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&atm_dev_notify_chain, nb); +} +EXPORT_SYMBOL_GPL(register_atmdevice_notifier); + +void unregister_atmdevice_notifier(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier); + static int __init atm_init(void) { int error; diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index ee3b3049d385..ed371684c133 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -43,19 +43,6 @@ config BT_L2CAP Say Y here to compile L2CAP support into the kernel or say M to compile it as module (l2cap). -config BT_L2CAP_EXT_FEATURES - bool "L2CAP Extended Features support (EXPERIMENTAL)" - depends on BT_L2CAP && EXPERIMENTAL - help - This option enables the L2CAP Extended Features support. These - new features include the Enhanced Retransmission and Streaming - Modes, the Frame Check Sequence (FCS), and Segmentation and - Reassembly (SAR) for L2CAP packets. They are a required for the - new Alternate MAC/PHY and the Bluetooth Medical Profile. - - You should say N unless you know what you are doing. Note that - this is in an experimental state yet. - config BT_SCO tristate "SCO links support" depends on BT diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 0d9e506f5d5a..70672544db86 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -86,26 +86,26 @@ struct bnep_setup_conn_req { __u8 ctrl; __u8 uuid_size; __u8 service[0]; -} __attribute__((packed)); +} __packed; struct bnep_set_filter_req { __u8 type; __u8 ctrl; __be16 len; __u8 list[0]; -} __attribute__((packed)); +} __packed; struct bnep_control_rsp { __u8 type; __u8 ctrl; __be16 resp; -} __attribute__((packed)); +} __packed; struct bnep_ext_hdr { __u8 type; __u8 len; __u8 data[0]; -} __attribute__((packed)); +} __packed; /* BNEP ioctl defines */ #define BNEPCONNADD _IOW('B', 200, int) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b10e3cdb08f8..0b1e460fe440 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1,6 +1,6 @@ /* BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } +/* Device _must_ be locked */ +void hci_sco_setup(struct hci_conn *conn, __u8 status) +{ + struct hci_conn *sco = conn->link; + + BT_DBG("%p", conn); + + if (!sco) + return; + + if (!status) { + if (lmp_esco_capable(conn->hdev)) + hci_setup_sync(sco, conn->handle); + else + hci_add_sco(sco, conn->handle); + } else { + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } +} + static void hci_conn_timeout(unsigned long arg) { struct hci_conn *conn = (void *) arg; @@ -358,6 +379,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 acl->sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); + } else { + if (acl->sec_level < sec_level) + acl->sec_level = sec_level; + if (acl->auth_type < auth_type) + acl->auth_type = auth_type; } if (type == ACL_LINK) @@ -380,10 +406,13 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 acl->power_save = 1; hci_conn_enter_active_mode(acl); - if (lmp_esco_capable(hdev)) - hci_setup_sync(sco, acl->handle); - else - hci_add_sco(sco, acl->handle); + if (test_bit(HCI_CONN_MODE_CHANGE_PEND, &acl->pend)) { + /* defer SCO setup until mode change completed */ + set_bit(HCI_CONN_SCO_SETUP_PEND, &acl->pend); + return sco; + } + + hci_sco_setup(acl, 0x00); } return sco; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2f768de87011..8303f1c9ef54 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -562,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) hci_dev_lock_bh(hdev); inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); + hci_blacklist_clear(hdev); hci_dev_unlock_bh(hdev); hci_notify(hdev, HCI_DEV_DOWN); @@ -913,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev) skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); - for (i = 0; i < 3; i++) + for (i = 0; i < NUM_REASSEMBLY; i++) hdev->reassembly[i] = NULL; init_waitqueue_head(&hdev->req_wait_q); @@ -923,6 +924,8 @@ int hci_register_dev(struct hci_dev *hdev) hci_conn_hash_init(hdev); + INIT_LIST_HEAD(&hdev->blacklist.list); + memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); atomic_set(&hdev->promisc, 0); @@ -970,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); - for (i = 0; i < 3; i++) + for (i = 0; i < NUM_REASSEMBLY; i++) kfree_skb(hdev->reassembly[i]); hci_notify(hdev, HCI_DEV_UNREG); @@ -1030,89 +1033,170 @@ int hci_recv_frame(struct sk_buff *skb) } EXPORT_SYMBOL(hci_recv_frame); -/* Receive packet type fragment */ -#define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) - -int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) +static int hci_reassembly(struct hci_dev *hdev, int type, void *data, + int count, __u8 index, gfp_t gfp_mask) { - if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) + int len = 0; + int hlen = 0; + int remain = count; + struct sk_buff *skb; + struct bt_skb_cb *scb; + + if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) || + index >= NUM_REASSEMBLY) return -EILSEQ; + skb = hdev->reassembly[index]; + + if (!skb) { + switch (type) { + case HCI_ACLDATA_PKT: + len = HCI_MAX_FRAME_SIZE; + hlen = HCI_ACL_HDR_SIZE; + break; + case HCI_EVENT_PKT: + len = HCI_MAX_EVENT_SIZE; + hlen = HCI_EVENT_HDR_SIZE; + break; + case HCI_SCODATA_PKT: + len = HCI_MAX_SCO_SIZE; + hlen = HCI_SCO_HDR_SIZE; + break; + } + + skb = bt_skb_alloc(len, gfp_mask); + if (!skb) + return -ENOMEM; + + scb = (void *) skb->cb; + scb->expect = hlen; + scb->pkt_type = type; + + skb->dev = (void *) hdev; + hdev->reassembly[index] = skb; + } + while (count) { - struct sk_buff *skb = __reassembly(hdev, type); - struct { int expect; } *scb; - int len = 0; + scb = (void *) skb->cb; + len = min(scb->expect, (__u16)count); - if (!skb) { - /* Start of the frame */ + memcpy(skb_put(skb, len), data, len); - switch (type) { - case HCI_EVENT_PKT: - if (count >= HCI_EVENT_HDR_SIZE) { - struct hci_event_hdr *h = data; - len = HCI_EVENT_HDR_SIZE + h->plen; - } else - return -EILSEQ; - break; + count -= len; + data += len; + scb->expect -= len; + remain = count; - case HCI_ACLDATA_PKT: - if (count >= HCI_ACL_HDR_SIZE) { - struct hci_acl_hdr *h = data; - len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen); - } else - return -EILSEQ; - break; + switch (type) { + case HCI_EVENT_PKT: + if (skb->len == HCI_EVENT_HDR_SIZE) { + struct hci_event_hdr *h = hci_event_hdr(skb); + scb->expect = h->plen; + + if (skb_tailroom(skb) < scb->expect) { + kfree_skb(skb); + hdev->reassembly[index] = NULL; + return -ENOMEM; + } + } + break; - case HCI_SCODATA_PKT: - if (count >= HCI_SCO_HDR_SIZE) { - struct hci_sco_hdr *h = data; - len = HCI_SCO_HDR_SIZE + h->dlen; - } else - return -EILSEQ; - break; + case HCI_ACLDATA_PKT: + if (skb->len == HCI_ACL_HDR_SIZE) { + struct hci_acl_hdr *h = hci_acl_hdr(skb); + scb->expect = __le16_to_cpu(h->dlen); + + if (skb_tailroom(skb) < scb->expect) { + kfree_skb(skb); + hdev->reassembly[index] = NULL; + return -ENOMEM; + } } + break; - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) { - BT_ERR("%s no memory for packet", hdev->name); - return -ENOMEM; + case HCI_SCODATA_PKT: + if (skb->len == HCI_SCO_HDR_SIZE) { + struct hci_sco_hdr *h = hci_sco_hdr(skb); + scb->expect = h->dlen; + + if (skb_tailroom(skb) < scb->expect) { + kfree_skb(skb); + hdev->reassembly[index] = NULL; + return -ENOMEM; + } } + break; + } + + if (scb->expect == 0) { + /* Complete frame */ - skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = type; + hci_recv_frame(skb); - __reassembly(hdev, type) = skb; + hdev->reassembly[index] = NULL; + return remain; + } + } - scb = (void *) skb->cb; - scb->expect = len; - } else { - /* Continuation */ + return remain; +} - scb = (void *) skb->cb; - len = scb->expect; - } +int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) +{ + int rem = 0; - len = min(len, count); + if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) + return -EILSEQ; - memcpy(skb_put(skb, len), data, len); + while (count) { + rem = hci_reassembly(hdev, type, data, count, + type - 1, GFP_ATOMIC); + if (rem < 0) + return rem; - scb->expect -= len; + data += (count - rem); + count = rem; + }; - if (scb->expect == 0) { - /* Complete frame */ + return rem; +} +EXPORT_SYMBOL(hci_recv_fragment); - __reassembly(hdev, type) = NULL; +#define STREAM_REASSEMBLY 0 - bt_cb(skb)->pkt_type = type; - hci_recv_frame(skb); - } +int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count) +{ + int type; + int rem = 0; - count -= len; data += len; - } + while (count) { + struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY]; - return 0; + if (!skb) { + struct { char type; } *pkt; + + /* Start of the frame */ + pkt = data; + type = pkt->type; + + data++; + count--; + } else + type = bt_cb(skb)->pkt_type; + + rem = hci_reassembly(hdev, type, data, + count, STREAM_REASSEMBLY, GFP_ATOMIC); + if (rem < 0) + return rem; + + data += (count - rem); + count = rem; + }; + + return rem; } -EXPORT_SYMBOL(hci_recv_fragment); +EXPORT_SYMBOL(hci_recv_stream_fragment); /* ---- Interface to upper protocols ---- */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6c57fc71c7e2..bfef5bae0b3a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1,6 +1,6 @@ /* BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -584,7 +584,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) conn->out = 1; conn->link_mode |= HCI_LM_MASTER; } else - BT_ERR("No memmory for new connection"); + BT_ERR("No memory for new connection"); } } @@ -785,9 +785,13 @@ static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) + if (conn) { clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, status); + } + hci_dev_unlock(hdev); } @@ -808,9 +812,13 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); - if (conn) + if (conn) { clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, status); + } + hci_dev_unlock(hdev); } @@ -915,20 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s } else conn->state = BT_CLOSED; - if (conn->type == ACL_LINK) { - struct hci_conn *sco = conn->link; - if (sco) { - if (!ev->status) { - if (lmp_esco_capable(hdev)) - hci_setup_sync(sco, conn->handle); - else - hci_add_sco(sco, conn->handle); - } else { - hci_proto_connect_cfm(sco, ev->status); - hci_conn_del(sco); - } - } - } + if (conn->type == ACL_LINK) + hci_sco_setup(conn, ev->status); if (ev->status) { hci_proto_connect_cfm(conn, ev->status); @@ -952,7 +948,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - if (mask & HCI_LM_ACCEPT) { + if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) { /* Connection accepted */ struct inquiry_entry *ie; struct hci_conn *conn; @@ -965,7 +961,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); if (!conn) { if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { - BT_ERR("No memmory for new connection"); + BT_ERR("No memory for new connection"); hci_dev_unlock(hdev); return; } @@ -1049,6 +1045,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn) { if (!ev->status) conn->link_mode |= HCI_LM_AUTH; + else + conn->sec_level = BT_SECURITY_LOW; clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); @@ -1479,6 +1477,9 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb else conn->power_save = 0; } + + if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend)) + hci_sco_setup(conn, ev->status); } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 38f08f6b86f6..4f170a595934 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -165,6 +165,86 @@ static int hci_sock_release(struct socket *sock) return 0; } +struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct list_head *p; + struct bdaddr_list *blacklist = &hdev->blacklist; + + list_for_each(p, &blacklist->list) { + struct bdaddr_list *b; + + b = list_entry(p, struct bdaddr_list, list); + + if (bacmp(bdaddr, &b->bdaddr) == 0) + return b; + } + + return NULL; +} + +static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg) +{ + bdaddr_t bdaddr; + struct bdaddr_list *entry; + + if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) + return -EFAULT; + + if (bacmp(&bdaddr, BDADDR_ANY) == 0) + return -EBADF; + + if (hci_blacklist_lookup(hdev, &bdaddr)) + return -EEXIST; + + entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + bacpy(&entry->bdaddr, &bdaddr); + + list_add(&entry->list, &hdev->blacklist.list); + + return 0; +} + +int hci_blacklist_clear(struct hci_dev *hdev) +{ + struct list_head *p, *n; + struct bdaddr_list *blacklist = &hdev->blacklist; + + list_for_each_safe(p, n, &blacklist->list) { + struct bdaddr_list *b; + + b = list_entry(p, struct bdaddr_list, list); + + list_del(p); + kfree(b); + } + + return 0; +} + +static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg) +{ + bdaddr_t bdaddr; + struct bdaddr_list *entry; + + if (copy_from_user(&bdaddr, arg, sizeof(bdaddr))) + return -EFAULT; + + if (bacmp(&bdaddr, BDADDR_ANY) == 0) + return hci_blacklist_clear(hdev); + + entry = hci_blacklist_lookup(hdev, &bdaddr); + if (!entry) + return -ENOENT; + + list_del(&entry->list); + kfree(entry); + + return 0; +} + /* Ioctls that require bound socket */ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { @@ -194,6 +274,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign case HCIGETAUTHINFO: return hci_get_auth_info(hdev, (void __user *) arg); + case HCIBLOCKADDR: + if (!capable(CAP_NET_ADMIN)) + return -EACCES; + return hci_blacklist_add(hdev, (void __user *) arg); + + case HCIUNBLOCKADDR: + if (!capable(CAP_NET_ADMIN)) + return -EACCES; + return hci_blacklist_del(hdev, (void __user *) arg); + default: if (hdev->ioctl) return hdev->ioctl(hdev, cmd, arg); diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 463ffa4fe042..ce44c47eeac1 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -436,6 +436,41 @@ static const struct file_operations inquiry_cache_fops = { .release = single_release, }; +static int blacklist_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct bdaddr_list *blacklist = &hdev->blacklist; + struct list_head *l; + + hci_dev_lock_bh(hdev); + + list_for_each(l, &blacklist->list) { + struct bdaddr_list *b; + bdaddr_t bdaddr; + + b = list_entry(l, struct bdaddr_list, list); + + baswap(&bdaddr, &b->bdaddr); + + seq_printf(f, "%s\n", batostr(&bdaddr)); + } + + hci_dev_unlock_bh(hdev); + + return 0; +} + +static int blacklist_open(struct inode *inode, struct file *file) +{ + return single_open(file, blacklist_show, inode->i_private); +} + +static const struct file_operations blacklist_fops = { + .open = blacklist_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; int hci_register_sysfs(struct hci_dev *hdev) { struct device *dev = &hdev->dev; @@ -465,6 +500,9 @@ int hci_register_sysfs(struct hci_dev *hdev) debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, hdev, &inquiry_cache_fops); + debugfs_create_file("blacklist", 0444, hdev->debugfs, + hdev, &blacklist_fops); + return 0; } diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 1b682a5aa061..9ba1e8eee37c 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1,6 +1,8 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org> + Copyright (C) 2010 Google Inc. Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> @@ -53,15 +55,9 @@ #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> -#define VERSION "2.14" +#define VERSION "2.15" -#ifdef CONFIG_BT_L2CAP_EXT_FEATURES -static int enable_ertm = 1; -#else -static int enable_ertm = 0; -#endif -static int max_transmit = L2CAP_DEFAULT_MAX_TX; -static int tx_window = L2CAP_DEFAULT_TX_WINDOW; +static int disable_ertm = 0; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; @@ -80,9 +76,12 @@ static void __l2cap_sock_close(struct sock *sk, int reason); static void l2cap_sock_close(struct sock *sk); static void l2cap_sock_kill(struct sock *sk); +static int l2cap_build_conf_req(struct sock *sk, void *data); static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); +static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); + /* ---- L2CAP timers ---- */ static void l2cap_sock_timeout(unsigned long arg) { @@ -278,6 +277,24 @@ static void l2cap_chan_del(struct sock *sk, int err) parent->sk_data_ready(parent, 0); } else sk->sk_state_change(sk); + + skb_queue_purge(TX_QUEUE(sk)); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + struct srej_list *l, *tmp; + + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + del_timer(&l2cap_pi(sk)->ack_timer); + + skb_queue_purge(SREJ_QUEUE(sk)); + skb_queue_purge(BUSY_QUEUE(sk)); + + list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) { + list_del(&l->list); + kfree(l); + } + } } /* Service level security */ @@ -351,8 +368,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) struct sk_buff *skb; struct l2cap_hdr *lh; struct l2cap_conn *conn = pi->conn; + struct sock *sk = (struct sock *)pi; int count, hlen = L2CAP_HDR_SIZE + 2; + if (sk->sk_state != BT_CONNECTED) + return; + if (pi->fcs == L2CAP_FCS_CRC16) hlen += 2; @@ -401,6 +422,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) l2cap_send_sframe(pi, control); } +static inline int __l2cap_no_conn_pending(struct sock *sk) +{ + return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND); +} + static void l2cap_do_start(struct sock *sk) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; @@ -409,12 +435,13 @@ static void l2cap_do_start(struct sock *sk) if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; - if (l2cap_check_security(sk)) { + if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(l2cap_pi(sk)->scid); req.psm = l2cap_pi(sk)->psm; l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); @@ -434,24 +461,57 @@ static void l2cap_do_start(struct sock *sk) } } -static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk) +static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) +{ + u32 local_feat_mask = l2cap_feat_mask; + if (!disable_ertm) + local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; + + switch (mode) { + case L2CAP_MODE_ERTM: + return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; + case L2CAP_MODE_STREAMING: + return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; + default: + return 0x00; + } +} + +static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) { struct l2cap_disconn_req req; + if (!conn) + return; + + skb_queue_purge(TX_QUEUE(sk)); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + del_timer(&l2cap_pi(sk)->ack_timer); + } + req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid); req.scid = cpu_to_le16(l2cap_pi(sk)->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_DISCONN_REQ, sizeof(req), &req); + + sk->sk_state = BT_DISCONN; + sk->sk_err = err; } /* ---- L2CAP connections ---- */ static void l2cap_conn_start(struct l2cap_conn *conn) { struct l2cap_chan_list *l = &conn->chan_list; + struct sock_del_list del, *tmp1, *tmp2; struct sock *sk; BT_DBG("conn %p", conn); + INIT_LIST_HEAD(&del.list); + read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { @@ -464,18 +524,38 @@ static void l2cap_conn_start(struct l2cap_conn *conn) } if (sk->sk_state == BT_CONNECT) { - if (l2cap_check_security(sk)) { - struct l2cap_conn_req req; - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; + struct l2cap_conn_req req; - l2cap_pi(sk)->ident = l2cap_get_ident(conn); + if (!l2cap_check_security(sk) || + !__l2cap_no_conn_pending(sk)) { + bh_unlock_sock(sk); + continue; + } - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); + if (!l2cap_mode_supported(l2cap_pi(sk)->mode, + conn->feat_mask) + && l2cap_pi(sk)->conf_state & + L2CAP_CONF_STATE2_DEVICE) { + tmp1 = kzalloc(sizeof(struct sock_del_list), + GFP_ATOMIC); + tmp1->sk = sk; + list_add_tail(&tmp1->list, &del.list); + bh_unlock_sock(sk); + continue; } + + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } else if (sk->sk_state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; + char buf[128]; rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); @@ -498,12 +578,31 @@ static void l2cap_conn_start(struct l2cap_conn *conn) l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT || + rsp.result != L2CAP_CR_SUCCESS) { + bh_unlock_sock(sk); + continue; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; } bh_unlock_sock(sk); } read_unlock(&l->lock); + + list_for_each_entry_safe(tmp1, tmp2, &del.list, list) { + bh_lock_sock(tmp1->sk); + __l2cap_sock_close(tmp1->sk, ECONNRESET); + bh_unlock_sock(tmp1->sk); + list_del(&tmp1->list); + kfree(tmp1); + } } static void l2cap_conn_ready(struct l2cap_conn *conn) @@ -732,9 +831,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason) sk->sk_type == SOCK_STREAM) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; - sk->sk_state = BT_DISCONN; l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, reason); } else l2cap_chan_del(sk, reason); break; @@ -794,6 +892,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) pi->imtu = l2cap_pi(parent)->imtu; pi->omtu = l2cap_pi(parent)->omtu; + pi->conf_state = l2cap_pi(parent)->conf_state; pi->mode = l2cap_pi(parent)->mode; pi->fcs = l2cap_pi(parent)->fcs; pi->max_tx = l2cap_pi(parent)->max_tx; @@ -804,13 +903,15 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) } else { pi->imtu = L2CAP_DEFAULT_MTU; pi->omtu = 0; - if (enable_ertm && sk->sk_type == SOCK_STREAM) + if (!disable_ertm && sk->sk_type == SOCK_STREAM) { pi->mode = L2CAP_MODE_ERTM; - else + pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; + } else { pi->mode = L2CAP_MODE_BASIC; - pi->max_tx = max_transmit; + } + pi->max_tx = L2CAP_DEFAULT_MAX_TX; pi->fcs = L2CAP_FCS_CRC16; - pi->tx_win = tx_window; + pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; pi->sec_level = BT_SECURITY_LOW; pi->role_switch = 0; pi->force_reliable = 0; @@ -1059,7 +1160,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: - if (enable_ertm) + if (!disable_ertm) break; /* fall through */ default: @@ -1076,6 +1177,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al case BT_CONNECTED: /* Already connected */ + err = -EISCONN; goto done; case BT_OPEN: @@ -1124,7 +1226,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: - if (enable_ertm) + if (!disable_ertm) break; /* fall through */ default: @@ -1277,9 +1379,11 @@ static void l2cap_monitor_timeout(unsigned long arg) { struct sock *sk = (void *) arg; + BT_DBG("sk %p", sk); + bh_lock_sock(sk); if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) { - l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk); + l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED); bh_unlock_sock(sk); return; } @@ -1295,6 +1399,8 @@ static void l2cap_retrans_timeout(unsigned long arg) { struct sock *sk = (void *) arg; + BT_DBG("sk %p", sk); + bh_lock_sock(sk); l2cap_pi(sk)->retry_count = 1; __mod_monitor_timer(); @@ -1333,7 +1439,7 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) hci_send_acl(pi->conn->hcon, skb, 0); } -static int l2cap_streaming_send(struct sock *sk) +static void l2cap_streaming_send(struct sock *sk) { struct sk_buff *skb, *tx_skb; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1363,7 +1469,6 @@ static int l2cap_streaming_send(struct sock *sk) skb = skb_dequeue(TX_QUEUE(sk)); kfree_skb(skb); } - return 0; } static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) @@ -1387,15 +1492,22 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) if (pi->remote_max_tx && bt_cb(skb)->retries == pi->remote_max_tx) { - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); return; } tx_skb = skb_clone(skb, GFP_ATOMIC); bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + + if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { + control |= L2CAP_CTRL_FINAL; + pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; + } + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); + put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); if (pi->fcs == L2CAP_FCS_CRC16) { @@ -1413,15 +1525,14 @@ static int l2cap_ertm_send(struct sock *sk) u16 control, fcs; int nsent = 0; - if (pi->conn_state & L2CAP_CONN_WAIT_F) - return 0; + if (sk->sk_state != BT_CONNECTED) + return -ENOTCONN; - while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) && - !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) { + while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) { if (pi->remote_max_tx && bt_cb(skb)->retries == pi->remote_max_tx) { - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); break; } @@ -1430,6 +1541,8 @@ static int l2cap_ertm_send(struct sock *sk) bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + control &= L2CAP_CTRL_SAR; + if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { control |= L2CAP_CTRL_FINAL; pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; @@ -1470,16 +1583,11 @@ static int l2cap_retransmit_frames(struct sock *sk) struct l2cap_pinfo *pi = l2cap_pi(sk); int ret; - spin_lock_bh(&pi->send_lock); - if (!skb_queue_empty(TX_QUEUE(sk))) sk->sk_send_head = TX_QUEUE(sk)->next; pi->next_tx_seq = pi->expected_ack_seq; ret = l2cap_ertm_send(sk); - - spin_unlock_bh(&pi->send_lock); - return ret; } @@ -1487,7 +1595,6 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi) { struct sock *sk = (struct sock *)pi; u16 control = 0; - int nframes; control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; @@ -1498,11 +1605,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi) return; } - spin_lock_bh(&pi->send_lock); - nframes = l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); - - if (nframes > 0) + if (l2cap_ertm_send(sk) > 0) return; control |= L2CAP_SUPER_RCV_READY; @@ -1697,10 +1800,8 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz size += buflen; } skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk)); - spin_lock_bh(&pi->send_lock); if (sk->sk_send_head == NULL) sk->sk_send_head = sar_queue.next; - spin_unlock_bh(&pi->send_lock); return size; } @@ -1745,7 +1846,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms case L2CAP_MODE_BASIC: /* Check outgoing MTU */ if (len > pi->omtu) { - err = -EINVAL; + err = -EMSGSIZE; goto done; } @@ -1772,14 +1873,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms } __skb_queue_tail(TX_QUEUE(sk), skb); - if (pi->mode == L2CAP_MODE_ERTM) - spin_lock_bh(&pi->send_lock); - if (sk->sk_send_head == NULL) sk->sk_send_head = skb; - if (pi->mode == L2CAP_MODE_ERTM) - spin_unlock_bh(&pi->send_lock); } else { /* Segment SDU into multiples PDUs */ err = l2cap_sar_segment_sdu(sk, msg, len); @@ -1788,11 +1884,14 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms } if (pi->mode == L2CAP_MODE_STREAMING) { - err = l2cap_streaming_send(sk); + l2cap_streaming_send(sk); } else { - spin_lock_bh(&pi->send_lock); + if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && + pi->conn_state && L2CAP_CONN_WAIT_F) { + err = len; + break; + } err = l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); } if (err >= 0) @@ -1801,7 +1900,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms default: BT_DBG("bad state %1.1x", pi->mode); - err = -EINVAL; + err = -EBADFD; } done: @@ -1817,6 +1916,8 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { struct l2cap_conn_rsp rsp; + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + u8 buf[128]; sk->sk_state = BT_CONFIG; @@ -1827,6 +1928,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { + release_sock(sk); + return 0; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + release_sock(sk); return 0; } @@ -1863,13 +1974,19 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us break; } + if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { + err = -EINVAL; + break; + } + l2cap_pi(sk)->mode = opts.mode; switch (l2cap_pi(sk)->mode) { case L2CAP_MODE_BASIC: + l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; break; case L2CAP_MODE_ERTM: case L2CAP_MODE_STREAMING: - if (enable_ertm) + if (!disable_ertm) break; /* fall through */ default: @@ -2137,6 +2254,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } + + if (!err && sk->sk_err) + err = -sk->sk_err; + release_sock(sk); return err; } @@ -2357,25 +2478,10 @@ static inline void l2cap_ertm_init(struct sock *sk) __skb_queue_head_init(SREJ_QUEUE(sk)); __skb_queue_head_init(BUSY_QUEUE(sk)); - spin_lock_init(&l2cap_pi(sk)->send_lock); INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work); -} -static int l2cap_mode_supported(__u8 mode, __u32 feat_mask) -{ - u32 local_feat_mask = l2cap_feat_mask; - if (enable_ertm) - local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; - - switch (mode) { - case L2CAP_MODE_ERTM: - return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; - case L2CAP_MODE_STREAMING: - return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; - default: - return 0x00; - } + sk->sk_backlog_rcv = l2cap_ertm_data_rcv; } static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) @@ -2406,10 +2512,10 @@ static int l2cap_build_conf_req(struct sock *sk, void *data) switch (pi->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: - pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; - if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) - l2cap_send_disconn_req(pi->conn, sk); - break; + if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE) + break; + + /* fall through */ default: pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); break; @@ -2420,6 +2526,14 @@ done: case L2CAP_MODE_BASIC: if (pi->imtu != L2CAP_DEFAULT_MTU) l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); + + rfc.mode = L2CAP_MODE_BASIC; + rfc.txwin_size = 0; + rfc.max_transmit = 0; + rfc.retrans_timeout = 0; + rfc.monitor_timeout = 0; + rfc.max_pdu_size = 0; + break; case L2CAP_MODE_ERTM: @@ -2432,9 +2546,6 @@ done: if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) break; @@ -2455,9 +2566,6 @@ done: if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) break; @@ -2469,6 +2577,9 @@ done: break; } + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc); + /* FIXME: Need actual value of the flush timeout */ //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); @@ -2533,18 +2644,21 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data) } } - if (pi->num_conf_rsp || pi->num_conf_req) + if (pi->num_conf_rsp || pi->num_conf_req > 1) goto done; switch (pi->mode) { case L2CAP_MODE_STREAMING: case L2CAP_MODE_ERTM: - pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; - if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) + if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) { + pi->mode = l2cap_select_mode(rfc.mode, + pi->conn->feat_mask); + break; + } + + if (pi->mode != rfc.mode) return -ECONNREFUSED; - break; - default: - pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); + break; } @@ -2667,7 +2781,6 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, rfc.mode != pi->mode) return -ECONNREFUSED; - pi->mode = rfc.mode; pi->fcs = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, @@ -2676,6 +2789,11 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, } } + if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode) + return -ECONNREFUSED; + + pi->mode = rfc.mode; + if (*result == L2CAP_CONF_SUCCESS) { switch (rfc.mode) { case L2CAP_MODE_ERTM: @@ -2770,7 +2888,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd struct l2cap_chan_list *list = &conn->chan_list; struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; - struct sock *sk, *parent; + struct sock *parent, *uninitialized_var(sk); int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); @@ -2879,6 +2997,15 @@ sendresp: L2CAP_INFO_REQ, sizeof(info), &info); } + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) && + result == L2CAP_CR_SUCCESS) { + u8 buf[128]; + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + } + return 0; } @@ -2899,11 +3026,11 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd if (scid) { sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); if (!sk) - return 0; + return -EFAULT; } else { sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident); if (!sk) - return 0; + return -EFAULT; } switch (result) { @@ -2911,10 +3038,13 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd sk->sk_state = BT_CONFIG; l2cap_pi(sk)->ident = 0; l2cap_pi(sk)->dcid = dcid; - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) + break; + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(sk, req), req); l2cap_pi(sk)->num_conf_req++; @@ -2950,8 +3080,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr if (!sk) return -ENOENT; - if (sk->sk_state == BT_DISCONN) + if (sk->sk_state != BT_CONFIG) { + struct l2cap_cmd_rej rej; + + rej.reason = cpu_to_le16(0x0002); + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); goto unlock; + } /* Reject if config buffer is too small. */ len = cmd_len - sizeof(*req); @@ -2977,7 +3113,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr /* Complete config. */ len = l2cap_parse_conf_req(sk, rsp); if (len < 0) { - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto unlock; } @@ -3047,7 +3183,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr char req[64]; if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto done; } @@ -3056,7 +3192,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr len = l2cap_parse_conf_rsp(sk, rsp->data, len, req, &result); if (len < 0) { - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto done; } @@ -3069,10 +3205,9 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr } default: - sk->sk_state = BT_DISCONN; sk->sk_err = ECONNRESET; l2cap_sock_set_timer(sk, HZ * 5); - l2cap_send_disconn_req(conn, sk); + l2cap_send_disconn_req(conn, sk, ECONNRESET); goto done; } @@ -3123,16 +3258,6 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd sk->sk_shutdown = SHUTDOWN_MASK; - skb_queue_purge(TX_QUEUE(sk)); - - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { - skb_queue_purge(SREJ_QUEUE(sk)); - skb_queue_purge(BUSY_QUEUE(sk)); - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); - del_timer(&l2cap_pi(sk)->ack_timer); - } - l2cap_chan_del(sk, ECONNRESET); bh_unlock_sock(sk); @@ -3155,16 +3280,6 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd if (!sk) return 0; - skb_queue_purge(TX_QUEUE(sk)); - - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { - skb_queue_purge(SREJ_QUEUE(sk)); - skb_queue_purge(BUSY_QUEUE(sk)); - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); - del_timer(&l2cap_pi(sk)->ack_timer); - } - l2cap_chan_del(sk, 0); bh_unlock_sock(sk); @@ -3187,7 +3302,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); - if (enable_ertm) + if (!disable_ertm) feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING | L2CAP_FEAT_FCS; put_unaligned_le32(feat_mask, rsp->data); @@ -3352,7 +3467,7 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi, struct sk_buff *skb) our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); if (our_fcs != rcv_fcs) - return -EINVAL; + return -EBADMSG; } return 0; } @@ -3363,25 +3478,19 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk) u16 control = 0; pi->frames_sent = 0; - pi->conn_state |= L2CAP_CONN_SEND_FBIT; control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { - control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL; + control |= L2CAP_SUPER_RCV_NOT_READY; l2cap_send_sframe(pi, control); pi->conn_state |= L2CAP_CONN_RNR_SENT; - pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; } - if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0) - __mod_retrans_timer(); - - pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY) + l2cap_retransmit_frames(sk); - spin_lock_bh(&pi->send_lock); l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) && pi->frames_sent == 0) { @@ -3393,6 +3502,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk) static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar) { struct sk_buff *next_skb; + struct l2cap_pinfo *pi = l2cap_pi(sk); + int tx_seq_offset, next_tx_seq_offset; bt_cb(skb)->tx_seq = tx_seq; bt_cb(skb)->sar = sar; @@ -3403,11 +3514,20 @@ static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_s return 0; } + tx_seq_offset = (tx_seq - pi->buffer_seq) % 64; + if (tx_seq_offset < 0) + tx_seq_offset += 64; + do { if (bt_cb(next_skb)->tx_seq == tx_seq) return -EINVAL; - if (bt_cb(next_skb)->tx_seq > tx_seq) { + next_tx_seq_offset = (bt_cb(next_skb)->tx_seq - + pi->buffer_seq) % 64; + if (next_tx_seq_offset < 0) + next_tx_seq_offset += 64; + + if (next_tx_seq_offset > tx_seq_offset) { __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb); return 0; } @@ -3525,11 +3645,51 @@ drop: pi->sdu = NULL; disconnect: - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); kfree_skb(skb); return 0; } +static int l2cap_try_push_rx_skb(struct sock *sk) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb; + u16 control; + int err; + + while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) { + control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; + err = l2cap_ertm_reassembly_sdu(sk, skb, control); + if (err < 0) { + skb_queue_head(BUSY_QUEUE(sk), skb); + return -EBUSY; + } + + pi->buffer_seq = (pi->buffer_seq + 1) % 64; + } + + if (!(pi->conn_state & L2CAP_CONN_RNR_SENT)) + goto done; + + control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL; + l2cap_send_sframe(pi, control); + l2cap_pi(sk)->retry_count = 1; + + del_timer(&pi->retrans_timer); + __mod_monitor_timer(); + + l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; + +done: + pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY; + pi->conn_state &= ~L2CAP_CONN_RNR_SENT; + + BT_DBG("sk %p, Exit local busy", sk); + + return 0; +} + static void l2cap_busy_work(struct work_struct *work) { DECLARE_WAITQUEUE(wait, current); @@ -3538,7 +3698,6 @@ static void l2cap_busy_work(struct work_struct *work) struct sock *sk = (struct sock *)pi; int n_tries = 0, timeo = HZ/5, err; struct sk_buff *skb; - u16 control; lock_sock(sk); @@ -3548,8 +3707,8 @@ static void l2cap_busy_work(struct work_struct *work) if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) { err = -EBUSY; - l2cap_send_disconn_req(pi->conn, sk); - goto done; + l2cap_send_disconn_req(pi->conn, sk, EBUSY); + break; } if (!timeo) @@ -3557,7 +3716,7 @@ static void l2cap_busy_work(struct work_struct *work) if (signal_pending(current)) { err = sock_intr_errno(timeo); - goto done; + break; } release_sock(sk); @@ -3566,40 +3725,12 @@ static void l2cap_busy_work(struct work_struct *work) err = sock_error(sk); if (err) - goto done; - - while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) { - control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; - err = l2cap_ertm_reassembly_sdu(sk, skb, control); - if (err < 0) { - skb_queue_head(BUSY_QUEUE(sk), skb); - break; - } - - pi->buffer_seq = (pi->buffer_seq + 1) % 64; - } + break; - if (!skb) + if (l2cap_try_push_rx_skb(sk) == 0) break; } - if (!(pi->conn_state & L2CAP_CONN_RNR_SENT)) - goto done; - - control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL; - l2cap_send_sframe(pi, control); - l2cap_pi(sk)->retry_count = 1; - - del_timer(&pi->retrans_timer); - __mod_monitor_timer(); - - l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; - -done: - pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY; - pi->conn_state &= ~L2CAP_CONN_RNR_SENT; - set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); @@ -3614,7 +3745,9 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; __skb_queue_tail(BUSY_QUEUE(sk), skb); - return -EBUSY; + return l2cap_try_push_rx_skb(sk); + + } err = l2cap_ertm_reassembly_sdu(sk, skb, control); @@ -3624,6 +3757,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) } /* Busy Condition */ + BT_DBG("sk %p, Enter local busy", sk); + pi->conn_state |= L2CAP_CONN_LOCAL_BUSY; bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; __skb_queue_tail(BUSY_QUEUE(sk), skb); @@ -3634,6 +3769,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) pi->conn_state |= L2CAP_CONN_RNR_SENT; + del_timer(&pi->ack_timer); + queue_work(_busy_wq, &pi->busy_work); return err; @@ -3747,7 +3884,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq) l2cap_ertm_reassembly_sdu(sk, skb, control); l2cap_pi(sk)->buffer_seq_srej = (l2cap_pi(sk)->buffer_seq_srej + 1) % 64; - tx_seq++; + tx_seq = (tx_seq + 1) % 64; } } @@ -3783,10 +3920,11 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq) l2cap_send_sframe(pi, control); new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); - new->tx_seq = pi->expected_tx_seq++; + new->tx_seq = pi->expected_tx_seq; + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; list_add_tail(&new->list, SREJ_LIST(sk)); } - pi->expected_tx_seq++; + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; } static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) @@ -3795,11 +3933,12 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str u8 tx_seq = __get_txseq(rx_control); u8 req_seq = __get_reqseq(rx_control); u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; - u8 tx_seq_offset, expected_tx_seq_offset; + int tx_seq_offset, expected_tx_seq_offset; int num_to_ack = (pi->tx_win/6) + 1; int err = 0; - BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); + BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq, + rx_control); if (L2CAP_CTRL_FINAL & rx_control && l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) { @@ -3821,7 +3960,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str /* invalid tx_seq */ if (tx_seq_offset >= pi->tx_win) { - l2cap_send_disconn_req(pi->conn, sk); + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); goto drop; } @@ -3844,6 +3983,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str pi->buffer_seq = pi->buffer_seq_srej; pi->conn_state &= ~L2CAP_CONN_SREJ_SENT; l2cap_send_ack(pi); + BT_DBG("sk %p, Exit SREJ_SENT", sk); } } else { struct srej_list *l; @@ -3872,6 +4012,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str pi->conn_state |= L2CAP_CONN_SREJ_SENT; + BT_DBG("sk %p, Enter SREJ", sk); + INIT_LIST_HEAD(SREJ_LIST(sk)); pi->buffer_seq_srej = pi->buffer_seq; @@ -3882,6 +4024,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str pi->conn_state |= L2CAP_CONN_SEND_PBIT; l2cap_send_srejframe(sk, tx_seq); + + del_timer(&pi->ack_timer); } return 0; @@ -3895,6 +4039,10 @@ expected: return 0; } + err = l2cap_push_rx_skb(sk, skb, rx_control); + if (err < 0) + return 0; + if (rx_control & L2CAP_CTRL_FINAL) { if (pi->conn_state & L2CAP_CONN_REJ_ACT) pi->conn_state &= ~L2CAP_CONN_REJ_ACT; @@ -3902,10 +4050,6 @@ expected: l2cap_retransmit_frames(sk); } - err = l2cap_push_rx_skb(sk, skb, rx_control); - if (err < 0) - return 0; - __mod_ack_timer(); pi->num_acked = (pi->num_acked + 1) % num_to_ack; @@ -3923,10 +4067,14 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) { struct l2cap_pinfo *pi = l2cap_pi(sk); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control), + rx_control); + pi->expected_ack_seq = __get_reqseq(rx_control); l2cap_drop_acked_frames(sk); if (rx_control & L2CAP_CTRL_POLL) { + pi->conn_state |= L2CAP_CONN_SEND_FBIT; if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && (pi->unacked_frames > 0)) @@ -3955,9 +4103,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { l2cap_send_ack(pi); } else { - spin_lock_bh(&pi->send_lock); l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); } } } @@ -3967,6 +4113,8 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control) struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_reqseq(rx_control); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; pi->expected_ack_seq = tx_seq; @@ -3989,16 +4137,18 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control) struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_reqseq(rx_control); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; if (rx_control & L2CAP_CTRL_POLL) { pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); + + pi->conn_state |= L2CAP_CONN_SEND_FBIT; l2cap_retransmit_one_frame(sk, tx_seq); - spin_lock_bh(&pi->send_lock); l2cap_ertm_send(sk); - spin_unlock_bh(&pi->send_lock); if (pi->conn_state & L2CAP_CONN_WAIT_F) { pi->srej_save_reqseq = tx_seq; @@ -4024,10 +4174,15 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control) struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_reqseq(rx_control); + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + pi->conn_state |= L2CAP_CONN_REMOTE_BUSY; pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); + if (rx_control & L2CAP_CTRL_POLL) + pi->conn_state |= L2CAP_CONN_SEND_FBIT; + if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) { del_timer(&pi->retrans_timer); if (rx_control & L2CAP_CTRL_POLL) @@ -4075,12 +4230,83 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str return 0; } +static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u16 control; + u8 req_seq; + int len, next_tx_seq_offset, req_seq_offset; + + control = get_unaligned_le16(skb->data); + skb_pull(skb, 2); + len = skb->len; + + /* + * We can just drop the corrupted I-frame here. + * Receiver will miss it and start proper recovery + * procedures and ask retransmission. + */ + if (l2cap_check_fcs(pi, skb)) + goto drop; + + if (__is_sar_start(control) && __is_iframe(control)) + len -= 2; + + if (pi->fcs == L2CAP_FCS_CRC16) + len -= 2; + + if (len > pi->mps) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + req_seq = __get_reqseq(control); + req_seq_offset = (req_seq - pi->expected_ack_seq) % 64; + if (req_seq_offset < 0) + req_seq_offset += 64; + + next_tx_seq_offset = + (pi->next_tx_seq - pi->expected_ack_seq) % 64; + if (next_tx_seq_offset < 0) + next_tx_seq_offset += 64; + + /* check for invalid req-seq */ + if (req_seq_offset > next_tx_seq_offset) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + if (__is_iframe(control)) { + if (len < 0) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + l2cap_data_channel_iframe(sk, control, skb); + } else { + if (len != 0) { + BT_ERR("%d", len); + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + l2cap_data_channel_sframe(sk, control, skb); + } + + return 0; + +drop: + kfree_skb(skb); + return 0; +} + static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) { struct sock *sk; struct l2cap_pinfo *pi; - u16 control, len; - u8 tx_seq, req_seq, next_tx_seq_offset, req_seq_offset; + u16 control; + u8 tx_seq; + int len; sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); if (!sk) { @@ -4110,59 +4336,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk break; case L2CAP_MODE_ERTM: - control = get_unaligned_le16(skb->data); - skb_pull(skb, 2); - len = skb->len; - - if (__is_sar_start(control)) - len -= 2; - - if (pi->fcs == L2CAP_FCS_CRC16) - len -= 2; - - /* - * We can just drop the corrupted I-frame here. - * Receiver will miss it and start proper recovery - * procedures and ask retransmission. - */ - if (len > pi->mps) { - l2cap_send_disconn_req(pi->conn, sk); - goto drop; - } - - if (l2cap_check_fcs(pi, skb)) - goto drop; - - req_seq = __get_reqseq(control); - req_seq_offset = (req_seq - pi->expected_ack_seq) % 64; - if (req_seq_offset < 0) - req_seq_offset += 64; - - next_tx_seq_offset = - (pi->next_tx_seq - pi->expected_ack_seq) % 64; - if (next_tx_seq_offset < 0) - next_tx_seq_offset += 64; - - /* check for invalid req-seq */ - if (req_seq_offset > next_tx_seq_offset) { - l2cap_send_disconn_req(pi->conn, sk); - goto drop; - } - - if (__is_iframe(control)) { - if (len < 4) { - l2cap_send_disconn_req(pi->conn, sk); - goto drop; - } - - l2cap_data_channel_iframe(sk, control, skb); + if (!sock_owned_by_user(sk)) { + l2cap_ertm_data_rcv(sk, skb); } else { - if (len != 0) { - l2cap_send_disconn_req(pi->conn, sk); + if (sk_add_backlog(sk, skb)) goto drop; - } - - l2cap_data_channel_sframe(sk, control, skb); } goto done; @@ -4172,16 +4350,16 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk skb_pull(skb, 2); len = skb->len; + if (l2cap_check_fcs(pi, skb)) + goto drop; + if (__is_sar_start(control)) len -= 2; if (pi->fcs == L2CAP_FCS_CRC16) len -= 2; - if (len > pi->mps || len < 4 || __is_sframe(control)) - goto drop; - - if (l2cap_check_fcs(pi, skb)) + if (len > pi->mps || len < 0 || __is_sframe(control)) goto drop; tx_seq = __get_txseq(control); @@ -4281,7 +4459,7 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) struct hlist_node *node; if (type != ACL_LINK) - return 0; + return -EINVAL; BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); @@ -4314,7 +4492,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); if (hcon->type != ACL_LINK) - return 0; + return -EINVAL; if (!status) { conn = l2cap_conn_add(hcon, status); @@ -4343,7 +4521,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) BT_DBG("hcon %p reason %d", hcon, reason); if (hcon->type != ACL_LINK) - return 0; + return -EINVAL; l2cap_conn_del(hcon, bt_err(reason)); @@ -4404,6 +4582,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) req.psm = l2cap_pi(sk)->psm; l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req); @@ -4671,14 +4850,8 @@ EXPORT_SYMBOL(l2cap_load); module_init(l2cap_init); module_exit(l2cap_exit); -module_param(enable_ertm, bool, 0644); -MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode"); - -module_param(max_transmit, uint, 0644); -MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)"); - -module_param(tx_window, uint, 0644); -MODULE_PARM_DESC(tx_window, "Transmission window size value (default = 63)"); +module_param(disable_ertm, bool, 0644); +MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 43fbf6b4b4bf..44a623275951 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1152,7 +1152,7 @@ error: return err; } -void rfcomm_cleanup_sockets(void) +void __exit rfcomm_cleanup_sockets(void) { debugfs_remove(rfcomm_sock_debugfs); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 309b6c261b25..026205c18b78 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -1153,7 +1153,7 @@ static const struct tty_operations rfcomm_ops = { .tiocmset = rfcomm_tty_tiocmset, }; -int rfcomm_init_ttys(void) +int __init rfcomm_init_ttys(void) { rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS); if (!rfcomm_tty_driver) @@ -1183,7 +1183,7 @@ int rfcomm_init_ttys(void) return 0; } -void rfcomm_cleanup_ttys(void) +void __exit rfcomm_cleanup_ttys(void) { tty_unregister_driver(rfcomm_tty_driver); put_tty_driver(rfcomm_tty_driver); diff --git a/net/bridge/br.c b/net/bridge/br.c index 76357b547752..c8436fa31344 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -63,7 +63,6 @@ static int __init br_init(void) goto err_out4; brioctl_set(br_ioctl_deviceless_stub); - br_handle_frame_hook = br_handle_frame; #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) br_fdb_test_addr_hook = br_fdb_test_addr; @@ -100,7 +99,6 @@ static void __exit br_deinit(void) br_fdb_test_addr_hook = NULL; #endif - br_handle_frame_hook = NULL; br_fdb_fini(); } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index eedf2c94820e..cf09fe591fc2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,7 +22,7 @@ #include <asm/uaccess.h> #include "br_private.h" -/* net device transmit always called with no BH (preempt_disabled) */ +/* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); @@ -38,17 +38,26 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) } #endif + u64_stats_update_begin(&brstats->syncp); brstats->tx_packets++; brstats->tx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + rcu_read_lock(); if (is_multicast_ether_addr(dest)) { - if (br_multicast_rcv(br, NULL, skb)) + if (unlikely(netpoll_tx_running(dev))) { + br_flood_deliver(br, skb); + goto out; + } + if (br_multicast_rcv(br, NULL, skb)) { + kfree_skb(skb); goto out; + } mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) @@ -61,6 +70,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood_deliver(br, skb); out: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -92,21 +102,25 @@ static int br_dev_stop(struct net_device *dev) return 0; } -static struct net_device_stats *br_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct net_bridge *br = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - struct br_cpu_netstats sum = { 0 }; + struct br_cpu_netstats tmp, sum = { 0 }; unsigned int cpu; for_each_possible_cpu(cpu) { + unsigned int start; const struct br_cpu_netstats *bstats = per_cpu_ptr(br->stats, cpu); - - sum.tx_bytes += bstats->tx_bytes; - sum.tx_packets += bstats->tx_packets; - sum.rx_bytes += bstats->rx_bytes; - sum.rx_packets += bstats->rx_packets; + do { + start = u64_stats_fetch_begin(&bstats->syncp); + memcpy(&tmp, bstats, sizeof(tmp)); + } while (u64_stats_fetch_retry(&bstats->syncp, start)); + sum.tx_bytes += tmp.tx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.rx_packets += tmp.rx_packets; } stats->tx_bytes = sum.tx_bytes; @@ -127,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) #ifdef CONFIG_BRIDGE_NETFILTER /* remember the MTU in the rtable for PMTU */ - br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu; + br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu; #endif return 0; @@ -199,73 +213,81 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) } #ifdef CONFIG_NET_POLL_CONTROLLER -static bool br_devices_support_netpoll(struct net_bridge *br) +static void br_poll_controller(struct net_device *br_dev) { - struct net_bridge_port *p; - bool ret = true; - int count = 0; - unsigned long flags; - - spin_lock_irqsave(&br->lock, flags); - list_for_each_entry(p, &br->port_list, list) { - count++; - if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) || - !p->dev->netdev_ops->ndo_poll_controller) - ret = false; - } - spin_unlock_irqrestore(&br->lock, flags); - return count != 0 && ret; } -static void br_poll_controller(struct net_device *br_dev) +static void br_netpoll_cleanup(struct net_device *dev) { - struct netpoll *np = br_dev->npinfo->netpoll; + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p, *n; - if (np->real_dev != br_dev) - netpoll_poll_dev(np->real_dev); + list_for_each_entry_safe(p, n, &br->port_list, list) { + br_netpoll_disable(p); + } } -void br_netpoll_cleanup(struct net_device *dev) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; - const struct net_device_ops *ops; + int err = 0; - br->dev->npinfo = NULL; list_for_each_entry_safe(p, n, &br->port_list, list) { - if (p->dev) { - ops = p->dev->netdev_ops; - if (ops->ndo_netpoll_cleanup) - ops->ndo_netpoll_cleanup(p->dev); - else - p->dev->npinfo = NULL; - } + if (!p->dev) + continue; + + err = br_netpoll_enable(p); + if (err) + goto fail; } + +out: + return err; + +fail: + br_netpoll_cleanup(dev); + goto out; } -void br_netpoll_disable(struct net_bridge *br, - struct net_device *dev) +int br_netpoll_enable(struct net_bridge_port *p) { - if (br_devices_support_netpoll(br)) - br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; - if (dev->netdev_ops->ndo_netpoll_cleanup) - dev->netdev_ops->ndo_netpoll_cleanup(dev); - else - dev->npinfo = NULL; + struct netpoll *np; + int err = 0; + + np = kzalloc(sizeof(*p->np), GFP_KERNEL); + err = -ENOMEM; + if (!np) + goto out; + + np->dev = p->dev; + + err = __netpoll_setup(np); + if (err) { + kfree(np); + goto out; + } + + p->np = np; + +out: + return err; } -void br_netpoll_enable(struct net_bridge *br, - struct net_device *dev) +void br_netpoll_disable(struct net_bridge_port *p) { - if (br_devices_support_netpoll(br)) { - br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL; - if (br->dev->npinfo) - dev->npinfo = br->dev->npinfo; - } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) { - br->dev->priv_flags |= IFF_DISABLE_NETPOLL; - br_info(br,"new device %s does not support netpoll (disabling)", - dev->name); - } + struct netpoll *np = p->np; + + if (!np) + return; + + p->np = NULL; + + /* Wait for transmitting packets to finish before freeing. */ + synchronize_rcu_bh(); + + __netpoll_cleanup(np); + kfree(np); } #endif @@ -288,12 +310,13 @@ static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, .ndo_start_xmit = br_dev_xmit, - .ndo_get_stats = br_get_stats, + .ndo_get_stats64 = br_get_stats64, .ndo_set_mac_address = br_set_mac_address, .ndo_set_multicast_list = br_dev_set_multicast_list, .ndo_change_mtu = br_change_mtu, .ndo_do_ioctl = br_dev_ioctl, #ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_setup = br_netpoll_setup, .ndo_netpoll_cleanup = br_netpoll_cleanup, .ndo_poll_controller = br_poll_controller, #endif diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b01dde35a69e..90512ccfd3e9 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -214,7 +214,7 @@ void br_fdb_delete_by_port(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */ +/* No locking or refcounting, assumes caller has rcu_read_lock */ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, const unsigned char *addr) { @@ -240,11 +240,11 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) struct net_bridge_fdb_entry *fdb; int ret; - if (!dev->br_port) + if (!br_port_exists(dev)) return 0; rcu_read_lock(); - fdb = __br_fdb_get(dev->br_port->br, addr); + fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr); ret = fdb && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; rcu_read_unlock(); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index a4e72a89e4ff..cbfe87f0f34a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) kfree_skb(skb); else { skb_push(skb, ETH_HLEN); - -#ifdef CONFIG_NET_POLL_CONTROLLER - if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) { - netpoll_send_skb(skb->dev->npinfo->netpoll, skb); - skb->dev->priv_flags &= ~IFF_IN_NETPOLL; - } else -#endif - dev_queue_xmit(skb); + dev_queue_xmit(skb); } } @@ -73,23 +66,20 @@ int br_forward_finish(struct sk_buff *skb) static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { -#ifdef CONFIG_NET_POLL_CONTROLLER - struct net_bridge *br = to->br; - if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) { - struct netpoll *np; - to->dev->npinfo = skb->dev->npinfo; - np = skb->dev->npinfo->netpoll; - np->real_dev = np->dev = to->dev; - to->dev->priv_flags |= IFF_IN_NETPOLL; - } -#endif skb->dev = to->dev; + + if (unlikely(netpoll_tx_running(to->dev))) { + if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) + kfree_skb(skb); + else { + skb_push(skb, ETH_HLEN); + br_netpoll_send_skb(to, skb); + } + return; + } + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, br_forward_finish); -#ifdef CONFIG_NET_POLL_CONTROLLER - if (skb->dev->npinfo) - skb->dev->npinfo->netpoll->dev = br->dev; -#endif } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 18b245e2c00e..c03d2c3ff03e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -147,14 +147,17 @@ static void del_nbp(struct net_bridge_port *p) list_del_rcu(&p->list); - rcu_assign_pointer(dev->br_port, NULL); + dev->priv_flags &= ~IFF_BRIDGE_PORT; + + netdev_rx_handler_unregister(dev); br_multicast_del_port(p); kobject_uevent(&p->kobj, KOBJ_REMOVE); kobject_del(&p->kobj); - br_netpoll_disable(br, dev); + br_netpoll_disable(p); + call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -167,8 +170,6 @@ static void del_br(struct net_bridge *br, struct list_head *head) del_nbp(p); } - br_netpoll_cleanup(br->dev); - del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); @@ -400,7 +401,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return -ELOOP; /* Device is already being bridged */ - if (dev->br_port != NULL) + if (br_port_exists(dev)) return -EBUSY; /* No bridging devices that dislike that (e.g. wireless) */ @@ -428,7 +429,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err2; - rcu_assign_pointer(dev->br_port, p); + if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) + goto err3; + + err = netdev_rx_handler_register(dev, br_handle_frame, p); + if (err) + goto err3; + + dev->priv_flags |= IFF_BRIDGE_PORT; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -448,9 +457,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) kobject_uevent(&p->kobj, KOBJ_ADD); - br_netpoll_enable(br, dev); - return 0; +err3: + sysfs_remove_link(br->ifobj, p->dev->name); err2: br_fdb_delete_by_port(br, p, 1); err1: @@ -467,9 +476,13 @@ put_back: /* called with RTNL */ int br_del_if(struct net_bridge *br, struct net_device *dev) { - struct net_bridge_port *p = dev->br_port; + struct net_bridge_port *p; + + if (!br_port_exists(dev)) + return -EINVAL; - if (!p || p->br != br) + p = br_port_get(dev); + if (p->br != br) return -EINVAL; del_nbp(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index d36e700f7a26..826cd5221536 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -27,8 +27,10 @@ static int br_pass_frame_up(struct sk_buff *skb) struct net_bridge *br = netdev_priv(brdev); struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); + u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; brstats->rx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); indev = skb->dev; skb->dev = brdev; @@ -37,11 +39,11 @@ static int br_pass_frame_up(struct sk_buff *skb) netif_receive_skb); } -/* note: already called with rcu_read_lock (preempt_disabled) */ +/* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge_port *p = br_port_get_rcu(skb->dev); struct net_bridge *br; struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; @@ -108,13 +110,12 @@ drop: goto out; } -/* note: already called with rcu_read_lock (preempt_disabled) */ +/* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct sk_buff *skb) { - struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); + struct net_bridge_port *p = br_port_get_rcu(skb->dev); - if (p) - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_fdb_update(p->br, p, eth_hdr(skb)->h_source); return 0; /* process further */ } @@ -131,15 +132,18 @@ static inline int is_link_local(const unsigned char *dest) } /* - * Called via br_handle_frame_hook. * Return NULL if skb is handled - * note: already called with rcu_read_lock (preempt_disabled) + * note: already called with rcu_read_lock */ -struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) +struct sk_buff *br_handle_frame(struct sk_buff *skb) { + struct net_bridge_port *p; const unsigned char *dest = eth_hdr(skb)->h_dest; int (*rhook)(struct sk_buff *skb); + if (skb->pkt_type == PACKET_LOOPBACK) + return skb; + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; @@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (!skb) return NULL; + p = br_port_get_rcu(skb->dev); + if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9d21d98ae5fa..eb5b256ffc88 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -99,6 +99,15 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( return NULL; } +static struct net_bridge_mdb_entry *br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, struct br_ip *dst) +{ + if (!mdb) + return NULL; + + return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); +} + static struct net_bridge_mdb_entry *br_mdb_ip4_get( struct net_bridge_mdb_htable *mdb, __be32 dst) { @@ -107,7 +116,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get( br_dst.u.ip4 = dst; br_dst.proto = htons(ETH_P_IP); - return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst)); + return br_mdb_ip_get(mdb, &br_dst); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -119,23 +128,17 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get( ipv6_addr_copy(&br_dst.u.ip6, dst); br_dst.proto = htons(ETH_P_IPV6); - return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst)); + return br_mdb_ip_get(mdb, &br_dst); } #endif -static struct net_bridge_mdb_entry *br_mdb_ip_get( - struct net_bridge_mdb_htable *mdb, struct br_ip *dst) -{ - return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); -} - struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb) { struct net_bridge_mdb_htable *mdb = br->mdb; struct br_ip ip; - if (!mdb || br->multicast_disabled) + if (br->multicast_disabled) return NULL; if (BR_INPUT_SKB_CB(skb)->igmp) @@ -1432,7 +1435,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, struct icmp6hdr *icmp6h; u8 nexthdr; unsigned len; - unsigned offset; + int offset; int err; if (!pskb_may_pull(skb, sizeof(*ip6h))) @@ -1725,13 +1728,9 @@ unlock: int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; - int err = -ENOENT; + int err = 0; spin_lock(&br->multicast_lock); - if (!netif_running(br->dev)) - goto unlock; - - err = 0; if (br->multicast_disabled == !val) goto unlock; @@ -1739,6 +1738,9 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) if (br->multicast_disabled) goto unlock; + if (!netif_running(br->dev)) + goto unlock; + if (br->mdb) { if (br->mdb->old) { err = -EEXIST; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 44420992f72f..2c911c0759c2 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -55,6 +55,9 @@ static int brnf_call_arptables __read_mostly = 1; static int brnf_filter_vlan_tagged __read_mostly = 0; static int brnf_filter_pppoe_tagged __read_mostly = 0; #else +#define brnf_call_iptables 1 +#define brnf_call_ip6tables 1 +#define brnf_call_arptables 1 #define brnf_filter_vlan_tagged 0 #define brnf_filter_pppoe_tagged 0 #endif @@ -117,26 +120,27 @@ void br_netfilter_rtable_init(struct net_bridge *br) { struct rtable *rt = &br->fake_rtable; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.dev = br->dev; - rt->u.dst.path = &rt->u.dst; - rt->u.dst.metrics[RTAX_MTU - 1] = 1500; - rt->u.dst.flags = DST_NOXFRM; - rt->u.dst.ops = &fake_dst_ops; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; + rt->dst.path = &rt->dst; + rt->dst.metrics[RTAX_MTU - 1] = 1500; + rt->dst.flags = DST_NOXFRM; + rt->dst.ops = &fake_dst_ops; } static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->br_port); - - return port ? &port->br->fake_rtable : NULL; + if (!br_port_exists(dev)) + return NULL; + return &br_port_get_rcu(dev)->br->fake_rtable; } static inline struct net_device *bridge_parent(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->br_port); + if (!br_port_exists(dev)) + return NULL; - return port ? port->br->dev : NULL; + return br_port_get_rcu(dev)->br->dev; } static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) @@ -244,8 +248,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) kfree_skb(skb); return 0; } - dst_hold(&rt->u.dst); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); @@ -396,8 +399,7 @@ bridged_dnat: kfree_skb(skb); return 0; } - dst_hold(&rt->u.dst); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); } skb->dev = nf_bridge->physindev; @@ -545,25 +547,30 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net_bridge_port *p; + struct net_bridge *br; struct iphdr *iph; __u32 len = nf_bridge_encap_header_len(skb); if (unlikely(!pskb_may_pull(skb, len))) goto out; + p = br_port_get_rcu(in); + if (p == NULL) + goto out; + br = p->br; + if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { -#ifdef CONFIG_SYSCTL - if (!brnf_call_ip6tables) + if (!brnf_call_ip6tables && !br->nf_call_ip6tables) return NF_ACCEPT; -#endif + nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); } -#ifdef CONFIG_SYSCTL - if (!brnf_call_iptables) + + if (!brnf_call_iptables && !br->nf_call_iptables) return NF_ACCEPT; -#endif if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) @@ -591,6 +598,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, pskb_trim_rcsum(skb, len); + /* BUG: Should really parse the IP options here. */ + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + nf_bridge_put(skb->nf_bridge); if (!nf_bridge_alloc(skb)) return NF_DROP; @@ -716,12 +726,17 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net_bridge_port *p; + struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); -#ifdef CONFIG_SYSCTL - if (!brnf_call_arptables) + p = br_port_get_rcu(out); + if (p == NULL) + return NF_ACCEPT; + br = p->br; + + if (!brnf_call_arptables && !br->nf_call_arptables) return NF_ACCEPT; -#endif if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe0a79018ab2..4a6a378c84e3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -120,10 +120,11 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; for_each_netdev(net, dev) { /* not a bridge port */ - if (dev->br_port == NULL || idx < cb->args[0]) + if (!br_port_exists(dev) || idx < cb->args[0]) goto skip; - if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid, + if (br_fill_ifinfo(skb, br_port_get(dev), + NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) break; @@ -168,9 +169,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!dev) return -ENODEV; - p = dev->br_port; - if (!p) + if (!br_port_exists(dev)) return -EINVAL; + p = br_port_get(dev); /* if kernel STP is running, don't allow changes */ if (p->br->stp_enabled == BR_KERNEL_STP) diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 717e1fd6133c..404d4e14c6a7 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -32,14 +32,15 @@ struct notifier_block br_device_notifier = { static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct net_bridge_port *p = dev->br_port; + struct net_bridge_port *p = br_port_get(dev); struct net_bridge *br; int err; /* not a port of a bridge */ - if (p == NULL) + if (!br_port_exists(dev)) return NOTIFY_DONE; + p = br_port_get(dev); br = p->br; switch (event) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0f4a74bc6a9b..75c90edaf7db 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -15,6 +15,8 @@ #include <linux/netdevice.h> #include <linux/if_bridge.h> +#include <linux/netpoll.h> +#include <linux/u64_stats_sync.h> #include <net/route.h> #define BR_HASH_BITS 8 @@ -143,13 +145,23 @@ struct net_bridge_port #ifdef CONFIG_SYSFS char sysfs_name[IFNAMSIZ]; #endif + +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *np; +#endif }; +#define br_port_get_rcu(dev) \ + ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data)) +#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data) +#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) + struct br_cpu_netstats { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; }; struct net_bridge @@ -164,6 +176,9 @@ struct net_bridge unsigned long feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; + bool nf_call_iptables; + bool nf_call_ip6tables; + bool nf_call_arptables; #endif unsigned long flags; #define BR_SET_MAC_ADDR 0x00000001 @@ -273,16 +288,41 @@ extern void br_dev_setup(struct net_device *dev); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); #ifdef CONFIG_NET_POLL_CONTROLLER -extern void br_netpoll_cleanup(struct net_device *dev); -extern void br_netpoll_enable(struct net_bridge *br, - struct net_device *dev); -extern void br_netpoll_disable(struct net_bridge *br, - struct net_device *dev); +static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) +{ + return br->dev->npinfo; +} + +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + struct netpoll *np = p->np; + + if (np) + netpoll_send_skb(np, skb); +} + +extern int br_netpoll_enable(struct net_bridge_port *p); +extern void br_netpoll_disable(struct net_bridge_port *p); #else -#define br_netpoll_cleanup(br) -#define br_netpoll_enable(br, dev) -#define br_netpoll_disable(br, dev) +static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) +{ + return NULL; +} + +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} +static inline int br_netpoll_enable(struct net_bridge_port *p) +{ + return 0; +} + +static inline void br_netpoll_disable(struct net_bridge_port *p) +{ +} #endif /* br_fdb.c */ @@ -331,8 +371,7 @@ extern void br_features_recompute(struct net_bridge *br); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); -extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, - struct sk_buff *skb); +extern struct sk_buff *br_handle_frame(struct sk_buff *skb); /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 217bd225a42f..35cf27087b56 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -131,18 +131,19 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) /* * Called from llc. * - * NO locks, but rcu_read_lock (preempt_disabled) + * NO locks, but rcu_read_lock */ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_device *dev) { const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = rcu_dereference(dev->br_port); + struct net_bridge_port *p; struct net_bridge *br; const unsigned char *buf; - if (!p) + if (!br_port_exists(dev)) goto err; + p = br_port_get_rcu(dev); if (!pskb_may_pull(skb, 4)) goto err; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 486b8f3861d2..5c1e5559ebba 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -611,6 +611,73 @@ static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR, show_multicast_startup_query_interval, store_multicast_startup_query_interval); #endif +#ifdef CONFIG_BRIDGE_NETFILTER +static ssize_t show_nf_call_iptables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_iptables); +} + +static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_iptables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_iptables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_iptables); +} +static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR, + show_nf_call_iptables, store_nf_call_iptables); + +static ssize_t show_nf_call_ip6tables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_ip6tables); +} + +static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_ip6tables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_ip6tables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); +} +static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR, + show_nf_call_ip6tables, store_nf_call_ip6tables); + +static ssize_t show_nf_call_arptables( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_arptables); +} + +static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_arptables = val ? true : false; + return 0; +} + +static ssize_t store_nf_call_arptables( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_arptables); +} +static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR, + show_nf_call_arptables, store_nf_call_arptables); +#endif static struct attribute *bridge_attrs[] = { &dev_attr_forward_delay.attr, @@ -645,6 +712,11 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, #endif +#ifdef CONFIG_BRIDGE_NETFILTER + &dev_attr_nf_call_iptables.attr, + &dev_attr_nf_call_ip6tables.attr, + &dev_attr_nf_call_arptables.attr, +#endif NULL }; diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 9e19166ba453..46624bb6d9be 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -24,8 +24,9 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) return EBT_DROP; if (par->hooknum != NF_BR_BROUTING) + /* rcu_read_lock()ed by nf_hook_slow */ memcpy(eth_hdr(skb)->h_dest, - par->in->br_port->br->dev->dev_addr, ETH_ALEN); + br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN); else memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index ae3c7cef1484..26377e96fa1c 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -177,8 +177,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (in) { strcpy(pm->physindev, in->name); /* If in isn't a bridge, then physindev==indev */ - if (in->br_port) - strcpy(pm->indev, in->br_port->br->dev->name); + if (br_port_exists(in)) + /* rcu_read_lock()ed by nf_hook_slow */ + strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); else strcpy(pm->indev, in->name); } else @@ -187,7 +188,8 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); - strcpy(pm->outdev, out->br_port->br->dev->name); + /* rcu_read_lock()ed by nf_hook_slow */ + strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); } else pm->outdev[0] = pm->physoutdev[0] = '\0'; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 59ca00e40dec..bcc102e3be4d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -140,11 +140,14 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h, return 1; if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) return 1; - if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN)) + /* rcu_read_lock()ed by nf_hook_slow */ + if (in && br_port_exists(in) && + FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev), + EBT_ILOGICALIN)) return 1; - if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT)) + if (out && br_port_exists(out) && + FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev), + EBT_ILOGICALOUT)) return 1; if (e->bitmask & EBT_SOURCEMAC) { diff --git a/net/caif/Kconfig b/net/caif/Kconfig index ed651786f16b..529750da9624 100644 --- a/net/caif/Kconfig +++ b/net/caif/Kconfig @@ -21,19 +21,18 @@ menuconfig CAIF See Documentation/networking/caif for a further explanation on how to use and configure CAIF. -if CAIF - config CAIF_DEBUG bool "Enable Debug" + depends on CAIF default n --- help --- Enable the inclusion of debug code in the CAIF stack. Be aware that doing this will impact performance. If unsure say N. - config CAIF_NETDEV tristate "CAIF GPRS Network device" + depends on CAIF default CAIF ---help--- Say Y if you will be using a CAIF based GPRS network device. @@ -41,5 +40,3 @@ config CAIF_NETDEV If you select to build it as a built-in then the main CAIF device must also be a built-in. If unsure say Y. - -endif diff --git a/net/caif/Makefile b/net/caif/Makefile index 34852af2595e..f87481fb0e65 100644 --- a/net/caif/Makefile +++ b/net/caif/Makefile @@ -1,23 +1,13 @@ -ifeq ($(CONFIG_CAIF_DEBUG),1) -CAIF_DBG_FLAGS := -DDEBUG +ifeq ($(CONFIG_CAIF_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG endif -ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS) - caif-objs := caif_dev.o \ cfcnfg.o cfmuxl.o cfctrl.o \ cffrml.o cfveil.o cfdbgl.o\ cfserl.o cfdgml.o \ cfrfml.o cfvidl.o cfutill.o \ cfsrvl.o cfpkt_skbuff.o caif_config_util.o -clean-dirs:= .tmp_versions - -clean-files:= \ - Module.symvers \ - modules.order \ - *.cmd \ - *.o \ - *~ obj-$(CONFIG_CAIF) += caif.o obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c index 6f36580366f0..76ae68303d3a 100644 --- a/net/caif/caif_config_util.c +++ b/net/caif/caif_config_util.c @@ -80,6 +80,11 @@ int connect_req_to_link_param(struct cfcnfg *cnfg, l->u.utility.paramlen); break; + case CAIFPROTO_DEBUG: + l->linktype = CFCTRL_SRV_DBG; + l->endpoint = s->sockaddr.u.dbg.service; + l->chtype = s->sockaddr.u.dbg.type; + break; default: return -EINVAL; } diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index e2b86f1f5a47..0b586e9d1378 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -255,7 +255,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, pref = CFPHYPREF_HIGH_BW; break; } - + dev_hold(dev); cfcnfg_add_phy_layer(get_caif_conf(), phy_type, dev, @@ -285,6 +285,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, caifd->layer.up->ctrlcmd(caifd->layer.up, _CAIF_CTRLCMD_PHYIF_DOWN_IND, caifd->layer.id); + might_sleep(); res = wait_event_interruptible_timeout(caifd->event, atomic_read(&caifd->in_use) == 0, TIMEOUT); @@ -300,6 +301,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what, "Unregistering an active CAIF device: %s\n", __func__, dev->name); cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); + dev_put(dev); atomic_set(&caifd->state, what); break; @@ -326,7 +328,8 @@ struct cfcnfg *get_caif_conf(void) EXPORT_SYMBOL(get_caif_conf); int caif_connect_client(struct caif_connect_request *conn_req, - struct cflayer *client_layer) + struct cflayer *client_layer, int *ifindex, + int *headroom, int *tailroom) { struct cfctrl_link_param param; int ret; @@ -334,8 +337,9 @@ int caif_connect_client(struct caif_connect_request *conn_req, if (ret) return ret; /* Hook up the adaptation layer. */ - return cfcnfg_add_adaptation_layer(get_caif_conf(), - ¶m, client_layer); + return cfcnfg_add_adaptation_layer(get_caif_conf(), ¶m, + client_layer, ifindex, + headroom, tailroom); } EXPORT_SYMBOL(caif_connect_client); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 3d0e09584fae..8ce904786116 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -28,8 +28,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(AF_CAIF); -#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10) -#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100) +#define CAIF_DEF_SNDBUF (4096*10) +#define CAIF_DEF_RCVBUF (4096*100) /* * CAIF state is re-using the TCP socket states. @@ -76,6 +76,7 @@ struct caifsock { struct caif_connect_request conn_req; struct mutex readlock; struct dentry *debugfs_socket_dir; + int headroom, tailroom, maxframe; }; static int rx_flow_is_on(struct caifsock *cf_sk) @@ -594,27 +595,32 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, goto err; noblock = msg->msg_flags & MSG_DONTWAIT; - buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM; - - ret = -EMSGSIZE; - if (buffer_size > CAIF_MAX_PAYLOAD_SIZE) - goto err; - timeo = sock_sndtimeo(sk, noblock); timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk), 1, timeo, &ret); + if (ret) + goto err; ret = -EPIPE; if (cf_sk->sk.sk_state != CAIF_CONNECTED || sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) goto err; + /* Error if trying to write more than maximum frame size. */ + ret = -EMSGSIZE; + if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM) + goto err; + + buffer_size = len + cf_sk->headroom + cf_sk->tailroom; + ret = -ENOMEM; skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret); - if (!skb) + + if (!skb || skb_tailroom(skb) < buffer_size) goto err; - skb_reserve(skb, CAIF_NEEDED_HEADROOM); + + skb_reserve(skb, cf_sk->headroom); ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); @@ -645,7 +651,6 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, long timeo; err = -EOPNOTSUPP; - if (unlikely(msg->msg_flags&MSG_OOB)) goto out_err; @@ -662,8 +667,8 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = len-sent; - if (size > CAIF_MAX_PAYLOAD_SIZE) - size = CAIF_MAX_PAYLOAD_SIZE; + if (size > cf_sk->maxframe) + size = cf_sk->maxframe; /* If size is more than half of sndbuf, chop up message */ if (size > ((sk->sk_sndbuf >> 1) - 64)) @@ -673,14 +678,14 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = SKB_MAX_ALLOC; skb = sock_alloc_send_skb(sk, - size + CAIF_NEEDED_HEADROOM - + CAIF_NEEDED_TAILROOM, + size + cf_sk->headroom + + cf_sk->tailroom, msg->msg_flags&MSG_DONTWAIT, &err); if (skb == NULL) goto out_err; - skb_reserve(skb, CAIF_NEEDED_HEADROOM); + skb_reserve(skb, cf_sk->headroom); /* * If you pass two values to the sock_alloc_send_skb * it tries to grab the large buffer with GFP_NOFS @@ -821,17 +826,15 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); long timeo; int err; + int ifindex, headroom, tailroom; + struct net_device *dev; + lock_sock(sk); err = -EAFNOSUPPORT; if (uaddr->sa_family != AF_CAIF) goto out; - err = -ESOCKTNOSUPPORT; - if (unlikely(!(sk->sk_type == SOCK_STREAM && - cf_sk->sk.sk_protocol == CAIFPROTO_AT) && - sk->sk_type != SOCK_SEQPACKET)) - goto out; switch (sock->state) { case SS_UNCONNECTED: /* Normal case, a fresh connect */ @@ -874,8 +877,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, sk_stream_kill_queues(&cf_sk->sk); err = -EINVAL; - if (addr_len != sizeof(struct sockaddr_caif) || - !uaddr) + if (addr_len != sizeof(struct sockaddr_caif)) goto out; memcpy(&cf_sk->conn_req.sockaddr, uaddr, @@ -888,12 +890,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr, dbfs_atomic_inc(&cnt.num_connect_req); cf_sk->layer.receive = caif_sktrecv_cb; err = caif_connect_client(&cf_sk->conn_req, - &cf_sk->layer); + &cf_sk->layer, &ifindex, &headroom, &tailroom); if (err < 0) { cf_sk->sk.sk_socket->state = SS_UNCONNECTED; cf_sk->sk.sk_state = CAIF_DISCONNECTED; goto out; } + dev = dev_get_by_index(sock_net(sk), ifindex); + cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom); + cf_sk->tailroom = tailroom; + cf_sk->maxframe = dev->mtu - (headroom + tailroom); + dev_put(dev); + if (cf_sk->maxframe < 1) { + pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n", + __func__, dev->mtu); + err = -ENODEV; + goto out; + } err = -EINPROGRESS; wait_connect: diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index df43f264d9fb..1c29189b344d 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/slab.h> +#include <linux/netdevice.h> #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/cfcnfg.h> @@ -22,6 +23,7 @@ #define PHY_NAME_LEN 20 #define container_obj(layr) container_of(layr, struct cfcnfg, layer) +#define RFM_FRAGMENT_SIZE 4030 /* Information about CAIF physical interfaces held by Config Module in order * to manage physical interfaces @@ -41,6 +43,15 @@ struct cfcnfg_phyinfo { /* Information about the physical device */ struct dev_info dev_info; + + /* Interface index */ + int ifindex; + + /* Use Start of frame extension */ + bool use_stx; + + /* Use Start of frame checksum */ + bool use_fcs; }; struct cfcnfg { @@ -248,9 +259,20 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id) { } +int protohead[CFCTRL_SRV_MASK] = { + [CFCTRL_SRV_VEI] = 4, + [CFCTRL_SRV_DATAGRAM] = 7, + [CFCTRL_SRV_UTIL] = 4, + [CFCTRL_SRV_RFM] = 3, + [CFCTRL_SRV_DBG] = 3, +}; + int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, struct cfctrl_link_param *param, - struct cflayer *adap_layer) + struct cflayer *adap_layer, + int *ifindex, + int *proto_head, + int *proto_tail) { struct cflayer *frml; if (adap_layer == NULL) { @@ -276,6 +298,14 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg, param->phyid); caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id == param->phyid); + + *ifindex = cnfg->phy_layers[param->phyid].ifindex; + *proto_head = + protohead[param->linktype]+ + (cnfg->phy_layers[param->phyid].use_stx ? 1 : 0); + + *proto_tail = 2; + /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */ cfctrl_enum_req(cnfg->ctrl, param->phyid); return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer); @@ -297,6 +327,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, struct cfcnfg *cnfg = container_obj(layer); struct cflayer *servicel = NULL; struct cfcnfg_phyinfo *phyinfo; + struct net_device *netdev; + if (adapt_layer == NULL) { pr_debug("CAIF: %s(): link setup response " "but no client exist, send linkdown back\n", @@ -308,19 +340,15 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, caif_assert(cnfg != NULL); caif_assert(phyid != 0); phyinfo = &cnfg->phy_layers[phyid]; - caif_assert(phyinfo != NULL); caif_assert(phyinfo->id == phyid); caif_assert(phyinfo->phy_layer != NULL); caif_assert(phyinfo->phy_layer->id == phyid); - if (phyinfo != NULL && - phyinfo->phy_ref_count++ == 0 && - phyinfo->phy_layer != NULL && + phyinfo->phy_ref_count++; + if (phyinfo->phy_ref_count == 1 && phyinfo->phy_layer->modemcmd != NULL) { - caif_assert(phyinfo->phy_layer->id == phyid); phyinfo->phy_layer->modemcmd(phyinfo->phy_layer, _CAIF_MODEMCMD_PHYIF_USEFULL); - } adapt_layer->id = channel_id; @@ -332,7 +360,9 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, servicel = cfdgml_create(channel_id, &phyinfo->dev_info); break; case CFCTRL_SRV_RFM: - servicel = cfrfml_create(channel_id, &phyinfo->dev_info); + netdev = phyinfo->dev_info.dev; + servicel = cfrfml_create(channel_id, &phyinfo->dev_info, + netdev->mtu); break; case CFCTRL_SRV_UTIL: servicel = cfutill_create(channel_id, &phyinfo->dev_info); @@ -363,8 +393,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv, void cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, - void *dev, struct cflayer *phy_layer, u16 *phyid, - enum cfcnfg_phy_preference pref, + struct net_device *dev, struct cflayer *phy_layer, + u16 *phyid, enum cfcnfg_phy_preference pref, bool fcs, bool stx) { struct cflayer *frml; @@ -418,6 +448,10 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type, cnfg->phy_layers[*phyid].dev_info.dev = dev; cnfg->phy_layers[*phyid].phy_layer = phy_layer; cnfg->phy_layers[*phyid].phy_ref_count = 0; + cnfg->phy_layers[*phyid].ifindex = dev->ifindex; + cnfg->phy_layers[*phyid].use_stx = stx; + cnfg->phy_layers[*phyid].use_fcs = fcs; + phy_layer->type = phy_type; frml = cffrml_create(*phyid, fcs); if (!frml) { diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index fcfda98a5e6d..563145fdc4c3 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -19,7 +19,7 @@ #ifdef CAIF_NO_LOOP static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt){ - return CAIF_FAILURE; + return -1; } #else static int handle_loop(struct cfctrl *ctrl, @@ -43,7 +43,7 @@ struct cflayer *cfctrl_create(void) memset(&dev_info, 0, sizeof(dev_info)); dev_info.id = 0xff; memset(this, 0, sizeof(*this)); - cfsrvl_init(&this->serv, 0, &dev_info); + cfsrvl_init(&this->serv, 0, &dev_info, false); atomic_set(&this->req_seq_no, 1); atomic_set(&this->rsp_seq_no, 1); this->serv.layer.receive = cfctrl_recv; @@ -395,7 +395,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) cmd = cmdrsp & CFCTRL_CMD_MASK; if (cmd != CFCTRL_CMD_LINK_ERR && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) { - if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) + if (handle_loop(cfctrl, cmd, pkt) != 0) cmdrsp |= CFCTRL_ERR_BIT; } @@ -647,6 +647,6 @@ found: default: break; } - return CAIF_SUCCESS; + return 0; } #endif diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index ab6b6dc34cf8..676648cac8dd 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -22,7 +22,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(dbg, 0, sizeof(struct cfsrvl)); - cfsrvl_init(dbg, channel_id, dev_info); + cfsrvl_init(dbg, channel_id, dev_info, false); dbg->layer.receive = cfdbgl_receive; dbg->layer.transmit = cfdbgl_transmit; snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id); diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index 53194840ecb6..ed9d53aff280 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -17,6 +17,7 @@ #define DGM_FLOW_OFF 0x81 #define DGM_FLOW_ON 0x80 #define DGM_CTRL_PKT_SIZE 1 +#define DGM_MTU 1500 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt); @@ -30,7 +31,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(dgm, 0, sizeof(struct cfsrvl)); - cfsrvl_init(dgm, channel_id, dev_info); + cfsrvl_init(dgm, channel_id, dev_info, true); dgm->layer.receive = cfdgml_receive; dgm->layer.transmit = cfdgml_transmit; snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id); @@ -89,6 +90,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt) if (!cfsrvl_ready(service, &ret)) return ret; + /* STE Modem cannot handle more than 1500 bytes datagrams */ + if (cfpkt_getlen(pkt) > DGM_MTU) + return -EMSGSIZE; + cfpkt_add_head(pkt, &zero, 4); /* Add info for MUX-layer to route the packet out. */ diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index a6fdf899741a..01f238ff2346 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -9,8 +9,8 @@ #include <linux/hardirq.h> #include <net/caif/cfpkt.h> -#define PKT_PREFIX CAIF_NEEDED_HEADROOM -#define PKT_POSTFIX CAIF_NEEDED_TAILROOM +#define PKT_PREFIX 16 +#define PKT_POSTFIX 2 #define PKT_LEN_WHEN_EXTENDING 128 #define PKT_ERROR(pkt, errmsg) do { \ cfpkt_priv(pkt)->erronous = true; \ @@ -338,7 +338,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, u16 dstlen; u16 createlen; if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) { - cfpkt_destroy(addpkt); return dstpkt; } if (expectlen > addlen) diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index fd27b172fb5d..eb1602022ac0 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -7,102 +7,304 @@ #include <linux/stddef.h> #include <linux/spinlock.h> #include <linux/slab.h> +#include <linux/unaligned/le_byteshift.h> #include <net/caif/caif_layer.h> #include <net/caif/cfsrvl.h> #include <net/caif/cfpkt.h> -#define container_obj(layr) container_of(layr, struct cfsrvl, layer) - +#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer) #define RFM_SEGMENTATION_BIT 0x01 -#define RFM_PAYLOAD 0x00 -#define RFM_CMD_BIT 0x80 -#define RFM_FLOW_OFF 0x81 -#define RFM_FLOW_ON 0x80 -#define RFM_SET_PIN 0x82 -#define RFM_CTRL_PKT_SIZE 1 +#define RFM_HEAD_SIZE 7 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt); -static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl); -struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info) +struct cfrfml { + struct cfsrvl serv; + struct cfpkt *incomplete_frm; + int fragment_size; + u8 seghead[6]; + u16 pdu_size; + /* Protects serialized processing of packets */ + spinlock_t sync; +}; + +static void cfrfml_release(struct kref *kref) +{ + struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref); + struct cfrfml *rfml = container_obj(&srvl->layer); + + if (rfml->incomplete_frm) + cfpkt_destroy(rfml->incomplete_frm); + + kfree(srvl); +} + +struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, + int mtu_size) { - struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); - if (!rfm) { + int tmp; + struct cfrfml *this = + kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); + + if (!this) { pr_warning("CAIF: %s(): Out of memory\n", __func__); return NULL; } - caif_assert(offsetof(struct cfsrvl, layer) == 0); - memset(rfm, 0, sizeof(struct cfsrvl)); - cfsrvl_init(rfm, channel_id, dev_info); - rfm->layer.modemcmd = cfservl_modemcmd; - rfm->layer.receive = cfrfml_receive; - rfm->layer.transmit = cfrfml_transmit; - snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id); - return &rfm->layer; + + cfsrvl_init(&this->serv, channel_id, dev_info, false); + this->serv.release = cfrfml_release; + this->serv.layer.receive = cfrfml_receive; + this->serv.layer.transmit = cfrfml_transmit; + + /* Round down to closest multiple of 16 */ + tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16; + tmp *= 16; + + this->fragment_size = tmp; + spin_lock_init(&this->sync); + snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ, + "rfm%d", channel_id); + + return &this->serv.layer; } -static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) +static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, + struct cfpkt *pkt, int *err) { - return -EPROTO; + struct cfpkt *tmppkt; + *err = -EPROTO; + /* n-th but not last segment */ + + if (cfpkt_extr_head(pkt, seghead, 6) < 0) + return NULL; + + /* Verify correct header */ + if (memcmp(seghead, rfml->seghead, 6) != 0) + return NULL; + + tmppkt = cfpkt_append(rfml->incomplete_frm, pkt, + rfml->pdu_size + RFM_HEAD_SIZE); + + /* If cfpkt_append failes input pkts are not freed */ + *err = -ENOMEM; + if (tmppkt == NULL) + return NULL; + + *err = 0; + return tmppkt; } static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt) { u8 tmp; bool segmented; - int ret; + int err; + u8 seghead[6]; + struct cfrfml *rfml; + struct cfpkt *tmppkt = NULL; + caif_assert(layr->up != NULL); caif_assert(layr->receive != NULL); + rfml = container_obj(layr); + spin_lock(&rfml->sync); + + err = -EPROTO; + if (cfpkt_extr_head(pkt, &tmp, 1) < 0) + goto out; + segmented = tmp & RFM_SEGMENTATION_BIT; + + if (segmented) { + if (rfml->incomplete_frm == NULL) { + /* Initial Segment */ + if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0) + goto out; + + rfml->pdu_size = get_unaligned_le16(rfml->seghead+4); + + if (cfpkt_erroneous(pkt)) + goto out; + rfml->incomplete_frm = pkt; + pkt = NULL; + } else { + + tmppkt = rfm_append(rfml, seghead, pkt, &err); + if (tmppkt == NULL) + goto out; + + if (cfpkt_erroneous(tmppkt)) + goto out; + + rfml->incomplete_frm = tmppkt; + + + if (cfpkt_erroneous(tmppkt)) + goto out; + } + err = 0; + goto out; + } + + if (rfml->incomplete_frm) { + + /* Last Segment */ + tmppkt = rfm_append(rfml, seghead, pkt, &err); + if (tmppkt == NULL) + goto out; + + if (cfpkt_erroneous(tmppkt)) + goto out; + + rfml->incomplete_frm = NULL; + pkt = tmppkt; + tmppkt = NULL; + + /* Verify that length is correct */ + err = EPROTO; + if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1) + goto out; + } + + err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt); + +out: + + if (err != 0) { + if (tmppkt) + cfpkt_destroy(tmppkt); + if (pkt) + cfpkt_destroy(pkt); + if (rfml->incomplete_frm) + cfpkt_destroy(rfml->incomplete_frm); + rfml->incomplete_frm = NULL; + + pr_info("CAIF: %s(): " + "Connection error %d triggered on RFM link\n", + __func__, err); + + /* Trigger connection error upon failure.*/ + layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, + rfml->serv.dev_info.id); + } + spin_unlock(&rfml->sync); + return err; +} + + +static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt) +{ + caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size); + + /* Add info for MUX-layer to route the packet out. */ + cfpkt_info(pkt)->channel_id = rfml->serv.layer.id; /* - * RFM is taking care of segmentation and stripping of - * segmentation bit. + * To optimize alignment, we add up the size of CAIF header before + * payload. */ - if (cfpkt_extr_head(pkt, &tmp, 1) < 0) { - pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); - cfpkt_destroy(pkt); - return -EPROTO; - } - segmented = tmp & RFM_SEGMENTATION_BIT; - caif_assert(!segmented); + cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE; + cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info; - ret = layr->up->receive(layr->up, pkt); - return ret; + return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt); } static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt) { - u8 tmp = 0; - int ret; - struct cfsrvl *service = container_obj(layr); + int err; + u8 seg; + u8 head[6]; + struct cfpkt *rearpkt = NULL; + struct cfpkt *frontpkt = pkt; + struct cfrfml *rfml = container_obj(layr); caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); - if (!cfsrvl_ready(service, &ret)) - return ret; + if (!cfsrvl_ready(&rfml->serv, &err)) + return err; + + err = -EPROTO; + if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1) + goto out; + + err = 0; + if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE) + err = cfpkt_peek_head(pkt, head, 6); + + if (err < 0) + goto out; + + while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) { + + seg = 1; + err = -EPROTO; + + if (cfpkt_add_head(frontpkt, &seg, 1) < 0) + goto out; + /* + * On OOM error cfpkt_split returns NULL. + * + * NOTE: Segmented pdu is not correctly aligned. + * This has negative performance impact. + */ + + rearpkt = cfpkt_split(frontpkt, rfml->fragment_size); + if (rearpkt == NULL) + goto out; + + err = cfrfml_transmit_segment(rfml, frontpkt); + + if (err != 0) + goto out; + frontpkt = rearpkt; + rearpkt = NULL; + + err = -ENOMEM; + if (frontpkt == NULL) + goto out; + err = -EPROTO; + if (cfpkt_add_head(frontpkt, head, 6) < 0) + goto out; - if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { - pr_err("CAIF: %s():Packet too large - size=%d\n", - __func__, cfpkt_getlen(pkt)); - return -EOVERFLOW; } - if (cfpkt_add_head(pkt, &tmp, 1) < 0) { - pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); - return -EPROTO; + + seg = 0; + err = -EPROTO; + + if (cfpkt_add_head(frontpkt, &seg, 1) < 0) + goto out; + + err = cfrfml_transmit_segment(rfml, frontpkt); + + frontpkt = NULL; +out: + + if (err != 0) { + pr_info("CAIF: %s(): " + "Connection error %d triggered on RFM link\n", + __func__, err); + /* Trigger connection error upon failure.*/ + + layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, + rfml->serv.dev_info.id); + + if (rearpkt) + cfpkt_destroy(rearpkt); + + if (frontpkt && frontpkt != pkt) { + + cfpkt_destroy(frontpkt); + /* + * Socket layer will free the original packet, + * but this packet may already be sent and + * freed. So we have to return 0 in this case + * to avoid socket layer to re-free this packet. + * The return of shutdown indication will + * cause connection to be invalidated anyhow. + */ + err = 0; + } } - /* Add info for MUX-layer to route the packet out. */ - cfpkt_info(pkt)->channel_id = service->layer.id; - /* - * To optimize alignment, we add up the size of CAIF header before - * payload. - */ - cfpkt_info(pkt)->hdr_len = 1; - cfpkt_info(pkt)->dev_info = &service->dev_info; - ret = layr->dn->transmit(layr->dn, pkt); - if (ret < 0) - cfpkt_extr_head(pkt, &tmp, 1); - return ret; + return err; } diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 965c5baace40..a11fbd68a13d 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -14,7 +14,8 @@ #define container_obj(layr) ((struct cfserl *) layr) #define CFSERL_STX 0x02 -#define CAIF_MINIUM_PACKET_SIZE 4 +#define SERIAL_MINIUM_PACKET_SIZE 4 +#define SERIAL_MAX_FRAMESIZE 4096 struct cfserl { struct cflayer layer; struct cfpkt *incomplete_frm; @@ -119,8 +120,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt) /* * Frame error handling */ - if (expectlen < CAIF_MINIUM_PACKET_SIZE - || expectlen > CAIF_MAX_FRAMESIZE) { + if (expectlen < SERIAL_MINIUM_PACKET_SIZE + || expectlen > SERIAL_MAX_FRAMESIZE) { if (!layr->usestx) { if (pkt != NULL) cfpkt_destroy(pkt); diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 6e5b7079a684..f40939a91211 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -24,8 +24,10 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, int phyid) { struct cfsrvl *service = container_obj(layr); + caif_assert(layr->up != NULL); caif_assert(layr->up->ctrlcmd != NULL); + switch (ctrl) { case CAIF_CTRLCMD_INIT_RSP: service->open = true; @@ -89,9 +91,14 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl) { struct cfsrvl *service = container_obj(layr); + caif_assert(layr != NULL); caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); + + if (!service->supports_flowctrl) + return 0; + switch (ctrl) { case CAIF_MODEMCMD_FLOW_ON_REQ: { @@ -152,9 +159,17 @@ void cfservl_destroy(struct cflayer *layer) kfree(layer); } +void cfsrvl_release(struct kref *kref) +{ + struct cfsrvl *service = container_of(kref, struct cfsrvl, ref); + kfree(service); +} + void cfsrvl_init(struct cfsrvl *service, - u8 channel_id, - struct dev_info *dev_info) + u8 channel_id, + struct dev_info *dev_info, + bool supports_flowctrl + ) { caif_assert(offsetof(struct cfsrvl, layer) == 0); service->open = false; @@ -164,14 +179,11 @@ void cfsrvl_init(struct cfsrvl *service, service->layer.ctrlcmd = cfservl_ctrlcmd; service->layer.modemcmd = cfservl_modemcmd; service->dev_info = *dev_info; + service->supports_flowctrl = supports_flowctrl; + service->release = cfsrvl_release; kref_init(&service->ref); } -void cfsrvl_release(struct kref *kref) -{ - struct cfsrvl *service = container_of(kref, struct cfsrvl, ref); - kfree(service); -} bool cfsrvl_ready(struct cfsrvl *service, int *err) { diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 5fd2c9ea8b42..02795aff57a4 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -31,7 +31,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(util, 0, sizeof(struct cfsrvl)); - cfsrvl_init(util, channel_id, dev_info); + cfsrvl_init(util, channel_id, dev_info, true); util->layer.receive = cfutill_receive; util->layer.transmit = cfutill_transmit; snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1"); @@ -90,12 +90,6 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt) if (!cfsrvl_ready(service, &ret)) return ret; - if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { - pr_err("CAIF: %s(): packet too large size=%d\n", - __func__, cfpkt_getlen(pkt)); - return -EOVERFLOW; - } - cfpkt_add_head(pkt, &zero, 1); /* Add info for MUX-layer to route the packet out. */ info = cfpkt_info(pkt); diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index e04f7d964e83..77cc09faac9a 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -30,7 +30,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info) } caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(vei, 0, sizeof(struct cfsrvl)); - cfsrvl_init(vei, channel_id, dev_info); + cfsrvl_init(vei, channel_id, dev_info, true); vei->layer.receive = cfvei_receive; vei->layer.transmit = cfvei_transmit; snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id); @@ -84,11 +84,6 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt) return ret; caif_assert(layr->dn != NULL); caif_assert(layr->dn->transmit != NULL); - if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) { - pr_warning("CAIF: %s(): Packet too large - size=%d\n", - __func__, cfpkt_getlen(pkt)); - return -EOVERFLOW; - } if (cfpkt_add_head(pkt, &tmp, 1) < 0) { pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index 89ad4ea239f1..ada6ee2d48f5 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -27,7 +27,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info) caif_assert(offsetof(struct cfsrvl, layer) == 0); memset(vid, 0, sizeof(struct cfsrvl)); - cfsrvl_init(vid, channel_id, dev_info); + cfsrvl_init(vid, channel_id, dev_info, false); vid->layer.receive = cfvidl_receive; vid->layer.transmit = cfvidl_transmit; snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1"); diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 610966abe2dc..4293e190ec53 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -23,7 +23,7 @@ #include <net/caif/caif_dev.h> /* GPRS PDP connection has MTU to 1500 */ -#define SIZE_MTU 1500 +#define GPRS_PDP_MTU 1500 /* 5 sec. connect timeout */ #define CONNECT_TIMEOUT (5 * HZ) #define CAIF_NET_DEFAULT_QUEUE_LEN 500 @@ -232,6 +232,8 @@ static int chnl_net_open(struct net_device *dev) { struct chnl_net *priv = NULL; int result = -1; + int llifindex, headroom, tailroom, mtu; + struct net_device *lldev; ASSERT_RTNL(); priv = netdev_priv(dev); if (!priv) { @@ -241,41 +243,88 @@ static int chnl_net_open(struct net_device *dev) if (priv->state != CAIF_CONNECTING) { priv->state = CAIF_CONNECTING; - result = caif_connect_client(&priv->conn_req, &priv->chnl); + result = caif_connect_client(&priv->conn_req, &priv->chnl, + &llifindex, &headroom, &tailroom); if (result != 0) { - priv->state = CAIF_DISCONNECTED; pr_debug("CAIF: %s(): err: " "Unable to register and open device," " Err:%d\n", __func__, result); - return result; + goto error; + } + + lldev = dev_get_by_index(dev_net(dev), llifindex); + + if (lldev == NULL) { + pr_debug("CAIF: %s(): no interface?\n", __func__); + result = -ENODEV; + goto error; + } + + dev->needed_tailroom = tailroom + lldev->needed_tailroom; + dev->hard_header_len = headroom + lldev->hard_header_len + + lldev->needed_tailroom; + + /* + * MTU, head-room etc is not know before we have a + * CAIF link layer device available. MTU calculation may + * override initial RTNL configuration. + * MTU is minimum of current mtu, link layer mtu pluss + * CAIF head and tail, and PDP GPRS contexts max MTU. + */ + mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom)); + mtu = min_t(int, GPRS_PDP_MTU, mtu); + dev_set_mtu(dev, mtu); + dev_put(lldev); + + if (mtu < 100) { + pr_warning("CAIF: %s(): " + "CAIF Interface MTU too small (%d)\n", + __func__, mtu); + result = -ENODEV; + goto error; } } + rtnl_unlock(); /* Release RTNL lock during connect wait */ + result = wait_event_interruptible_timeout(priv->netmgmt_wq, priv->state != CAIF_CONNECTING, CONNECT_TIMEOUT); + rtnl_lock(); + if (result == -ERESTARTSYS) { pr_debug("CAIF: %s(): wait_event_interruptible" " woken by a signal\n", __func__); - return -ERESTARTSYS; + result = -ERESTARTSYS; + goto error; } + if (result == 0) { pr_debug("CAIF: %s(): connect timeout\n", __func__); caif_disconnect_client(&priv->chnl); priv->state = CAIF_DISCONNECTED; pr_debug("CAIF: %s(): state disconnected\n", __func__); - return -ETIMEDOUT; + result = -ETIMEDOUT; + goto error; } if (priv->state != CAIF_CONNECTED) { pr_debug("CAIF: %s(): connect failed\n", __func__); - return -ECONNREFUSED; + result = -ECONNREFUSED; + goto error; } pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); return 0; + +error: + caif_disconnect_client(&priv->chnl); + priv->state = CAIF_DISCONNECTED; + pr_debug("CAIF: %s(): state disconnected\n", __func__); + return result; + } static int chnl_net_stop(struct net_device *dev) @@ -321,9 +370,7 @@ static void ipcaif_net_setup(struct net_device *dev) dev->destructor = free_netdev; dev->flags |= IFF_NOARP; dev->flags |= IFF_POINTOPOINT; - dev->needed_headroom = CAIF_NEEDED_HEADROOM; - dev->needed_tailroom = CAIF_NEEDED_TAILROOM; - dev->mtu = SIZE_MTU; + dev->mtu = GPRS_PDP_MTU; dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN; priv = netdev_priv(dev); diff --git a/net/can/raw.c b/net/can/raw.c index da99cf153b33..a10e3338f084 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -436,14 +436,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (count > 1) { /* filter does not fit into dfilter => alloc space */ - filter = kmalloc(optlen, GFP_KERNEL); - if (!filter) - return -ENOMEM; - - if (copy_from_user(filter, optval, optlen)) { - kfree(filter); - return -EFAULT; - } + filter = memdup_user(optval, optlen); + if (IS_ERR(filter)) + return PTR_ERR(filter); } else if (count == 1) { if (copy_from_user(&sfilter, optval, sizeof(sfilter))) return -EFAULT; @@ -655,6 +650,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = sock_tx_timestamp(msg, sk, skb_tx(skb)); if (err < 0) goto free_skb; + + /* to be able to check the received tx sock reference in raw_rcv() */ + skb_tx(skb)->prevent_sk_orphan = 1; + skb->dev = dev; skb->sk = sk; diff --git a/net/compat.c b/net/compat.c index ec24d9edb025..63d260e81472 100644 --- a/net/compat.c +++ b/net/compat.c @@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, int tot_len; if (kern_msg->msg_namelen) { - if (mode==VERIFY_READ) { + if (mode == VERIFY_READ) { int err = move_addr_to_kernel(kern_msg->msg_name, kern_msg->msg_namelen, kern_address); @@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, static int do_set_sock_timeout(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - struct compat_timeval __user *up = (struct compat_timeval __user *) optval; + struct compat_timeval __user *up = (struct compat_timeval __user *)optval; struct timeval ktime; mm_segment_t old_fs; int err; @@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level, return -EFAULT; old_fs = get_fs(); set_fs(KERNEL_DS); - err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime)); + err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime)); set_fs(old_fs); return err; @@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, unsigned int optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_setsockopt(sock,level,optname); + if (sock) { + err = security_socket_setsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname, int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct compat_timeval __user *ctv = - (struct compat_timeval __user*) userstamp; + (struct compat_timeval __user *) userstamp; int err = -ENOENT; struct timeval tv; @@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp); int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) { struct compat_timespec __user *ctv = - (struct compat_timespec __user*) userstamp; + (struct compat_timespec __user *) userstamp; int err = -ENOENT; struct timespec ts; @@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { int err; - struct socket *sock; + struct socket *sock = sockfd_lookup(fd, &err); - if ((sock = sockfd_lookup(fd, &err))!=NULL) - { - err = security_socket_getsockopt(sock, level, - optname); + if (sock) { + err = security_socket_getsockopt(sock, level, optname); if (err) { sockfd_put(sock); return err; @@ -531,7 +528,7 @@ struct compat_group_req { __u32 gr_interface; struct __kernel_sockaddr_storage gr_group __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; struct compat_group_source_req { __u32 gsr_interface; @@ -539,7 +536,7 @@ struct compat_group_source_req { __attribute__ ((aligned(4))); struct __kernel_sockaddr_storage gsr_source __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; struct compat_group_filter { __u32 gf_interface; @@ -549,7 +546,7 @@ struct compat_group_filter { __u32 gf_numsrc; struct __kernel_sockaddr_storage gf_slist[1] __attribute__ ((aligned(4))); -} __attribute__ ((packed)); +} __packed; #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ sizeof(struct __kernel_sockaddr_storage)) @@ -557,7 +554,7 @@ struct compat_group_filter { int compat_mc_setsockopt(struct sock *sock, int level, int optname, char __user *optval, unsigned int optlen, - int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int)) + int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int)) { char __user *koptval = optval; int koptlen = optlen; @@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname, } return setsockopt(sock, level, optname, koptval, koptlen); } - EXPORT_SYMBOL(compat_mc_setsockopt); int compat_mc_getsockopt(struct sock *sock, int level, int optname, char __user *optval, int __user *optlen, - int (*getsockopt)(struct sock *,int,int,char __user *,int __user *)) + int (*getsockopt)(struct sock *, int, int, char __user *, int __user *)) { struct compat_group_filter __user *gf32 = (void *)optval; struct group_filter __user *kgf; @@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, __put_user(interface, &kgf->gf_interface) || __put_user(fmode, &kgf->gf_fmode) || __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group))) + copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group))) return -EFAULT; err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); @@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname, copylen = numsrc * sizeof(gf32->gf_slist[0]); if (copylen > klen) copylen = klen; - if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) + if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) return -EFAULT; } return err; } - EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5)}; +static unsigned char nas[20] = { + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) +}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) compat_ptr(a[4]), compat_ptr(a[5])); break; case SYS_SHUTDOWN: - ret = sys_shutdown(a0,a1); + ret = sys_shutdown(a0, a1); break; case SYS_SETSOCKOPT: ret = compat_sys_setsockopt(a0, a1, a[2], diff --git a/net/core/Makefile b/net/core/Makefile index 51c3eec850ef..8a04dd22cf77 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o - +obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o diff --git a/net/core/datagram.c b/net/core/datagram.c index f5b6f43a4c2e..251997a95483 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -219,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, err); } +EXPORT_SYMBOL(skb_recv_datagram); void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { @@ -288,7 +289,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) return err; } - EXPORT_SYMBOL(skb_kill_datagram); /** @@ -373,6 +373,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, fault: return -EFAULT; } +EXPORT_SYMBOL(skb_copy_datagram_iovec); /** * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. @@ -716,6 +717,7 @@ csum_error: fault: return -EFAULT; } +EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); /** * datagram_poll - generic datagram poll @@ -770,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, return mask; } - EXPORT_SYMBOL(datagram_poll); -EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec); -EXPORT_SYMBOL(skb_copy_datagram_iovec); -EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 2b3bf53bc687..e1c1cdcc2bb0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -101,8 +101,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> -#include <linux/if_bridge.h> -#include <linux/if_macvlan.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -803,35 +801,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) EXPORT_SYMBOL(dev_getfirstbyhwtype); /** - * dev_get_by_flags - find any device with given flags + * dev_get_by_flags_rcu - find any device with given flags * @net: the applicable net namespace * @if_flags: IFF_* values * @mask: bitmask of bits in if_flags to check * * Search for any interface with the given flags. Returns NULL if a device - * is not found or a pointer to the device. The device returned has - * had a reference added and the pointer is safe until the user calls - * dev_put to indicate they have finished with it. + * is not found or a pointer to the device. Must be called inside + * rcu_read_lock(), and result refcount is unchanged. */ -struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; - rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { - dev_hold(dev); ret = dev; break; } } - rcu_read_unlock(); return ret; } -EXPORT_SYMBOL(dev_get_by_flags); +EXPORT_SYMBOL(dev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device @@ -1488,6 +1482,7 @@ static inline void net_timestamp_check(struct sk_buff *skb) int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { skb_orphan(skb); + nf_reset(skb); if (!(dev->flags & IFF_UP) || (skb->len > (dev->mtu + dev->hard_header_len))) { @@ -1541,7 +1536,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if (net_ratelimit()) printk(KERN_CRIT "protocol %04x is " "buggy, dev %s\n", - skb2->protocol, dev->name); + ntohs(skb2->protocol), + dev->name); skb_reset_network_header(skb2); } @@ -1553,6 +1549,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } +/* + * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues + * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. + */ +void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if (txq < real_num) { + dev->real_num_tx_queues = txq; + qdisc_reset_all_tx_gt(dev, txq); + } +} +EXPORT_SYMBOL(netif_set_real_num_tx_queues); static inline void __netif_reschedule(struct Qdisc *q) { @@ -1893,8 +1907,32 @@ static int dev_gso_segment(struct sk_buff *skb) */ static inline void skb_orphan_try(struct sk_buff *skb) { - if (!skb_tx(skb)->flags) + struct sock *sk = skb->sk; + + if (sk && !skb_tx(skb)->flags) { + /* skb_tx_hash() wont be able to get sk. + * We copy sk_hash into skb->rxhash + */ + if (!skb->rxhash) + skb->rxhash = sk->sk_hash; skb_orphan(skb); + } +} + +/* + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG, or if + * at least one of fragments is in highmem and device does not + * support DMA from it. + */ +static inline int skb_needs_linearize(struct sk_buff *skb, + struct net_device *dev) +{ + return skb_is_nonlinear(skb) && + ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || + illegal_highdma(dev, skb)))); } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -1921,6 +1959,22 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto out_kfree_skb; if (skb->next) goto gso; + } else { + if (skb_needs_linearize(skb, dev) && + __skb_linearize(skb)) + goto out_kfree_skb; + + /* If packet is not checksummed and device does not + * support checksumming for this protocol, complete + * checksumming here. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + skb_set_transport_header(skb, skb->csum_start - + skb_headroom(skb)); + if (!dev_can_checksum(dev, skb) && + skb_checksum_help(skb)) + goto out_kfree_skb; + } } rc = ops->ndo_start_xmit(skb, dev); @@ -1980,8 +2034,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol; - + hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); @@ -2004,12 +2057,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - u16 queue_index; + int queue_index; struct sock *sk = skb->sk; - if (sk_tx_queue_recorded(sk)) { - queue_index = sk_tx_queue_get(sk); - } else { + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_select_queue) { @@ -2038,14 +2090,24 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); + bool contended = qdisc_is_running(q); int rc; + /* + * Heuristic to force contended enqueues to serialize on a + * separate lock before trying to get qdisc main lock. + * This permits __QDISC_STATE_RUNNING owner to get the lock more often + * and dequeue packets faster. + */ + if (unlikely(contended)) + spin_lock(&q->busylock); + spin_lock(root_lock); if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { kfree_skb(skb); rc = NET_XMIT_DROP; } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && - !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { + qdisc_run_begin(q)) { /* * This is a work-conserving queue; there are no old skbs * waiting to be sent out; and the qdisc is not running - @@ -2054,37 +2116,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); __qdisc_update_bstats(q, skb->len); - if (sch_direct_xmit(skb, q, dev, txq, root_lock)) + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } __qdisc_run(q); - else - clear_bit(__QDISC_STATE_RUNNING, &q->state); + } else + qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); rc = qdisc_enqueue_root(skb, q); - qdisc_run(q); + if (qdisc_run_begin(q)) { + if (unlikely(contended)) { + spin_unlock(&q->busylock); + contended = false; + } + __qdisc_run(q); + } } spin_unlock(root_lock); - + if (unlikely(contended)) + spin_unlock(&q->busylock); return rc; } -/* - * Returns true if either: - * 1. skb has frag_list and the device doesn't support FRAGLIST, or - * 2. skb is fragmented and the device does not support SG, or if - * at least one of fragments is in highmem and device does not - * support DMA from it. - */ -static inline int skb_needs_linearize(struct sk_buff *skb, - struct net_device *dev) -{ - return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || - illegal_highdma(dev, skb))); -} - /** * dev_queue_xmit - transmit a buffer * @skb: buffer to transmit @@ -2117,25 +2175,6 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; - /* GSO will handle the following emulations directly. */ - if (netif_needs_gso(dev, skb)) - goto gso; - - /* Convert a paged skb to linear, if required */ - if (skb_needs_linearize(skb, dev) && __skb_linearize(skb)) - goto out_kfree_skb; - - /* If packet is not checksummed and device does not support - * checksumming for this protocol, complete checksumming here. - */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, skb->csum_start - - skb_headroom(skb)); - if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) - goto out_kfree_skb; - } - -gso: /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -2194,7 +2233,6 @@ gso: rc = -ENETDOWN; rcu_read_unlock_bh(); -out_kfree_skb: kfree_skb(skb); return rc; out: @@ -2579,70 +2617,14 @@ static inline int deliver_skb(struct sk_buff *skb, return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } -#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) - -#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) +#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ + (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); #endif -/* - * If bridge module is loaded call bridging hook. - * returns NULL if packet was consumed. - */ -struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, - struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(br_handle_frame_hook); - -static inline struct sk_buff *handle_bridge(struct sk_buff *skb, - struct packet_type **pt_prev, int *ret, - struct net_device *orig_dev) -{ - struct net_bridge_port *port; - - if (skb->pkt_type == PACKET_LOOPBACK || - (port = rcu_dereference(skb->dev->br_port)) == NULL) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - - return br_handle_frame_hook(port, skb); -} -#else -#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) -#endif - -#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) -struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p, - struct sk_buff *skb) __read_mostly; -EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); - -static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, - struct net_device *orig_dev) -{ - struct macvlan_port *port; - - port = rcu_dereference(skb->dev->macvlan_port); - if (!port) - return skb; - - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } - return macvlan_handle_frame_hook(port, skb); -} -#else -#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) -#endif - #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -2660,10 +2642,10 @@ static int ing_filter(struct sk_buff *skb) int result = TC_ACT_OK; struct Qdisc *q; - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING - "Redir loop detected Dropping packet (%d->%d)\n", - skb->skb_iif, dev->ifindex); + if (unlikely(MAX_RED_LOOP < ttl++)) { + if (net_ratelimit()) + pr_warning( "Redir loop detected Dropping packet (%d->%d)\n", + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -2693,9 +2675,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; - } else { - /* Huh? Why does turning on AF_PACKET affect this? */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); } switch (ing_filter(skb)) { @@ -2738,6 +2717,51 @@ void netif_nit_deliver(struct sk_buff *skb) rcu_read_unlock(); } +/** + * netdev_rx_handler_register - register receive handler + * @dev: device to register a handler for + * @rx_handler: receive handler to register + * @rx_handler_data: data pointer that is used by rx handler + * + * Register a receive hander for a device. This handler will then be + * called from __netif_receive_skb. A negative errno code is returned + * on a failure. + * + * The caller must hold the rtnl_mutex. + */ +int netdev_rx_handler_register(struct net_device *dev, + rx_handler_func_t *rx_handler, + void *rx_handler_data) +{ + ASSERT_RTNL(); + + if (dev->rx_handler) + return -EBUSY; + + rcu_assign_pointer(dev->rx_handler_data, rx_handler_data); + rcu_assign_pointer(dev->rx_handler, rx_handler); + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_rx_handler_register); + +/** + * netdev_rx_handler_unregister - unregister receive handler + * @dev: device to unregister a handler from + * + * Unregister a receive hander from a device. + * + * The caller must hold the rtnl_mutex. + */ +void netdev_rx_handler_unregister(struct net_device *dev) +{ + + ASSERT_RTNL(); + rcu_assign_pointer(dev->rx_handler, NULL); + rcu_assign_pointer(dev->rx_handler_data, NULL); +} +EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); + static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, struct net_device *master) { @@ -2759,7 +2783,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) if (master->priv_flags & IFF_MASTER_ARPMON) dev->last_rx = jiffies; - if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { + if ((master->priv_flags & IFF_MASTER_ALB) && + (master->priv_flags & IFF_BRIDGE_PORT)) { /* Do address unmangle. The local destination address * will be always the one master has. Provides the right * functionality in a bridge. @@ -2790,6 +2815,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + rx_handler_func_t *rx_handler; struct net_device *orig_dev; struct net_device *master; struct net_device *null_or_orig; @@ -2831,8 +2857,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb->dev = master; } - __get_cpu_var(softnet_data).processed++; - + __this_cpu_inc(softnet_data.processed); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; @@ -2864,12 +2889,17 @@ static int __netif_receive_skb(struct sk_buff *skb) ncls: #endif - skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; - skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto out; + /* Handle special case of bridge or macvlan */ + rx_handler = rcu_dereference(skb->dev->rx_handler); + if (rx_handler) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + skb = rx_handler(skb); + if (!skb) + goto out; + } /* * Make sure frames received on VLAN interfaces stacked on @@ -2930,6 +2960,9 @@ int netif_receive_skb(struct sk_buff *skb) if (netdev_tstamp_prequeue) net_timestamp_check(skb); + if (skb_defer_rx_timestamp(skb)) + return NET_RX_SUCCESS; + #ifdef CONFIG_RPS { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -3694,10 +3727,11 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - const struct net_device_stats *stats = dev_get_stats(dev); + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " - "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", + seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " + "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, @@ -5246,20 +5280,22 @@ void netdev_run_todo(void) /** * dev_txq_stats_fold - fold tx_queues stats * @dev: device to get statistics from - * @stats: struct net_device_stats to hold results + * @stats: struct rtnl_link_stats64 to hold results */ void dev_txq_stats_fold(const struct net_device *dev, - struct net_device_stats *stats) + struct rtnl_link_stats64 *stats) { - unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; unsigned int i; struct netdev_queue *txq; for (i = 0; i < dev->num_tx_queues; i++) { txq = netdev_get_tx_queue(dev, i); + spin_lock_bh(&txq->_xmit_lock); tx_bytes += txq->tx_bytes; tx_packets += txq->tx_packets; tx_dropped += txq->tx_dropped; + spin_unlock_bh(&txq->_xmit_lock); } if (tx_bytes || tx_packets || tx_dropped) { stats->tx_bytes = tx_bytes; @@ -5269,23 +5305,53 @@ void dev_txq_stats_fold(const struct net_device *dev, } EXPORT_SYMBOL(dev_txq_stats_fold); +/* Convert net_device_stats to rtnl_link_stats64. They have the same + * fields in the same order, with only the type differing. + */ +static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats) +{ +#if BITS_PER_LONG == 64 + BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); + memcpy(stats64, netdev_stats, sizeof(*stats64)); +#else + size_t i, n = sizeof(*stats64) / sizeof(u64); + const unsigned long *src = (const unsigned long *)netdev_stats; + u64 *dst = (u64 *)stats64; + + BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) != + sizeof(*stats64) / sizeof(u64)); + for (i = 0; i < n; i++) + dst[i] = src[i]; +#endif +} + /** * dev_get_stats - get network device statistics * @dev: device to get statistics from + * @storage: place to store stats * - * Get network statistics from device. The device driver may provide - * its own method by setting dev->netdev_ops->get_stats; otherwise - * the internal statistics structure is used. + * Get network statistics from device. Return @storage. + * The device driver may provide its own method by setting + * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats; + * otherwise the internal statistics structure is used. */ -const struct net_device_stats *dev_get_stats(struct net_device *dev) +struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, + struct rtnl_link_stats64 *storage) { const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_get_stats) - return ops->ndo_get_stats(dev); - - dev_txq_stats_fold(dev, &dev->stats); - return &dev->stats; + if (ops->ndo_get_stats64) { + memset(storage, 0, sizeof(*storage)); + return ops->ndo_get_stats64(dev, storage); + } + if (ops->ndo_get_stats) { + netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); + return storage; + } + netdev_stats_to_stats64(storage, &dev->stats); + dev_txq_stats_fold(dev, storage); + return storage; } EXPORT_SYMBOL(dev_get_stats); @@ -5790,6 +5856,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len) return buffer; } +static int __netdev_printk(const char *level, const struct net_device *dev, + struct va_format *vaf) +{ + int r; + + if (dev && dev->dev.parent) + r = dev_printk(level, dev->dev.parent, "%s: %pV", + netdev_name(dev), vaf); + else if (dev) + r = printk("%s%s: %pV", level, netdev_name(dev), vaf); + else + r = printk("%s(NULL net_device): %pV", level, vaf); + + return r; +} + +int netdev_printk(const char *level, const struct net_device *dev, + const char *format, ...) +{ + struct va_format vaf; + va_list args; + int r; + + va_start(args, format); + + vaf.fmt = format; + vaf.va = &args; + + r = __netdev_printk(level, dev, &vaf); + va_end(args); + + return r; +} +EXPORT_SYMBOL(netdev_printk); + +#define define_netdev_printk_level(func, level) \ +int func(const struct net_device *dev, const char *fmt, ...) \ +{ \ + int r; \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + r = __netdev_printk(level, dev, &vaf); \ + va_end(args); \ + \ + return r; \ +} \ +EXPORT_SYMBOL(func); + +define_netdev_printk_level(netdev_emerg, KERN_EMERG); +define_netdev_printk_level(netdev_alert, KERN_ALERT); +define_netdev_printk_level(netdev_crit, KERN_CRIT); +define_netdev_printk_level(netdev_err, KERN_ERR); +define_netdev_printk_level(netdev_warn, KERN_WARNING); +define_netdev_printk_level(netdev_notice, KERN_NOTICE); +define_netdev_printk_level(netdev_info, KERN_INFO); + static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ad41529fb60f..36e603c78ce9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -223,6 +223,11 @@ static int set_all_monitor_traces(int state) spin_lock(&trace_state_lock); + if (state == trace_state) { + rc = -EAGAIN; + goto out_unlock; + } + switch (state) { case TRACE_ON: rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); @@ -251,11 +256,12 @@ static int set_all_monitor_traces(int state) if (!rc) trace_state = state; + else + rc = -EINPROGRESS; +out_unlock: spin_unlock(&trace_state_lock); - if (rc) - return -EINPROGRESS; return rc; } @@ -341,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = { static int __init init_net_drop_monitor(void) { - int cpu; - int rc, i, ret; struct per_cpu_dm_data *data; + int cpu, rc; + printk(KERN_INFO "Initalizing network drop monitor service\n"); if (sizeof(void *) > 8) { @@ -351,21 +357,12 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - if (genl_register_family(&net_drop_monitor_family) < 0) { + rc = genl_register_family_with_ops(&net_drop_monitor_family, + dropmon_ops, + ARRAY_SIZE(dropmon_ops)); + if (rc) { printk(KERN_ERR "Could not create drop monitor netlink family\n"); - return -EFAULT; - } - - rc = -EFAULT; - - for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { - ret = genl_register_ops(&net_drop_monitor_family, - &dropmon_ops[i]); - if (ret) { - printk(KERN_CRIT "Failed to register operation %d\n", - dropmon_ops[i].cmd); - goto out_unreg; - } + return rc; } rc = register_netdevice_notifier(&dropmon_net_notifier); diff --git a/net/core/dst.c b/net/core/dst.c index 9920722cc82b..6c41b1fac3db 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -197,7 +197,6 @@ static void ___dst_free(struct dst_entry *dst) dst->input = dst->output = dst_discard; dst->obsolete = 2; } -EXPORT_SYMBOL(__dst_free); void __dst_free(struct dst_entry *dst) { @@ -213,6 +212,7 @@ void __dst_free(struct dst_entry *dst) } spin_unlock_bh(&dst_garbage.lock); } +EXPORT_SYMBOL(__dst_free); struct dst_entry *dst_destroy(struct dst_entry * dst) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a0f4964033d2..7a85367b3c2f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -144,31 +144,13 @@ u32 ethtool_op_get_flags(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_flags); -int ethtool_op_set_flags(struct net_device *dev, u32 data) +int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) { - const struct ethtool_ops *ops = dev->ethtool_ops; - unsigned long features = dev->features; - - if (data & ETH_FLAG_LRO) - features |= NETIF_F_LRO; - else - features &= ~NETIF_F_LRO; - - if (data & ETH_FLAG_NTUPLE) { - if (!ops->set_rx_ntuple) - return -EOPNOTSUPP; - features |= NETIF_F_NTUPLE; - } else { - /* safe to clear regardless */ - features &= ~NETIF_F_NTUPLE; - } - - if (data & ETH_FLAG_RXHASH) - features |= NETIF_F_RXHASH; - else - features &= ~NETIF_F_RXHASH; + if (data & ~supported) + return -EINVAL; - dev->features = features; + dev->features = ((dev->features & ~flags_dup_features) | + (data & flags_dup_features)); return 0; } EXPORT_SYMBOL(ethtool_op_set_flags); @@ -318,23 +300,33 @@ out: } static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { - struct ethtool_rxnfc cmd; + struct ethtool_rxnfc info; + size_t info_size = sizeof(info); if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_SRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; - return dev->ethtool_ops->set_rxnfc(dev, &cmd); + return dev->ethtool_ops->set_rxnfc(dev, &info); } static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, - void __user *useraddr) + u32 cmd, void __user *useraddr) { struct ethtool_rxnfc info; + size_t info_size = sizeof(info); const struct ethtool_ops *ops = dev->ethtool_ops; int ret; void *rule_buf = NULL; @@ -342,13 +334,22 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, if (!ops->get_rxnfc) return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) + /* struct ethtool_rxnfc was originally defined for + * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data + * members. User-space might still be using that + * definition. */ + if (cmd == ETHTOOL_GRXFH) + info_size = (offsetof(struct ethtool_rxnfc, data) + + sizeof(info.data)); + + if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; if (info.cmd == ETHTOOL_GRXCLSRLALL) { if (info.rule_cnt > 0) { - rule_buf = kmalloc(info.rule_cnt * sizeof(u32), - GFP_USER); + if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32)) + rule_buf = kmalloc(info.rule_cnt * sizeof(u32), + GFP_USER); if (!rule_buf) return -ENOMEM; } @@ -359,7 +360,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, goto err_out; ret = -EFAULT; - if (copy_to_user(useraddr, &info, sizeof(info))) + if (copy_to_user(useraddr, &info, info_size)) goto err_out; if (rule_buf) { @@ -376,6 +377,80 @@ err_out: return ret; } +static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_rxfh_indir *indir; + u32 table_size; + size_t full_size; + int ret; + + if (!dev->ethtool_ops->get_rxfh_indir) + return -EOPNOTSUPP; + + if (copy_from_user(&table_size, + useraddr + offsetof(struct ethtool_rxfh_indir, size), + sizeof(table_size))) + return -EFAULT; + + if (table_size > + (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) + return -ENOMEM; + full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; + indir = kmalloc(full_size, GFP_USER); + if (!indir) + return -ENOMEM; + + indir->cmd = ETHTOOL_GRXFHINDIR; + indir->size = table_size; + ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); + if (ret) + goto out; + + if (copy_to_user(useraddr, indir, full_size)) + ret = -EFAULT; + +out: + kfree(indir); + return ret; +} + +static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_rxfh_indir *indir; + u32 table_size; + size_t full_size; + int ret; + + if (!dev->ethtool_ops->set_rxfh_indir) + return -EOPNOTSUPP; + + if (copy_from_user(&table_size, + useraddr + offsetof(struct ethtool_rxfh_indir, size), + sizeof(table_size))) + return -EFAULT; + + if (table_size > + (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index)) + return -ENOMEM; + full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size; + indir = kmalloc(full_size, GFP_USER); + if (!indir) + return -ENOMEM; + + if (copy_from_user(indir, useraddr, full_size)) { + ret = -EFAULT; + goto out; + } + + ret = dev->ethtool_ops->set_rxfh_indir(dev, indir); + +out: + kfree(indir); + return ret; +} + static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, struct ethtool_rx_ntuple_flow_spec *spec, struct ethtool_rx_ntuple_flow_spec_container *fsc) @@ -1516,12 +1591,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: - rc = ethtool_get_rxnfc(dev, useraddr); + rc = ethtool_get_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_SRXFH: case ETHTOOL_SRXCLSRLDEL: case ETHTOOL_SRXCLSRLINS: - rc = ethtool_set_rxnfc(dev, useraddr); + rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; case ETHTOOL_GGRO: rc = ethtool_get_gro(dev, useraddr); @@ -1544,6 +1619,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GSSET_INFO: rc = ethtool_get_sset_info(dev, useraddr); break; + case ETHTOOL_GRXFHINDIR: + rc = ethtool_get_rxfh_indir(dev, useraddr); + break; + case ETHTOOL_SRXFHINDIR: + rc = ethtool_set_rxfh_indir(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/filter.c b/net/core/filter.c index da69fb728d32..52b051f82a01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int fentry = &filter[pc]; switch (fentry->code) { - case BPF_ALU|BPF_ADD|BPF_X: + case BPF_S_ALU_ADD_X: A += X; continue; - case BPF_ALU|BPF_ADD|BPF_K: + case BPF_S_ALU_ADD_K: A += fentry->k; continue; - case BPF_ALU|BPF_SUB|BPF_X: + case BPF_S_ALU_SUB_X: A -= X; continue; - case BPF_ALU|BPF_SUB|BPF_K: + case BPF_S_ALU_SUB_K: A -= fentry->k; continue; - case BPF_ALU|BPF_MUL|BPF_X: + case BPF_S_ALU_MUL_X: A *= X; continue; - case BPF_ALU|BPF_MUL|BPF_K: + case BPF_S_ALU_MUL_K: A *= fentry->k; continue; - case BPF_ALU|BPF_DIV|BPF_X: + case BPF_S_ALU_DIV_X: if (X == 0) return 0; A /= X; continue; - case BPF_ALU|BPF_DIV|BPF_K: + case BPF_S_ALU_DIV_K: A /= fentry->k; continue; - case BPF_ALU|BPF_AND|BPF_X: + case BPF_S_ALU_AND_X: A &= X; continue; - case BPF_ALU|BPF_AND|BPF_K: + case BPF_S_ALU_AND_K: A &= fentry->k; continue; - case BPF_ALU|BPF_OR|BPF_X: + case BPF_S_ALU_OR_X: A |= X; continue; - case BPF_ALU|BPF_OR|BPF_K: + case BPF_S_ALU_OR_K: A |= fentry->k; continue; - case BPF_ALU|BPF_LSH|BPF_X: + case BPF_S_ALU_LSH_X: A <<= X; continue; - case BPF_ALU|BPF_LSH|BPF_K: + case BPF_S_ALU_LSH_K: A <<= fentry->k; continue; - case BPF_ALU|BPF_RSH|BPF_X: + case BPF_S_ALU_RSH_X: A >>= X; continue; - case BPF_ALU|BPF_RSH|BPF_K: + case BPF_S_ALU_RSH_K: A >>= fentry->k; continue; - case BPF_ALU|BPF_NEG: + case BPF_S_ALU_NEG: A = -A; continue; - case BPF_JMP|BPF_JA: + case BPF_S_JMP_JA: pc += fentry->k; continue; - case BPF_JMP|BPF_JGT|BPF_K: + case BPF_S_JMP_JGT_K: pc += (A > fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGE|BPF_K: + case BPF_S_JMP_JGE_K: pc += (A >= fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JEQ|BPF_K: + case BPF_S_JMP_JEQ_K: pc += (A == fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JSET|BPF_K: + case BPF_S_JMP_JSET_K: pc += (A & fentry->k) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGT|BPF_X: + case BPF_S_JMP_JGT_X: pc += (A > X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JGE|BPF_X: + case BPF_S_JMP_JGE_X: pc += (A >= X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JEQ|BPF_X: + case BPF_S_JMP_JEQ_X: pc += (A == X) ? fentry->jt : fentry->jf; continue; - case BPF_JMP|BPF_JSET|BPF_X: + case BPF_S_JMP_JSET_X: pc += (A & X) ? fentry->jt : fentry->jf; continue; - case BPF_LD|BPF_W|BPF_ABS: + case BPF_S_LD_W_ABS: k = fentry->k; load_w: ptr = load_pointer(skb, k, 4, &tmp); @@ -217,7 +217,7 @@ load_w: continue; } break; - case BPF_LD|BPF_H|BPF_ABS: + case BPF_S_LD_H_ABS: k = fentry->k; load_h: ptr = load_pointer(skb, k, 2, &tmp); @@ -226,7 +226,7 @@ load_h: continue; } break; - case BPF_LD|BPF_B|BPF_ABS: + case BPF_S_LD_B_ABS: k = fentry->k; load_b: ptr = load_pointer(skb, k, 1, &tmp); @@ -235,54 +235,54 @@ load_b: continue; } break; - case BPF_LD|BPF_W|BPF_LEN: + case BPF_S_LD_W_LEN: A = skb->len; continue; - case BPF_LDX|BPF_W|BPF_LEN: + case BPF_S_LDX_W_LEN: X = skb->len; continue; - case BPF_LD|BPF_W|BPF_IND: + case BPF_S_LD_W_IND: k = X + fentry->k; goto load_w; - case BPF_LD|BPF_H|BPF_IND: + case BPF_S_LD_H_IND: k = X + fentry->k; goto load_h; - case BPF_LD|BPF_B|BPF_IND: + case BPF_S_LD_B_IND: k = X + fentry->k; goto load_b; - case BPF_LDX|BPF_B|BPF_MSH: + case BPF_S_LDX_B_MSH: ptr = load_pointer(skb, fentry->k, 1, &tmp); if (ptr != NULL) { X = (*(u8 *)ptr & 0xf) << 2; continue; } return 0; - case BPF_LD|BPF_IMM: + case BPF_S_LD_IMM: A = fentry->k; continue; - case BPF_LDX|BPF_IMM: + case BPF_S_LDX_IMM: X = fentry->k; continue; - case BPF_LD|BPF_MEM: + case BPF_S_LD_MEM: A = mem[fentry->k]; continue; - case BPF_LDX|BPF_MEM: + case BPF_S_LDX_MEM: X = mem[fentry->k]; continue; - case BPF_MISC|BPF_TAX: + case BPF_S_MISC_TAX: X = A; continue; - case BPF_MISC|BPF_TXA: + case BPF_S_MISC_TXA: A = X; continue; - case BPF_RET|BPF_K: + case BPF_S_RET_K: return fentry->k; - case BPF_RET|BPF_A: + case BPF_S_RET_A: return A; - case BPF_ST: + case BPF_S_ST: mem[fentry->k] = A; continue; - case BPF_STX: + case BPF_S_STX: mem[fentry->k] = X; continue; default: @@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen) /* Only allow valid instructions */ switch (ftest->code) { case BPF_ALU|BPF_ADD|BPF_K: + ftest->code = BPF_S_ALU_ADD_K; + break; case BPF_ALU|BPF_ADD|BPF_X: + ftest->code = BPF_S_ALU_ADD_X; + break; case BPF_ALU|BPF_SUB|BPF_K: + ftest->code = BPF_S_ALU_SUB_K; + break; case BPF_ALU|BPF_SUB|BPF_X: + ftest->code = BPF_S_ALU_SUB_X; + break; case BPF_ALU|BPF_MUL|BPF_K: + ftest->code = BPF_S_ALU_MUL_K; + break; case BPF_ALU|BPF_MUL|BPF_X: + ftest->code = BPF_S_ALU_MUL_X; + break; case BPF_ALU|BPF_DIV|BPF_X: + ftest->code = BPF_S_ALU_DIV_X; + break; case BPF_ALU|BPF_AND|BPF_K: + ftest->code = BPF_S_ALU_AND_K; + break; case BPF_ALU|BPF_AND|BPF_X: + ftest->code = BPF_S_ALU_AND_X; + break; case BPF_ALU|BPF_OR|BPF_K: + ftest->code = BPF_S_ALU_OR_K; + break; case BPF_ALU|BPF_OR|BPF_X: + ftest->code = BPF_S_ALU_OR_X; + break; case BPF_ALU|BPF_LSH|BPF_K: + ftest->code = BPF_S_ALU_LSH_K; + break; case BPF_ALU|BPF_LSH|BPF_X: + ftest->code = BPF_S_ALU_LSH_X; + break; case BPF_ALU|BPF_RSH|BPF_K: + ftest->code = BPF_S_ALU_RSH_K; + break; case BPF_ALU|BPF_RSH|BPF_X: + ftest->code = BPF_S_ALU_RSH_X; + break; case BPF_ALU|BPF_NEG: + ftest->code = BPF_S_ALU_NEG; + break; case BPF_LD|BPF_W|BPF_ABS: + ftest->code = BPF_S_LD_W_ABS; + break; case BPF_LD|BPF_H|BPF_ABS: + ftest->code = BPF_S_LD_H_ABS; + break; case BPF_LD|BPF_B|BPF_ABS: + ftest->code = BPF_S_LD_B_ABS; + break; case BPF_LD|BPF_W|BPF_LEN: + ftest->code = BPF_S_LD_W_LEN; + break; case BPF_LD|BPF_W|BPF_IND: + ftest->code = BPF_S_LD_W_IND; + break; case BPF_LD|BPF_H|BPF_IND: + ftest->code = BPF_S_LD_H_IND; + break; case BPF_LD|BPF_B|BPF_IND: + ftest->code = BPF_S_LD_B_IND; + break; case BPF_LD|BPF_IMM: + ftest->code = BPF_S_LD_IMM; + break; case BPF_LDX|BPF_W|BPF_LEN: + ftest->code = BPF_S_LDX_W_LEN; + break; case BPF_LDX|BPF_B|BPF_MSH: + ftest->code = BPF_S_LDX_B_MSH; + break; case BPF_LDX|BPF_IMM: + ftest->code = BPF_S_LDX_IMM; + break; case BPF_MISC|BPF_TAX: + ftest->code = BPF_S_MISC_TAX; + break; case BPF_MISC|BPF_TXA: + ftest->code = BPF_S_MISC_TXA; + break; case BPF_RET|BPF_K: + ftest->code = BPF_S_RET_K; + break; case BPF_RET|BPF_A: + ftest->code = BPF_S_RET_A; break; /* Some instructions need special checks */ - case BPF_ALU|BPF_DIV|BPF_K: /* check for division by zero */ + case BPF_ALU|BPF_DIV|BPF_K: if (ftest->k == 0) return -EINVAL; + ftest->code = BPF_S_ALU_DIV_K; break; + /* check for invalid memory addresses */ case BPF_LD|BPF_MEM: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_LD_MEM; + break; case BPF_LDX|BPF_MEM: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_LDX_MEM; + break; case BPF_ST: + if (ftest->k >= BPF_MEMWORDS) + return -EINVAL; + ftest->code = BPF_S_ST; + break; case BPF_STX: - /* check for invalid memory addresses */ if (ftest->k >= BPF_MEMWORDS) return -EINVAL; + ftest->code = BPF_S_STX; break; case BPF_JMP|BPF_JA: @@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ if (ftest->k >= (unsigned)(flen-pc-1)) return -EINVAL; + ftest->code = BPF_S_JMP_JA; break; case BPF_JMP|BPF_JEQ|BPF_K: + ftest->code = BPF_S_JMP_JEQ_K; + break; case BPF_JMP|BPF_JEQ|BPF_X: + ftest->code = BPF_S_JMP_JEQ_X; + break; case BPF_JMP|BPF_JGE|BPF_K: + ftest->code = BPF_S_JMP_JGE_K; + break; case BPF_JMP|BPF_JGE|BPF_X: + ftest->code = BPF_S_JMP_JGE_X; + break; case BPF_JMP|BPF_JGT|BPF_K: + ftest->code = BPF_S_JMP_JGT_K; + break; case BPF_JMP|BPF_JGT|BPF_X: + ftest->code = BPF_S_JMP_JGT_X; + break; case BPF_JMP|BPF_JSET|BPF_K: + ftest->code = BPF_S_JMP_JSET_K; + break; case BPF_JMP|BPF_JSET|BPF_X: + ftest->code = BPF_S_JMP_JSET_X; + break; + + default: + return -EINVAL; + } + /* for conditionals both must be safe */ + switch (ftest->code) { + case BPF_S_JMP_JEQ_K: + case BPF_S_JMP_JEQ_X: + case BPF_S_JMP_JGE_K: + case BPF_S_JMP_JGE_X: + case BPF_S_JMP_JGT_K: + case BPF_S_JMP_JGT_X: + case BPF_S_JMP_JSET_X: + case BPF_S_JMP_JSET_K: if (pc + ftest->jt + 1 >= flen || pc + ftest->jf + 1 >= flen) return -EINVAL; - break; + } + } + /* last instruction must be a RET code */ + switch (filter[flen - 1].code) { + case BPF_S_RET_K: + case BPF_S_RET_A: + return 0; + break; default: return -EINVAL; } - } - - return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; } EXPORT_SYMBOL(sk_chk_filter); diff --git a/net/core/flow.c b/net/core/flow.c index 161900674009..f67dcbfe54ef 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -62,6 +62,7 @@ struct flow_cache { }; atomic_t flow_cache_genid = ATOMIC_INIT(0); +EXPORT_SYMBOL(flow_cache_genid); static struct flow_cache flow_cache_global; static struct kmem_cache *flow_cachep; @@ -222,7 +223,7 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, unsigned int hash; local_bh_disable(); - fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); + fcp = this_cpu_ptr(fc->percpu); fle = NULL; flo = NULL; @@ -291,6 +292,7 @@ ret_object: local_bh_enable(); return flo; } +EXPORT_SYMBOL(flow_cache_lookup); static void flow_cache_flush_tasklet(unsigned long data) { @@ -302,7 +304,7 @@ static void flow_cache_flush_tasklet(unsigned long data) LIST_HEAD(gc_list); int i, deleted = 0; - fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); + fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { hlist_for_each_entry_safe(fle, entry, tmp, &fcp->hash_table[i], u.hlist) { @@ -424,6 +426,3 @@ static int __init flow_cache_init_global(void) } module_init(flow_cache_init_global); - -EXPORT_SYMBOL(flow_cache_genid); -EXPORT_SYMBOL(flow_cache_lookup); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 785e5276a300..9fbe7f7429b0 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head) * * Removes the rate estimator specified by &bstats and &rate_est. * + * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 393b1d8618e2..0452eb27a272 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, return 0; } +EXPORT_SYMBOL(gnet_stats_start_copy_compat); /** * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode @@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, { return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); } +EXPORT_SYMBOL(gnet_stats_start_copy); /** * gnet_stats_copy_basic - copy basic statistics into statistic TLV @@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) } return 0; } +EXPORT_SYMBOL(gnet_stats_copy_basic); /** * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV @@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, return 0; } +EXPORT_SYMBOL(gnet_stats_copy_rate_est); /** * gnet_stats_copy_queue - copy queue statistics into statistics TLV @@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q) return 0; } +EXPORT_SYMBOL(gnet_stats_copy_queue); /** * gnet_stats_copy_app - copy application specific statistics into statistics TLV @@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return 0; } +EXPORT_SYMBOL(gnet_stats_copy_app); /** * gnet_stats_finish_copy - finish dumping procedure @@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d) spin_unlock_bh(d->lock); return 0; } - - -EXPORT_SYMBOL(gnet_stats_start_copy); -EXPORT_SYMBOL(gnet_stats_start_copy_compat); -EXPORT_SYMBOL(gnet_stats_copy_basic); -EXPORT_SYMBOL(gnet_stats_copy_rate_est); -EXPORT_SYMBOL(gnet_stats_copy_queue); -EXPORT_SYMBOL(gnet_stats_copy_app); EXPORT_SYMBOL(gnet_stats_finish_copy); diff --git a/net/core/iovec.c b/net/core/iovec.c index 1e7f4e91a935..1cd98df412df 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -95,6 +95,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len) return 0; } +EXPORT_SYMBOL(memcpy_toiovec); /* * Copy kernel to iovec. Returns -EFAULT on error. @@ -120,6 +121,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, return 0; } +EXPORT_SYMBOL(memcpy_toiovecend); /* * Copy iovec to kernel. Returns -EFAULT on error. @@ -144,6 +146,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) return 0; } +EXPORT_SYMBOL(memcpy_fromiovec); /* * Copy iovec from kernel. Returns -EFAULT on error. @@ -172,6 +175,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, return 0; } +EXPORT_SYMBOL(memcpy_fromiovecend); /* * And now for the all-in-one: copy and checksum from a user iovec @@ -256,9 +260,4 @@ out_fault: err = -EFAULT; goto out; } - EXPORT_SYMBOL(csum_partial_copy_fromiovecend); -EXPORT_SYMBOL(memcpy_fromiovec); -EXPORT_SYMBOL(memcpy_fromiovecend); -EXPORT_SYMBOL(memcpy_toiovec); -EXPORT_SYMBOL(memcpy_toiovecend); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index bdbce2f5875b..01a1101b5936 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -243,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev) linkwatch_schedule_work(urgent); } - EXPORT_SYMBOL(linkwatch_fire_event); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6ba1c0eece03..a4e0a7482c2b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) - = neigh->dev->header_ops->cache_update; + = NULL; + + if (neigh->dev->header_ops) + update = neigh->dev->header_ops->cache_update; if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 99e7052d7323..af4dfbadf2a0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -29,6 +29,7 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; +static const char fmt_u64[] = "%llu\n"; static inline int dev_isalive(const struct net_device *dev) { @@ -94,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW(dev_id, fmt_hex); +NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); @@ -294,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev, } static struct device_attribute net_class_attributes[] = { + __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), @@ -324,14 +327,15 @@ static ssize_t netstat_show(const struct device *d, struct net_device *dev = to_net_dev(d); ssize_t ret = -EINVAL; - WARN_ON(offset > sizeof(struct net_device_stats) || - offset % sizeof(unsigned long) != 0); + WARN_ON(offset > sizeof(struct rtnl_link_stats64) || + offset % sizeof(u64) != 0); read_lock(&dev_base_lock); if (dev_isalive(dev)) { - const struct net_device_stats *stats = dev_get_stats(dev); - ret = sprintf(buf, fmt_ulong, - *(unsigned long *)(((u8 *) stats) + offset)); + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); + + ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset)); } read_unlock(&dev_base_lock); return ret; @@ -343,7 +347,7 @@ static ssize_t show_##name(struct device *d, \ struct device_attribute *attr, char *buf) \ { \ return netstat_show(d, attr, buf, \ - offsetof(struct net_device_stats, name)); \ + offsetof(struct rtnl_link_stats64, name)); \ } \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) @@ -922,13 +926,12 @@ int netdev_class_create_file(struct class_attribute *class_attr) { return class_create_file(&net_class, class_attr); } +EXPORT_SYMBOL(netdev_class_create_file); void netdev_class_remove_file(struct class_attribute *class_attr) { class_remove_file(&net_class, class_attr); } - -EXPORT_SYMBOL(netdev_class_create_file); EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) diff --git a/net/core/netevent.c b/net/core/netevent.c index 95f81de87502..865f0ceb81fb 100644 --- a/net/core/netevent.c +++ b/net/core/netevent.c @@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb) err = atomic_notifier_chain_register(&netevent_notif_chain, nb); return err; } +EXPORT_SYMBOL_GPL(register_netevent_notifier); /** * netevent_unregister_notifier - unregister a netevent notifier block @@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); } +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); /** * call_netevent_notifiers - call all netevent notifier blocks @@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v) { return atomic_notifier_call_chain(&netevent_notif_chain, val, v); } - -EXPORT_SYMBOL_GPL(register_netevent_notifier); -EXPORT_SYMBOL_GPL(unregister_netevent_notifier); EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 94825b109551..537e01afd81b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -199,11 +199,13 @@ void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } +EXPORT_SYMBOL(netpoll_poll_dev); void netpoll_poll(struct netpoll *np) { netpoll_poll_dev(np->dev); } +EXPORT_SYMBOL(netpoll_poll); static void refill_skbs(void) { @@ -292,6 +294,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) unsigned long tries; struct net_device *dev = np->dev; const struct net_device_ops *ops = dev->netdev_ops; + /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { @@ -343,6 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) schedule_delayed_work(&npinfo->tx_work,0); } } +EXPORT_SYMBOL(netpoll_send_skb); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { @@ -404,6 +408,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) netpoll_send_skb(np, skb); } +EXPORT_SYMBOL(netpoll_send_udp); static void arp_reply(struct sk_buff *skb) { @@ -630,6 +635,7 @@ void netpoll_print_options(struct netpoll *np) printk(KERN_INFO "%s: remote ethernet address %pM\n", np->name, np->remote_mac); } +EXPORT_SYMBOL(netpoll_print_options); int netpoll_parse_options(struct netpoll *np, char *opt) { @@ -722,30 +728,29 @@ int netpoll_parse_options(struct netpoll *np, char *opt) np->name, cur); return -1; } +EXPORT_SYMBOL(netpoll_parse_options); -int netpoll_setup(struct netpoll *np) +int __netpoll_setup(struct netpoll *np) { - struct net_device *ndev = NULL; - struct in_device *in_dev; + struct net_device *ndev = np->dev; struct netpoll_info *npinfo; - struct netpoll *npe, *tmp; + const struct net_device_ops *ops; unsigned long flags; int err; - if (np->dev_name) - ndev = dev_get_by_name(&init_net, np->dev_name); - if (!ndev) { - printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", + if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || + !ndev->netdev_ops->ndo_poll_controller) { + printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); - return -ENODEV; + err = -ENOTSUPP; + goto out; } - np->dev = ndev; if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; - goto put; + goto out; } npinfo->rx_flags = 0; @@ -757,6 +762,13 @@ int netpoll_setup(struct netpoll *np) INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); atomic_set(&npinfo->refcnt, 1); + + ops = np->dev->netdev_ops; + if (ops->ndo_netpoll_setup) { + err = ops->ndo_netpoll_setup(ndev, npinfo); + if (err) + goto free_npinfo; + } } else { npinfo = ndev->npinfo; atomic_inc(&npinfo->refcnt); @@ -764,12 +776,37 @@ int netpoll_setup(struct netpoll *np) npinfo->netpoll = np; - if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || - !ndev->netdev_ops->ndo_poll_controller) { - printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", + if (np->rx_hook) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; + list_add_tail(&np->rx, &npinfo->rx_np); + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + /* last thing to do is link it to the net device structure */ + rcu_assign_pointer(ndev->npinfo, npinfo); + + return 0; + +free_npinfo: + kfree(npinfo); +out: + return err; +} +EXPORT_SYMBOL_GPL(__netpoll_setup); + +int netpoll_setup(struct netpoll *np) +{ + struct net_device *ndev = NULL; + struct in_device *in_dev; + int err; + + if (np->dev_name) + ndev = dev_get_by_name(&init_net, np->dev_name); + if (!ndev) { + printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); - err = -ENOTSUPP; - goto release; + return -ENODEV; } if (!netif_running(ndev)) { @@ -785,7 +822,7 @@ int netpoll_setup(struct netpoll *np) if (err) { printk(KERN_ERR "%s: failed to open %s\n", np->name, ndev->name); - goto release; + goto put; } atleast = jiffies + HZ/10; @@ -822,7 +859,7 @@ int netpoll_setup(struct netpoll *np) printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); err = -EDESTADDRREQ; - goto release; + goto put; } np->local_ip = in_dev->ifa_list->ifa_local; @@ -830,38 +867,25 @@ int netpoll_setup(struct netpoll *np) printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip); } - if (np->rx_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_flags |= NETPOLL_RX_ENABLED; - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } + np->dev = ndev; /* fill up the skb queue */ refill_skbs(); - /* last thing to do is link it to the net device structure */ - ndev->npinfo = npinfo; + rtnl_lock(); + err = __netpoll_setup(np); + rtnl_unlock(); - /* avoid racing with NAPI reading npinfo */ - synchronize_rcu(); + if (err) + goto put; return 0; - release: - if (!ndev->npinfo) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) { - npe->dev = NULL; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - - kfree(npinfo); - } put: dev_put(ndev); return err; } +EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { @@ -870,49 +894,65 @@ static int __init netpoll_init(void) } core_initcall(netpoll_init); -void netpoll_cleanup(struct netpoll *np) +void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; unsigned long flags; - if (np->dev) { - npinfo = np->dev->npinfo; - if (npinfo) { - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - if (list_empty(&npinfo->rx_np)) - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } + npinfo = np->dev->npinfo; + if (!npinfo) + return; - if (atomic_dec_and_test(&npinfo->refcnt)) { - const struct net_device_ops *ops; - skb_queue_purge(&npinfo->arp_tx); - skb_queue_purge(&npinfo->txq); - cancel_rearming_delayed_work(&npinfo->tx_work); - - /* clean after last, unfinished work */ - __skb_queue_purge(&npinfo->txq); - kfree(npinfo); - ops = np->dev->netdev_ops; - if (ops->ndo_netpoll_cleanup) - ops->ndo_netpoll_cleanup(np->dev); - else - np->dev->npinfo = NULL; - } - } + if (!list_empty(&npinfo->rx_np)) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + list_del(&np->rx); + if (list_empty(&npinfo->rx_np)) + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + if (atomic_dec_and_test(&npinfo->refcnt)) { + const struct net_device_ops *ops; + + ops = np->dev->netdev_ops; + if (ops->ndo_netpoll_cleanup) + ops->ndo_netpoll_cleanup(np->dev); + + rcu_assign_pointer(np->dev->npinfo, NULL); + + /* avoid racing with NAPI reading npinfo */ + synchronize_rcu_bh(); + + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + cancel_rearming_delayed_work(&npinfo->tx_work); - dev_put(np->dev); + /* clean after last, unfinished work */ + __skb_queue_purge(&npinfo->txq); + kfree(npinfo); } +} +EXPORT_SYMBOL_GPL(__netpoll_cleanup); + +void netpoll_cleanup(struct netpoll *np) +{ + if (!np->dev) + return; + rtnl_lock(); + __netpoll_cleanup(np); + rtnl_unlock(); + + dev_put(np->dev); np->dev = NULL; } +EXPORT_SYMBOL(netpoll_cleanup); int netpoll_trap(void) { return atomic_read(&trapped); } +EXPORT_SYMBOL(netpoll_trap); void netpoll_set_trap(int trap) { @@ -921,14 +961,4 @@ void netpoll_set_trap(int trap) else atomic_dec(&trapped); } - -EXPORT_SYMBOL(netpoll_send_skb); EXPORT_SYMBOL(netpoll_set_trap); -EXPORT_SYMBOL(netpoll_trap); -EXPORT_SYMBOL(netpoll_print_options); -EXPORT_SYMBOL(netpoll_parse_options); -EXPORT_SYMBOL(netpoll_setup); -EXPORT_SYMBOL(netpoll_cleanup); -EXPORT_SYMBOL(netpoll_send_udp); -EXPORT_SYMBOL(netpoll_poll_dev); -EXPORT_SYMBOL(netpoll_poll); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1dacd7ba8dbb..10a1ea72010d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -115,6 +115,9 @@ * command by Adit Ranadive <adit.262@gmail.com> * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sys.h> #include <linux/types.h> #include <linux/module.h> @@ -169,11 +172,13 @@ #include <asm/dma.h> #include <asm/div64.h> /* do_div */ -#define VERSION "2.73" +#define VERSION "2.74" #define IP_NAME_SZ 32 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ #define MPLS_STACK_BOTTOM htonl(0x00000100) +#define func_enter() pr_debug("entering %s\n", __func__); + /* Device flag bits */ #define F_IPSRC_RND (1<<0) /* IP-Src Random */ #define F_IPDST_RND (1<<1) /* IP-Dst Random */ @@ -424,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) } static const char version[] = - "pktgen " VERSION ": Packet Generator for packet performance testing.\n"; + "Packet Generator for packet performance testing. " + "Version: " VERSION "\n"; static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); @@ -495,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, pktgen_reset_all_threads(); else - printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); + pr_warning("Unknown command: %s\n", data); err = count; @@ -840,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file, const char __user * user_buffer, size_t count, loff_t * offset) { - struct seq_file *seq = (struct seq_file *)file->private_data; + struct seq_file *seq = file->private_data; struct pktgen_dev *pkt_dev = seq->private; int i = 0, max, len; char name[16], valstr[32]; @@ -852,14 +858,14 @@ static ssize_t pktgen_if_write(struct file *file, pg_result = &(pkt_dev->result[0]); if (count < 1) { - printk(KERN_WARNING "pktgen: wrong command format\n"); + pr_warning("wrong command format\n"); return -EINVAL; } max = count - i; tmp = count_trail_chars(&user_buffer[i], max); if (tmp < 0) { - printk(KERN_WARNING "pktgen: illegal format\n"); + pr_warning("illegal format\n"); return tmp; } i += tmp; @@ -980,6 +986,36 @@ static ssize_t pktgen_if_write(struct file *file, (unsigned long long) pkt_dev->delay); return count; } + if (!strcmp(name, "rate")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + if (!value) + return len; + pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value; + if (debug) + pr_info("Delay set at: %llu ns\n", pkt_dev->delay); + + sprintf(pg_result, "OK: rate=%lu", value); + return count; + } + if (!strcmp(name, "ratep")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) + return len; + + i += len; + if (!value) + return len; + pkt_dev->delay = NSEC_PER_SEC/value; + if (debug) + pr_info("Delay set at: %llu ns\n", pkt_dev->delay); + + sprintf(pg_result, "OK: rate=%lu", value); + return count; + } if (!strcmp(name, "udp_src_min")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) @@ -1398,18 +1434,12 @@ static ssize_t pktgen_if_write(struct file *file, i += len; for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } + int value; + + value = hex_to_bin(*v); + if (value >= 0) + *m = *m * 16 + value; + if (*v == ':') { m++; *m = 0; @@ -1440,18 +1470,12 @@ static ssize_t pktgen_if_write(struct file *file, i += len; for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } + int value; + + value = hex_to_bin(*v); + if (value >= 0) + *m = *m * 16 + value; + if (*v == ':') { m++; *m = 0; @@ -1740,7 +1764,7 @@ static ssize_t pktgen_thread_write(struct file *file, const char __user * user_buffer, size_t count, loff_t * offset) { - struct seq_file *seq = (struct seq_file *)file->private_data; + struct seq_file *seq = file->private_data; struct pktgen_thread *t = seq->private; int i = 0, max, len, ret; char name[40]; @@ -1781,7 +1805,7 @@ static ssize_t pktgen_thread_write(struct file *file, name, (unsigned long)count); if (!t) { - printk(KERN_ERR "pktgen: ERROR: No thread\n"); + pr_err("ERROR: No thread\n"); ret = -EINVAL; goto out; } @@ -1874,7 +1898,7 @@ static void pktgen_mark_device(const char *ifname) int i = 0; mutex_lock(&pktgen_thread_lock); - pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname); + pr_debug("%s: marking %s for removal\n", __func__, ifname); while (1) { @@ -1883,15 +1907,14 @@ static void pktgen_mark_device(const char *ifname) break; /* success */ mutex_unlock(&pktgen_thread_lock); - pr_debug("pktgen: pktgen_mark_device waiting for %s " - "to disappear....\n", ifname); + pr_debug("%s: waiting for %s to disappear....\n", + __func__, ifname); schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try)); mutex_lock(&pktgen_thread_lock); if (++i >= max_tries) { - printk(KERN_ERR "pktgen_mark_device: timed out after " - "waiting %d msec for device %s to be removed\n", - msec_per_try * i, ifname); + pr_err("%s: timed out after waiting %d msec for device %s to be removed\n", + __func__, msec_per_try * i, ifname); break; } @@ -1918,8 +1941,8 @@ static void pktgen_change_name(struct net_device *dev) &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) - printk(KERN_ERR "pktgen: can't move proc " - " entry for '%s'\n", dev->name); + pr_err("can't move proc entry for '%s'\n", + dev->name); break; } } @@ -1983,15 +2006,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) odev = pktgen_dev_get_by_name(pkt_dev, ifname); if (!odev) { - printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); + pr_err("no such netdevice: \"%s\"\n", ifname); return -ENODEV; } if (odev->type != ARPHRD_ETHER) { - printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); + pr_err("not an ethernet device: \"%s\"\n", ifname); err = -EINVAL; } else if (!netif_running(odev)) { - printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); + pr_err("device is down: \"%s\"\n", ifname); err = -ENETDOWN; } else { pkt_dev->odev = odev; @@ -2010,8 +2033,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) int ntxq; if (!pkt_dev->odev) { - printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " - "setup_inject.\n"); + pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n"); sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); return; @@ -2021,19 +2043,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) ntxq = pkt_dev->odev->real_num_tx_queues; if (ntxq <= pkt_dev->queue_map_min) { - printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_min (zero-based) (%d) exceeds valid range " - "[0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odevname); + pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_min = ntxq - 1; } if (pkt_dev->queue_map_max >= ntxq) { - printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_max (zero-based) (%d) exceeds valid range " - "[0 - %d] for (%d) queues on %s, resetting\n", - pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odevname); + pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, + pkt_dev->odevname); pkt_dev->queue_map_max = ntxq - 1; } @@ -2093,8 +2111,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } rcu_read_unlock(); if (err) - printk(KERN_ERR "pktgen: ERROR: IPv6 link " - "address not availble.\n"); + pr_err("ERROR: IPv6 link address not available\n"); } #endif } else { @@ -2142,15 +2159,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_set_expires(&t.timer, spin_until); - remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer)); + remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); if (remaining <= 0) { pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); return; } start_time = ktime_now(); - if (remaining < 100) - udelay(remaining); /* really small just spin */ + if (remaining < 100000) + ndelay(remaining); /* really small just spin */ else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); @@ -2528,8 +2545,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); if (ret < 0) { - printk(KERN_ERR "Error expanding " - "ipsec packet %d\n", ret); + pr_err("Error expanding ipsec packet %d\n", + ret); goto err; } } @@ -2538,8 +2555,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, skb_pull(skb, ETH_HLEN); ret = pktgen_output_ipsec(skb, pkt_dev); if (ret) { - printk(KERN_ERR "Error creating ipsec " - "packet %d\n", ret); + pr_err("Error creating ipsec packet %d\n", ret); goto err; } /* restore ll */ @@ -3015,8 +3031,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); if (net_ratelimit()) - printk(KERN_INFO "pktgen: increased datalen to %d\n", - datalen); + pr_info("increased datalen to %d\n", datalen); } udph->source = htons(pkt_dev->cur_udp_src); @@ -3143,7 +3158,7 @@ static void pktgen_run(struct pktgen_thread *t) struct pktgen_dev *pkt_dev; int started = 0; - pr_debug("pktgen: entering pktgen_run. %p\n", t); + func_enter(); if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) { @@ -3176,7 +3191,7 @@ static void pktgen_stop_all_threads_ifs(void) { struct pktgen_thread *t; - pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n"); + func_enter(); mutex_lock(&pktgen_thread_lock); @@ -3241,7 +3256,7 @@ static void pktgen_run_all_threads(void) { struct pktgen_thread *t; - pr_debug("pktgen: entering pktgen_run_all_threads.\n"); + func_enter(); mutex_lock(&pktgen_thread_lock); @@ -3260,7 +3275,7 @@ static void pktgen_reset_all_threads(void) { struct pktgen_thread *t; - pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); + func_enter(); mutex_lock(&pktgen_thread_lock); @@ -3310,8 +3325,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; if (!pkt_dev->running) { - printk(KERN_WARNING "pktgen: interface: %s is already " - "stopped\n", pkt_dev->odevname); + pr_warning("interface: %s is already stopped\n", + pkt_dev->odevname); return -EINVAL; } @@ -3347,7 +3362,7 @@ static void pktgen_stop(struct pktgen_thread *t) { struct pktgen_dev *pkt_dev; - pr_debug("pktgen: entering pktgen_stop\n"); + func_enter(); if_lock(t); @@ -3367,7 +3382,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) struct list_head *q, *n; struct pktgen_dev *cur; - pr_debug("pktgen: entering pktgen_rem_one_if\n"); + func_enter(); if_lock(t); @@ -3393,9 +3408,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) struct list_head *q, *n; struct pktgen_dev *cur; + func_enter(); + /* Remove all devices, free mem */ - pr_debug("pktgen: entering pktgen_rem_all_ifs\n"); if_lock(t); list_for_each_safe(q, n, &t->if_list) { @@ -3477,8 +3493,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->skb = fill_packet(odev, pkt_dev); if (pkt_dev->skb == NULL) { - printk(KERN_ERR "pktgen: ERROR: couldn't " - "allocate skb in fill_packet.\n"); + pr_err("ERROR: couldn't allocate skb in fill_packet\n"); schedule(); pkt_dev->clone_count--; /* back out increment, OOM */ return; @@ -3558,8 +3573,7 @@ static int pktgen_thread_worker(void *arg) init_waitqueue_head(&t->queue); complete(&t->start_done); - pr_debug("pktgen: starting pktgen/%d: pid=%d\n", - cpu, task_pid_nr(current)); + pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); set_current_state(TASK_INTERRUPTIBLE); @@ -3612,13 +3626,13 @@ static int pktgen_thread_worker(void *arg) set_current_state(TASK_INTERRUPTIBLE); } - pr_debug("pktgen: %s stopping all device\n", t->tsk->comm); + pr_debug("%s stopping all device\n", t->tsk->comm); pktgen_stop(t); - pr_debug("pktgen: %s removing all device\n", t->tsk->comm); + pr_debug("%s removing all device\n", t->tsk->comm); pktgen_rem_all_ifs(t); - pr_debug("pktgen: %s removing thread.\n", t->tsk->comm); + pr_debug("%s removing thread\n", t->tsk->comm); pktgen_rem_thread(t); return 0; @@ -3642,7 +3656,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, } if_unlock(t); - pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev); + pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev); return pkt_dev; } @@ -3658,8 +3672,7 @@ static int add_dev_to_thread(struct pktgen_thread *t, if_lock(t); if (pkt_dev->pg_thread) { - printk(KERN_ERR "pktgen: ERROR: already assigned " - "to a thread.\n"); + pr_err("ERROR: already assigned to a thread\n"); rv = -EBUSY; goto out; } @@ -3685,7 +3698,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev = __pktgen_NN_threads(ifname, FIND); if (pkt_dev) { - printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); + pr_err("ERROR: interface already used\n"); return -EBUSY; } @@ -3730,7 +3743,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) { - printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", + pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, ifname); err = -EINVAL; goto out2; @@ -3761,8 +3774,7 @@ static int __init pktgen_create_thread(int cpu) t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL, cpu_to_node(cpu)); if (!t) { - printk(KERN_ERR "pktgen: ERROR: out of memory, can't " - "create new thread.\n"); + pr_err("ERROR: out of memory, can't create new thread\n"); return -ENOMEM; } @@ -3776,8 +3788,7 @@ static int __init pktgen_create_thread(int cpu) p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { - printk(KERN_ERR "pktgen: kernel_thread() failed " - "for cpu %d\n", t->cpu); + pr_err("kernel_thread() failed for cpu %d\n", t->cpu); list_del(&t->th_list); kfree(t); return PTR_ERR(p); @@ -3788,7 +3799,7 @@ static int __init pktgen_create_thread(int cpu) pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, &pktgen_thread_fops, t); if (!pe) { - printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", + pr_err("cannot create %s/%s procfs entry\n", PG_PROC_DIR, t->tsk->comm); kthread_stop(p); list_del(&t->th_list); @@ -3822,11 +3833,10 @@ static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) { - pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); + pr_debug("remove_device pkt_dev=%p\n", pkt_dev); if (pkt_dev->running) { - printk(KERN_WARNING "pktgen: WARNING: trying to remove a " - "running interface, stopping it now.\n"); + pr_warning("WARNING: trying to remove a running interface, stopping it now\n"); pktgen_stop_device(pkt_dev); } @@ -3857,7 +3867,7 @@ static int __init pg_init(void) int cpu; struct proc_dir_entry *pe; - printk(KERN_INFO "%s", version); + pr_info("%s", version); pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) @@ -3865,8 +3875,7 @@ static int __init pg_init(void) pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); if (pe == NULL) { - printk(KERN_ERR "pktgen: ERROR: cannot create %s " - "procfs entry.\n", PGCTRL); + pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL); proc_net_remove(&init_net, PG_PROC_DIR); return -EINVAL; } @@ -3879,13 +3888,12 @@ static int __init pg_init(void) err = pktgen_create_thread(cpu); if (err) - printk(KERN_WARNING "pktgen: WARNING: Cannot create " - "thread for cpu %d (%d)\n", cpu, err); + pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n", + cpu, err); } if (list_empty(&pktgen_threads)) { - printk(KERN_ERR "pktgen: ERROR: Initialization failed for " - "all threads\n"); + pr_err("ERROR: Initialization failed for all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); proc_net_remove(&init_net, PG_PROC_DIR); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1a2af24e9e3d..f78d821bd935 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -579,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - const struct net_device_stats *b) + const struct rtnl_link_stats64 *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -610,7 +610,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; } -static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b) +static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) { struct rtnl_link_stats64 a; @@ -686,7 +686,7 @@ static size_t rtnl_port_size(const struct net_device *dev) return port_self_size; } -static inline size_t if_nlmsg_size(const struct net_device *dev) +static noinline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ @@ -791,7 +791,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, { struct ifinfomsg *ifm; struct nlmsghdr *nlh; - const struct net_device_stats *stats; + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -847,7 +848,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (attr == NULL) goto nla_put_failure; - stats = dev_get_stats(dev); + stats = dev_get_stats(dev, &temp); copy_rtnl_link_stats(nla_data(attr), stats); attr = nla_reserve(skb, IFLA_STATS64, diff --git a/net/core/scm.c b/net/core/scm.c index b88f6f9d0b97..413cab89017d 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -130,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm) } } } +EXPORT_SYMBOL(__scm_destroy); int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) { @@ -170,6 +171,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) err = scm_check_creds(&p->creds); if (err) goto error; + + if (pid_vnr(p->pid) != p->creds.pid) { + struct pid *pid; + err = -ESRCH; + pid = find_get_pid(p->creds.pid); + if (!pid) + goto error; + put_pid(p->pid); + p->pid = pid; + } + + if ((p->cred->euid != p->creds.uid) || + (p->cred->egid != p->creds.gid)) { + struct cred *cred; + err = -ENOMEM; + cred = prepare_creds(); + if (!cred) + goto error; + + cred->uid = cred->euid = p->creds.uid; + cred->gid = cred->egid = p->creds.uid; + put_cred(p->cred); + p->cred = cred; + } break; default: goto error; @@ -187,6 +212,7 @@ error: scm_destroy(p); return err; } +EXPORT_SYMBOL(__scm_send); int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) { @@ -225,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) out: return err; } +EXPORT_SYMBOL(put_cmsg); void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { @@ -294,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) */ __scm_destroy(scm); } +EXPORT_SYMBOL(scm_detach_fds); struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) { @@ -311,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) } return new_fpl; } - -EXPORT_SYMBOL(__scm_destroy); -EXPORT_SYMBOL(__scm_send); -EXPORT_SYMBOL(put_cmsg); -EXPORT_SYMBOL(scm_detach_fds); EXPORT_SYMBOL(scm_fp_dup); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 34432b4e96bb..3a2513f0d0c3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -817,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, memcpy(data + nhead, skb->head, skb->tail - skb->head); #endif memcpy(data + size, skb_end_pointer(skb), - sizeof(struct skb_shared_info)); + offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); @@ -843,7 +843,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; - skb->csum_start += nhead; + /* Only adjust this if it actually is csum_start rather than csum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb->csum_start += nhead; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; @@ -930,7 +932,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, copy_skb_header(n, skb); off = newheadroom - oldheadroom; - n->csum_start += off; + if (n->ip_summed == CHECKSUM_PARTIAL) + n->csum_start += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET n->transport_header += off; n->network_header += off; @@ -2483,7 +2486,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) skb_postpull_rcsum(skb, skb->data, len); return skb->data += len; } - EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** diff --git a/net/core/sock.c b/net/core/sock.c index 2cf7f9f7e775..b05b9b6ddb87 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -110,6 +110,7 @@ #include <linux/tcp.h> #include <linux/init.h> #include <linux/highmem.h> +#include <linux/user_namespace.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -156,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { @@ -172,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { @@ -188,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_MAX" }; @@ -749,6 +750,20 @@ set_rcvbuf: EXPORT_SYMBOL(sock_setsockopt); +void cred_to_ucred(struct pid *pid, const struct cred *cred, + struct ucred *ucred) +{ + ucred->pid = pid_vnr(pid); + ucred->uid = ucred->gid = -1; + if (cred) { + struct user_namespace *current_ns = current_user_ns(); + + ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid); + ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid); + } +} +EXPORT_SYMBOL_GPL(cred_to_ucred); + int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -901,11 +916,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PEERCRED: - if (len > sizeof(sk->sk_peercred)) - len = sizeof(sk->sk_peercred); - if (copy_to_user(optval, &sk->sk_peercred, len)) + { + struct ucred peercred; + if (len > sizeof(peercred)) + len = sizeof(peercred); + cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + if (copy_to_user(optval, &peercred, len)) return -EFAULT; goto lenout; + } case SO_PEERNAME: { @@ -1119,6 +1138,9 @@ static void __sk_free(struct sock *sk) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", __func__, atomic_read(&sk->sk_omem_alloc)); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + put_pid(sk->sk_peer_pid); put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); } @@ -1317,9 +1339,10 @@ EXPORT_SYMBOL(sock_wfree); void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + unsigned int len = skb->truesize; - atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - sk_mem_uncharge(skb->sk, skb->truesize); + atomic_sub(len, &sk->sk_rmem_alloc); + sk_mem_uncharge(sk, len); } EXPORT_SYMBOL(sock_rfree); @@ -1954,9 +1977,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - sk->sk_peercred.pid = 0; - sk->sk_peercred.uid = -1; - sk->sk_peercred.gid = -1; + sk->sk_peer_pid = NULL; + sk->sk_peer_cred = NULL; sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; @@ -2210,8 +2232,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); #ifdef CONFIG_NET_NS void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - int cpu = smp_processor_id(); - per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val; + __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add); @@ -2257,7 +2278,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; + __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add); diff --git a/net/core/stream.c b/net/core/stream.c index cc196f42b8d8..d959e0f41528 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk) rcu_read_unlock(); } } - EXPORT_SYMBOL(sk_stream_write_space); /** @@ -81,7 +80,6 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) } while (!done); return 0; } - EXPORT_SYMBOL(sk_stream_wait_connect); /** @@ -109,7 +107,6 @@ void sk_stream_wait_close(struct sock *sk, long timeout) finish_wait(sk_sleep(sk), &wait); } } - EXPORT_SYMBOL(sk_stream_wait_close); /** @@ -174,7 +171,6 @@ do_interrupted: err = sock_intr_errno(*timeo_p); goto out; } - EXPORT_SYMBOL(sk_stream_wait_memory); int sk_stream_error(struct sock *sk, int flags, int err) @@ -185,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err) send_sig(SIGPIPE, current, 0); return err; } - EXPORT_SYMBOL(sk_stream_error); void sk_stream_kill_queues(struct sock *sk) @@ -210,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk) * have gone away, only the net layer knows can touch it. */ } - EXPORT_SYMBOL(sk_stream_kill_queues); diff --git a/net/core/timestamping.c b/net/core/timestamping.c new file mode 100644 index 000000000000..0ae6c22da85b --- /dev/null +++ b/net/core/timestamping.c @@ -0,0 +1,126 @@ +/* + * PTP 1588 clock support - support for timestamping in PHY devices + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/errqueue.h> +#include <linux/phy.h> +#include <linux/ptp_classify.h> +#include <linux/skbuff.h> + +static struct sock_filter ptp_filter[] = { + PTP_FILTER +}; + +static unsigned int classify(struct sk_buff *skb) +{ + if (likely(skb->dev && + skb->dev->phydev && + skb->dev->phydev->drv)) + return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter)); + else + return PTP_CLASS_NONE; +} + +void skb_clone_tx_timestamp(struct sk_buff *skb) +{ + struct phy_device *phydev; + struct sk_buff *clone; + struct sock *sk = skb->sk; + unsigned int type; + + if (!sk) + return; + + type = classify(skb); + + switch (type) { + case PTP_CLASS_V1_IPV4: + case PTP_CLASS_V1_IPV6: + case PTP_CLASS_V2_IPV4: + case PTP_CLASS_V2_IPV6: + case PTP_CLASS_V2_L2: + case PTP_CLASS_V2_VLAN: + phydev = skb->dev->phydev; + if (likely(phydev->drv->txtstamp)) { + clone = skb_clone(skb, GFP_ATOMIC); + if (!clone) + return; + clone->sk = sk; + phydev->drv->txtstamp(phydev, clone, type); + } + break; + default: + break; + } +} + +void skb_complete_tx_timestamp(struct sk_buff *skb, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct sock *sk = skb->sk; + struct sock_exterr_skb *serr; + int err; + + if (!hwtstamps) + return; + + *skb_hwtstamps(skb) = *hwtstamps; + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + skb->sk = NULL; + err = sock_queue_err_skb(sk, skb); + if (err) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); + +bool skb_defer_rx_timestamp(struct sk_buff *skb) +{ + struct phy_device *phydev; + unsigned int type; + + skb_push(skb, ETH_HLEN); + + type = classify(skb); + + skb_pull(skb, ETH_HLEN); + + switch (type) { + case PTP_CLASS_V1_IPV4: + case PTP_CLASS_V1_IPV6: + case PTP_CLASS_V2_IPV4: + case PTP_CLASS_V2_IPV6: + case PTP_CLASS_V2_L2: + case PTP_CLASS_V2_VLAN: + phydev = skb->dev->phydev; + if (likely(phydev->drv->rxtstamp)) + return phydev->drv->rxtstamp(phydev, skb, type); + break; + default: + break; + } + + return false; +} + +void __init skb_timestamping_init(void) +{ + BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter))); +} diff --git a/net/core/utils.c b/net/core/utils.c index 838250241d26..f41854470539 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -77,7 +77,6 @@ __be32 in_aton(const char *str) } return(htonl(l)); } - EXPORT_SYMBOL(in_aton); #define IN6PTON_XDIGIT 0x00010000 @@ -162,7 +161,6 @@ out: *end = s; return ret; } - EXPORT_SYMBOL(in4_pton); int in6_pton(const char *src, int srclen, @@ -280,7 +278,6 @@ out: *end = s; return ret; } - EXPORT_SYMBOL(in6_pton); void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 01e4d39fa232..92a6fcb40d7d 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -82,7 +82,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) elapsed_time = delta / 10; if (elapsed_time != 0 && - dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) + dccp_insert_option_elapsed_time(skb, elapsed_time)) return -1; avr = dccp_ackvec_record_new(); @@ -201,7 +201,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, const unsigned int packets, const unsigned char state) { - unsigned int gap; + long gap; long new_head; if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d3235899c7e3..95f752986497 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -715,9 +715,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) x_recv = htonl(hc->rx_x_recv); pinv = htonl(hc->rx_pinv); - if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, + if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)) || - dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, + dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv))) return -1; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index a10a61a1ded2..3ccef1b70fee 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -446,16 +446,12 @@ extern void dccp_feat_list_purge(struct list_head *fn_list); extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); -extern int dccp_insert_option_elapsed_time(struct sock *sk, - struct sk_buff *skb, - u32 elapsed_time); +extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); extern u32 dccp_timestamp(void); extern void dccp_timestamping_init(void); -extern int dccp_insert_option_timestamp(struct sock *sk, - struct sk_buff *skb); -extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb, - unsigned char option, - const void *value, unsigned char len); +extern int dccp_insert_option_timestamp(struct sk_buff *skb); +extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, + const void *value, unsigned char len); #ifdef CONFIG_SYSCTL extern int dccp_sysctl_init(void); diff --git a/net/dccp/input.c b/net/dccp/input.c index 6beb6a7d6fba..10c957a88f4f 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -430,7 +430,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dccp_parse_options(sk, NULL, skb)) return 1; - /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ + /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - dp->dccps_options_received.dccpor_timestamp_echo)); @@ -535,6 +535,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, const struct dccp_hdr *dh, const unsigned len) { + struct dccp_sock *dp = dccp_sk(sk); + u32 sample = dp->dccps_options_received.dccpor_timestamp_echo; int queued = 0; switch (dh->dccph_type) { @@ -559,7 +561,14 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, if (sk->sk_state == DCCP_PARTOPEN) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); - dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; + /* Obtain usec RTT sample from SYN exchange (used by TFRC). */ + if (likely(sample)) { + long delta = dccp_timestamp() - sample; + + dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta); + } + + dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_set_state(sk, DCCP_OPEN); if (dh->dccph_type == DCCP_PKT_DATAACK || diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d9b11ef8694c..d4a166f0f391 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; /* OK, now commit destination to socket. */ - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, inet->inet_daddr, @@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return NULL; } - return &rt->u.dst; + return &rt->dst; } static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 091698899594..6e3f32575df7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct ipv6_txoptions *opt = NULL; - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; int err = -1; struct dst_entry *dst; @@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, opt = np->opt; - if (opt != NULL && opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, goto out_overflow; if (dst == NULL) { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - if (opt != NULL && opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; @@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct in6_addr *saddr = NULL, *final_p, final; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); - if (np->opt != NULL && np->opt->srcrt != NULL) { - const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/dccp/options.c b/net/dccp/options.c index 07395f861d35..bfda087bd90d 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -299,9 +299,8 @@ static inline u8 dccp_ndp_len(const u64 ndp) return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); } -int dccp_insert_option(struct sock *sk, struct sk_buff *skb, - const unsigned char option, - const void *value, const unsigned char len) +int dccp_insert_option(struct sk_buff *skb, const unsigned char option, + const void *value, const unsigned char len) { unsigned char *to; @@ -354,8 +353,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } -int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, - u32 elapsed_time) +int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) { const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 2 + elapsed_time_len; @@ -386,13 +384,13 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) +int dccp_insert_option_timestamp(struct sk_buff *skb) { __be32 now = htonl(dccp_timestamp()); /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ - return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); + return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); @@ -531,9 +529,9 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { /* * Obtain RTT sample from Request/Response exchange. - * This is currently used in CCID 3 initialisation. + * This is currently used for TFRC initialisation. */ - if (dccp_insert_option_timestamp(sk, skb)) + if (dccp_insert_option_timestamp(skb)) return -1; } else if (dp->dccps_hc_rx_ackvec != NULL && @@ -564,6 +562,10 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb) if (dccp_feat_insert_opts(NULL, dreq, skb)) return -1; + /* Obtain RTT sample from Response/Ack exchange (used by TFRC). */ + if (dccp_insert_option_timestamp(skb)) + return -1; + if (dreq->dreq_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(NULL, dreq, skb)) return -1; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index b03ecf6b2bb0..096250d1323b 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -473,14 +473,9 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type, if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) return -EINVAL; - val = kmalloc(optlen, GFP_KERNEL); - if (val == NULL) - return -ENOMEM; - - if (copy_from_user(val, optval, optlen)) { - kfree(val); - return -EFAULT; - } + val = memdup_user(optval, optlen); + if (IS_ERR(val)) + return PTR_ERR(val); lock_sock(sk); if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) @@ -1007,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int dccp_mib_init(void) { return snmp_mib_init((void __percpu **)dccp_statistics, - sizeof(struct dccp_mib)); + sizeof(struct dccp_mib), + __alignof__(struct dccp_mib)); } static inline void dccp_mib_exit(void) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 812e6dff6067..6585ea6d1182 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst) static inline void dnrt_free(struct dn_route *rt) { - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline void dnrt_drop(struct dn_route *rt) { - dst_release(&rt->u.dst); - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + dst_release(&rt->dst); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static void dn_dst_check_expire(unsigned long dummy) @@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy) spin_lock(&dn_rt_hash_table[i].lock); while((rt=*rtp) != NULL) { - if (atomic_read(&rt->u.dst.__refcnt) || - (now - rt->u.dst.lastuse) < expire) { - rtp = &rt->u.dst.dn_next; + if (atomic_read(&rt->dst.__refcnt) || + (now - rt->dst.lastuse) < expire) { + rtp = &rt->dst.dn_next; continue; } - *rtp = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + *rtp = rt->dst.dn_next; + rt->dst.dn_next = NULL; dnrt_free(rt); } spin_unlock(&dn_rt_hash_table[i].lock); @@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops) rtp = &dn_rt_hash_table[i].chain; while((rt=*rtp) != NULL) { - if (atomic_read(&rt->u.dst.__refcnt) || - (now - rt->u.dst.lastuse) < expire) { - rtp = &rt->u.dst.dn_next; + if (atomic_read(&rt->dst.__refcnt) || + (now - rt->dst.lastuse) < expire) { + rtp = &rt->dst.dn_next; continue; } - *rtp = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + *rtp = rt->dst.dn_next; + rt->dst.dn_next = NULL; dnrt_drop(rt); break; } @@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * while((rth = *rthp) != NULL) { if (compare_keys(&rth->fl, &rt->fl)) { /* Put it first */ - *rthp = rth->u.dst.dn_next; - rcu_assign_pointer(rth->u.dst.dn_next, + *rthp = rth->dst.dn_next; + rcu_assign_pointer(rth->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - dst_use(&rth->u.dst, now); + dst_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); dnrt_drop(rt); *rp = rth; return 0; } - rthp = &rth->u.dst.dn_next; + rthp = &rth->dst.dn_next; } - rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain); + rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - dst_use(&rt->u.dst, now); + dst_use(&rt->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); *rp = rt; return 0; @@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy) goto nothing_to_declare; for(; rt; rt=next) { - next = rt->u.dst.dn_next; - rt->u.dst.dn_next = NULL; + next = rt->dst.dn_next; + rt->dst.dn_next = NULL; dst_free((struct dst_entry *)rt); } @@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb) /* Ensure that we have enough space for headers */ rt = (struct dn_route *)skb_dst(skb); header_len = dn_db->use_long ? 21 : 6; - if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len)) + if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len)) goto drop; /* @@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb) if (++cb->hops > 30) goto drop; - skb->dev = rt->u.dst.dev; + skb->dev = rt->dst.dev; /* * If packet goes out same interface it came in on, then set @@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb) static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; - struct net_device *dev = rt->u.dst.dev; + struct net_device *dev = rt->dst.dev; struct neighbour *n; unsigned mss; @@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - memcpy(rt->u.dst.metrics, fi->fib_metrics, - sizeof(rt->u.dst.metrics)); + memcpy(rt->dst.metrics, fi->fib_metrics, + sizeof(rt->dst.metrics)); } rt->rt_type = res->type; - if (dev != NULL && rt->u.dst.neighbour == NULL) { + if (dev != NULL && rt->dst.neighbour == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - rt->u.dst.neighbour = n; + rt->dst.neighbour = n; } - if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || - dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) - rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || - dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) - rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; + if (dst_metric(&rt->dst, RTAX_MTU) == 0 || + dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) + rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); + if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || + dst_metric(&rt->dst, RTAX_ADVMSS) > mss) + rt->dst.metrics[RTAX_ADVMSS-1] = mss; return 0; } @@ -1096,8 +1096,8 @@ make_route: if (rt == NULL) goto e_nobufs; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.flags = DST_HOST; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.flags = DST_HOST; rt->fl.fld_src = oldflp->fld_src; rt->fl.fld_dst = oldflp->fld_dst; @@ -1113,17 +1113,17 @@ make_route: rt->rt_dst_map = fl.fld_dst; rt->rt_src_map = fl.fld_src; - rt->u.dst.dev = dev_out; + rt->dst.dev = dev_out; dev_hold(dev_out); - rt->u.dst.neighbour = neigh; + rt->dst.neighbour = neigh; neigh = NULL; - rt->u.dst.lastuse = jiffies; - rt->u.dst.output = dn_output; - rt->u.dst.input = dn_rt_bug; + rt->dst.lastuse = jiffies; + rt->dst.output = dn_output; + rt->dst.input = dn_rt_bug; rt->rt_flags = flags; if (flags & RTCF_LOCAL) - rt->u.dst.input = dn_nsp_rx; + rt->dst.input = dn_nsp_rx; err = dn_rt_set_next_hop(rt, &res); if (err) @@ -1152,7 +1152,7 @@ e_nobufs: err = -ENOBUFS; goto done; e_neighbour: - dst_free(&rt->u.dst); + dst_free(&rt->dst); goto e_nobufs; } @@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; - rt = rcu_dereference_bh(rt->u.dst.dn_next)) { + rt = rcu_dereference_bh(rt->dst.dn_next)) { if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && (flp->mark == rt->fl.mark) && (rt->fl.iif == 0) && (rt->fl.oif == flp->oif)) { - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); - *pprt = &rt->u.dst; + *pprt = &rt->dst; return 0; } } @@ -1375,29 +1375,29 @@ make_route: rt->fl.iif = in_dev->ifindex; rt->fl.mark = fl.mark; - rt->u.dst.flags = DST_HOST; - rt->u.dst.neighbour = neigh; - rt->u.dst.dev = out_dev; - rt->u.dst.lastuse = jiffies; - rt->u.dst.output = dn_rt_bug; + rt->dst.flags = DST_HOST; + rt->dst.neighbour = neigh; + rt->dst.dev = out_dev; + rt->dst.lastuse = jiffies; + rt->dst.output = dn_rt_bug; switch(res.type) { case RTN_UNICAST: - rt->u.dst.input = dn_forward; + rt->dst.input = dn_forward; break; case RTN_LOCAL: - rt->u.dst.output = dn_output; - rt->u.dst.input = dn_nsp_rx; - rt->u.dst.dev = in_dev; + rt->dst.output = dn_output; + rt->dst.input = dn_nsp_rx; + rt->dst.dev = in_dev; flags |= RTCF_LOCAL; break; default: case RTN_UNREACHABLE: case RTN_BLACKHOLE: - rt->u.dst.input = dst_discard; + rt->dst.input = dst_discard; } rt->rt_flags = flags; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + if (rt->dst.dev) + dev_hold(rt->dst.dev); err = dn_rt_set_next_hop(rt, &res); if (err) @@ -1405,7 +1405,7 @@ make_route: hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); dn_insert_route(rt, hash, &rt); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); done: if (neigh) @@ -1427,7 +1427,7 @@ e_nobufs: goto done; e_neighbour: - dst_free(&rt->u.dst); + dst_free(&rt->dst); goto done; } @@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb) rcu_read_lock(); for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; - rt = rcu_dereference(rt->u.dst.dn_next)) { + rt = rcu_dereference(rt->dst.dn_next)) { if ((rt->fl.fld_src == cb->src) && (rt->fl.fld_dst == cb->dst) && (rt->fl.oif == 0) && (rt->fl.mark == skb->mark) && (rt->fl.iif == cb->iif)) { - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); return 0; @@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 16; RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); } - if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + if (rt->dst.dev) + RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); /* * Note to self - change this if input routes reverse direction when * they deal only with inputs and not with replies like they do @@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); if (rt->rt_daddr != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto rtattr_failure; - expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; - if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, - rt->u.dst.error) < 0) + expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, + rt->dst.error) < 0) goto rtattr_failure; if (rt->fl.iif) RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); @@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void local_bh_enable(); memset(cb, 0, sizeof(struct dn_skb_cb)); rt = (struct dn_route *)skb_dst(skb); - if (!err && -rt->u.dst.error) - err = rt->u.dst.error; + if (!err && -rt->dst.error) + err = rt->dst.error; } else { int oif = 0; if (rta[RTA_OIF - 1]) @@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb->dev = NULL; if (err) goto out_free; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock_bh(); for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) { + rt = rcu_dereference_bh(rt->dst.dn_next), idx++) { if (idx < s_idx) continue; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { @@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou { struct dn_rt_cache_iter_state *s = seq->private; - rt = rt->u.dst.dn_next; + rt = rt->dst.dn_next; while(!rt) { rcu_read_unlock_bh(); if (--s->bucket < 0) @@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", - rt->u.dst.dev ? rt->u.dst.dev->name : "*", + rt->dst.dev ? rt->dst.dev->name : "*", dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), - atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, - (int) dst_metric(&rt->u.dst, RTAX_RTT)); + atomic_read(&rt->dst.__refcnt), + rt->dst.__use, + (int) dst_metric(&rt->dst, RTAX_RTT)); return 0; } diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig new file mode 100644 index 000000000000..50d49f7e0472 --- /dev/null +++ b/net/dns_resolver/Kconfig @@ -0,0 +1,27 @@ +# +# Configuration for DNS Resolver +# +config DNS_RESOLVER + tristate "DNS Resolver support" + depends on NET && KEYS + help + Saying Y here will include support for the DNS Resolver key type + which can be used to make upcalls to perform DNS lookups in + userspace. + + DNS Resolver is used to query DNS server for information. Examples + being resolving a UNC hostname element to an IP address for CIFS or + performing a DNS query for AFSDB records so that AFS can locate a + cell's volume location database servers. + + DNS Resolver is used by the CIFS and AFS modules, and would support + SMB2 later. DNS Resolver is supported by the userspace upcall + helper "/sbin/dns.resolver" via /etc/request-key.conf. + + See <file:Documentation/networking/dns_resolver.txt> for further + information. + + To compile this as a module, choose M here: the module will be called + dnsresolver. + + If unsure, say N. diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile new file mode 100644 index 000000000000..c0ef4e71dc49 --- /dev/null +++ b/net/dns_resolver/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Linux DNS Resolver. +# + +obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o + +dns_resolver-objs := dns_key.o dns_query.o diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c new file mode 100644 index 000000000000..400a04d5c9a1 --- /dev/null +++ b/net/dns_resolver/dns_key.c @@ -0,0 +1,211 @@ +/* Key type used to cache DNS lookups made by the kernel + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/keyctl.h> +#include <linux/err.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> +#include "internal.h" + +MODULE_DESCRIPTION("DNS Resolver"); +MODULE_AUTHOR("Wang Lei"); +MODULE_LICENSE("GPL"); + +unsigned dns_resolver_debug; +module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(debug, "DNS Resolver debugging mask"); + +const struct cred *dns_resolver_cache; + +/* + * Instantiate a user defined key for dns_resolver. + * + * The data must be a NUL-terminated string, with the NUL char accounted in + * datalen. + * + * If the data contains a '#' characters, then we take the clause after each + * one to be an option of the form 'key=value'. The actual data of interest is + * the string leading up to the first '#'. For instance: + * + * "ip1,ip2,...#foo=bar" + */ +static int +dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +{ + struct user_key_payload *upayload; + int ret; + size_t result_len = 0; + const char *data = _data, *opt; + + kenter("%%%d,%s,'%s',%zu", + key->serial, key->description, data, datalen); + + if (datalen <= 1 || !data || data[datalen - 1] != '\0') + return -EINVAL; + datalen--; + + /* deal with any options embedded in the data */ + opt = memchr(data, '#', datalen); + if (!opt) { + kdebug("no options currently supported"); + return -EINVAL; + } + + result_len = datalen; + ret = key_payload_reserve(key, result_len); + if (ret < 0) + return -EINVAL; + + upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); + if (!upayload) { + kleave(" = -ENOMEM"); + return -ENOMEM; + } + + upayload->datalen = result_len; + memcpy(upayload->data, data, result_len); + upayload->data[result_len] = '\0'; + rcu_assign_pointer(key->payload.data, upayload); + + kleave(" = 0"); + return 0; +} + +/* + * The description is of the form "[<type>:]<domain_name>" + * + * The domain name may be a simple name or an absolute domain name (which + * should end with a period). The domain name is case-independent. + */ +static int +dns_resolver_match(const struct key *key, const void *description) +{ + int slen, dlen, ret = 0; + const char *src = key->description, *dsp = description; + + kenter("%s,%s", src, dsp); + + if (!src || !dsp) + goto no_match; + + if (strcasecmp(src, dsp) == 0) + goto matched; + + slen = strlen(src); + dlen = strlen(dsp); + if (slen <= 0 || dlen <= 0) + goto no_match; + if (src[slen - 1] == '.') + slen--; + if (dsp[dlen - 1] == '.') + dlen--; + if (slen != dlen || strncasecmp(src, dsp, slen) != 0) + goto no_match; + +matched: + ret = 1; +no_match: + kleave(" = %d", ret); + return ret; +} + +struct key_type key_type_dns_resolver = { + .name = "dns_resolver", + .instantiate = dns_resolver_instantiate, + .match = dns_resolver_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, +}; + +static int __init init_dns_resolver(void) +{ + struct cred *cred; + struct key *keyring; + int ret; + + printk(KERN_NOTICE "Registering the %s key type\n", + key_type_dns_resolver.name); + + /* create an override credential set with a special thread keyring in + * which DNS requests are cached + * + * this is used to prevent malicious redirections from being installed + * with add_key(). + */ + cred = prepare_kernel_cred(NULL); + if (!cred) + return -ENOMEM; + + keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + if (IS_ERR(keyring)) { + ret = PTR_ERR(keyring); + goto failed_put_cred; + } + + ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); + if (ret < 0) + goto failed_put_key; + + ret = register_key_type(&key_type_dns_resolver); + if (ret < 0) + goto failed_put_key; + + /* instruct request_key() to use this special keyring as a cache for + * the results it looks up */ + cred->thread_keyring = keyring; + cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + dns_resolver_cache = cred; + + kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); + return 0; + +failed_put_key: + key_put(keyring); +failed_put_cred: + put_cred(cred); + return ret; +} + +static void __exit exit_dns_resolver(void) +{ + key_revoke(dns_resolver_cache->thread_keyring); + unregister_key_type(&key_type_dns_resolver); + put_cred(dns_resolver_cache); + printk(KERN_NOTICE "Unregistered %s key type\n", + key_type_dns_resolver.name); +} + +module_init(init_dns_resolver) +module_exit(exit_dns_resolver) +MODULE_LICENSE("GPL"); diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c new file mode 100644 index 000000000000..03d5255f5cf2 --- /dev/null +++ b/net/dns_resolver/dns_query.c @@ -0,0 +1,160 @@ +/* Upcall routine, designed to work as a key type and working through + * /sbin/request-key to contact userspace when handling DNS queries. + * + * See Documentation/networking/dns_resolver.txt + * + * Copyright (c) 2007 Igor Mammedov + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Wang Lei (wang840925@gmail.com) + * David Howells (dhowells@redhat.com) + * + * The upcall wrapper used to make an arbitrary DNS query. + * + * This function requires the appropriate userspace tool dns.upcall to be + * installed and something like the following lines should be added to the + * /etc/request-key.conf file: + * + * create dns_resolver * * /sbin/dns.upcall %k + * + * For example to use this module to query AFSDB RR: + * + * create dns_resolver afsdb:* * /sbin/dns.afsdb %k + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/dns_resolver.h> +#include <linux/err.h> +#include <keys/dns_resolver-type.h> +#include <keys/user-type.h> + +#include "internal.h" + +/** + * dns_query - Query the DNS + * @type: Query type (or NULL for straight host->IP lookup) + * @name: Name to look up + * @namelen: Length of name + * @options: Request options (or NULL if no options) + * @_result: Where to place the returned data. + * @_expiry: Where to store the result expiry time (or NULL) + * + * The data will be returned in the pointer at *result, and the caller is + * responsible for freeing it. + * + * The description should be of the form "[<query_type>:]<domain_name>", and + * the options need to be appropriate for the query type requested. If no + * query_type is given, then the query is a straight hostname to IP address + * lookup. + * + * The DNS resolution lookup is performed by upcalling to userspace by way of + * requesting a key of type dns_resolver. + * + * Returns the size of the result on success, -ve error code otherwise. + */ +int dns_query(const char *type, const char *name, size_t namelen, + const char *options, char **_result, time_t *_expiry) +{ + struct key *rkey; + struct user_key_payload *upayload; + const struct cred *saved_cred; + size_t typelen, desclen; + char *desc, *cp; + int ret, len; + + kenter("%s,%*.*s,%zu,%s", + type, (int)namelen, (int)namelen, name, namelen, options); + + if (!name || namelen == 0 || !_result) + return -EINVAL; + + /* construct the query key description as "[<type>:]<name>" */ + typelen = 0; + desclen = 0; + if (type) { + typelen = strlen(type); + if (typelen < 1) + return -EINVAL; + desclen += typelen + 1; + } + + if (!namelen) + namelen = strlen(name); + if (namelen < 3) + return -EINVAL; + desclen += namelen + 1; + + desc = kmalloc(desclen, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + cp = desc; + if (type) { + memcpy(cp, type, typelen); + cp += typelen; + *cp++ = ':'; + } + memcpy(cp, name, namelen); + cp += namelen; + *cp = '\0'; + + if (!options) + options = ""; + kdebug("call request_key(,%s,%s)", desc, options); + + /* make the upcall, using special credentials to prevent the use of + * add_key() to preinstall malicious redirections + */ + saved_cred = override_creds(dns_resolver_cache); + rkey = request_key(&key_type_dns_resolver, desc, options); + revert_creds(saved_cred); + kfree(desc); + if (IS_ERR(rkey)) { + ret = PTR_ERR(rkey); + goto out; + } + + down_read(&rkey->sem); + rkey->perm |= KEY_USR_VIEW; + + ret = key_validate(rkey); + if (ret < 0) + goto put; + + upayload = rcu_dereference_protected(rkey->payload.data, + lockdep_is_held(&rkey->sem)); + len = upayload->datalen; + + ret = -ENOMEM; + *_result = kmalloc(len + 1, GFP_KERNEL); + if (!*_result) + goto put; + + memcpy(*_result, upayload->data, len + 1); + if (_expiry) + *_expiry = rkey->expiry; + + ret = len; +put: + up_read(&rkey->sem); + key_put(rkey); +out: + kleave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(dns_query); diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h new file mode 100644 index 000000000000..189ca9e9b785 --- /dev/null +++ b/net/dns_resolver/internal.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 Wang Lei + * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved. + * + * Internal DNS Rsolver stuff + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/sched.h> + +/* + * dns_key.c + */ +extern const struct cred *dns_resolver_cache; + +/* + * debug tracing + */ +extern unsigned dns_resolver_debug; + +#define kdebug(FMT, ...) \ +do { \ + if (unlikely(dns_resolver_debug)) \ + printk(KERN_DEBUG "[%-6.6s] "FMT"\n", \ + current->comm, ##__VA_ARGS__); \ +} while (0) + +#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__) +#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index c51b55400dc5..11201784d29a 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -1,7 +1,7 @@ menuconfig NET_DSA bool "Distributed Switch Architecture support" default n - depends on EXPERIMENTAL && !S390 + depends on EXPERIMENTAL && NET_ETHERNET && !S390 select PHYLIB ---help--- This allows you to use hardware switch chips that use diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8fdca56bb08f..64ca2a6fa0d4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -164,10 +164,9 @@ out: static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); - struct mii_ioctl_data *mii_data = if_mii(ifr); if (p->phy != NULL) - return phy_mii_ioctl(p->phy, mii_data, cmd); + return phy_mii_ioctl(p->phy, ifr, cmd); return -EOPNOTSUPP; } diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 2a5a8053e000..dc54bd0d083b 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -48,7 +48,7 @@ static const struct proto_ops econet_ops; static struct hlist_head econet_sklist; -static DEFINE_RWLOCK(econet_lock); +static DEFINE_SPINLOCK(econet_lock); static DEFINE_MUTEX(econet_mutex); /* Since there are only 256 possible network numbers (or fewer, depends @@ -98,16 +98,16 @@ struct ec_cb static void econet_remove_socket(struct hlist_head *list, struct sock *sk) { - write_lock_bh(&econet_lock); + spin_lock_bh(&econet_lock); sk_del_node_init(sk); - write_unlock_bh(&econet_lock); + spin_unlock_bh(&econet_lock); } static void econet_insert_socket(struct hlist_head *list, struct sock *sk) { - write_lock_bh(&econet_lock); + spin_lock_bh(&econet_lock); sk_add_node(sk, list); - write_unlock_bh(&econet_lock); + spin_unlock_bh(&econet_lock); } /* @@ -782,15 +782,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char struct sock *sk; struct hlist_node *node; + spin_lock(&econet_lock); sk_for_each(sk, node, &econet_sklist) { struct econet_sock *opt = ec_sk(sk); if ((opt->port == port || opt->port == 0) && (opt->station == station || opt->station == 0) && - (opt->net == net || opt->net == 0)) + (opt->net == net || opt->net == 0)) { + sock_hold(sk); goto found; + } } sk = NULL; found: + spin_unlock(&econet_lock); return sk; } @@ -852,7 +856,7 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) { struct iphdr *ip = ip_hdr(skb); unsigned char stn = ntohl(ip->saddr) & 0xff; - struct sock *sk; + struct sock *sk = NULL; struct sk_buff *newskb; struct ec_device *edev = skb->dev->ec_ptr; @@ -882,10 +886,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) } aun_send_response(ip->saddr, ah->handle, 3, 0); + sock_put(sk); return; bad: aun_send_response(ip->saddr, ah->handle, 4, 0); + if (sk) + sock_put(sk); } /* @@ -1050,7 +1057,7 @@ release: static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; - struct sock *sk; + struct sock *sk = NULL; struct ec_device *edev = dev->ec_ptr; if (!net_eq(dev_net(dev), &init_net)) @@ -1085,10 +1092,12 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb, hdr->port)) goto drop; - + sock_put(sk); return NET_RX_SUCCESS; drop: + if (sk) + sock_put(sk); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 61ec0329316c..215c83986a9d 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; - unsigned char *rawp; skb->dev = dev; skb_reset_mac_header(skb); @@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; - rawp = skb->data; - /* * This is a magic hack to spot IPX packets. Older Novell breaks * the protocol design and runs IPX over 802.3 without an 802.2 LLC * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This * won't work for fault tolerant netware but does for the rest. */ - if (*(unsigned short *)rawp == 0xFFFF) + if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF) return htons(ETH_P_802_3); /* diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c index eb00796758c3..85d574addbc1 100644 --- a/net/ethernet/pe2.c +++ b/net/ethernet/pe2.c @@ -28,11 +28,10 @@ struct datalink_proto *make_EII_client(void) return proto; } +EXPORT_SYMBOL(make_EII_client); void destroy_EII_client(struct datalink_proto *dl) { kfree(dl); } - EXPORT_SYMBOL(destroy_EII_client); -EXPORT_SYMBOL(make_EII_client); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 551ce564b035..6a1100c25a9f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -355,6 +355,8 @@ lookup_protocol: inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; + inet->nodefrag = 0; + if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) @@ -725,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, sock_rps_record_flow(sk); /* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->sendmsg(iocb, sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); -static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) +ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, + size_t size, int flags) { struct sock *sk = sock->sk; sock_rps_record_flow(sk); /* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + inet_autobind(sk)) return -EAGAIN; if (sk->sk_prot->sendpage) return sk->sk_prot->sendpage(sk, page, offset, size, flags); return sock_no_sendpage(sock, page, offset, size, flags); } +EXPORT_SYMBOL(inet_sendpage); int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -892,10 +897,10 @@ const struct proto_ops inet_stream_ops = { .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, - .sendmsg = tcp_sendmsg, + .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = tcp_sendpage, + .sendpage = inet_sendpage, .splice_read = tcp_splice_read, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, @@ -1100,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (err) return err; - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); new_saddr = rt->rt_src; @@ -1166,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk) err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); } if (!err) - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); else { /* Routing failed... */ sk->sk_route_caps = 0; @@ -1425,13 +1430,49 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt) } EXPORT_SYMBOL_GPL(snmp_fold_field); -int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) +#if BITS_PER_LONG==32 + +u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) +{ + u64 res = 0; + int cpu; + + for_each_possible_cpu(cpu) { + void *bhptr, *userptr; + struct u64_stats_sync *syncp; + u64 v_bh, v_user; + unsigned int start; + + /* first mib used by softirq context, we must use _bh() accessors */ + bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); + syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); + do { + start = u64_stats_fetch_begin_bh(syncp); + v_bh = *(((u64 *) bhptr) + offt); + } while (u64_stats_fetch_retry_bh(syncp, start)); + + /* second mib used in USER context */ + userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); + syncp = (struct u64_stats_sync *)(userptr + syncp_offset); + do { + start = u64_stats_fetch_begin(syncp); + v_user = *(((u64 *) userptr) + offt); + } while (u64_stats_fetch_retry(syncp, start)); + + res += v_bh + v_user; + } + return res; +} +EXPORT_SYMBOL_GPL(snmp_fold_field64); +#endif + +int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) { BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long)); + ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long)); + ptr[1] = __alloc_percpu(mibsize, align); if (!ptr[1]) goto err1; return 0; @@ -1488,25 +1529,32 @@ static const struct net_protocol icmp_protocol = { static __net_init int ipv4_mib_init_net(struct net *net) { if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, - sizeof(struct tcp_mib)) < 0) + sizeof(struct tcp_mib), + __alignof__(struct tcp_mib)) < 0) goto err_tcp_mib; if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, - sizeof(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; if (snmp_mib_init((void __percpu **)net->mib.net_statistics, - sizeof(struct linux_mib)) < 0) + sizeof(struct linux_mib), + __alignof__(struct linux_mib)) < 0) goto err_net_mib; if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, - sizeof(struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) goto err_udp_mib; if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, - sizeof(struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) goto err_udplite_mib; if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, - sizeof(struct icmp_mib)) < 0) + sizeof(struct icmp_mib), + __alignof__(struct icmp_mib)) < 0) goto err_icmp_mib; if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, - sizeof(struct icmpmsg_mib)) < 0) + sizeof(struct icmpmsg_mib), + __alignof__(struct icmpmsg_mib)) < 0) goto err_icmpmsg_mib; tcp_mib_init(net); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f094b75810db..96c1955b3e2f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -116,6 +116,7 @@ #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) #include <net/atmclip.h> struct neigh_table *clip_tbl_hook; +EXPORT_SYMBOL(clip_tbl_hook); #endif #include <asm/system.h> @@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = { .hh_output = dev_queue_xmit, .queue_xmit = dev_queue_xmit, }; +EXPORT_SYMBOL(arp_broken_ops); struct neigh_table arp_tbl = { .family = AF_INET, @@ -198,6 +200,7 @@ struct neigh_table arp_tbl = { .gc_thresh2 = 512, .gc_thresh3 = 1024, }; +EXPORT_SYMBOL(arp_tbl); int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) { @@ -333,11 +336,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) struct net_device *dev = neigh->dev; __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev; - if (!in_dev) + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) { + rcu_read_unlock(); return; - + } switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ @@ -358,9 +364,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) case 2: /* Avoid secondary IPs, get a primary/preferred one */ break; } + rcu_read_unlock(); - if (in_dev) - in_dev_put(in_dev); if (!saddr) saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); @@ -427,7 +432,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) if (ip_route_output_key(net, &rt, &fl) < 0) return 1; - if (rt->u.dst.dev != dev) { + if (rt->dst.dev != dev) { NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); flag = 1; } @@ -497,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) kfree_skb(skb); return 1; } +EXPORT_SYMBOL(arp_find); /* END OF OBSOLETE FUNCTIONS */ @@ -532,7 +538,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct in_device *out_dev; int imi, omi = -1; - if (rt->u.dst.dev == dev) + if (rt->dst.dev == dev) return 0; if (!IN_DEV_PROXY_ARP(in_dev)) @@ -545,10 +551,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, /* place to check for proxy_arp for routes */ - if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { + out_dev = __in_dev_get_rcu(rt->dst.dev); + if (out_dev) omi = IN_DEV_MEDIUM_ID(out_dev); - in_dev_put(out_dev); - } + return (omi != imi && omi != -1); } @@ -576,7 +582,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev, __be32 sip, __be32 tip) { /* Private VLAN is only concerned about the same ethernet segment */ - if (rt->u.dst.dev != dev) + if (rt->dst.dev != dev) return 0; /* Don't reply on self probes (often done by windowz boxes)*/ @@ -698,6 +704,7 @@ out: kfree_skb(skb); return NULL; } +EXPORT_SYMBOL(arp_create); /* * Send an arp packet. @@ -707,6 +714,7 @@ void arp_xmit(struct sk_buff *skb) /* Send it off, maybe filter it using firewalling first. */ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); } +EXPORT_SYMBOL(arp_xmit); /* * Create and send an arp packet. @@ -733,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, arp_xmit(skb); } +EXPORT_SYMBOL(arp_send); /* * Process an arp request. @@ -741,7 +750,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, static int arp_process(struct sk_buff *skb) { struct net_device *dev = skb->dev; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct arphdr *arp; unsigned char *arp_ptr; struct rtable *rt; @@ -890,7 +899,6 @@ static int arp_process(struct sk_buff *skb) arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); } else { pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); - in_dev_put(in_dev); return 0; } goto out; @@ -936,8 +944,6 @@ static int arp_process(struct sk_buff *skb) } out: - if (in_dev) - in_dev_put(in_dev); consume_skb(skb); return 0; } @@ -1045,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, struct rtable * rt; if ((err = ip_route_output_key(net, &rt, &fl)) != 0) return err; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; @@ -1152,7 +1158,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, struct rtable * rt; if ((err = ip_route_output_key(net, &rt, &fl)) != 0) return err; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (!dev) return -EINVAL; @@ -1453,14 +1459,3 @@ static int __init arp_proc_init(void) } #endif /* CONFIG_PROC_FS */ - -EXPORT_SYMBOL(arp_broken_ops); -EXPORT_SYMBOL(arp_find); -EXPORT_SYMBOL(arp_create); -EXPORT_SYMBOL(arp_xmit); -EXPORT_SYMBOL(arp_send); -EXPORT_SYMBOL(arp_tbl); - -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) -EXPORT_SYMBOL(clip_tbl_hook); -#endif diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index fb2465811b48..f0550941df7b 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -69,9 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; - sk_dst_set(sk, &rt->u.dst); + sk_dst_set(sk, &rt->dst); return(0); } - EXPORT_SYMBOL(ip4_datagram_connect); - diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 382bc768ed56..da14c49284f4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } ip_mc_up(in_dev); /* fall through */ + case NETDEV_NOTIFY_PEERS: case NETDEV_CHANGEADDR: /* Send gratuitous ARP to notify of link change */ if (IN_DEV_ARP_NOTIFY(in_dev)) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4f0ed458c883..a43968918350 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -175,6 +175,7 @@ out: fib_res_put(&res); return dev; } +EXPORT_SYMBOL(ip_dev_find); /* * Find address type as if only "dev" was present in the system. If @@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr) { return __inet_dev_addr_type(net, NULL, addr); } +EXPORT_SYMBOL(inet_addr_type); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { return __inet_dev_addr_type(net, dev, addr); } +EXPORT_SYMBOL(inet_dev_addr_type); /* Given (packet source, input interface) and optional (dst, oif, tos): - (main) check, that source is valid i.e. not broadcast or our local @@ -284,7 +287,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (no_addr) goto last_resort; if (rpf == 1) - goto e_inval; + goto e_rpf; fl.oif = dev->ifindex; ret = 0; @@ -299,7 +302,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, last_resort: if (rpf) - goto e_inval; + goto e_rpf; *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); *itag = 0; return 0; @@ -308,6 +311,8 @@ e_inval_res: fib_res_put(&res); e_inval: return -EINVAL; +e_rpf: + return -EXDEV; } static inline __be32 sk_extract_addr(struct sockaddr *addr) @@ -1075,7 +1080,3 @@ void __init ip_fib_init(void) fib_hash_init(); } - -EXPORT_SYMBOL(inet_addr_type); -EXPORT_SYMBOL(inet_dev_addr_type); -EXPORT_SYMBOL(ip_dev_find); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d65e9215bcd7..a0d847c7cba5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = { .fatal = 1, }, }; +EXPORT_SYMBOL(icmp_err_convert); /* * ICMP control array. This specifies what to do with each ICMP. @@ -267,11 +268,12 @@ int xrlim_allow(struct dst_entry *dst, int timeout) dst->rate_tokens = token; return rc; } +EXPORT_SYMBOL(xrlim_allow); static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, int type, int code) { - struct dst_entry *dst = &rt->u.dst; + struct dst_entry *dst = &rt->dst; int rc = 1; if (type > NR_ICMP_TYPES) @@ -327,7 +329,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net((*rt)->u.dst.dev)); + sk = icmp_sk(dev_net((*rt)->dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -359,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); struct sock *sk; struct inet_sock *inet; __be32 daddr; @@ -427,7 +429,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!rt) goto out; - net = dev_net(rt->u.dst.dev); + net = dev_net(rt->dst.dev); /* * Find the original header. It is expected to be valid, of course. @@ -596,9 +598,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, - RT_TOS(tos), rt2->u.dst.dev); + RT_TOS(tos), rt2->dst.dev); - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); skb_in->_skb_refdst = orefdst; /* restore old refdst */ } @@ -610,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) XFRM_LOOKUP_ICMP); switch (err) { case 0: - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = rt2; break; case -EPERM: @@ -629,7 +631,7 @@ route_done: /* RFC says return as much as we can without exceeding 576 bytes. */ - room = dst_mtu(&rt->u.dst); + room = dst_mtu(&rt->dst); if (room > 576) room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; @@ -647,6 +649,7 @@ out_unlock: icmp_xmit_unlock(sk); out:; } +EXPORT_SYMBOL(icmp_send); /* @@ -925,6 +928,7 @@ static void icmp_address(struct sk_buff *skb) /* * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain * loudly if an inconsistency is found. + * called with rcu_read_lock() */ static void icmp_address_reply(struct sk_buff *skb) @@ -935,12 +939,12 @@ static void icmp_address_reply(struct sk_buff *skb) struct in_ifaddr *ifa; if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) - goto out; + return; - in_dev = in_dev_get(dev); + in_dev = __in_dev_get_rcu(dev); if (!in_dev) - goto out; - rcu_read_lock(); + return; + if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { @@ -958,9 +962,6 @@ static void icmp_address_reply(struct sk_buff *skb) mp, dev->name, &rt->rt_src); } } - rcu_read_unlock(); - in_dev_put(in_dev); -out:; } static void icmp_discard(struct sk_buff *skb) @@ -974,7 +975,7 @@ int icmp_rcv(struct sk_buff *skb) { struct icmphdr *icmph; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -1216,7 +1217,3 @@ int __init icmp_init(void) { return register_pernet_subsys(&icmp_sk_ops); } - -EXPORT_SYMBOL(icmp_err_convert); -EXPORT_SYMBOL(icmp_send); -EXPORT_SYMBOL(xrlim_allow); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5fff865a4fa7..a1ad0e7180d2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) return NULL; } - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); skb->dev = dev; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = rt->rt_src; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(pip, &rt->u.dst, NULL); + ip_select_ident(pip, &rt->dst, NULL); ((u8*)&pip[1])[0] = IPOPT_RA; ((u8*)&pip[1])[1] = 4; ((u8*)&pip[1])[2] = 0; @@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = rt->rt_src; iph->protocol = IPPROTO_IGMP; - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); ((u8*)&iph[1])[0] = IPOPT_RA; ((u8*)&iph[1])[1] = 4; ((u8*)&iph[1])[2] = 0; @@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, read_unlock(&in_dev->mc_list_lock); } +/* called in rcu_read_lock() section */ int igmp_rcv(struct sk_buff *skb) { /* This basically follows the spec line by line -- see RFC1112 */ struct igmphdr *ih; - struct in_device *in_dev = in_dev_get(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); int len = skb->len; if (in_dev == NULL) goto drop; if (!pskb_may_pull(skb, sizeof(struct igmphdr))) - goto drop_ref; + goto drop; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; if (__skb_checksum_complete(skb)) - goto drop_ref; + goto drop; } ih = igmp_hdr(skb); @@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb) break; case IGMP_PIM: #ifdef CONFIG_IP_PIMSM_V1 - in_dev_put(in_dev); return pim_rcv_v1(skb); #endif case IGMPV3_HOST_MEMBERSHIP_REPORT: @@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb) break; } -drop_ref: - in_dev_put(in_dev); drop: kfree_skb(skb); return 0; @@ -1246,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) out: return; } +EXPORT_SYMBOL(ip_mc_inc_group); /* * Resend IGMP JOIN report; used for bonding. @@ -1268,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im) igmp_ifc_event(in_dev); #endif } +EXPORT_SYMBOL(ip_mc_rejoin_group); /* * A socket has left a multicast group on device dev @@ -1298,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) } } } +EXPORT_SYMBOL(ip_mc_dec_group); /* Device changing type */ @@ -1427,7 +1428,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) } if (!dev && !ip_route_output_key(net, &rt, &fl)) { - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); } if (dev) { @@ -1646,8 +1647,7 @@ static int sf_setstate(struct ip_mc_list *pmc) if (dpsf->sf_inaddr == psf->sf_inaddr) break; if (!dpsf) { - dpsf = (struct ip_sf_list *) - kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); if (!dpsf) continue; *dpsf = *psf; @@ -1807,6 +1807,7 @@ done: rtnl_unlock(); return err; } +EXPORT_SYMBOL(ip_mc_join_group); static void ip_sf_socklist_reclaim(struct rcu_head *rp) { @@ -2679,8 +2680,3 @@ int __init igmp_mc_proc_init(void) return register_pernet_subsys(&igmp_net_ops); } #endif - -EXPORT_SYMBOL(ip_mc_dec_group); -EXPORT_SYMBOL(ip_mc_inc_group); -EXPORT_SYMBOL(ip_mc_join_group); -EXPORT_SYMBOL(ip_mc_rejoin_group); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 70eb3507c406..7174370b1195 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -84,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk, } return node != NULL; } - EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); /* Obtain a reference to a local port for the given sock, @@ -212,7 +211,6 @@ fail: local_bh_enable(); return ret; } - EXPORT_SYMBOL_GPL(inet_csk_get_port); /* @@ -305,7 +303,6 @@ out_err: *err = error; goto out; } - EXPORT_SYMBOL(inet_csk_accept); /* @@ -327,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk, setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); icsk->icsk_pending = icsk->icsk_ack.pending = 0; } - EXPORT_SYMBOL(inet_csk_init_xmit_timers); void inet_csk_clear_xmit_timers(struct sock *sk) @@ -340,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk) sk_stop_timer(sk, &icsk->icsk_delack_timer); sk_stop_timer(sk, &sk->sk_timer); } - EXPORT_SYMBOL(inet_csk_clear_xmit_timers); void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } - EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) { sk_reset_timer(sk, &sk->sk_timer, jiffies + len); } - EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, @@ -383,7 +376,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; - return &rt->u.dst; + return &rt->dst; route_err: ip_rt_put(rt); @@ -391,7 +384,6 @@ no_route: IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } - EXPORT_SYMBOL_GPL(inet_csk_route_req); static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, @@ -433,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, return req; } - EXPORT_SYMBOL_GPL(inet_csk_search_req); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, @@ -447,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); inet_csk_reqsk_queue_added(sk, timeout); } +EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); /* Only thing we need from tcp.h */ extern int sysctl_tcp_synack_retries; -EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); /* Decide when to expire the request and when to resend SYN-ACK */ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, @@ -569,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, if (lopt->qlen) inet_csk_reset_keepalive_timer(parent, interval); } - EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, @@ -599,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, } return newsk; } - EXPORT_SYMBOL_GPL(inet_csk_clone); /* @@ -630,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk) percpu_counter_dec(sk->sk_prot->orphan_count); sock_put(sk); } - EXPORT_SYMBOL(inet_csk_destroy_sock); int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) @@ -665,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) __reqsk_queue_destroy(&icsk->icsk_accept_queue); return -EADDRINUSE; } - EXPORT_SYMBOL_GPL(inet_csk_listen_start); /* @@ -720,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk) } WARN_ON(sk->sk_ack_backlog); } - EXPORT_SYMBOL_GPL(inet_csk_listen_stop); void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) @@ -732,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) sin->sin_addr.s_addr = inet->inet_daddr; sin->sin_port = inet->inet_dport; } - EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); #ifdef CONFIG_COMPAT @@ -747,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, return icsk->icsk_af_ops->getsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, @@ -761,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); #endif diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a2ca6aed763b..5ff2a51b6d0c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) fq->last_in |= INET_FRAG_COMPLETE; } } - EXPORT_SYMBOL(inet_frag_kill); static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d3e160a88219..fb7ad5a21ff3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk) __inet_put_port(sk); local_bh_enable(); } - EXPORT_SYMBOL(inet_put_port); void __inet_inherit_port(struct sock *sk, struct sock *child) @@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); } - EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, @@ -546,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), __inet_check_established, __inet_hash_nolisten); } - EXPORT_SYMBOL_GPL(inet_hash_connect); void inet_hashinfo_init(struct inet_hashinfo *h) @@ -560,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h) i + LISTENING_NULLS_BASE); } } - EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 6bcfe52a9c87..9ffa24b9a804 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -51,8 +51,8 @@ * lookups performed with disabled BHs. * * Serialisation issues. - * 1. Nodes may appear in the tree only with the pool write lock held. - * 2. Nodes may disappear from the tree only with the pool write lock held + * 1. Nodes may appear in the tree only with the pool lock held. + * 2. Nodes may disappear from the tree only with the pool lock held * AND reference count being 0. * 3. Nodes appears and disappears from unused node list only under * "inet_peer_unused_lock". @@ -64,23 +64,31 @@ * usually under some other lock to prevent node disappearing * dtime: unused node list lock * v4daddr: unchangeable - * ip_id_count: idlock + * ip_id_count: atomic value (no lock needed) */ static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height -static struct inet_peer peer_fake_node = { - .avl_left = &peer_fake_node, - .avl_right = &peer_fake_node, + +#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) +static const struct inet_peer peer_fake_node = { + .avl_left = peer_avl_empty, + .avl_right = peer_avl_empty, .avl_height = 0 }; -#define peer_avl_empty (&peer_fake_node) -static struct inet_peer *peer_root = peer_avl_empty; -static DEFINE_RWLOCK(peer_pool_lock); + +static struct { + struct inet_peer *root; + spinlock_t lock; + int total; +} peers = { + .root = peer_avl_empty, + .lock = __SPIN_LOCK_UNLOCKED(peers.lock), + .total = 0, +}; #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static int peer_total; /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more * aggressively at this stage */ @@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min int inet_peer_gc_mintime __read_mostly = 10 * HZ; int inet_peer_gc_maxtime __read_mostly = 120 * HZ; -static LIST_HEAD(unused_peers); -static DEFINE_SPINLOCK(inet_peer_unused_lock); +static struct { + struct list_head list; + spinlock_t lock; +} unused_peers = { + .list = LIST_HEAD_INIT(unused_peers.list), + .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), +}; static void peer_check_expire(unsigned long dummy); static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); @@ -116,7 +129,7 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); /* All the timers, started at system startup tend @@ -131,38 +144,69 @@ void __init inet_initpeers(void) /* Called with or without local BH being disabled. */ static void unlink_from_unused(struct inet_peer *p) { - spin_lock_bh(&inet_peer_unused_lock); - list_del_init(&p->unused); - spin_unlock_bh(&inet_peer_unused_lock); + if (!list_empty(&p->unused)) { + spin_lock_bh(&unused_peers.lock); + list_del_init(&p->unused); + spin_unlock_bh(&unused_peers.lock); + } } /* * Called with local BH disabled and the pool lock held. - * _stack is known to be NULL or not at compile time, - * so compiler will optimize the if (_stack) tests. */ #define lookup(_daddr, _stack) \ ({ \ struct inet_peer *u, **v; \ - if (_stack != NULL) { \ - stackptr = _stack; \ - *stackptr++ = &peer_root; \ - } \ - for (u = peer_root; u != peer_avl_empty; ) { \ + \ + stackptr = _stack; \ + *stackptr++ = &peers.root; \ + for (u = peers.root; u != peer_avl_empty; ) { \ if (_daddr == u->v4daddr) \ break; \ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ - if (_stack != NULL) \ - *stackptr++ = v; \ + *stackptr++ = v; \ u = *v; \ } \ u; \ }) -/* Called with local BH disabled and the pool write lock held. */ +/* + * Called with rcu_read_lock_bh() + * Because we hold no lock against a writer, its quite possible we fall + * in an endless loop. + * But every pointer we follow is guaranteed to be valid thanks to RCU. + * We exit from this function if number of links exceeds PEER_MAXDEPTH + */ +static struct inet_peer *lookup_rcu_bh(__be32 daddr) +{ + struct inet_peer *u = rcu_dereference_bh(peers.root); + int count = 0; + + while (u != peer_avl_empty) { + if (daddr == u->v4daddr) { + /* Before taking a reference, check if this entry was + * deleted, unlink_from_pool() sets refcnt=-1 to make + * distinction between an unused entry (refcnt=0) and + * a freed one. + */ + if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1))) + u = NULL; + return u; + } + if ((__force __u32)daddr < (__force __u32)u->v4daddr) + u = rcu_dereference_bh(u->avl_left); + else + u = rcu_dereference_bh(u->avl_right); + if (unlikely(++count == PEER_MAXDEPTH)) + break; + } + return NULL; +} + +/* Called with local BH disabled and the pool lock held. */ #define lookup_rightempty(start) \ ({ \ struct inet_peer *u, **v; \ @@ -176,9 +220,10 @@ static void unlink_from_unused(struct inet_peer *p) u; \ }) -/* Called with local BH disabled and the pool write lock held. +/* Called with local BH disabled and the pool lock held. * Variable names are the proof of operation correctness. - * Look into mm/map_avl.c for more detail description of the ideas. */ + * Look into mm/map_avl.c for more detail description of the ideas. + */ static void peer_avl_rebalance(struct inet_peer **stack[], struct inet_peer ***stackend) { @@ -254,15 +299,21 @@ static void peer_avl_rebalance(struct inet_peer **stack[], } } -/* Called with local BH disabled and the pool write lock held. */ +/* Called with local BH disabled and the pool lock held. */ #define link_to_pool(n) \ do { \ n->avl_height = 1; \ n->avl_left = peer_avl_empty; \ n->avl_right = peer_avl_empty; \ + smp_wmb(); /* lockless readers can catch us now */ \ **--stackptr = n; \ peer_avl_rebalance(stack, stackptr); \ -} while(0) +} while (0) + +static void inetpeer_free_rcu(struct rcu_head *head) +{ + kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); +} /* May be called with local BH enabled. */ static void unlink_from_pool(struct inet_peer *p) @@ -271,13 +322,14 @@ static void unlink_from_pool(struct inet_peer *p) do_free = 0; - write_lock_bh(&peer_pool_lock); + spin_lock_bh(&peers.lock); /* Check the reference counter. It was artificially incremented by 1 - * in cleanup() function to prevent sudden disappearing. If the - * reference count is still 1 then the node is referenced only as `p' - * here and from the pool. So under the exclusive pool lock it's safe - * to remove the node and free it later. */ - if (atomic_read(&p->refcnt) == 1) { + * in cleanup() function to prevent sudden disappearing. If we can + * atomically (because of lockless readers) take this last reference, + * it's safe to remove the node and free it later. + * We use refcnt=-1 to alert lockless readers this entry is deleted. + */ + if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { struct inet_peer **stack[PEER_MAXDEPTH]; struct inet_peer ***stackptr, ***delp; if (lookup(p->v4daddr, stack) != p) @@ -303,20 +355,21 @@ static void unlink_from_pool(struct inet_peer *p) delp[1] = &t->avl_left; /* was &p->avl_left */ } peer_avl_rebalance(stack, stackptr); - peer_total--; + peers.total--; do_free = 1; } - write_unlock_bh(&peer_pool_lock); + spin_unlock_bh(&peers.lock); if (do_free) - kmem_cache_free(peer_cachep, p); + call_rcu_bh(&p->rcu, inetpeer_free_rcu); else /* The node is used again. Decrease the reference counter * back. The loop "cleanup -> unlink_from_unused * -> unlink_from_pool -> putpeer -> link_to_unused * -> cleanup (for the same node)" * doesn't really exist because the entry will have a - * recent deletion time and will not be cleaned again soon. */ + * recent deletion time and will not be cleaned again soon. + */ inet_putpeer(p); } @@ -326,16 +379,16 @@ static int cleanup_once(unsigned long ttl) struct inet_peer *p = NULL; /* Remove the first entry from the list of unused nodes. */ - spin_lock_bh(&inet_peer_unused_lock); - if (!list_empty(&unused_peers)) { + spin_lock_bh(&unused_peers.lock); + if (!list_empty(&unused_peers.list)) { __u32 delta; - p = list_first_entry(&unused_peers, struct inet_peer, unused); + p = list_first_entry(&unused_peers.list, struct inet_peer, unused); delta = (__u32)jiffies - p->dtime; if (delta < ttl) { /* Do not prune fresh entries. */ - spin_unlock_bh(&inet_peer_unused_lock); + spin_unlock_bh(&unused_peers.lock); return -1; } @@ -345,7 +398,7 @@ static int cleanup_once(unsigned long ttl) * before unlink_from_pool() call. */ atomic_inc(&p->refcnt); } - spin_unlock_bh(&inet_peer_unused_lock); + spin_unlock_bh(&unused_peers.lock); if (p == NULL) /* It means that the total number of USED entries has @@ -360,62 +413,56 @@ static int cleanup_once(unsigned long ttl) /* Called with or without local BH being disabled. */ struct inet_peer *inet_getpeer(__be32 daddr, int create) { - struct inet_peer *p, *n; + struct inet_peer *p; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; - /* Look up for the address quickly. */ - read_lock_bh(&peer_pool_lock); - p = lookup(daddr, NULL); - if (p != peer_avl_empty) - atomic_inc(&p->refcnt); - read_unlock_bh(&peer_pool_lock); + /* Look up for the address quickly, lockless. + * Because of a concurrent writer, we might not find an existing entry. + */ + rcu_read_lock_bh(); + p = lookup_rcu_bh(daddr); + rcu_read_unlock_bh(); + + if (p) { + /* The existing node has been found. + * Remove the entry from unused list if it was there. + */ + unlink_from_unused(p); + return p; + } + /* retry an exact lookup, taking the lock before. + * At least, nodes should be hot in our cache. + */ + spin_lock_bh(&peers.lock); + p = lookup(daddr, stack); if (p != peer_avl_empty) { - /* The existing node has been found. */ + atomic_inc(&p->refcnt); + spin_unlock_bh(&peers.lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); return p; } + p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; + if (p) { + p->v4daddr = daddr; + atomic_set(&p->refcnt, 1); + atomic_set(&p->rid, 0); + atomic_set(&p->ip_id_count, secure_ip_id(daddr)); + p->tcp_ts_stamp = 0; + INIT_LIST_HEAD(&p->unused); + + + /* Link the node. */ + link_to_pool(p); + peers.total++; + } + spin_unlock_bh(&peers.lock); - if (!create) - return NULL; - - /* Allocate the space outside the locked region. */ - n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); - if (n == NULL) - return NULL; - n->v4daddr = daddr; - atomic_set(&n->refcnt, 1); - atomic_set(&n->rid, 0); - atomic_set(&n->ip_id_count, secure_ip_id(daddr)); - n->tcp_ts_stamp = 0; - - write_lock_bh(&peer_pool_lock); - /* Check if an entry has suddenly appeared. */ - p = lookup(daddr, stack); - if (p != peer_avl_empty) - goto out_free; - - /* Link the node. */ - link_to_pool(n); - INIT_LIST_HEAD(&n->unused); - peer_total++; - write_unlock_bh(&peer_pool_lock); - - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ cleanup_once(0); - return n; - -out_free: - /* The appropriate node is already in the pool. */ - atomic_inc(&p->refcnt); - write_unlock_bh(&peer_pool_lock); - /* Remove the entry from unused list if it was there. */ - unlink_from_unused(p); - /* Free preallocated the preallocated node. */ - kmem_cache_free(peer_cachep, n); return p; } @@ -425,12 +472,12 @@ static void peer_check_expire(unsigned long dummy) unsigned long now = jiffies; int ttl; - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) ttl = inet_peer_minttl; else ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * - peer_total / inet_peer_threshold * HZ; + peers.total / inet_peer_threshold * HZ; while (!cleanup_once(ttl)) { if (jiffies != now) break; @@ -439,22 +486,25 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - if (peer_total >= inet_peer_threshold) + if (peers.total >= inet_peer_threshold) peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; else peer_periodic_timer.expires = jiffies + inet_peer_gc_maxtime - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; + peers.total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } void inet_putpeer(struct inet_peer *p) { - spin_lock_bh(&inet_peer_unused_lock); - if (atomic_dec_and_test(&p->refcnt)) { - list_add_tail(&p->unused, &unused_peers); + local_bh_disable(); + + if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { + list_add_tail(&p->unused, &unused_peers.list); p->dtime = (__u32)jiffies; + spin_unlock(&unused_peers.lock); } - spin_unlock_bh(&inet_peer_unused_lock); + + local_bh_enable(); } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 56cdf68a074c..99461f09320f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto sr_failed; - if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && + if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { - IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(dst_mtu(&rt->u.dst))); + htonl(dst_mtu(&rt->dst))); goto drop; } /* We are about to mangle packet. Copy it! */ - if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) + if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len)) goto drop; iph = ip_hdr(skb); @@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb) skb->priority = rt_tos2priority(iph->tos); return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, - rt->u.dst.dev, ip_forward_finish); + rt->dst.dev, ip_forward_finish); sr_failed: /* diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 75347ea70ea0..b7c41654dde5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a) } /* Memory Tracking Functions. */ -static __inline__ void frag_kfree_skb(struct netns_frags *nf, - struct sk_buff *skb, int *work) +static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) { - if (work) - *work -= skb->truesize; atomic_sub(skb->truesize, &nf->mem); kfree_skb(skb); } @@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp) fp = qp->q.fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(qp->q.net, fp, NULL); + frag_kfree_skb(qp->q.net, fp); fp = xp; } while (fp); @@ -317,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp) qp->q.len = 0; qp->q.meat = 0; qp->q.fragments = NULL; + qp->q.fragments_tail = NULL; qp->iif = 0; return 0; @@ -389,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = qp->q.fragments_tail; + if (!prev || FRAG_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for (next = qp->q.fragments; next != NULL; next = next->next) { if (FRAG_CB(next)->offset >= offset) @@ -396,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) prev = next; } +found: /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. @@ -446,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->q.fragments = next; qp->q.meat -= free_it->len; - frag_kfree_skb(qp->q.net, free_it, NULL); + frag_kfree_skb(qp->q.net, free_it); } } @@ -454,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + qp->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -507,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, goto out_nomem; fp->next = head->next; + if (!fp->next) + qp->q.fragments_tail = fp; prev->next = fp; skb_morph(head, qp->q.fragments); @@ -556,7 +564,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &qp->q.net->mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -566,8 +573,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &qp->q.net->mem); } + atomic_sub(head->truesize, &qp->q.net->mem); head->next = NULL; head->dev = dev; @@ -578,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph->tot_len = htons(len); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; + qp->q.fragments_tail = NULL; return 0; out_nomem: @@ -624,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) kfree_skb(skb); return -ENOMEM; } +EXPORT_SYMBOL(ip_defrag); #ifdef CONFIG_SYSCTL static int zero; @@ -777,5 +786,3 @@ void __init ipfrag_init(void) ip4_frags.secret_interval = 10 * 60 * HZ; inet_frags_init(&ip4_frags); } - -EXPORT_SYMBOL(ip_defrag); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 32618e11076d..945b20a5ad50 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -731,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tos = 0; if (skb->protocol == htons(ETH_P_IP)) tos = old_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); } { @@ -745,7 +747,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -755,7 +757,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev df = tiph->frag_off; if (df) - mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; + mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -803,7 +805,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tunnel->err_count = 0; } - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { @@ -830,7 +832,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -853,7 +855,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else - iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); + iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); } ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; @@ -915,7 +917,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_GRE }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } @@ -1174,7 +1176,7 @@ static int ipgre_open(struct net_device *dev) struct rtable *rt; if (ip_route_output_key(dev_net(dev), &rt, &fl)) return -EADDRNOTAVAIL; - dev = rt->u.dst.dev; + dev = rt->dst.dev; ip_rt_put(rt); if (__in_dev_get_rtnl(dev) == NULL) return -EADDRNOTAVAIL; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d930dc5e4d85..d859bcc26cb7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -146,7 +146,7 @@ #include <linux/netlink.h> /* - * Process Router Attention IP option + * Process Router Attention IP option (RFC 2113) */ int ip_call_ra_chain(struct sk_buff *skb) { @@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb) struct sock *last = NULL; struct net_device *dev = skb->dev; - read_lock(&ip_ra_lock); - for (ra = ip_ra_chain; ra; ra = ra->next) { + for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report @@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb) sk->sk_bound_dev_if == dev->ifindex) && net_eq(sock_net(sk), dev_net(dev))) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { - read_unlock(&ip_ra_lock); + if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) return 1; - } } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb) if (last) { raw_rcv(last, skb); - read_unlock(&ip_ra_lock); return 1; } - read_unlock(&ip_ra_lock); return 0; } @@ -298,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb) } if (unlikely(opt->srr)) { - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); + if (in_dev) { if (!IN_DEV_SOURCE_ROUTE(in_dev)) { if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) printk(KERN_INFO "source route option %pI4 -> %pI4\n", &iph->saddr, &iph->daddr); - in_dev_put(in_dev); goto drop; } - - in_dev_put(in_dev); } if (ip_options_rcv_srr(skb)) @@ -340,13 +333,16 @@ static int ip_rcv_finish(struct sk_buff *skb) else if (err == -ENETUNREACH) IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_INNOROUTES); + else if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); goto drop; } } #ifdef CONFIG_NET_CLS_ROUTE if (unlikely(skb_dst(skb)->tclassid)) { - struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); + struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; @@ -360,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb) rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { - IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) - IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, skb->len); return dst_input(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 041d41df1224..04b69896df5f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph) iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } +EXPORT_SYMBOL(ip_send_check); int __ip_local_out(struct sk_buff *skb) { @@ -151,15 +152,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->version = 4; iph->ihl = 5; iph->tos = inet->tos; - if (ip_dont_fragment(sk, &rt->u.dst)) + if (ip_dont_fragment(sk, &rt->dst)) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; - iph->ttl = ip_select_ttl(inet, &rt->u.dst); + iph->ttl = ip_select_ttl(inet, &rt->dst); iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->protocol = sk->sk_protocol; - ip_select_ident(iph, &rt->u.dst, sk); + ip_select_ident(iph, &rt->dst, sk); if (opt && opt->optlen) { iph->ihl += opt->optlen>>2; @@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, /* Send it out. */ return ip_local_out(skb); } - EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); static inline int ip_finish_output2(struct sk_buff *skb) @@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb) { struct sock *sk = skb->sk; struct rtable *rt = skb_rtable(skb); - struct net_device *dev = rt->u.dst.dev; + struct net_device *dev = rt->dst.dev; /* * If the indicated interface is up and running, send the packet. @@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb) if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) goto no_route; } - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); } - skb_dst_set_noref(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); packet_routed: if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) @@ -372,11 +372,11 @@ packet_routed: skb_reset_network_header(skb); iph = ip_hdr(skb); *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); - if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df) + if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) iph->frag_off = htons(IP_DF); else iph->frag_off = 0; - iph->ttl = ip_select_ttl(inet, &rt->u.dst); + iph->ttl = ip_select_ttl(inet, &rt->dst); iph->protocol = sk->sk_protocol; iph->saddr = rt->rt_src; iph->daddr = rt->rt_dst; @@ -387,7 +387,7 @@ packet_routed: ip_options_build(skb, opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(iph, &rt->u.dst, sk, + ip_select_ident_more(iph, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; @@ -403,6 +403,7 @@ no_route: kfree_skb(skb); return -EHOSTUNREACH; } +EXPORT_SYMBOL(ip_queue_xmit); static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) @@ -411,7 +412,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->priority = from->priority; to->protocol = from->protocol; skb_dst_drop(to); - skb_dst_set(to, dst_clone(skb_dst(from))); + skb_dst_copy(to, from); to->dev = from->dev; to->mark = from->mark; @@ -442,17 +443,16 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct iphdr *iph; - int raw = 0; int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs, pad; + unsigned int mtu, hlen, left, len, ll_rs; int offset; __be16 not_last_frag; struct rtable *rt = skb_rtable(skb); int err = 0; - dev = rt->u.dst.dev; + dev = rt->dst.dev; /* * Point into the IP datagram header. @@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) */ hlen = iph->ihl * 4; - mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ + mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) mtu -= nf_bridge_mtu_reduction(skb); @@ -580,14 +580,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) slow_path: left = skb->len - hlen; /* Space per frame */ - ptr = raw + hlen; /* Where to start from */ + ptr = hlen; /* Where to start from */ /* for bridged IP traffic encapsulated inside f.e. a vlan header, * we need to make room for the encapsulating header */ - pad = nf_bridge_pad(skb); - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); - mtu -= pad; + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); /* * Fragment the datagram. @@ -697,7 +695,6 @@ fail: IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; } - EXPORT_SYMBOL(ip_fragment); int @@ -716,6 +713,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk } return 0; } +EXPORT_SYMBOL(ip_generic_getfrag); static inline __wsum csum_page(struct page *page, int offset, int copy) @@ -833,13 +831,13 @@ int ip_append_data(struct sock *sk, */ *rtp = NULL; inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->u.dst.dev->mtu : - dst_mtu(rt->u.dst.path); - inet->cork.dst = &rt->u.dst; + rt->dst.dev->mtu : + dst_mtu(rt->dst.path); + inet->cork.dst = &rt->dst; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - if ((exthdrlen = rt->u.dst.header_len) != 0) { + if ((exthdrlen = rt->dst.header_len) != 0) { length += exthdrlen; transhdrlen += exthdrlen; } @@ -852,7 +850,7 @@ int ip_append_data(struct sock *sk, exthdrlen = 0; mtu = inet->cork.fragsize; } - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; @@ -869,7 +867,7 @@ int ip_append_data(struct sock *sk, */ if (transhdrlen && length + fragheaderlen <= mtu && - rt->u.dst.dev->features & NETIF_F_V4_CSUM && + rt->dst.dev->features & NETIF_F_V4_CSUM && !exthdrlen) csummode = CHECKSUM_PARTIAL; @@ -878,7 +876,7 @@ int ip_append_data(struct sock *sk, inet->cork.length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags); @@ -926,7 +924,7 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && - !(rt->u.dst.dev->features&NETIF_F_SG)) + !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -937,7 +935,7 @@ alloc_new_skb: * the last. */ if (datalen == length + fraggap) - alloclen += rt->u.dst.trailer_len; + alloclen += rt->dst.trailer_len; if (transhdrlen) { skb = sock_alloc_send_skb(sk, @@ -1010,7 +1008,7 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; @@ -1105,10 +1103,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, if (inet->cork.flags & IPCORK_OPT) opt = inet->cork.opt; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) + if (!(rt->dst.dev->features&NETIF_F_SG)) return -EOPNOTSUPP; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); mtu = inet->cork.fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); @@ -1125,7 +1123,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, inet->cork.length += size; if ((size + skb->len > mtu) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } @@ -1277,8 +1275,8 @@ int ip_push_pending_frames(struct sock *sk) * If local_df is set too, we still allow to fragment this frame * locally. */ if (inet->pmtudisc >= IP_PMTUDISC_DO || - (skb->len <= dst_mtu(&rt->u.dst) && - ip_dont_fragment(sk, &rt->u.dst))) + (skb->len <= dst_mtu(&rt->dst) && + ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); if (inet->cork.flags & IPCORK_OPT) @@ -1287,7 +1285,7 @@ int ip_push_pending_frames(struct sock *sk) if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; else - ttl = ip_select_ttl(inet, &rt->u.dst); + ttl = ip_select_ttl(inet, &rt->dst); iph = (struct iphdr *)skb->data; iph->version = 4; @@ -1298,7 +1296,7 @@ int ip_push_pending_frames(struct sock *sk) } iph->tos = inet->tos; iph->frag_off = df; - ip_select_ident(iph, &rt->u.dst, sk); + ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; iph->saddr = rt->rt_src; @@ -1311,7 +1309,7 @@ int ip_push_pending_frames(struct sock *sk) * on dst refcount */ inet->cork.dst = NULL; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (iph->protocol == IPPROTO_ICMP) icmp_out_count(net, ((struct icmphdr *) @@ -1448,7 +1446,3 @@ void __init ip_init(void) igmp_mc_proc_init(); #endif } - -EXPORT_SYMBOL(ip_generic_getfrag); -EXPORT_SYMBOL(ip_queue_xmit); -EXPORT_SYMBOL(ip_send_check); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ce231780a2b1..6c40a8c46e79 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -239,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) sent to multicast group to reach destination designated router. */ struct ip_ra_chain *ip_ra_chain; -DEFINE_RWLOCK(ip_ra_lock); +static DEFINE_SPINLOCK(ip_ra_lock); + + +static void ip_ra_destroy_rcu(struct rcu_head *head) +{ + struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); + + sock_put(ra->saved_sk); + kfree(ra); +} int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)) @@ -251,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on, new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - write_lock_bh(&ip_ra_lock); + spin_lock_bh(&ip_ra_lock); for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); kfree(new_ra); return -EADDRINUSE; } - *rap = ra->next; - write_unlock_bh(&ip_ra_lock); + /* dont let ip_call_ra_chain() use sk again */ + ra->sk = NULL; + rcu_assign_pointer(*rap, ra->next); + spin_unlock_bh(&ip_ra_lock); if (ra->destructor) ra->destructor(sk); - sock_put(sk); - kfree(ra); + /* + * Delay sock_put(sk) and kfree(ra) after one rcu grace + * period. This guarantee ip_call_ra_chain() dont need + * to mess with socket refcounts. + */ + ra->saved_sk = sk; + call_rcu(&ra->rcu, ip_ra_destroy_rcu); return 0; } } if (new_ra == NULL) { - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); return -ENOBUFS; } new_ra->sk = sk; new_ra->destructor = destructor; new_ra->next = ra; - *rap = new_ra; + rcu_assign_pointer(*rap, new_ra); sock_hold(sk); - write_unlock_bh(&ip_ra_lock); + spin_unlock_bh(&ip_ra_lock); return 0; } @@ -449,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | - (1<<IP_MINTTL))) || + (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) || optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_ALL || optname == IP_MULTICAST_LOOP || @@ -572,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, } inet->hdrincl = val ? 1 : 0; break; + case IP_NODEFRAG: + if (sk->sk_type != SOCK_RAW) { + err = -ENOPROTOOPT; + break; + } + inet->nodefrag = val ? 1 : 0; + break; case IP_MTU_DISCOVER: if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) goto e_inval; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b9d84e800cf4..3a6e1ec5e9ae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options) memcpy(e, ic_req_params, sizeof(ic_req_params)); e += sizeof(ic_req_params); + if (ic_host_name_set) { + *e++ = 12; /* host-name */ + len = strlen(utsname()->nodename); + *e++ = len; + memcpy(e, utsname()->nodename, len); + e += len; + } if (*vendor_class_identifier) { printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", vendor_class_identifier); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7fd636711037..ec036731a70b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error_icmp; } } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) df |= old_iph->frag_off & htons(IP_DF); if (df) { - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { stats->collisions++; @@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_IPIP }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 757f25eb9b4b..179fcab866fc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -442,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) int err; err = ipmr_fib_lookup(net, &fl, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; @@ -1553,9 +1555,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - dev = rt->u.dst.dev; + dev = rt->dst.dev; - if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { + if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not allow to send ICMP, so that packets will disappear to blackhole. @@ -1566,7 +1568,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; + encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; if (skb_cow(skb, encap)) { ip_rt_put(rt); @@ -1577,7 +1579,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->bytes_out += skb->len; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_decrease_ttl(ip_hdr(skb)); /* FIXME: forward and output firewalls used to be called here. @@ -1728,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb) goto dont_forward; err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); - if (err < 0) + if (err < 0) { + kfree_skb(skb); return err; + } if (!local) { if (IPCB(skb)->opt.router_alert) { diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 07de855e2175..d88a46c54fd1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* Drop old route. */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) orefdst = skb->_skb_refdst; if (ip_route_input(skb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); + RT_TOS(iph->tos), rt->dst.dev) != 0) { + dst_release(&rt->dst); return -1; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); refdst_drop(orefdst); } @@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, skb->len - dataoff, 0); skb->ip_summed = CHECKSUM_NONE; - csum = __skb_checksum_complete_head(skb, dataoff + len); - if (!csum) - skb->ip_summed = CHECKSUM_UNNECESSARY; + return __skb_checksum_complete_head(skb, dataoff + len); } return csum; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1ac01b128621..6bccba31d132 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb, arp = arp_hdr(skb); do { const struct arpt_entry_target *t; - int hdr_len; if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { e = arpt_next_entry(e); continue; } - hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * skb->dev->addr_len); - ADD_COUNTER(e->counters, hdr_len, 1); + ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); t = arpt_get_target_c(e); @@ -713,7 +710,7 @@ static void get_counters(const struct xt_table_info *t, struct arpt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -723,14 +720,16 @@ static void get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -744,7 +743,7 @@ static void get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -758,7 +757,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) * about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vmalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1005,8 +1004,7 @@ static int __do_replace(struct net *net, const char *name, struct arpt_entry *iter; ret = 0; - counters = vmalloc_node(num_counters * sizeof(struct xt_counters), - numa_node_id()); + counters = vmalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; @@ -1159,7 +1157,7 @@ static int do_add_counters(struct net *net, const void __user *user, if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len - size, numa_node_id()); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index a4e5fc5df4bf..d2c1311cb28d 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; -static DEFINE_RWLOCK(queue_lock); +static DEFINE_SPINLOCK(queue_lock); static int peer_pid __read_mostly; static unsigned int copy_range __read_mostly; static unsigned int queue_total; @@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range) break; case IPQ_COPY_PACKET: - copy_mode = mode; + if (range > 0xFFFF) + range = 0xFFFF; copy_range = range; - if (copy_range > 0xFFFF) - copy_range = 0xFFFF; + copy_mode = mode; break; default: @@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id) { struct nf_queue_entry *entry = NULL, *i; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); list_for_each_entry(i, &queue_list, list) { if ((unsigned long)i == id) { @@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id) queue_total--; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return entry; } @@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) static void ipq_flush(ipq_cmpfn cmpfn, unsigned long data) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); __ipq_flush(cmpfn, data); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } static struct sk_buff * @@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) struct nlmsghdr *nlh; struct timeval tv; - read_lock_bh(&queue_lock); - - switch (copy_mode) { + switch (ACCESS_ONCE(copy_mode)) { case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); @@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_PACKET: if (entry->skb->ip_summed == CHECKSUM_PARTIAL && - (*errp = skb_checksum_help(entry->skb))) { - read_unlock_bh(&queue_lock); + (*errp = skb_checksum_help(entry->skb))) return NULL; - } - if (copy_range == 0 || copy_range > entry->skb->len) + + data_len = ACCESS_ONCE(copy_range); + if (data_len == 0 || data_len > entry->skb->len) data_len = entry->skb->len; - else - data_len = copy_range; size = NLMSG_SPACE(sizeof(*pmsg) + data_len); break; default: *errp = -EINVAL; - read_unlock_bh(&queue_lock); return NULL; } - read_unlock_bh(&queue_lock); - skb = alloc_skb(size, GFP_ATOMIC); if (!skb) goto nlmsg_failure; @@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) if (nskb == NULL) return status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (!peer_pid) goto err_out_free_nskb; @@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) __ipq_enqueue_entry(entry); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; err_out_free_nskb: kfree_skb(nskb); err_out_unlock: - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range) { int status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); status = __ipq_set_mode(mode, range); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb) if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (peer_pid) { if (peer_pid != pid) { - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); RCV_SKB_FAIL(-EBUSY); } } else { @@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb) peer_pid = pid; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); @@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this, struct netlink_notify *n = ptr; if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } return NOTIFY_DONE; } @@ -527,7 +520,7 @@ static ctl_table ipq_table[] = { #ifdef CONFIG_PROC_FS static int ip_queue_show(struct seq_file *m, void *v) { - read_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); seq_printf(m, "Peer PID : %d\n" @@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v) queue_dropped, queue_user_dropped); - read_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return 0; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4b6c5ca610fc..c439721b165a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + ADD_COUNTER(e->counters, skb->len, 1); t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); @@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t, struct ipt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -928,7 +930,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vmalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1352,7 +1354,7 @@ do_add_counters(struct net *net, const void __user *user, if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len - size, numa_node_id()); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f91c94b9a790..3a43cf36db87 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -53,12 +53,13 @@ struct clusterip_config { #endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ + struct rcu_head rcu; }; static LIST_HEAD(clusterip_configs); /* clusterip_lock protects the clusterip_configs list */ -static DEFINE_RWLOCK(clusterip_lock); +static DEFINE_SPINLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS static const struct file_operations clusterip_proc_fops; @@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c) atomic_inc(&c->refcount); } + +static void clusterip_config_rcu_free(struct rcu_head *head) +{ + kfree(container_of(head, struct clusterip_config, rcu)); +} + static inline void clusterip_config_put(struct clusterip_config *c) { if (atomic_dec_and_test(&c->refcount)) - kfree(c); + call_rcu_bh(&c->rcu, clusterip_config_rcu_free); } /* decrease the count of entries using/referencing this config. If last @@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c) static inline void clusterip_config_entry_put(struct clusterip_config *c) { - write_lock_bh(&clusterip_lock); - if (atomic_dec_and_test(&c->entries)) { - list_del(&c->list); - write_unlock_bh(&clusterip_lock); + local_bh_disable(); + if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { + list_del_rcu(&c->list); + spin_unlock(&clusterip_lock); + local_bh_enable(); dev_mc_del(c->dev, c->clustermac); dev_put(c->dev); @@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c) #endif return; } - write_unlock_bh(&clusterip_lock); + local_bh_enable(); } static struct clusterip_config * @@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip) { struct clusterip_config *c; - list_for_each_entry(c, &clusterip_configs, list) { + list_for_each_entry_rcu(c, &clusterip_configs, list) { if (c->clusterip == clusterip) return c; } @@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry) { struct clusterip_config *c; - read_lock_bh(&clusterip_lock); + rcu_read_lock_bh(); c = __clusterip_config_find(clusterip); - if (!c) { - read_unlock_bh(&clusterip_lock); - return NULL; + if (c) { + if (unlikely(!atomic_inc_not_zero(&c->refcount))) + c = NULL; + else if (entry) + atomic_inc(&c->entries); } - atomic_inc(&c->refcount); - if (entry) - atomic_inc(&c->entries); - read_unlock_bh(&clusterip_lock); + rcu_read_unlock_bh(); return c; } @@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, } #endif - write_lock_bh(&clusterip_lock); - list_add(&c->list, &clusterip_configs); - write_unlock_bh(&clusterip_lock); + spin_lock_bh(&clusterip_lock); + list_add_rcu(&c->list, &clusterip_configs); + spin_unlock_bh(&clusterip_lock); return c; } @@ -462,7 +469,7 @@ struct arp_payload { __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; __be32 dst_ip; -} __attribute__ ((packed)); +} __packed; #ifdef DEBUG static void arp_print(struct arp_payload *payload) @@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void) #endif nf_unregister_hook(&cip_arp_ops); xt_unregister_target(&clusterip_tg_reg); + + /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ + rcu_barrier_bh(); } module_init(clusterip_tg_init); diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 5234f4f3499a..915fc17d7ce2 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> +#include <linux/if_arp.h> #include <linux/ip.h> #include <net/icmp.h> #include <net/udp.h> @@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info, /* maxlen = 230+ 91 + 230 + 252 = 803 */ } +static void dump_mac_header(const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & IPT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + printk("MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + printk("%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + printk(":%02x", *p); + } + printk(" "); +} + static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf, } #endif - if (in && !out) { - /* MAC logging for input chain only. */ - printk("MAC="); - if (skb->dev && skb->dev->hard_header_len && - skb->mac_header != skb->network_header) { - int i; - const unsigned char *p = skb_mac_header(skb); - for (i = 0; i < skb->dev->hard_header_len; i++,p++) - printk("%02x%c", *p, - i==skb->dev->hard_header_len - 1 - ? ' ':':'); - } else - printk(" "); - } + /* MAC logging for input path only. */ + if (in && !out) + dump_mac_header(loginfo, skb); dump_packet(loginfo, skb, 0); printk("\n"); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index f43867d1697f..6cdb298f1035 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); + par->hooknum == NF_INET_LOCAL_OUT || + par->hooknum == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); @@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = { .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT), + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg_check, .me = THIS_MODULE }; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f5f4a888e4ec..b254dafaf429 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) } tcph->rst = 1; - tcph->check = tcp_v4_check(sizeof(struct tcphdr), - niph->saddr, niph->daddr, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); addr_type = RTN_UNSPEC; if (hook != NF_INET_FORWARD @@ -109,13 +110,12 @@ static void send_reset(struct sk_buff *oldskb, int hook) addr_type = RTN_LOCAL; /* ip_route_me_harder expects skb->dst to be set */ - skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); + skb_dst_set_noref(nskb, skb_dst(oldskb)); if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); - nskb->ip_summed = CHECKSUM_NONE; /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index cb763ae9ed90..eab8de32f200 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct inet_sock *inet = inet_sk(skb->sk); + + if (inet && inet->nodefrag) + return NF_ACCEPT; + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 4f8bddb760c9..8c8632d9b93c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, rcu_read_lock(); proto = __nf_nat_proto_find(orig_tuple->dst.protonum); - /* Change protocol info to have some randomization */ - if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { - proto->unique_tuple(tuple, range, maniptype, ct); - goto out; - } - /* Only bother mapping if it's not already in range and unique */ - if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && + (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) && !nf_nat_used_tuple(tuple, ct)) goto out; @@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)skb->data + hdrlen; /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ @@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - if (!nf_ct_get_tuple(skb, - ip_hdrlen(skb) + sizeof(struct icmphdr), - (ip_hdrlen(skb) + + if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), + (hdrlen + sizeof(struct icmphdr) + inside->ip.ihl * 4), - (u_int16_t)AF_INET, - inside->ip.protocol, + (u_int16_t)AF_INET, inside->ip.protocol, &inner, l3proto, l4proto)) return 0; @@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, pass all hooks (locally-generated ICMP). Consider incoming packet: PREROUTING (DST manip), routing produces ICMP, goes through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, skb, - ip_hdrlen(skb) + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, - !manip)) + if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), + &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)skb->data + hdrlen; inside->icmp.checksum = 0; inside->icmp.checksum = csum_fold(skb_checksum(skb, hdrlen, @@ -742,7 +733,7 @@ static int __init nf_nat_init(void) spin_unlock_bh(&nf_nat_lock); /* Initialize fake conntrack so that NAT will skip it */ - nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; + nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 6c4f11f51446..3e61faf23a9a 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, } EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); -bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, +void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, @@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ if (maniptype == IP_NAT_MANIP_DST) - return false; + return; if (ntohs(*portptr) < 1024) { /* Loose convention: >> 512 is credential passing */ @@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, else off = *rover; - for (i = 0; i < range_size; i++, off++) { + for (i = 0; ; ++off) { *portptr = htons(min + off % range_size); - if (nf_nat_used_tuple(tuple, ct)) + if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) *rover = off; - return true; + return; } - return false; + return; } EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 22485ce306d4..570faf2667b2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c @@ -22,14 +22,14 @@ static u_int16_t dccp_port_rover; -static bool +static void dccp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &dccp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &dccp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d7e89201351e..bc8d83a31c73 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ -static bool +static void gre_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* If there is no master conntrack we are not PPTP, do not change tuples */ if (!ct->master) - return false; + return; if (maniptype == IP_NAT_MANIP_SRC) keyptr = &tuple->src.u.gre.key; @@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, pr_debug("min = %u, range_size = %u\n", min, range_size); - for (i = 0; i < range_size; i++, key++) { + for (i = 0; ; ++key) { *keyptr = htons(min + key % range_size); - if (!nf_nat_used_tuple(tuple, ct)) - return true; + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; } pr_debug("%p: no NAT mapping\n", ct); - return false; + return; } /* manipulate a GRE packet according to maniptype */ diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 19a8b0b07d8e..5744c3ec847c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); } -static bool +static void icmp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, @@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; - for (i = 0; i < range_size; i++, id++) { + for (i = 0; ; ++id) { tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + (id % range_size)); - if (!nf_nat_used_tuple(tuple, ct)) - return true; + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; } - return false; + return; } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 3fc598eeeb1a..756331d42661 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -16,14 +16,14 @@ static u_int16_t nf_sctp_port_rover; -static bool +static void sctp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &nf_sctp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &nf_sctp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 399e2cfa263b..aa460a595d5d 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -20,14 +20,13 @@ static u_int16_t tcp_port_rover; -static bool +static void tcp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &tcp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 9e61c79492e4..dfe65c7e2925 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -19,14 +19,13 @@ static u_int16_t udp_port_rover; -static bool +static void udp_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udp_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index 440a229bbd87..3cc8c8af39ef 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c @@ -18,14 +18,14 @@ static u_int16_t udplite_port_rover; -static bool +static void udplite_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udplite_port_rover); + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, + &udplite_port_rover); } static bool diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index 14381c62acea..a50f2bc1c732 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, return true; } -static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, +static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { /* Sorry: we can't help you; if it's not unique, we can't frob anything. */ - return false; + return; } static bool diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 98ed78281aee..ebbd319f62f5 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,7 +28,8 @@ #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ (1 << NF_INET_POST_ROUTING) | \ - (1 << NF_INET_LOCAL_OUT)) + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_LOCAL_IN)) static const struct xt_table nat_table = { .name = "nat", @@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) enum ip_conntrack_info ctinfo; const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); @@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) return 0; } -unsigned int +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for @@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { .target = ipt_snat_target, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", - .hooks = 1 << NF_INET_POST_ROUTING, + .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .checkentry = ipt_snat_checkentry, .family = AF_INET, }; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index beb25819c9c9..95481fee8bdb 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum, return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); @@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - if (hooknum == NF_INET_LOCAL_IN) - /* LOCAL_IN hook doesn't have a chain! */ - ret = alloc_null_binding(ct, hooknum); - else - ret = nf_nat_rule_find(skb, hooknum, in, out, - ct); - + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3dc9914c1dce..4ae1f203f7cb 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), + SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), SNMP_MIB_SENTINEL }; @@ -342,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v) IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); + BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.ip_statistics, - snmp4_ipstats_list[i].entry)); + seq_printf(seq, " %llu", + snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp4_ipstats_list[i].entry, + offsetof(struct ipstats_mib, syncp))); icmp_put(seq); /* RFC 2011 compatibility */ icmpmsg_put(seq); @@ -431,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "\nIpExt:"); for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **)net->mib.ip_statistics, - snmp4_ipextstats_list[i].entry)); + seq_printf(seq, " %llu", + snmp_fold_field64((void __percpu **)net->mib.ip_statistics, + snmp4_ipextstats_list[i].entry, + offsetof(struct ipstats_mib, syncp))); seq_putc(seq, '\n'); return 0; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 542f22fc98b3..f2d297351405 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) return ret; } +EXPORT_SYMBOL(inet_add_protocol); /* * Remove a protocol from the hash tables. @@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) return ret; } - -EXPORT_SYMBOL(inet_add_protocol); EXPORT_SYMBOL(inet_del_protocol); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c7a1639388a..009a7b2aa1ef 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) } static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, - struct rtable *rt, + struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); @@ -323,25 +323,27 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, struct sk_buff *skb; unsigned int iphlen; int err; + struct rtable *rt = *rtp; - if (length > rt->u.dst.dev->mtu) { + if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, - rt->u.dst.dev->mtu); + rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, &rt->dst); + *rtp = NULL; skb_reset_network_header(skb); iph = ip_hdr(skb); @@ -373,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } @@ -382,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb_transport_header(skb))->type); err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - rt->u.dst.dev, dst_output); + rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) @@ -576,7 +578,7 @@ back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, msg->msg_iov, len, - rt, msg->msg_flags); + &rt, msg->msg_flags); else { if (!ipc.addr) @@ -604,7 +606,7 @@ out: return len; do_confirm: - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 560acc677ce4..3f56b6e6c6aa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -253,8 +253,7 @@ static unsigned rt_hash_mask __read_mostly; static unsigned int rt_hash_log __read_mostly; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); -#define RT_CACHE_STAT_INC(field) \ - (__raw_get_cpu_var(rt_cache_stat).field++) +#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, int genid) @@ -287,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); while (r) { - if (dev_net(r->u.dst.dev) == seq_file_net(seq) && + if (dev_net(r->dst.dev) == seq_file_net(seq) && r->rt_genid == st->genid) return r; - r = rcu_dereference_bh(r->u.dst.rt_next); + r = rcu_dereference_bh(r->dst.rt_next); } rcu_read_unlock_bh(); } @@ -302,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, { struct rt_cache_iter_state *st = seq->private; - r = r->u.dst.rt_next; + r = r->dst.rt_next; while (!r) { rcu_read_unlock_bh(); do { @@ -320,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, { struct rt_cache_iter_state *st = seq->private; while ((r = __rt_cache_get_next(seq, r)) != NULL) { - if (dev_net(r->u.dst.dev) != seq_file_net(seq)) + if (dev_net(r->dst.dev) != seq_file_net(seq)) continue; if (r->rt_genid == st->genid) break; @@ -378,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", - r->u.dst.dev ? r->u.dst.dev->name : "*", + r->dst.dev ? r->dst.dev->name : "*", (__force u32)r->rt_dst, (__force u32)r->rt_gateway, - r->rt_flags, atomic_read(&r->u.dst.__refcnt), - r->u.dst.__use, 0, (__force u32)r->rt_src, - (dst_metric(&r->u.dst, RTAX_ADVMSS) ? - (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), - dst_metric(&r->u.dst, RTAX_WINDOW), - (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + - dst_metric(&r->u.dst, RTAX_RTTVAR)), + r->rt_flags, atomic_read(&r->dst.__refcnt), + r->dst.__use, 0, (__force u32)r->rt_src, + (dst_metric(&r->dst, RTAX_ADVMSS) ? + (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), + dst_metric(&r->dst, RTAX_WINDOW), + (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + + dst_metric(&r->dst, RTAX_RTTVAR)), r->fl.fl4_tos, - r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, - r->u.dst.hh ? (r->u.dst.hh->hh_output == + r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, + r->dst.hh ? (r->dst.hh->hh_output == dev_queue_xmit) : 0, r->rt_spec_dst, &len); @@ -609,13 +608,13 @@ static inline int ip_rt_proc_init(void) static inline void rt_free(struct rtable *rt) { - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline void rt_drop(struct rtable *rt) { ip_rt_put(rt); - call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static inline int rt_fast_clean(struct rtable *rth) @@ -623,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth) /* Kill broadcast/multicast entries very aggresively, if they collide in hash table with more useful entries */ return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && - rth->fl.iif && rth->u.dst.rt_next; + rth->fl.iif && rth->dst.rt_next; } static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->u.dst.expires; + rth->dst.expires; } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -637,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t unsigned long age; int ret = 0; - if (atomic_read(&rth->u.dst.__refcnt)) + if (atomic_read(&rth->dst.__refcnt)) goto out; ret = 1; - if (rth->u.dst.expires && - time_after_eq(jiffies, rth->u.dst.expires)) + if (rth->dst.expires && + time_after_eq(jiffies, rth->dst.expires)) goto out; - age = jiffies - rth->u.dst.lastuse; + age = jiffies - rth->dst.lastuse; ret = 0; if ((age <= tmo1 && !rt_fast_clean(rth)) || (age <= tmo2 && rt_valuable(rth))) @@ -661,7 +660,7 @@ out: return ret; */ static inline u32 rt_score(struct rtable *rt) { - u32 score = jiffies - rt->u.dst.lastuse; + u32 score = jiffies - rt->dst.lastuse; score = ~score & ~(3<<30); @@ -701,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) { - return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); + return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); } static inline int rt_is_expired(struct rtable *rth) { - return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); + return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } /* @@ -735,7 +734,7 @@ static void rt_do_flush(int process_context) rth = rt_hash_table[i].chain; /* defer releasing the head of the list after spin_unlock */ - for (tail = rth; tail; tail = tail->u.dst.rt_next) + for (tail = rth; tail; tail = tail->dst.rt_next) if (!rt_is_expired(tail)) break; if (rth != tail) @@ -744,9 +743,9 @@ static void rt_do_flush(int process_context) /* call rt_free on entries after the tail requiring flush */ prev = &rt_hash_table[i].chain; for (p = *prev; p; p = next) { - next = p->u.dst.rt_next; + next = p->dst.rt_next; if (!rt_is_expired(p)) { - prev = &p->u.dst.rt_next; + prev = &p->dst.rt_next; } else { *prev = next; rt_free(p); @@ -761,7 +760,7 @@ static void rt_do_flush(int process_context) spin_unlock_bh(rt_hash_lock_addr(i)); for (; rth != tail; rth = next) { - next = rth->u.dst.rt_next; + next = rth->dst.rt_next; rt_free(rth); } } @@ -792,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) while (aux != rth) { if (compare_hash_inputs(&aux->fl, &rth->fl)) return 0; - aux = aux->u.dst.rt_next; + aux = aux->dst.rt_next; } return ONE; } @@ -832,18 +831,18 @@ static void rt_check_expire(void) length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { - prefetch(rth->u.dst.rt_next); + prefetch(rth->dst.rt_next); if (rt_is_expired(rth)) { - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); continue; } - if (rth->u.dst.expires) { + if (rth->dst.expires) { /* Entry is expired even if it is in use */ - if (time_before_eq(jiffies, rth->u.dst.expires)) { + if (time_before_eq(jiffies, rth->dst.expires)) { nofree: tmo >>= 1; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; /* * We only count entries on * a chain with equal hash inputs once @@ -859,7 +858,7 @@ nofree: goto nofree; /* Cleanup aged off entries. */ - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); } spin_unlock_bh(rt_hash_lock_addr(i)); @@ -1000,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops) if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; continue; } - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); goal--; } @@ -1069,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head) while (rth) { length += has_noalias(head, rth); - rth = rth->u.dst.rt_next; + rth = rth->dst.rt_next; } return length >> FRACT_BITS; } @@ -1091,7 +1090,7 @@ restart: candp = NULL; now = jiffies; - if (!rt_caching(dev_net(rt->u.dst.dev))) { + if (!rt_caching(dev_net(rt->dst.dev))) { /* * If we're not caching, just tell the caller we * were successful and don't touch the route. The @@ -1109,7 +1108,7 @@ restart: */ if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { - int err = arp_bind_neighbour(&rt->u.dst); + int err = arp_bind_neighbour(&rt->dst); if (err) { if (net_ratelimit()) printk(KERN_WARNING @@ -1128,19 +1127,19 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { if (rt_is_expired(rth)) { - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; rt_free(rth); continue; } if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { /* Put it first */ - *rthp = rth->u.dst.rt_next; + *rthp = rth->dst.rt_next; /* * Since lookup is lockfree, the deletion * must be visible to another weakly ordered CPU before * the insertion at the start of the hash chain. */ - rcu_assign_pointer(rth->u.dst.rt_next, + rcu_assign_pointer(rth->dst.rt_next, rt_hash_table[hash].chain); /* * Since lookup is lockfree, the update writes @@ -1148,18 +1147,18 @@ restart: */ rcu_assign_pointer(rt_hash_table[hash].chain, rth); - dst_use(&rth->u.dst, now); + dst_use(&rth->dst, now); spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); if (rp) *rp = rth; else - skb_dst_set(skb, &rth->u.dst); + skb_dst_set(skb, &rth->dst); return 0; } - if (!atomic_read(&rth->u.dst.__refcnt)) { + if (!atomic_read(&rth->dst.__refcnt)) { u32 score = rt_score(rth); if (score <= min_score) { @@ -1171,7 +1170,7 @@ restart: chain_length++; - rthp = &rth->u.dst.rt_next; + rthp = &rth->dst.rt_next; } if (cand) { @@ -1182,17 +1181,17 @@ restart: * only 2 entries per bucket. We will see. */ if (chain_length > ip_rt_gc_elasticity) { - *candp = cand->u.dst.rt_next; + *candp = cand->dst.rt_next; rt_free(cand); } } else { if (chain_length > rt_chain_length_max && slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { - struct net *net = dev_net(rt->u.dst.dev); + struct net *net = dev_net(rt->dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; if (!rt_caching(net)) { printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", - rt->u.dst.dev->name, num); + rt->dst.dev->name, num); } rt_emergency_hash_rebuild(net); spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1207,7 +1206,7 @@ restart: route or unicast forwarding path. */ if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { - int err = arp_bind_neighbour(&rt->u.dst); + int err = arp_bind_neighbour(&rt->dst); if (err) { spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1238,14 +1237,14 @@ restart: } } - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 - if (rt->u.dst.rt_next) { + if (rt->dst.rt_next) { struct rtable *trt; printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); - for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) + for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) printk(" . %pI4", &trt->rt_dst); printk("\n"); } @@ -1263,7 +1262,7 @@ skip_hashing: if (rp) *rp = rt; else - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); return 0; } @@ -1325,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) ip_select_fb_ident(iph); } +EXPORT_SYMBOL(__ip_select_ident); static void rt_del(unsigned hash, struct rtable *rt) { @@ -1335,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt) ip_rt_put(rt); while ((aux = *rthp) != NULL) { if (aux == rt || rt_is_expired(aux)) { - *rthp = aux->u.dst.rt_next; + *rthp = aux->dst.rt_next; rt_free(aux); continue; } - rthp = &aux->u.dst.rt_next; + rthp = &aux->dst.rt_next; } spin_unlock_bh(rt_hash_lock_addr(hash)); } +/* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { int i, k; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct rtable *rth, **rthp; __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; @@ -1384,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rthp=&rt_hash_table[hash].chain; - rcu_read_lock(); while ((rth = rcu_dereference(*rthp)) != NULL) { struct rtable *rt; @@ -1393,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || rt_is_expired(rth) || - !net_eq(dev_net(rth->u.dst.dev), net)) { - rthp = &rth->u.dst.rt_next; + !net_eq(dev_net(rth->dst.dev), net)) { + rthp = &rth->dst.rt_next; continue; } if (rth->rt_dst != daddr || rth->rt_src != saddr || - rth->u.dst.error || + rth->dst.error || rth->rt_gateway != old_gw || - rth->u.dst.dev != dev) + rth->dst.dev != dev) break; - dst_hold(&rth->u.dst); - rcu_read_unlock(); + dst_hold(&rth->dst); rt = dst_alloc(&ipv4_dst_ops); if (rt == NULL) { ip_rt_put(rth); - in_dev_put(in_dev); return; } /* Copy all the information. */ *rt = *rth; - rt->u.dst.__use = 1; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.child = NULL; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + rt->dst.__use = 1; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.child = NULL; + if (rt->dst.dev) + dev_hold(rt->dst.dev); if (rt->idev) in_dev_hold(rt->idev); - rt->u.dst.obsolete = -1; - rt->u.dst.lastuse = jiffies; - rt->u.dst.path = &rt->u.dst; - rt->u.dst.neighbour = NULL; - rt->u.dst.hh = NULL; + rt->dst.obsolete = -1; + rt->dst.lastuse = jiffies; + rt->dst.path = &rt->dst; + rt->dst.neighbour = NULL; + rt->dst.hh = NULL; #ifdef CONFIG_XFRM - rt->u.dst.xfrm = NULL; + rt->dst.xfrm = NULL; #endif rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; @@ -1439,23 +1437,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway = new_gw; /* Redirect received -> path was valid */ - dst_confirm(&rth->u.dst); + dst_confirm(&rth->dst); if (rt->peer) atomic_inc(&rt->peer->refcnt); - if (arp_bind_neighbour(&rt->u.dst) || - !(rt->u.dst.neighbour->nud_state & + if (arp_bind_neighbour(&rt->dst) || + !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->u.dst.neighbour) - neigh_event_send(rt->u.dst.neighbour, NULL); + if (rt->dst.neighbour) + neigh_event_send(rt->dst.neighbour, NULL); ip_rt_put(rth); rt_drop(rt); goto do_next; } - netevent.old = &rth->u.dst; - netevent.new = &rt->u.dst; + netevent.old = &rth->dst; + netevent.new = &rt->dst; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); @@ -1464,12 +1462,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, ip_rt_put(rt); goto do_next; } - rcu_read_unlock(); do_next: ; } } - in_dev_put(in_dev); return; reject_redirect: @@ -1480,7 +1476,7 @@ reject_redirect: &old_gw, dev->name, &new_gw, &saddr, &daddr); #endif - in_dev_put(in_dev); + ; } static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) @@ -1493,8 +1489,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ip_rt_put(rt); ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || - (rt->u.dst.expires && - time_after_eq(jiffies, rt->u.dst.expires))) { + (rt->dst.expires && + time_after_eq(jiffies, rt->dst.expires))) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, rt->fl.oif, rt_genid(dev_net(dst->dev))); @@ -1532,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) int log_martians; rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->dst.dev); if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { rcu_read_unlock(); return; @@ -1543,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) - rt->u.dst.rate_tokens = 0; + if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) + rt->dst.rate_tokens = 0; /* Too many ignored redirects; do not send anything - * set u.dst.rate_last to the last seen redirected packet. + * set dst.rate_last to the last seen redirected packet. */ - if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { - rt->u.dst.rate_last = jiffies; + if (rt->dst.rate_tokens >= ip_rt_redirect_number) { + rt->dst.rate_last = jiffies; return; } /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (rt->u.dst.rate_tokens == 0 || + if (rt->dst.rate_tokens == 0 || time_after(jiffies, - (rt->u.dst.rate_last + - (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { + (rt->dst.rate_last + + (ip_rt_redirect_load << rt->dst.rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); - rt->u.dst.rate_last = jiffies; - ++rt->u.dst.rate_tokens; + rt->dst.rate_last = jiffies; + ++rt->dst.rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - rt->u.dst.rate_tokens == ip_rt_redirect_number && + rt->dst.rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", &rt->rt_src, rt->rt_iif, @@ -1581,7 +1577,7 @@ static int ip_error(struct sk_buff *skb) unsigned long now; int code; - switch (rt->u.dst.error) { + switch (rt->dst.error) { case EINVAL: default: goto out; @@ -1590,7 +1586,7 @@ static int ip_error(struct sk_buff *skb) break; case ENETUNREACH: code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(dev_net(rt->u.dst.dev), + IP_INC_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INNOROUTES); break; case EACCES: @@ -1599,12 +1595,12 @@ static int ip_error(struct sk_buff *skb) } now = jiffies; - rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; - if (rt->u.dst.rate_tokens > ip_rt_error_burst) - rt->u.dst.rate_tokens = ip_rt_error_burst; - rt->u.dst.rate_last = now; - if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { - rt->u.dst.rate_tokens -= ip_rt_error_cost; + rt->dst.rate_tokens += now - rt->dst.rate_last; + if (rt->dst.rate_tokens > ip_rt_error_burst) + rt->dst.rate_tokens = ip_rt_error_burst; + rt->dst.rate_last = now; + if (rt->dst.rate_tokens >= ip_rt_error_cost) { + rt->dst.rate_tokens -= ip_rt_error_cost; icmp_send(skb, ICMP_DEST_UNREACH, code, 0); } @@ -1649,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + rth = rcu_dereference(rth->dst.rt_next)) { unsigned short mtu = new_mtu; if (rth->fl.fl4_dst != daddr || @@ -1658,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->rt_src != iph->saddr || rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || - dst_metric_locked(&rth->u.dst, RTAX_MTU) || - !net_eq(dev_net(rth->u.dst.dev), net) || + dst_metric_locked(&rth->dst, RTAX_MTU) || + !net_eq(dev_net(rth->dst.dev), net) || rt_is_expired(rth)) continue; @@ -1667,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= dst_mtu(&rth->u.dst) && + old_mtu >= dst_mtu(&rth->dst) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= dst_mtu(&rth->u.dst)) { - if (mtu < dst_mtu(&rth->u.dst)) { - dst_confirm(&rth->u.dst); + if (mtu <= dst_mtu(&rth->dst)) { + if (mtu < dst_mtu(&rth->dst)) { + dst_confirm(&rth->dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; - rth->u.dst.metrics[RTAX_LOCK-1] |= + rth->dst.metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); } - rth->u.dst.metrics[RTAX_MTU-1] = mtu; - dst_set_expires(&rth->u.dst, + rth->dst.metrics[RTAX_MTU-1] = mtu; + dst_set_expires(&rth->dst, ip_rt_mtu_expires); } est_mtu = mtu; @@ -1755,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb) rt = skb_rtable(skb); if (rt) - dst_set_expires(&rt->u.dst, 0); + dst_set_expires(&rt->dst, 0); } static int ip_rt_bug(struct sk_buff *skb) @@ -1783,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt->fl.iif == 0) src = rt->rt_src; - else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { + else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { src = FIB_RES_PREFSRC(res); fib_res_put(&res); } else - src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, + src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); memcpy(addr, &src, 4); } @@ -1795,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) #ifdef CONFIG_NET_CLS_ROUTE static void set_class_tag(struct rtable *rt, u32 tag) { - if (!(rt->u.dst.tclassid & 0xFFFF)) - rt->u.dst.tclassid |= tag & 0xFFFF; - if (!(rt->u.dst.tclassid & 0xFFFF0000)) - rt->u.dst.tclassid |= tag & 0xFFFF0000; + if (!(rt->dst.tclassid & 0xFFFF)) + rt->dst.tclassid |= tag & 0xFFFF; + if (!(rt->dst.tclassid & 0xFFFF0000)) + rt->dst.tclassid |= tag & 0xFFFF0000; } #endif @@ -1810,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - memcpy(rt->u.dst.metrics, fi->fib_metrics, - sizeof(rt->u.dst.metrics)); + memcpy(rt->dst.metrics, fi->fib_metrics, + sizeof(rt->dst.metrics)); if (fi->fib_mtu == 0) { - rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && + rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + if (dst_metric_locked(&rt->dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && - rt->u.dst.dev->mtu > 576) - rt->u.dst.metrics[RTAX_MTU-1] = 576; + rt->dst.dev->mtu > 576) + rt->dst.metrics[RTAX_MTU-1] = 576; } #ifdef CONFIG_NET_CLS_ROUTE - rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; #endif } else - rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; - - if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) - rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) - rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, + rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; + + if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; + if (dst_mtu(&rt->dst) > IP_MAX_MTU) + rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; + if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) + rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, ip_rt_min_advmss); - if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) - rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; + if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) + rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; #ifdef CONFIG_NET_CLS_ROUTE #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1844,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->rt_type = res->type; } +/* called in rcu_read_lock() section */ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { - unsigned hash; + unsigned int hash; struct rtable *rth; __be32 spec_dst; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; + int err; /* Primary sanity checks. */ @@ -1866,21 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!ipv4_is_local_multicast(daddr)) goto e_inval; spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); - } else if (fib_validate_source(saddr, 0, tos, 0, - dev, &spec_dst, &itag, 0) < 0) - goto e_inval; - + } else { + err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, + &itag, 0); + if (err < 0) + goto e_err; + } rth = dst_alloc(&ipv4_dst_ops); if (!rth) goto e_nobufs; - rth->u.dst.output = ip_rt_bug; - rth->u.dst.obsolete = -1; + rth->dst.output = ip_rt_bug; + rth->dst.obsolete = -1; - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1888,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE - rth->u.dst.tclassid = itag; + rth->dst.tclassid = itag; #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = init_net.loopback_dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = init_net.loopback_dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; @@ -1902,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; if (our) { - rth->u.dst.input= ip_local_deliver; + rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; } #ifdef CONFIG_IP_MROUTE if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) - rth->u.dst.input = ip_mr_input; + rth->dst.input = ip_mr_input; #endif RT_CACHE_STAT_INC(in_slow_mc); - in_dev_put(in_dev); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); e_nobufs: - in_dev_put(in_dev); return -ENOBUFS; - e_inval: - in_dev_put(in_dev); return -EINVAL; +e_err: + return err; } @@ -1956,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +/* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { - struct rtable *rth; int err; struct in_device *out_dev; - unsigned flags = 0; + unsigned int flags = 0; __be32 spec_dst; u32 itag; /* get a working reference to the output device */ - out_dev = in_dev_get(FIB_RES_DEV(*res)); + out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); if (out_dev == NULL) { if (net_ratelimit()) printk(KERN_CRIT "Bug in ip_route_input" \ @@ -1986,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb, ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); - err = -EINVAL; goto cleanup; } @@ -2020,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) - rth->u.dst.flags |= DST_NOXFRM; + rth->dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2035,16 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_iif = rth->fl.iif = in_dev->dev->ifindex; - rth->u.dst.dev = (out_dev)->dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = (out_dev)->dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; - rth->u.dst.obsolete = -1; - rth->u.dst.input = ip_forward; - rth->u.dst.output = ip_output; - rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); + rth->dst.obsolete = -1; + rth->dst.input = ip_forward; + rth->dst.output = ip_output; + rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rt_set_nexthop(rth, res, itag); @@ -2053,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb, *result = rth; err = 0; cleanup: - /* release the working reference to the output device */ - in_dev_put(out_dev); return err; } @@ -2080,7 +2075,7 @@ static int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif, - rt_genid(dev_net(rth->u.dst.dev))); + rt_genid(dev_net(rth->dst.dev))); return rt_intern_hash(hash, rth, NULL, skb, fl->iif); } @@ -2098,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct fib_result res; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct flowi fl = { .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, @@ -2158,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto brd_input; if (res.type == RTN_LOCAL) { - int result; - result = fib_validate_source(saddr, daddr, tos, + err = fib_validate_source(saddr, daddr, tos, net->loopback_dev->ifindex, dev, &spec_dst, &itag, skb->mark); - if (result < 0) - goto martian_source; - if (result) + if (err < 0) + goto martian_source_keep_err; + if (err) flags |= RTCF_DIRECTSRC; spec_dst = daddr; goto local_input; @@ -2177,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); done: - in_dev_put(in_dev); if (free_res) fib_res_put(&res); out: return err; @@ -2192,7 +2185,7 @@ brd_input: err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, &itag, skb->mark); if (err < 0) - goto martian_source; + goto martian_source_keep_err; if (err) flags |= RTCF_DIRECTSRC; } @@ -2205,14 +2198,14 @@ local_input: if (!rth) goto e_nobufs; - rth->u.dst.output= ip_rt_bug; - rth->u.dst.obsolete = -1; + rth->dst.output= ip_rt_bug; + rth->dst.obsolete = -1; rth->rt_genid = rt_genid(net); - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2220,20 +2213,20 @@ local_input: rth->fl.fl4_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_NET_CLS_ROUTE - rth->u.dst.tclassid = itag; + rth->dst.tclassid = itag; #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = net->loopback_dev; - dev_hold(rth->u.dst.dev); - rth->idev = in_dev_get(rth->u.dst.dev); + rth->dst.dev = net->loopback_dev; + dev_hold(rth->dst.dev); + rth->idev = in_dev_get(rth->dst.dev); rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->u.dst.input= ip_local_deliver; + rth->dst.input= ip_local_deliver; rth->rt_flags = flags|RTCF_LOCAL; if (res.type == RTN_UNREACHABLE) { - rth->u.dst.input= ip_error; - rth->u.dst.error= -err; + rth->dst.input= ip_error; + rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; @@ -2273,8 +2266,10 @@ e_nobufs: goto done; martian_source: + err = -EINVAL; +martian_source_keep_err: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); - goto e_inval; + goto done; } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2284,32 +2279,34 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned hash; int iif = dev->ifindex; struct net *net; + int res; net = dev_net(dev); + rcu_read_lock(); + if (!rt_caching(net)) goto skip_cache; tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif, rt_genid(net)); - rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->u.dst.rt_next)) { + rth = rcu_dereference(rth->dst.rt_next)) { if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | (rth->fl.iif ^ iif) | rth->fl.oif | (rth->fl.fl4_tos ^ tos)) == 0 && rth->fl.mark == skb->mark && - net_eq(dev_net(rth->u.dst.dev), net) && + net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { if (noref) { - dst_use_noref(&rth->u.dst, jiffies); - skb_dst_set_noref(skb, &rth->u.dst); + dst_use_noref(&rth->dst, jiffies); + skb_dst_set_noref(skb, &rth->dst); } else { - dst_use(&rth->u.dst, jiffies); - skb_dst_set(skb, &rth->u.dst); + dst_use(&rth->dst, jiffies); + skb_dst_set(skb, &rth->dst); } RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2317,7 +2314,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, } RT_CACHE_STAT_INC(in_hlist_search); } - rcu_read_unlock(); skip_cache: /* Multicast recognition logic is moved from route cache to here. @@ -2332,12 +2328,11 @@ skip_cache: route cache entry is created eventually. */ if (ipv4_is_multicast(daddr)) { - struct in_device *in_dev; + struct in_device *in_dev = __in_dev_get_rcu(dev); - rcu_read_lock(); - if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { + if (in_dev) { int our = ip_check_mc(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE || @@ -2345,15 +2340,18 @@ skip_cache: IN_DEV_MFORWARD(in_dev)) #endif ) { + int res = ip_route_input_mc(skb, daddr, saddr, + tos, dev, our); rcu_read_unlock(); - return ip_route_input_mc(skb, daddr, saddr, - tos, dev, our); + return res; } } rcu_read_unlock(); return -EINVAL; } - return ip_route_input_slow(skb, daddr, saddr, tos, dev); + res = ip_route_input_slow(skb, daddr, saddr, tos, dev); + rcu_read_unlock(); + return res; } EXPORT_SYMBOL(ip_route_input_common); @@ -2415,12 +2413,12 @@ static int __mkroute_output(struct rtable **result, goto cleanup; } - atomic_set(&rth->u.dst.__refcnt, 1); - rth->u.dst.flags= DST_HOST; + atomic_set(&rth->dst.__refcnt, 1); + rth->dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOXFRM)) - rth->u.dst.flags |= DST_NOXFRM; + rth->dst.flags |= DST_NOXFRM; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->u.dst.flags |= DST_NOPOLICY; + rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; @@ -2432,35 +2430,35 @@ static int __mkroute_output(struct rtable **result, rth->rt_iif = oldflp->oif ? : dev_out->ifindex; /* get references to the devices that are to be hold by the routing cache entry */ - rth->u.dst.dev = dev_out; + rth->dst.dev = dev_out; dev_hold(dev_out); rth->idev = in_dev_get(dev_out); rth->rt_gateway = fl->fl4_dst; rth->rt_spec_dst= fl->fl4_src; - rth->u.dst.output=ip_output; - rth->u.dst.obsolete = -1; + rth->dst.output=ip_output; + rth->dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); if (flags & RTCF_LOCAL) { - rth->u.dst.input = ip_local_deliver; + rth->dst.input = ip_local_deliver; rth->rt_spec_dst = fl->fl4_dst; } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { rth->rt_spec_dst = fl->fl4_src; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { - rth->u.dst.output = ip_mc_output; + rth->dst.output = ip_mc_output; RT_CACHE_STAT_INC(out_slow_mc); } #ifdef CONFIG_IP_MROUTE if (res->type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && !ipv4_is_local_multicast(oldflp->fl4_dst)) { - rth->u.dst.input = ip_mr_input; - rth->u.dst.output = ip_mc_output; + rth->dst.input = ip_mr_input; + rth->dst.output = ip_mc_output; } } #endif @@ -2715,7 +2713,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; - rth = rcu_dereference_bh(rth->u.dst.rt_next)) { + rth = rcu_dereference_bh(rth->dst.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && @@ -2723,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rth->fl.mark == flp->mark && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && - net_eq(dev_net(rth->u.dst.dev), net) && + net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - dst_use(&rth->u.dst, jiffies); + dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); *rp = rth; @@ -2738,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, slow_output: return ip_route_output_slow(net, rp, flp); } - EXPORT_SYMBOL_GPL(__ip_route_output_key); static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -2762,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_alloc(&ipv4_dst_blackhole_ops); if (rt) { - struct dst_entry *new = &rt->u.dst; + struct dst_entry *new = &rt->dst; atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); + memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); - new->dev = ort->u.dst.dev; + new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); @@ -2794,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_free(new); } - dst_release(&(*rp)->u.dst); + dst_release(&(*rp)->dst); *rp = rt; return (rt ? 0 : -ENOMEM); } @@ -2822,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, return 0; } - EXPORT_SYMBOL_GPL(ip_route_output_flow); int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) { return ip_route_output_flow(net, rp, flp, NULL, 0); } +EXPORT_SYMBOL(ip_route_output_key); static int rt_fill_info(struct net *net, struct sk_buff *skb, u32 pid, u32 seq, int event, @@ -2864,11 +2861,11 @@ static int rt_fill_info(struct net *net, r->rtm_src_len = 32; NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); } - if (rt->u.dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); + if (rt->dst.dev) + NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); #ifdef CONFIG_NET_CLS_ROUTE - if (rt->u.dst.tclassid) - NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); + if (rt->dst.tclassid) + NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); #endif if (rt->fl.iif) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); @@ -2878,12 +2875,16 @@ static int rt_fill_info(struct net *net, if (rt->rt_dst != rt->rt_gateway) NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto nla_put_failure; - error = rt->u.dst.error; - expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; + if (rt->fl.mark) + NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); + + error = rt->dst.error; + expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; if (rt->peer) { + inet_peer_refcheck(rt->peer); id = atomic_read(&rt->peer->ip_id_count) & 0xffff; if (rt->peer->tcp_ts_stamp) { ts = rt->peer->tcp_ts; @@ -2914,7 +2915,7 @@ static int rt_fill_info(struct net *net, NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, + if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, expires, error) < 0) goto nla_put_failure; @@ -2935,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void __be32 src = 0; u32 iif; int err; + int mark; struct sk_buff *skb; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); @@ -2962,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; + mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; if (iif) { struct net_device *dev; @@ -2974,13 +2977,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb->protocol = htons(ETH_P_IP); skb->dev = dev; + skb->mark = mark; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); rt = skb_rtable(skb); - if (err == 0 && rt->u.dst.error) - err = -rt->u.dst.error; + if (err == 0 && rt->dst.error) + err = -rt->dst.error; } else { struct flowi fl = { .nl_u = { @@ -2991,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void }, }, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .mark = mark, }; err = ip_route_output_key(net, &rt, &fl); } @@ -2998,7 +3003,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err) goto errout_free; - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -3034,12 +3039,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; rcu_read_lock_bh(); for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { - if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) + rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { + if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) continue; if (rt_is_expired(rt)) continue; - skb_dst_set_noref(skb, &rt->u.dst); + skb_dst_set_noref(skb, &rt->dst); if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { @@ -3365,6 +3370,3 @@ void __init ip_static_sysctl_init(void) register_sysctl_paths(ipv4_path, ipv4_skeleton); } #endif - -EXPORT_SYMBOL(__ip_select_ident); -EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9f6b22206c52..650cace2180d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -18,8 +18,8 @@ #include <net/tcp.h> #include <net/route.h> -/* Timestamps: lowest 9 bits store TCP options */ -#define TSBITS 9 +/* Timestamps: lowest bits store TCP options */ +#define TSBITS 6 #define TSMASK (((__u32)1 << TSBITS) - 1) extern int sysctl_tcp_syncookies; @@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, /* * when syncookies are in effect and tcp timestamps are enabled we encode - * tcp options in the lowest 9 bits of the timestamp value that will be + * tcp options in the lower bits of the timestamp value that will be * sent in the syn-ack. * Since subsequent timestamps use the normal tcp_time_stamp value, we * must make sure that the resulting initial timestamp is <= tcp_time_stamp. @@ -70,11 +70,10 @@ __u32 cookie_init_timestamp(struct request_sock *req) u32 options = 0; ireq = inet_rsk(req); - if (ireq->wscale_ok) { - options = ireq->snd_wscale; - options |= ireq->rcv_wscale << 4; - } - options |= ireq->sack_ok << 8; + + options = ireq->wscale_ok ? ireq->snd_wscale : 0xf; + options |= ireq->sack_ok << 4; + options |= ireq->ecn_ok << 5; ts = ts_now & ~TSMASK; ts |= options; @@ -138,23 +137,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, } /* - * This table has to be sorted and terminated with (__u16)-1. - * XXX generate a better table. - * Unresolved Issues: HIPPI with a 64k MSS is not well supported. + * MSS Values are taken from the 2009 paper + * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson: + * - values 1440 to 1460 accounted for 80% of observed mss values + * - values outside the 536-1460 range are rare (<0.2%). + * + * Table must be sorted. */ static __u16 const msstab[] = { - 64 - 1, - 256 - 1, - 512 - 1, - 536 - 1, - 1024 - 1, - 1440 - 1, - 1460 - 1, - 4312 - 1, - (__u16)-1 + 64, + 512, + 536, + 1024, + 1440, + 1460, + 4312, + 8960, }; -/* The number doesn't include the -1 terminator */ -#define NUM_MSS (ARRAY_SIZE(msstab) - 1) /* * Generate a syncookie. mssp points to the mss, which is returned @@ -169,10 +168,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) tcp_synq_overflow(sk); - /* XXX sort msstab[] by probability? Binary search? */ - for (mssind = 0; mss > msstab[mssind + 1]; mssind++) - ; - *mssp = msstab[mssind] + 1; + for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) + if (mss >= msstab[mssind]) + break; + *mssp = msstab[mssind]; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); @@ -202,7 +201,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) jiffies / (HZ * 60), COUNTER_TRIES); - return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; + return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, @@ -227,26 +226,38 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, * additional tcp options in the timestamp. * This extracts these options from the timestamp echo. * - * The lowest 4 bits are for snd_wscale - * The next 4 lsb are for rcv_wscale - * The next lsb is for sack_ok + * The lowest 4 bits store snd_wscale. + * next 2 bits indicate SACK and ECN support. + * + * return false if we decode an option that should not be. */ -void cookie_check_timestamp(struct tcp_options_received *tcp_opt) +bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) { - /* echoed timestamp, 9 lowest bits contain options */ + /* echoed timestamp, lowest bits contain options */ u32 options = tcp_opt->rcv_tsecr & TSMASK; - tcp_opt->snd_wscale = options & 0xf; - options >>= 4; - tcp_opt->rcv_wscale = options & 0xf; + if (!tcp_opt->saw_tstamp) { + tcp_clear_options(tcp_opt); + return true; + } + + if (!sysctl_tcp_timestamps) + return false; tcp_opt->sack_ok = (options >> 4) & 0x1; + *ecn_ok = (options >> 5) & 1; + if (*ecn_ok && !sysctl_tcp_ecn) + return false; + + if (tcp_opt->sack_ok && !sysctl_tcp_sack) + return false; - if (tcp_opt->sack_ok) - tcp_sack_reset(tcp_opt); + if ((options & 0xf) == 0xf) + return true; /* no window scaling */ - if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) - tcp_opt->wscale_ok = 1; + tcp_opt->wscale_ok = 1; + tcp_opt->snd_wscale = options & 0xf; + return sysctl_tcp_window_scaling != 0; } EXPORT_SYMBOL(cookie_check_timestamp); @@ -265,8 +276,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; + bool ecn_ok; - if (!sysctl_tcp_syncookies || !th->ack) + if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk) || @@ -281,8 +293,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0); - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); + if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + goto out; ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ @@ -298,9 +310,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->ecn_ok = 0; + ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; @@ -354,15 +365,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } /* Try to redo what tcp_v4_send_synack did. */ - req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); + req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, &rcv_wscale, - dst_metric(&rt->u.dst, RTAX_INITRWND)); + dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ret = get_cookie_sock(sk, skb, req, &rt->u.dst); + ret = get_cookie_sock(sk, skb, req, &rt->dst); out: return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6596b4feeddc..176e11aaea77 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -315,7 +315,6 @@ struct tcp_splice_state { * is strict, actions are advisory and have some latency. */ int tcp_memory_pressure __read_mostly; - EXPORT_SYMBOL(tcp_memory_pressure); void tcp_enter_memory_pressure(struct sock *sk) @@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk) tcp_memory_pressure = 1; } } - EXPORT_SYMBOL(tcp_enter_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ @@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } return mask; } +EXPORT_SYMBOL(tcp_poll); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { @@ -508,10 +507,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) return put_user(answ, (int __user *)arg); } +EXPORT_SYMBOL(tcp_ioctl); static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } @@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; - tcb->flags = TCPCB_FLAG_ACK; + tcb->flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); @@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ssize_t spliced; int ret; + sock_rps_record_flow(sk); /* * We can't seek on a socket input */ @@ -675,6 +676,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } +EXPORT_SYMBOL(tcp_splice_read); struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) { @@ -815,7 +817,7 @@ new_segment: skb_shinfo(skb)->gso_segs = 0; if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; copied += copy; poffset += copy; @@ -856,15 +858,15 @@ out_err: return sk_stream_error(sk, flags, err); } -ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { ssize_t res; - struct sock *sk = sock->sk; if (!(sk->sk_route_caps & NETIF_F_SG) || !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) - return sock_no_sendpage(sock, page, offset, size, flags); + return sock_no_sendpage(sk->sk_socket, page, offset, size, + flags); lock_sock(sk); TCP_CHECK_TIMER(sk); @@ -873,6 +875,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, release_sock(sk); return res; } +EXPORT_SYMBOL(tcp_sendpage); #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) @@ -897,10 +900,9 @@ static inline int select_size(struct sock *sk, int sg) return tmp; } -int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, +int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { - struct sock *sk = sock->sk; struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -1061,7 +1063,7 @@ new_segment: } if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -1121,6 +1123,7 @@ out_err: release_sock(sk); return err; } +EXPORT_SYMBOL(tcp_sendmsg); /* * Handle reading urgent data. BSD has very simple semantics for @@ -1380,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_cleanup_rbuf(sk, copied); return copied; } +EXPORT_SYMBOL(tcp_read_sock); /* * This routine copies from a sock struct into the user buffer. @@ -1774,6 +1778,7 @@ recv_urg: err = tcp_recv_urg(sk, msg, len, flags); goto out; } +EXPORT_SYMBOL(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { @@ -1866,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how) tcp_send_fin(sk); } } +EXPORT_SYMBOL(tcp_shutdown); void tcp_close(struct sock *sk, long timeout) { @@ -1898,6 +1904,10 @@ void tcp_close(struct sock *sk, long timeout) sk_mem_reclaim(sk); + /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ + if (sk->sk_state == TCP_CLOSE) + goto adjudge_to_death; + /* As outlined in RFC 2525, section 2.17, we send a RST here because * data was lost. To witness the awful effects of the old behavior of * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk @@ -2025,6 +2035,7 @@ out: local_bh_enable(); sock_put(sk); } +EXPORT_SYMBOL(tcp_close); /* These states need RST on ABORT according to RFC793 */ @@ -2098,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_error_report(sk); return err; } +EXPORT_SYMBOL(tcp_disconnect); /* * Socket option code for TCP. @@ -2175,6 +2187,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, GFP_KERNEL); if (cvp == NULL) return -ENOMEM; + + kref_init(&cvp->kref); } lock_sock(sk); tp->rx_opt.cookie_in_always = @@ -2189,12 +2203,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, */ kref_put(&tp->cookie_values->kref, tcp_cookie_values_release); - kref_init(&cvp->kref); - tp->cookie_values = cvp; } else { cvp = tp->cookie_values; } } + if (cvp != NULL) { cvp->cookie_desired = ctd.tcpct_cookie_desired; @@ -2208,6 +2221,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, cvp->s_data_desired = ctd.tcpct_s_data_desired; cvp->s_data_constant = 0; /* false */ } + + tp->cookie_values = cvp; } release_sock(sk); return err; @@ -2396,6 +2411,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } +EXPORT_SYMBOL(tcp_setsockopt); #ifdef CONFIG_COMPAT int compat_tcp_setsockopt(struct sock *sk, int level, int optname, @@ -2406,7 +2422,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(compat_tcp_setsockopt); #endif @@ -2472,7 +2487,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; } - EXPORT_SYMBOL_GPL(tcp_get_info); static int do_tcp_getsockopt(struct sock *sk, int level, @@ -2590,6 +2604,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; } + case TCP_THIN_LINEAR_TIMEOUTS: + val = tp->thin_lto; + break; + case TCP_THIN_DUPACK: + val = tp->thin_dupack; + break; default: return -ENOPROTOOPT; } @@ -2611,6 +2631,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } +EXPORT_SYMBOL(tcp_getsockopt); #ifdef CONFIG_COMPAT int compat_tcp_getsockopt(struct sock *sk, int level, int optname, @@ -2621,7 +2642,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } - EXPORT_SYMBOL(compat_tcp_getsockopt); #endif @@ -2858,7 +2878,6 @@ void tcp_free_md5sig_pool(void) if (pool) __tcp_free_md5sig_pool(pool); } - EXPORT_SYMBOL(tcp_free_md5sig_pool); static struct tcp_md5sig_pool * __percpu * @@ -2934,7 +2953,6 @@ retry: } return pool; } - EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2958,7 +2976,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *per_cpu_ptr(p, smp_processor_id()); + return *this_cpu_ptr(p); local_bh_enable(); return NULL; @@ -2986,7 +3004,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, th->check = old_checksum; return err; } - EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, @@ -2999,6 +3016,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, const unsigned head_data_len = skb_headlen(skb) > header_len ? skb_headlen(skb) - header_len : 0; const struct skb_shared_info *shi = skb_shinfo(skb); + struct sk_buff *frag_iter; sg_init_table(&sg, 1); @@ -3013,9 +3031,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; } + skb_walk_frags(skb, frag_iter) + if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) + return 1; + return 0; } - EXPORT_SYMBOL(tcp_md5_hash_skb_data); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) @@ -3025,7 +3046,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) sg_init_one(&sg, key->key, key->keylen); return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); } - EXPORT_SYMBOL(tcp_md5_hash_key); #endif @@ -3297,16 +3317,3 @@ void __init tcp_init(void) tcp_secret_retiring = &tcp_secret_two; tcp_secret_secondary = &tcp_secret_two; } - -EXPORT_SYMBOL(tcp_close); -EXPORT_SYMBOL(tcp_disconnect); -EXPORT_SYMBOL(tcp_getsockopt); -EXPORT_SYMBOL(tcp_ioctl); -EXPORT_SYMBOL(tcp_poll); -EXPORT_SYMBOL(tcp_read_sock); -EXPORT_SYMBOL(tcp_recvmsg); -EXPORT_SYMBOL(tcp_sendmsg); -EXPORT_SYMBOL(tcp_splice_read); -EXPORT_SYMBOL(tcp_sendpage); -EXPORT_SYMBOL(tcp_setsockopt); -EXPORT_SYMBOL(tcp_shutdown); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 548d575e6cc6..3c426cb318e7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; +EXPORT_SYMBOL(sysctl_tcp_reordering); int sysctl_tcp_ecn __read_mostly = 2; +EXPORT_SYMBOL(sysctl_tcp_ecn); int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 2; +EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; @@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) inet_csk(sk)->icsk_ack.rcv_mss = hint; } +EXPORT_SYMBOL(tcp_initialize_rcv_mss); /* Receiver "autotuning" code. * @@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk) } tcp_xmit_retransmit_queue(sk); } +EXPORT_SYMBOL(tcp_simple_retransmit); /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, @@ -3286,7 +3291,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if (!(scb->flags & TCPCB_FLAG_SYN)) { + if (!(scb->flags & TCPHDR_SYN)) { flag |= FLAG_DATA_ACKED; } else { flag |= FLAG_SYN_ACKED; @@ -3858,6 +3863,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } } } +EXPORT_SYMBOL(tcp_parse_options); static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) { @@ -3931,6 +3937,7 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th) } return NULL; } +EXPORT_SYMBOL(tcp_parse_md5sig_option); #endif static inline void tcp_store_ts_recent(struct tcp_sock *tp) @@ -5432,6 +5439,7 @@ discard: __kfree_skb(skb); return 0; } +EXPORT_SYMBOL(tcp_rcv_established); static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) @@ -5931,14 +5939,4 @@ discard: } return 0; } - -EXPORT_SYMBOL(sysctl_tcp_ecn); -EXPORT_SYMBOL(sysctl_tcp_reordering); -EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); -EXPORT_SYMBOL(tcp_parse_options); -#ifdef CONFIG_TCP_MD5SIG -EXPORT_SYMBOL(tcp_parse_md5sig_option); -#endif -EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); -EXPORT_SYMBOL(tcp_initialize_rcv_mss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe193e53af44..020766292bb0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,6 +84,7 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; +EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG @@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) #endif struct inet_hashinfo tcp_hashinfo; +EXPORT_SYMBOL(tcp_hashinfo); static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { @@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } - EXPORT_SYMBOL_GPL(tcp_twsk_unique); /* This will initiate an outgoing connection. */ @@ -204,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * TIME-WAIT * and initialize rx_opt.ts_recent from it, * when trying new connection. */ - if (peer != NULL && - (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; + if (peer) { + inet_peer_refcheck(peer); + if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { + tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; + tp->rx_opt.ts_recent = peer->tcp_ts; + } } } @@ -237,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, @@ -265,6 +268,7 @@ failure: inet->inet_dport = 0; return err; } +EXPORT_SYMBOL(tcp_v4_connect); /* * This routine does path mtu discovery as defined in RFC1191. @@ -543,6 +547,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } +EXPORT_SYMBOL(tcp_v4_send_check); int tcp_v4_gso_send_check(struct sk_buff *skb) { @@ -793,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -#ifdef CONFIG_SYN_COOKIES -static void syn_flood_warning(struct sk_buff *skb) +static void syn_flood_warning(const struct sk_buff *skb) { - static unsigned long warntime; + const char *msg; - if (time_after(jiffies, (warntime + HZ * 60))) { - warntime = jiffies; - printk(KERN_INFO - "possible SYN flooding on port %d. Sending cookies.\n", - ntohs(tcp_hdr(skb)->dest)); - } -} +#ifdef CONFIG_SYN_COOKIES + if (sysctl_tcp_syncookies) + msg = "Sending cookies"; + else #endif + msg = "Dropping request"; + + pr_info("TCP: Possible SYN flooding on port %d. %s.\n", + ntohs(tcp_hdr(skb)->dest), msg); +} /* * Save and compile IPv4 options into the request_sock if needed. @@ -857,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, { return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); } - EXPORT_SYMBOL(tcp_v4_md5_lookup); static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, @@ -924,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } return 0; } - EXPORT_SYMBOL(tcp_v4_md5_do_add); static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, @@ -962,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) } return -ENOENT; } - EXPORT_SYMBOL(tcp_v4_md5_do_del); static void tcp_v4_clear_md5_list(struct sock *sk) @@ -1135,7 +1138,6 @@ clear_hash_noput: memset(md5_hash, 0, 16); return 1; } - EXPORT_SYMBOL(tcp_v4_md5_hash_skb); static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) @@ -1243,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * evidently real one. */ if (inet_csk_reqsk_queue_is_full(sk) && !isn) { + if (net_ratelimit()) + syn_flood_warning(skb); #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; @@ -1323,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - if (!want_cookie) + if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); if (want_cookie) { -#ifdef CONFIG_SYN_COOKIES - syn_flood_warning(skb); - req->cookie_ts = tmp_opt.tstamp_ok; -#endif isn = cookie_v4_init_sequence(sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { struct inet_peer *peer = NULL; @@ -1349,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { + inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { @@ -1393,6 +1395,7 @@ drop_and_free: drop: return 0; } +EXPORT_SYMBOL(tcp_v4_conn_request); /* @@ -1478,6 +1481,7 @@ exit: dst_release(dst); return NULL; } +EXPORT_SYMBOL(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { @@ -1504,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) } #ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) + if (!th->syn) sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); #endif return sk; @@ -1607,6 +1611,7 @@ csum_err: TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; } +EXPORT_SYMBOL(tcp_v4_do_rcv); /* * From tcp_input.c @@ -1793,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk) return 0; } +EXPORT_SYMBOL(tcp_v4_remember_stamp); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { @@ -1832,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .compat_getsockopt = compat_ip_getsockopt, #endif }; +EXPORT_SYMBOL(ipv4_specific); #ifdef CONFIG_TCP_MD5SIG static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { @@ -1960,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk) percpu_counter_dec(&tcp_sockets_allocated); } - EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS @@ -1978,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } +/* + * Get next listener socket follow cur. If cur is NULL, get first socket + * starting from bucket given in st->bucket; when st->bucket is zero the + * very first socket in the hash table is returned. + */ static void *listening_get_next(struct seq_file *seq, void *cur) { struct inet_connection_sock *icsk; @@ -1988,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct net *net = seq_file_net(seq); if (!sk) { - st->bucket = 0; - ilb = &tcp_hashinfo.listening_hash[0]; + ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); sk = sk_nulls_head(&ilb->head); + st->offset = 0; goto get_sk; } ilb = &tcp_hashinfo.listening_hash[st->bucket]; ++st->num; + ++st->offset; if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; @@ -2010,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) } req = req->dl_next; } + st->offset = 0; if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) break; get_req: @@ -2045,6 +2058,7 @@ start_req: read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } spin_unlock_bh(&ilb->lock); + st->offset = 0; if (++st->bucket < INET_LHTABLE_SIZE) { ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); @@ -2058,7 +2072,12 @@ out: static void *listening_get_idx(struct seq_file *seq, loff_t *pos) { - void *rc = listening_get_next(seq, NULL); + struct tcp_iter_state *st = seq->private; + void *rc; + + st->bucket = 0; + st->offset = 0; + rc = listening_get_next(seq, NULL); while (rc && *pos) { rc = listening_get_next(seq, rc); @@ -2073,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st) hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } +/* + * Get first established socket starting from bucket given in st->bucket. + * If st->bucket is zero, the very first socket in the hash is returned. + */ static void *established_get_first(struct seq_file *seq) { struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); void *rc = NULL; - for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { + st->offset = 0; + for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { struct sock *sk; struct hlist_nulls_node *node; struct inet_timewait_sock *tw; @@ -2124,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) struct net *net = seq_file_net(seq); ++st->num; + ++st->offset; if (st->state == TCP_SEQ_STATE_TIME_WAIT) { tw = cur; @@ -2140,6 +2165,7 @@ get_tw: st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ + st->offset = 0; while (++st->bucket <= tcp_hashinfo.ehash_mask && empty_bucket(st)) ; @@ -2167,7 +2193,11 @@ out: static void *established_get_idx(struct seq_file *seq, loff_t pos) { - void *rc = established_get_first(seq); + struct tcp_iter_state *st = seq->private; + void *rc; + + st->bucket = 0; + rc = established_get_first(seq); while (rc && pos) { rc = established_get_next(seq, rc); @@ -2192,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) return rc; } +static void *tcp_seek_last_pos(struct seq_file *seq) +{ + struct tcp_iter_state *st = seq->private; + int offset = st->offset; + int orig_num = st->num; + void *rc = NULL; + + switch (st->state) { + case TCP_SEQ_STATE_OPENREQ: + case TCP_SEQ_STATE_LISTENING: + if (st->bucket >= INET_LHTABLE_SIZE) + break; + st->state = TCP_SEQ_STATE_LISTENING; + rc = listening_get_next(seq, NULL); + while (offset-- && rc) + rc = listening_get_next(seq, rc); + if (rc) + break; + st->bucket = 0; + /* Fallthrough */ + case TCP_SEQ_STATE_ESTABLISHED: + case TCP_SEQ_STATE_TIME_WAIT: + st->state = TCP_SEQ_STATE_ESTABLISHED; + if (st->bucket > tcp_hashinfo.ehash_mask) + break; + rc = established_get_first(seq); + while (offset-- && rc) + rc = established_get_next(seq, rc); + } + + st->num = orig_num; + + return rc; +} + static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) { struct tcp_iter_state *st = seq->private; + void *rc; + + if (*pos && *pos == st->last_pos) { + rc = tcp_seek_last_pos(seq); + if (rc) + goto out; + } + st->state = TCP_SEQ_STATE_LISTENING; st->num = 0; - return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + st->bucket = 0; + st->offset = 0; + rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + +out: + st->last_pos = *pos; + return rc; } static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct tcp_iter_state *st = seq->private; void *rc = NULL; - struct tcp_iter_state *st; if (v == SEQ_START_TOKEN) { rc = tcp_get_idx(seq, 0); goto out; } - st = seq->private; switch (st->state) { case TCP_SEQ_STATE_OPENREQ: @@ -2217,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) rc = listening_get_next(seq, v); if (!rc) { st->state = TCP_SEQ_STATE_ESTABLISHED; + st->bucket = 0; + st->offset = 0; rc = established_get_first(seq); } break; @@ -2227,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } out: ++*pos; + st->last_pos = *pos; return rc; } @@ -2265,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; + s->last_pos = 0; return 0; } @@ -2288,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) rc = -ENOMEM; return rc; } +EXPORT_SYMBOL(tcp_proc_register); void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) { proc_net_remove(net, afinfo->name); } +EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(struct sock *sk, struct request_sock *req, struct seq_file *f, int i, int uid, int *len) @@ -2516,6 +2600,8 @@ struct proto tcp_prot = { .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, .recvmsg = tcp_recvmsg, + .sendmsg = tcp_sendmsg, + .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, .hash = inet_hash, .unhash = inet_unhash, @@ -2534,11 +2620,13 @@ struct proto tcp_prot = { .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, + .no_autobind = true, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif }; +EXPORT_SYMBOL(tcp_prot); static int __net_init tcp_sk_init(struct net *net) @@ -2569,20 +2657,3 @@ void __init tcp_v4_init(void) if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } - -EXPORT_SYMBOL(ipv4_specific); -EXPORT_SYMBOL(tcp_hashinfo); -EXPORT_SYMBOL(tcp_prot); -EXPORT_SYMBOL(tcp_v4_conn_request); -EXPORT_SYMBOL(tcp_v4_connect); -EXPORT_SYMBOL(tcp_v4_do_rcv); -EXPORT_SYMBOL(tcp_v4_remember_stamp); -EXPORT_SYMBOL(tcp_v4_send_check); -EXPORT_SYMBOL(tcp_v4_syn_recv_sock); - -#ifdef CONFIG_PROC_FS -EXPORT_SYMBOL(tcp_proc_register); -EXPORT_SYMBOL(tcp_proc_unregister); -#endif -EXPORT_SYMBOL(sysctl_tcp_low_latency); - diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 794c2e122a41..f25b56cb85cb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = { .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, (unsigned long)&tcp_death_row), }; - EXPORT_SYMBOL_GPL(tcp_death_row); static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) @@ -262,6 +261,7 @@ kill: inet_twsk_put(tw); return TCP_TW_SUCCESS; } +EXPORT_SYMBOL(tcp_timewait_state_process); /* * Move a socket to time-wait or dead fin-wait-2 state. @@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk) tcp_free_md5sig_pool(); #endif } - EXPORT_SYMBOL_GPL(tcp_twsk_destructor); static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, @@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, } return newsk; } +EXPORT_SYMBOL(tcp_create_openreq_child); /* * Process an incoming packet for SYN_RECV sockets represented @@ -706,6 +706,7 @@ embryonic_reset: inet_csk_reqsk_queue_drop(sk, req, prev); return NULL; } +EXPORT_SYMBOL(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, @@ -737,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child, sock_put(child); return ret; } - -EXPORT_SYMBOL(tcp_check_req); EXPORT_SYMBOL(tcp_child_process); -EXPORT_SYMBOL(tcp_create_openreq_child); -EXPORT_SYMBOL(tcp_timewait_state_process); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4ed957f201a..de3bd8458588 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss, /* Set the clamp no higher than max representable value */ (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); } +EXPORT_SYMBOL(tcp_select_initial_window); /* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return @@ -294,9 +295,9 @@ static u16 tcp_select_window(struct sock *sk) /* Packet ECN state for a SYN-ACK */ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; + TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE; } /* Packet ECN state for a SYN. */ @@ -306,7 +307,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) tp->ecn_flags = 0; if (sysctl_tcp_ecn == 1) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; + TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; } } @@ -361,7 +362,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) skb_shinfo(skb)->gso_type = 0; TCP_SKB_CB(skb)->seq = seq; - if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN)) + if (flags & (TCPHDR_SYN | TCPHDR_FIN)) seq++; TCP_SKB_CB(skb)->end_seq = seq; } @@ -820,7 +821,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) + if (unlikely(tcb->flags & TCPHDR_SYN)) tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); else tcp_options_size = tcp_established_options(sk, skb, &opts, @@ -843,7 +844,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); - if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { + if (unlikely(tcb->flags & TCPHDR_SYN)) { /* RFC1323: The window in SYN & SYN/ACK segments * is never scaled. */ @@ -866,7 +867,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } tcp_options_write((__be32 *)(th + 1), tp, &opts); - if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) + if (likely((tcb->flags & TCPHDR_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); #ifdef CONFIG_TCP_MD5SIG @@ -880,7 +881,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, icsk->icsk_af_ops->send_check(sk, skb); - if (likely(tcb->flags & TCPCB_FLAG_ACK)) + if (likely(tcb->flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) @@ -1023,7 +1024,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); + TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; @@ -1189,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); icsk->icsk_mtup.probe_size = 0; } +EXPORT_SYMBOL(tcp_mtup_init); /* This function synchronize snd mss to current pmtu/exthdr set. @@ -1232,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } +EXPORT_SYMBOL(tcp_sync_mss); /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. @@ -1328,8 +1331,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, u32 in_flight, cwnd; /* Don't be strict about the congestion window for the final FIN. */ - if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && - tcp_skb_pcount(skb) == 1) + if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) return 1; in_flight = tcp_packets_in_flight(tp); @@ -1398,7 +1400,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, * Nagle can be ignored during F-RTO too (see RFC4138). */ if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) + (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)) return 1; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) @@ -1461,7 +1463,7 @@ int tcp_may_send_now(struct sock *sk) * packet has never been sent out before (and thus is not cloned). */ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, - unsigned int mss_now) + unsigned int mss_now, gfp_t gfp) { struct sk_buff *buff; int nlen = skb->len - len; @@ -1471,7 +1473,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (skb->len != skb->data_len) return tcp_fragment(sk, skb, len, mss_now); - buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC); + buff = sk_stream_alloc_skb(sk, 0, gfp); if (unlikely(buff == NULL)) return -ENOMEM; @@ -1487,7 +1489,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); + TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); TCP_SKB_CB(buff)->flags = flags; /* This packet was never sent out yet, so no SACK bits. */ @@ -1518,7 +1520,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) + if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) goto send_now; if (icsk->icsk_ca_state != TCP_CA_Open) @@ -1644,7 +1646,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; - TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; + TCP_SKB_CB(nskb)->flags = TCPHDR_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; nskb->ip_summed = skb->ip_summed; @@ -1669,7 +1671,7 @@ static int tcp_mtu_probe(struct sock *sk) sk_wmem_free_skb(sk, skb); } else { TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & - ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); + ~(TCPHDR_FIN|TCPHDR_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -1769,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota); if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now))) + unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; TCP_SKB_CB(skb)->when = tcp_time_stamp; @@ -2020,7 +2022,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!sysctl_tcp_retrans_collapse) return; - if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) + if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN) return; tcp_for_write_queue_from_safe(skb, tmp, sk) { @@ -2112,7 +2114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * since it is cheap to do so and saves bytes on the network. */ if (skb->len > 0 && - (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && + (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { /* Reuse, even though it does some unnecessary work */ @@ -2208,6 +2210,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) int mib_idx; int fwd_rexmitting = 0; + if (!tp->packets_out) + return; + if (!tp->lost_out) tp->retransmit_high = tp->snd_una; @@ -2301,7 +2306,7 @@ void tcp_send_fin(struct sock *sk) mss_now = tcp_current_mss(sk); if (tcp_send_head(sk) != NULL) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; + TCP_SKB_CB(skb)->flags |= TCPHDR_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; } else { @@ -2318,7 +2323,7 @@ void tcp_send_fin(struct sock *sk) skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, - TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); + TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); @@ -2343,7 +2348,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, MAX_TCP_HEADER); tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), - TCPCB_FLAG_ACK | TCPCB_FLAG_RST); + TCPHDR_ACK | TCPHDR_RST); /* Send it off. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) @@ -2363,11 +2368,11 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *skb; skb = tcp_write_queue_head(sk); - if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { + if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) { printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); return -EFAULT; } - if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { + if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); if (nskb == NULL) @@ -2381,7 +2386,7 @@ int tcp_send_synack(struct sock *sk) skb = nskb; } - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; + TCP_SKB_CB(skb)->flags |= TCPHDR_ACK; TCP_ECN_send_synack(tcp_sk(sk), skb); } TCP_SKB_CB(skb)->when = tcp_time_stamp; @@ -2460,7 +2465,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, * not even correctly set) */ tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, - TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); + TCPHDR_SYN | TCPHDR_ACK); if (OPTION_COOKIE_EXTENSION & opts.options) { if (s_data_desired) { @@ -2515,6 +2520,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, return skb; } +EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) @@ -2592,7 +2598,7 @@ int tcp_connect(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tp->snd_nxt = tp->write_seq; - tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN); + tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); TCP_ECN_send_syn(sk, buff); /* Send it off. */ @@ -2617,6 +2623,7 @@ int tcp_connect(struct sock *sk) inet_csk(sk)->icsk_rto, TCP_RTO_MAX); return 0; } +EXPORT_SYMBOL(tcp_connect); /* Send out a delayed ack, the caller does the policy checking * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() @@ -2698,7 +2705,7 @@ void tcp_send_ack(struct sock *sk) /* Reserve space for headers and prepare control bits. */ skb_reserve(buff, MAX_TCP_HEADER); - tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK); + tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; @@ -2732,7 +2739,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) * end to send an ack. Don't queue or clone SKB, just * send it. */ - tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK); + tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); TCP_SKB_CB(skb)->when = tcp_time_stamp; return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); } @@ -2762,13 +2769,13 @@ int tcp_write_wakeup(struct sock *sk) if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || skb->len > mss) { seg_size = min(seg_size, mss); - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; if (tcp_fragment(sk, skb, seg_size, mss)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(sk, skb, mss); - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; + TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) @@ -2821,10 +2828,3 @@ void tcp_send_probe0(struct sock *sk) TCP_RTO_MAX); } } - -EXPORT_SYMBOL(tcp_select_initial_window); -EXPORT_SYMBOL(tcp_connect); -EXPORT_SYMBOL(tcp_make_synack); -EXPORT_SYMBOL(tcp_simple_retransmit); -EXPORT_SYMBOL(tcp_sync_mss); -EXPORT_SYMBOL(tcp_mtup_init); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 440a5c6004f6..808bb920c9f5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk) inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); } - EXPORT_SYMBOL(tcp_init_xmit_timers); static void tcp_write_err(struct sock *sk) diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 3b3813cc80b9..59186ca7808a 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -48,7 +48,6 @@ err: return ret; } - EXPORT_SYMBOL(xfrm4_tunnel_register); int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) @@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) return ret; } - EXPORT_SYMBOL(xfrm4_tunnel_deregister); static int tunnel4_rcv(struct sk_buff *skb) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eec4ff456e33..32e0bef60d0a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, !sock_flag(sk, SOCK_BROADCAST)) goto out; if (connected) - sk_dst_set(sk, dst_clone(&rt->u.dst)); + sk_dst_set(sk, dst_clone(&rt->dst)); } if (msg->msg_flags&MSG_CONFIRM) @@ -978,7 +978,7 @@ out: return err; do_confirm: - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); if (!(msg->msg_flags&MSG_PROBE) || len) goto back_from_confirm; err = 0; diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 6610bf76369f..ab76aa928fa9 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -58,6 +58,7 @@ struct proto udplite_prot = { .compat_getsockopt = compat_udp_getsockopt, #endif }; +EXPORT_SYMBOL(udplite_prot); static struct inet_protosw udplite4_protosw = { .type = SOCK_DGRAM, @@ -127,5 +128,3 @@ out_unregister_proto: out_register_err: printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); } - -EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index ad8fbb871aa0..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); } - EXPORT_SYMBOL(xfrm4_rcv); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1705476670ef..869078d4eeb9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, fl.fl4_src = saddr->a4; err = __ip_route_output_key(net, &rt, &fl); - dst = &rt->u.dst; + dst = &rt->dst; if (err) dst = ERR_PTR(err); return dst; @@ -108,6 +108,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) u8 *xprth = skb_network_header(skb) + iph->ihl * 4; memset(fl, 0, sizeof(struct flowi)); + fl->mark = skb->mark; + if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { case IPPROTO_UDP: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1a698df5706..ab70a3fbcafa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -121,8 +121,6 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __ipv6_regen_rndid(struct inet6_dev *idev); static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); - -static int desync_factor = MAX_DESYNC_FACTOR * HZ; #endif static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); @@ -284,13 +282,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, static int snmp6_alloc_dev(struct inet6_dev *idev) { if (snmp_mib_init((void __percpu **)idev->stats.ipv6, - sizeof(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) goto err_ip; if (snmp_mib_init((void __percpu **)idev->stats.icmpv6, - sizeof(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6_mib), + __alignof__(struct icmpv6_mib)) < 0) goto err_icmp; if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg, - sizeof(struct icmpv6msg_mib)) < 0) + sizeof(struct icmpv6msg_mib), + __alignof__(struct icmpv6msg_mib)) < 0) goto err_icmpmsg; return 0; @@ -557,7 +558,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warning("Freeing alive inet6 address %p\n", ifp); return; } - dst_release(&ifp->rt->u.dst); + dst_release(&ifp->rt->dst); call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu); } @@ -823,7 +824,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) rt->rt6i_flags |= RTF_EXPIRES; } } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } out: @@ -890,7 +891,8 @@ retry: idev->cnf.temp_valid_lft); tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, - idev->cnf.temp_prefered_lft - desync_factor / HZ); + idev->cnf.temp_prefered_lft - + idev->cnf.max_desync_factor); tmp_plen = ifp->prefix_len; max_addresses = idev->cnf.max_addresses; tmp_cstamp = ifp->cstamp; @@ -1650,7 +1652,8 @@ static void ipv6_regen_rndid(unsigned long data) expires = jiffies + idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor; + idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - + idev->cnf.max_desync_factor * HZ; if (time_before(expires, jiffies)) { printk(KERN_WARNING "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", @@ -1760,7 +1763,10 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) idev = ipv6_find_idev(dev); if (!idev) - return NULL; + return ERR_PTR(-ENOBUFS); + + if (idev->cnf.disable_ipv6) + return ERR_PTR(-EACCES); /* Add default multicast route */ addrconf_add_mroute(dev); @@ -1863,7 +1869,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) dev, expires, flags); } if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* Try to figure out our local address for this prefix */ @@ -2129,8 +2135,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, if (!dev) return -ENODEV; - if ((idev = addrconf_add_dev(dev)) == NULL) - return -ENOBUFS; + idev = addrconf_add_dev(dev); + if (IS_ERR(idev)) + return PTR_ERR(idev); scope = ipv6_addr_scope(pfx); @@ -2377,7 +2384,7 @@ static void addrconf_dev_config(struct net_device *dev) } idev = addrconf_add_dev(dev); - if (idev == NULL) + if (IS_ERR(idev)) return; memset(&addr, 0, sizeof(struct in6_addr)); @@ -2468,7 +2475,7 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) ASSERT_RTNL(); idev = addrconf_add_dev(dev); - if (!idev) { + if (IS_ERR(idev)) { printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); return; } @@ -3492,8 +3499,12 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred -= tval; else preferred = 0; - if (valid != INFINITY_LIFE_TIME) - valid -= tval; + if (valid != INFINITY_LIFE_TIME) { + if (valid > tval) + valid -= tval; + else + valid = 0; + } } } else { preferred = INFINITY_LIFE_TIME; @@ -3855,12 +3866,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib, memset(&stats[items], 0, pad); } +static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, + int items, int bytes, size_t syncpoff) +{ + int i; + int pad = bytes - sizeof(u64) * items; + BUG_ON(pad < 0); + + /* Use put_unaligned() because stats may not be aligned for u64. */ + put_unaligned(items, &stats[0]); + for (i = 1; i < items; i++) + put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]); + + memset(&stats[items], 0, pad); +} + static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes) { switch (attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); + __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, + IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); @@ -4093,11 +4120,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); - dst_hold(&ifp->rt->u.dst); + dst_hold(&ifp->rt->dst); if (ifp->state == INET6_IFADDR_STATE_DEAD && ip6_del_rt(ifp->rt)) - dst_free(&ifp->rt->u.dst); + dst_free(&ifp->rt->dst); break; } } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 8c4348cb1950..f0e774cea386 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -53,11 +53,7 @@ static struct ip6addrlbl_table static inline struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) { -#ifdef CONFIG_NET_NS - return lbl->lbl_net; -#else - return &init_net; -#endif + return read_pnet(&lbl->lbl_net); } /* diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e733942dafe1..56b9bf2516f4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -522,10 +522,10 @@ const struct proto_ops inet6_stream_ops = { .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ - .sendmsg = tcp_sendmsg, /* ok */ - .recvmsg = sock_common_recvmsg, /* ok */ + .sendmsg = inet_sendmsg, /* ok */ + .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, - .sendpage = tcp_sendpage, + .sendpage = inet_sendpage, .splice_read = tcp_splice_read, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, @@ -549,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = { .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet_sendmsg, /* ok */ - .recvmsg = sock_common_recvmsg, /* ok */ + .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT @@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); @@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { @@ -976,19 +971,24 @@ static void ipv6_packet_cleanup(void) static int __net_init ipv6_init_mibs(struct net *net) { if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, - sizeof (struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) return -ENOMEM; if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, - sizeof (struct udp_mib)) < 0) + sizeof(struct udp_mib), + __alignof__(struct udp_mib)) < 0) goto err_udplite_mib; if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, - sizeof(struct ipstats_mib)) < 0) + sizeof(struct ipstats_mib), + __alignof__(struct ipstats_mib)) < 0) goto err_ip_mib; if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, - sizeof(struct icmpv6_mib)) < 0) + sizeof(struct icmpv6_mib), + __alignof__(struct icmpv6_mib)) < 0) goto err_icmp_mib; if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics, - sizeof(struct icmpv6msg_mib)) < 0) + sizeof(struct icmpv6msg_mib), + __alignof__(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; return 0; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index b5b07054508a..0e5e943446f0 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) pac->acl_next = NULL; ipv6_addr_copy(&pac->acl_addr, addr); + rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); - dst_release(&rt->u.dst); + dst_release(&rt->dst); } else if (ishost) { err = -EADDRNOTAVAIL; - goto out_free_pac; + goto error; } else { /* router, no matching interface: just pick one */ - - dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags_rcu(net, IFF_UP, + IFF_UP | IFF_LOOPBACK); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (dev == NULL) { err = -ENODEV; - goto out_free_pac; + goto error; } - idev = in6_dev_get(dev); + idev = __in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; else err = -EADDRNOTAVAIL; - goto out_dev_put; + goto error; } /* reset ishost, now that we have a specific device */ ishost = !idev->cnf.forwarding; - in6_dev_put(idev); pac->acl_ifindex = dev->ifindex; @@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto out_dev_put; + goto error; } err = ipv6_dev_ac_inc(dev, addr); - if (err) - goto out_dev_put; - - write_lock_bh(&ipv6_sk_ac_lock); - pac->acl_next = np->ipv6_ac_list; - np->ipv6_ac_list = pac; - write_unlock_bh(&ipv6_sk_ac_lock); - - dev_put(dev); - - return 0; + if (!err) { + write_lock_bh(&ipv6_sk_ac_lock); + pac->acl_next = np->ipv6_ac_list; + np->ipv6_ac_list = pac; + write_unlock_bh(&ipv6_sk_ac_lock); + pac = NULL; + } -out_dev_put: - dev_put(dev); -out_free_pac: - sock_kfree_s(sk, pac, sizeof(*pac)); +error: + rcu_read_unlock(); + if (pac) + sock_kfree_s(sk, pac, sizeof(*pac)); return err; } @@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(net, pac->acl_ifindex); - if (dev) { + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - dev_put(dev); - } + rcu_read_unlock(); + sock_kfree_s(sk, pac, sizeof(*pac)); return 0; } @@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk) write_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; + rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - if (dev) - dev_put(dev); - dev = dev_get_by_index(net, pac->acl_ifindex); + dev = dev_get_by_index_rcu(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } - if (dev) - dev_put(dev); + rcu_read_unlock(); } #if 0 @@ -250,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac) { if (atomic_dec_and_test(&ac->aca_refcnt)) { in6_dev_put(ac->aca_idev); - dst_release(&ac->aca_rt->u.dst); + dst_release(&ac->aca_rt->dst); kfree(ac); } } @@ -356,40 +350,39 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->u.dst); + dst_hold(&aca->aca_rt->dst); ip6_del_rt(aca->aca_rt); aca_put(aca); return 0; } +/* called with rcu_read_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) { - int ret; - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); + if (idev == NULL) return -ENODEV; - ret = __ipv6_dev_ac_dec(idev, addr); - in6_dev_put(idev); - return ret; + return __ipv6_dev_ac_dec(idev, addr); } /* * check if the interface has this anycast address + * called with rcu_read_lock() */ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) { struct inet6_dev *idev; struct ifacaddr6 *aca; - idev = in6_dev_get(dev); + idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); for (aca = idev->ac_list; aca; aca = aca->aca_next) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; read_unlock_bh(&idev->lock); - in6_dev_put(idev); return aca != NULL; } return 0; @@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, { int found = 0; - if (dev) - return ipv6_chk_acast_dev(dev, addr); rcu_read_lock(); - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { - found = 1; - break; - } + if (dev) + found = ipv6_chk_acast_dev(dev, addr); + else + for_each_netdev_rcu(net, dev) + if (ipv6_chk_acast_dev(dev, addr)) { + found = 1; + break; + } rcu_read_unlock(); return found; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 712684687c9a..7d929a22cbc2 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; struct flowi fl; struct ip6_flowlabel *flowlabel = NULL; + struct ipv6_txoptions *opt; int addr_type; int err; @@ -155,19 +156,8 @@ ipv4_connected: security_sk_classify_flow(sk, &fl); - if (flowlabel) { - if (flowlabel->opt && flowlabel->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } - } else if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + opt = flowlabel ? flowlabel->opt : np->opt; + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8a659f92d17a..262f105d23b9 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -312,6 +312,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) Routing header. ********************************/ +/* called with rcu_read_lock() */ static int ipv6_rthdr_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); @@ -324,12 +325,9 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int accept_source_route = net->ipv6.devconf_all->accept_source_route; - idev = in6_dev_get(skb->dev); - if (idev) { - if (accept_source_route > idev->cnf.accept_source_route) - accept_source_route = idev->cnf.accept_source_route; - in6_dev_put(idev); - } + idev = __in6_dev_get(skb->dev); + if (idev && accept_source_route > idev->cnf.accept_source_route) + accept_source_route = idev->cnf.accept_source_route; if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + @@ -874,3 +872,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, return opt; } +/** + * fl6_update_dst - update flowi destination address with info given + * by srcrt option, if any. + * + * @fl: flowi for which fl6_dst is to be updated + * @opt: struct ipv6_txoptions in which to look for srcrt opt + * @orig: copy of original fl6_dst address if modified + * + * Returns NULL if no txoptions or no srcrt, otherwise returns orig + * and initial value of fl->fl6_dst set in orig + */ +struct in6_addr *fl6_update_dst(struct flowi *fl, + const struct ipv6_txoptions *opt, + struct in6_addr *orig) +{ + if (!opt || !opt->srcrt) + return NULL; + + ipv6_addr_copy(orig, &fl->fl6_dst); + ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); + return orig; +} + +EXPORT_SYMBOL_GPL(fl6_update_dst); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8e44f8f9c188..b1108ede18e1 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, if (arg.result) return arg.result; - dst_hold(&net->ipv6.ip6_null_entry->u.dst); - return &net->ipv6.ip6_null_entry->u.dst; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, @@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, struct in6_addr saddr; if (ipv6_dev_get_saddr(net, - ip6_dst_idev(&rt->u.dst)->dev, + ip6_dst_idev(&rt->dst)->dev, &flp->fl6_dst, rt6_flags2srcprefs(flags), &saddr)) @@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } again: - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = NULL; goto out; discard_pkt: - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); out: arg->result = rt; return rt == NULL ? -EAGAIN : 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0c5e3c3b7fd5..8a1628023bd1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; struct dst_entry *dst; - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; memset(&fl, 0, sizeof(fl)); fl.proto = sk->sk_protocol; @@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb) fl.fl_ip_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 92a122b7795d..b6a585909d35 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn) static __inline__ void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) - dst_free(&rt->u.dst); + dst_free(&rt->dst); } static void fib6_link_table(struct net *net, struct fib6_table *tb) @@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) int res; struct rt6_info *rt; - for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { res = rt6_dump_route(rt, w->args); if (res < 0) { /* Frame is full, suspend walking */ @@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &fn->leaf; - for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) { + for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) { /* * Search for duplicates */ @@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (iter->rt6i_metric > rt->rt6i_metric) break; - ins = &iter->u.dst.rt6_next; + ins = &iter->dst.rt6_next; } /* Reset round-robin state, if necessary */ @@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * insert node */ - rt->u.dst.rt6_next = iter; + rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); @@ -799,7 +799,7 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - dst_free(&rt->u.dst); + dst_free(&rt->dst); } return err; @@ -810,7 +810,7 @@ out: st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); - dst_free(&rt->u.dst); + dst_free(&rt->dst); return err; #endif } @@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, RT6_TRACE("fib6_del_route\n"); /* Unlink it */ - *rtp = rt->u.dst.rt6_next; + *rtp = rt->dst.rt6_next; rt->rt6i_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, FOR_WALKERS(w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rt->u.dst.rt6_next; + w->leaf = rt->dst.rt6_next; if (w->leaf == NULL) w->state = FWS_U; } } read_unlock(&fib6_walker_lock); - rt->u.dst.rt6_next = NULL; + rt->dst.rt6_next = NULL; /* If it was last route, expunge its radix tree node */ if (fn->leaf == NULL) { @@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info **rtp; #if RT6_DEBUG >= 2 - if (rt->u.dst.obsolete>0) { + if (rt->dst.obsolete>0) { WARN_ON(fn != NULL); return -ENOENT; } @@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * Walk the leaf entries looking for ourself */ - for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) { + for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { if (*rtp == rt) { fib6_del_route(fn, rtp, info); return 0; @@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) .nl_net = c->net, }; - for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; @@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) } gc_args.more++; } else if (rt->rt6i_flags & RTF_CACHE) { - if (atomic_read(&rt->u.dst.__refcnt) == 0 && - time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) { + if (atomic_read(&rt->dst.__refcnt) == 0 && + time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 89425af0684c..d40b330c0ee6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); for (;;) { /* Prepare header of the next frame, @@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !frag) @@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - dst_release(&rt->u.dst); + dst_release(&rt->dst); return 0; } @@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - dst_release(&rt->u.dst); + dst_release(&rt->dst); return err; } @@ -785,7 +785,7 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -798,7 +798,7 @@ slow_path: */ ip6_copy_metadata(frag, skb); - skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); @@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* need source address above miyazawa*/ } - dst_hold(&rt->u.dst); - inet->cork.dst = &rt->u.dst; + dst_hold(&rt->dst); + inet->cork.dst = &rt->dst; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); + rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } inet->cork.fragsize = mtu; - if (dst_allfrag(rt->u.dst.path)) + if (dst_allfrag(rt->dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; inet->cork.length = 0; sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; - exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - + exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; @@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, mtu = inet->cork.fragsize; } - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); @@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } if (proto == IPPROTO_UDP && - (rt->u.dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO)) { err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, @@ -1270,7 +1270,7 @@ alloc_new_skb: fraglen = datalen + fragheaderlen; if ((flags & MSG_MORE) && - !(rt->u.dst.dev->features&NETIF_F_SG)) + !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; @@ -1281,7 +1281,7 @@ alloc_new_skb: * because we have no idea if we're the last one. */ if (datalen == length + fraggap) - alloclen += rt->u.dst.trailer_len; + alloclen += rt->dst.trailer_len; /* * We just reserve space for fragment header. @@ -1358,7 +1358,7 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; @@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk) skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8f39893d8081..0fd027f3f47e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) goto out; - skb2->dev = rt->u.dst.dev; + skb2->dev = rt->dst.dev; /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { @@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_src = eiph->saddr; fl.fl4_tos = eiph->tos; if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { + rt->dst.dev->type != ARPHRD_TUNNEL) { ip_rt_put(rt); goto out; } @@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_send(skb2, rel_type, rel_code, rel_info); if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); kfree_skb(skb2); } @@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index bd43f0152c21..a7f66bc8f0b0 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -55,8 +55,6 @@ #include <asm/uaccess.h> -DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly; - struct ip6_ra_chain *ip6_ra_chain; DEFINE_RWLOCK(ip6_ra_lock); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index ab1622d7d409..d1444b95ad7e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -152,18 +152,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->next = NULL; ipv6_addr_copy(&mc_lst->addr, addr); + rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; - dev_hold(dev); - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (dev == NULL) { + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; } @@ -180,8 +181,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_mc_inc(dev, addr); if (err) { + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - dev_put(dev); return err; } @@ -190,7 +191,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) np->ipv6_mc_list = mc_lst; write_unlock_bh(&ipv6_sk_mc_lock); - dev_put(dev); + rcu_read_unlock(); return 0; } @@ -213,18 +214,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(net, mc_lst->ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, mc_lst->ifindex); if (dev != NULL) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) { + if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - in6_dev_put(idev); - } - dev_put(dev); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return 0; } @@ -234,43 +234,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EADDRNOTAVAIL; } -static struct inet6_dev *ip6_mc_find_dev(struct net *net, - struct in6_addr *group, - int ifindex) +/* called with rcu_read_lock() */ +static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, + struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; if (ifindex == 0) { - struct rt6_info *rt; + struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); - rt = rt6_lookup(net, group, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); - dst_release(&rt->u.dst); + dst_release(&rt->dst); } } else - dev = dev_get_by_index(net, ifindex); + dev = dev_get_by_index_rcu(net, ifindex); if (!dev) - goto nodev; - idev = in6_dev_get(dev); + return NULL; + idev = __in6_dev_get(dev); if (!idev) - goto release; + return NULL;; read_lock_bh(&idev->lock); - if (idev->dead) - goto unlock_release; - + if (idev->dead) { + read_unlock_bh(&idev->lock); + return NULL; + } return idev; - -unlock_release: - read_unlock_bh(&idev->lock); - in6_dev_put(idev); -release: - dev_put(dev); -nodev: - return NULL; } void ipv6_sock_mc_close(struct sock *sk) @@ -286,19 +279,17 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(net, mc_lst->ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, mc_lst->ifindex); if (dev) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) { + if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - in6_dev_put(idev); - } - dev_put(dev); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - + rcu_read_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); write_lock_bh(&ipv6_sk_mc_lock); @@ -327,14 +318,17 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); - if (!idev) + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface); + if (!idev) { + rcu_read_unlock(); return -ENODEV; + } dev = idev->dev; err = -EADDRNOTAVAIL; - read_lock_bh(&ipv6_sk_mc_lock); + read_lock(&ipv6_sk_mc_lock); for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; @@ -358,7 +352,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } - write_lock_bh(&pmc->sflock); + write_lock(&pmc->sflock); pmclocked = 1; psl = pmc->sflist; @@ -433,11 +427,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk, ip6_mc_add_src(idev, group, omode, 1, source, 1); done: if (pmclocked) - write_unlock_bh(&pmc->sflock); - read_unlock_bh(&ipv6_sk_mc_lock); + write_unlock(&pmc->sflock); + read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); if (leavegroup) return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -463,14 +456,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev(net, group, gsf->gf_interface); + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - if (!idev) + if (!idev) { + rcu_read_unlock(); return -ENODEV; + } dev = idev->dev; err = 0; - read_lock_bh(&ipv6_sk_mc_lock); + read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; @@ -512,7 +508,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); } - write_lock_bh(&pmc->sflock); + write_lock(&pmc->sflock); psl = pmc->sflist; if (psl) { (void) ip6_mc_del_src(idev, group, pmc->sfmode, @@ -522,13 +518,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); pmc->sflist = newpsl; pmc->sfmode = gsf->gf_fmode; - write_unlock_bh(&pmc->sflock); + write_unlock(&pmc->sflock); err = 0; done: - read_unlock_bh(&ipv6_sk_mc_lock); + read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -551,11 +546,13 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(net, group, gsf->gf_interface); + rcu_read_lock(); + idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - if (!idev) + if (!idev) { + rcu_read_unlock(); return -ENODEV; - + } dev = idev->dev; err = -EADDRNOTAVAIL; @@ -577,8 +574,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, psl = pmc->sflist; count = psl ? psl->sl_count : 0; read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; @@ -604,8 +600,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return 0; done: read_unlock_bh(&idev->lock); - in6_dev_put(idev); - dev_put(dev); + rcu_read_unlock(); return err; } @@ -822,6 +817,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) struct ifmcaddr6 *mc; struct inet6_dev *idev; + /* we need to take a reference on idev */ idev = in6_dev_get(dev); if (idev == NULL) @@ -860,7 +856,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); ipv6_addr_copy(&mc->mca_addr, addr); - mc->idev = idev; + mc->idev = idev; /* (reference taken) */ mc->mca_users = 1; /* mca_stamp should be updated upon changes */ mc->mca_cstamp = mc->mca_tstamp = jiffies; @@ -915,16 +911,18 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) { - struct inet6_dev *idev = in6_dev_get(dev); + struct inet6_dev *idev; int err; - if (!idev) - return -ENODEV; - - err = __ipv6_dev_mc_dec(idev, addr); + rcu_read_lock(); - in6_dev_put(idev); + idev = __in6_dev_get(dev); + if (!idev) + err = -ENODEV; + else + err = __ipv6_dev_mc_dec(idev, addr); + rcu_read_unlock(); return err; } @@ -965,7 +963,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, struct ifmcaddr6 *mc; int rv = 0; - idev = in6_dev_get(dev); + rcu_read_lock(); + idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); for (mc = idev->mc_list; mc; mc=mc->next) { @@ -992,8 +991,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rv = 1; /* don't filter unspecified source */ } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } + rcu_read_unlock(); return rv; } @@ -1104,6 +1103,7 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, return 1; } +/* called with rcu_read_lock() */ int igmp6_event_query(struct sk_buff *skb) { struct mld2_query *mlh2 = NULL; @@ -1127,7 +1127,7 @@ int igmp6_event_query(struct sk_buff *skb) if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) return -EINVAL; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (idev == NULL) return 0; @@ -1137,10 +1137,8 @@ int igmp6_event_query(struct sk_buff *skb) group_type = ipv6_addr_type(group); if (group_type != IPV6_ADDR_ANY && - !(group_type&IPV6_ADDR_MULTICAST)) { - in6_dev_put(idev); + !(group_type&IPV6_ADDR_MULTICAST)) return -EINVAL; - } if (len == 24) { int switchback; @@ -1161,10 +1159,9 @@ int igmp6_event_query(struct sk_buff *skb) } else if (len >= 28) { int srcs_offset = sizeof(struct mld2_query) - sizeof(struct icmp6hdr); - if (!pskb_may_pull(skb, srcs_offset)) { - in6_dev_put(idev); + if (!pskb_may_pull(skb, srcs_offset)) return -EINVAL; - } + mlh2 = (struct mld2_query *)skb_transport_header(skb); max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000; if (!max_delay) @@ -1173,28 +1170,23 @@ int igmp6_event_query(struct sk_buff *skb) if (mlh2->mld2q_qrv) idev->mc_qrv = mlh2->mld2q_qrv; if (group_type == IPV6_ADDR_ANY) { /* general query */ - if (mlh2->mld2q_nsrcs) { - in6_dev_put(idev); + if (mlh2->mld2q_nsrcs) return -EINVAL; /* no sources allowed */ - } + mld_gq_start_timer(idev); - in6_dev_put(idev); return 0; } /* mark sources to include, if group & source-specific */ if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + - ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) { - in6_dev_put(idev); + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) return -EINVAL; - } + mlh2 = (struct mld2_query *)skb_transport_header(skb); mark = 1; } - } else { - in6_dev_put(idev); + } else return -EINVAL; - } read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { @@ -1227,12 +1219,11 @@ int igmp6_event_query(struct sk_buff *skb) } } read_unlock_bh(&idev->lock); - in6_dev_put(idev); return 0; } - +/* called with rcu_read_lock() */ int igmp6_event_report(struct sk_buff *skb) { struct ifmcaddr6 *ma; @@ -1260,7 +1251,7 @@ int igmp6_event_report(struct sk_buff *skb) !(addr_type&IPV6_ADDR_LINKLOCAL)) return -EINVAL; - idev = in6_dev_get(skb->dev); + idev = __in6_dev_get(skb->dev); if (idev == NULL) return -ENODEV; @@ -1280,7 +1271,6 @@ int igmp6_event_report(struct sk_buff *skb) } } read_unlock_bh(&idev->lock); - in6_dev_put(idev); return 0; } @@ -1396,12 +1386,14 @@ static void mld_sendpack(struct sk_buff *skb) struct mld2_report *pmr = (struct mld2_report *)skb_transport_header(skb); int payload_len, mldlen; - struct inet6_dev *idev = in6_dev_get(skb->dev); + struct inet6_dev *idev; struct net *net = dev_net(skb->dev); int err; struct flowi fl; struct dst_entry *dst; + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); @@ -1441,8 +1433,7 @@ out: } else IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; err_out: @@ -1779,7 +1770,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - idev = in6_dev_get(skb->dev); + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); if (!dst) { @@ -1806,8 +1798,7 @@ out: } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - if (likely(idev != NULL)) - in6_dev_put(idev); + rcu_read_unlock(); return; err_out: @@ -1998,8 +1989,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) &psf->sf_addr)) break; if (!dpsf) { - dpsf = (struct ip6_sf_list *) - kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); if (!dpsf) continue; *dpsf = *psf; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 2794b6002836..d6e9599d0705 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type = static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { + struct ipv6hdr *iph = ipv6_hdr(skb); struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; int err = rt2->rt_hdr.nexthdr; spin_lock(&x->lock); - if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && + if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) && !ipv6_addr_any((struct in6_addr *)x->coaddr)) err = -ENOENT; spin_unlock(&x->lock); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2efef52fb461..58841c4ae947 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", __func__); - dst_release(&rt->u.dst); + dst_release(&rt->dst); in6_dev_put(in6_dev); return; } @@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; } skip_defrtr: @@ -1363,7 +1363,7 @@ skip_linkparms: in6_dev->cnf.mtu6 = mtu; if (rt) - rt->u.dst.metrics[RTAX_MTU-1] = mtu; + rt->dst.metrics[RTAX_MTU-1] = mtu; rt6_mtu_change(skb->dev, mtu); } @@ -1384,7 +1384,7 @@ skip_linkparms: } out: if (rt) - dst_release(&rt->u.dst); + dst_release(&rt->dst); else if (neigh) neigh_release(neigh); in6_dev_put(in6_dev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index a74951c039b6..7155b2451d7c 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -151,9 +151,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, protocol, csum_sub(0, hsum))); skb->ip_summed = CHECKSUM_NONE; - csum = __skb_checksum_complete_head(skb, dataoff + len); - if (!csum) - skb->ip_summed = CHECKSUM_UNNECESSARY; + return __skb_checksum_complete_head(skb, dataoff + len); } return csum; }; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 8c201743d96d..413ab0754e1f 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -43,7 +43,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; -static DEFINE_RWLOCK(queue_lock); +static DEFINE_SPINLOCK(queue_lock); static int peer_pid __read_mostly; static unsigned int copy_range __read_mostly; static unsigned int queue_total; @@ -73,10 +73,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range) break; case IPQ_COPY_PACKET: - copy_mode = mode; + if (range > 0xFFFF) + range = 0xFFFF; copy_range = range; - if (copy_range > 0xFFFF) - copy_range = 0xFFFF; + copy_mode = mode; break; default: @@ -102,7 +102,7 @@ ipq_find_dequeue_entry(unsigned long id) { struct nf_queue_entry *entry = NULL, *i; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); list_for_each_entry(i, &queue_list, list) { if ((unsigned long)i == id) { @@ -116,7 +116,7 @@ ipq_find_dequeue_entry(unsigned long id) queue_total--; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return entry; } @@ -137,9 +137,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) static void ipq_flush(ipq_cmpfn cmpfn, unsigned long data) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); __ipq_flush(cmpfn, data); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } static struct sk_buff * @@ -153,9 +153,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) struct nlmsghdr *nlh; struct timeval tv; - read_lock_bh(&queue_lock); - - switch (copy_mode) { + switch (ACCESS_ONCE(copy_mode)) { case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); @@ -163,26 +161,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_PACKET: if (entry->skb->ip_summed == CHECKSUM_PARTIAL && - (*errp = skb_checksum_help(entry->skb))) { - read_unlock_bh(&queue_lock); + (*errp = skb_checksum_help(entry->skb))) return NULL; - } - if (copy_range == 0 || copy_range > entry->skb->len) + + data_len = ACCESS_ONCE(copy_range); + if (data_len == 0 || data_len > entry->skb->len) data_len = entry->skb->len; - else - data_len = copy_range; size = NLMSG_SPACE(sizeof(*pmsg) + data_len); break; default: *errp = -EINVAL; - read_unlock_bh(&queue_lock); return NULL; } - read_unlock_bh(&queue_lock); - skb = alloc_skb(size, GFP_ATOMIC); if (!skb) goto nlmsg_failure; @@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) if (nskb == NULL) return status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (!peer_pid) goto err_out_free_nskb; @@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) __ipq_enqueue_entry(entry); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; err_out_free_nskb: kfree_skb(nskb); err_out_unlock: - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range) { int status; - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); status = __ipq_set_mode(mode, range); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return status; } @@ -441,11 +434,11 @@ __ipq_rcv_skb(struct sk_buff *skb) if (security_netlink_recv(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if (peer_pid) { if (peer_pid != pid) { - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); RCV_SKB_FAIL(-EBUSY); } } else { @@ -453,7 +446,7 @@ __ipq_rcv_skb(struct sk_buff *skb) peer_pid = pid; } - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); @@ -498,10 +491,10 @@ ipq_rcv_nl_event(struct notifier_block *this, struct netlink_notify *n = ptr; if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { - write_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); - write_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); } return NOTIFY_DONE; } @@ -528,7 +521,7 @@ static ctl_table ipq_table[] = { #ifdef CONFIG_PROC_FS static int ip6_queue_show(struct seq_file *m, void *v) { - read_lock_bh(&queue_lock); + spin_lock_bh(&queue_lock); seq_printf(m, "Peer PID : %d\n" @@ -546,7 +539,7 @@ static int ip6_queue_show(struct seq_file *m, void *v) queue_dropped, queue_user_dropped); - read_unlock_bh(&queue_lock); + spin_unlock_bh(&queue_lock); return 0; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9d2d68f0e605..5359ef4daac5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -387,9 +387,7 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(skb)->payload_len) + - sizeof(struct ipv6hdr), 1); + ADD_COUNTER(e->counters, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -899,7 +897,7 @@ get_counters(const struct xt_table_info *t, struct ip6t_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; + unsigned int curcpu = get_cpu(); /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters @@ -909,14 +907,16 @@ get_counters(const struct xt_table_info *t, * if new softirq were to run and call ipt_do_table */ local_bh_disable(); - curcpu = smp_processor_id(); - i = 0; xt_entry_foreach(iter, t->entries[curcpu], t->size) { SET_COUNTER(counters[i], iter->counters.bcnt, iter->counters.pcnt); ++i; } + local_bh_enable(); + /* Processing counters from other cpus, we can let bottom half enabled, + * (preemption is disabled) + */ for_each_possible_cpu(cpu) { if (cpu == curcpu) @@ -930,7 +930,7 @@ get_counters(const struct xt_table_info *t, } xt_info_wrunlock(cpu); } - local_bh_enable(); + put_cpu(); } static struct xt_counters *alloc_counters(const struct xt_table *table) @@ -943,7 +943,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vmalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); @@ -1213,8 +1213,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct ip6t_entry *iter; ret = 0; - counters = vmalloc_node(num_counters * sizeof(struct xt_counters), - numa_node_id()); + counters = vmalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; goto out; @@ -1368,7 +1367,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len - size, numa_node_id()); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index af4ee11f2066..0a07ae7b933f 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -373,6 +373,56 @@ static void dump_packet(const struct nf_loginfo *info, printk("MARK=0x%x ", skb->mark); } +static void dump_mac_header(const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & IP6T_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + printk("MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT && + (p -= ETH_HLEN) < skb->head) + p = NULL; + + if (p != NULL) { + printk("%02x", *p++); + for (i = 1; i < len; i++) + printk(":%02x", p[i]); + } + printk(" "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); + } + } else + printk(" "); +} + static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { @@ -400,35 +450,10 @@ ip6t_log_packet(u_int8_t pf, prefix, in ? in->name : "", out ? out->name : ""); - if (in && !out) { - unsigned int len; - /* MAC logging for input chain only. */ - printk("MAC="); - if (skb->dev && (len = skb->dev->hard_header_len) && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - int i; - - if (skb->dev->type == ARPHRD_SIT && - (p -= ETH_HLEN) < skb->head) - p = NULL; - - if (p != NULL) { - for (i = 0; i < len; i++) - printk("%02x%s", p[i], - i == len - 1 ? "" : ":"); - } - printk(" "); - if (skb->dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - printk("TUNNEL=%pI4->%pI4 ", - &iph->saddr, &iph->daddr); - } - } else - printk(" "); - } + /* MAC logging for input path only. */ + if (in && !out) + dump_mac_header(loginfo, skb); dump_packet(loginfo, skb, skb_network_offset(skb), 1); printk("\n"); diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 47d227713758..2933396e0281 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) fl.fl_ip_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(net, NULL, &fl); - if (dst == NULL) + if (dst == NULL || dst->error) { + dst_release(dst); return; - if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) + } + if (xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9be81776415e..1df3c8b6bf47 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, type = icmp6h->icmp6_type - 130; if (type >= 0 && type < sizeof(noct_valid_new) && noct_valid_new[type]) { - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6fb890187de0..13ef5bc05cf5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -114,10 +114,8 @@ static void nf_skb_free(struct sk_buff *skb) } /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) +static void frag_kfree_skb(struct sk_buff *skb) { - if (work) - *work -= skb->truesize; atomic_sub(skb->truesize, &nf_init_frags.mem); nf_skb_free(skb); kfree_skb(skb); @@ -201,7 +199,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, int offset, end; if (fq->q.last_in & INET_FRAG_COMPLETE) { - pr_debug("Allready completed\n"); + pr_debug("Already completed\n"); goto err; } @@ -271,6 +269,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = fq->q.fragments_tail; + if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for (next = fq->q.fragments; next != NULL; next = next->next) { if (NFCT_FRAG6_CB(next)->offset >= offset) @@ -278,6 +281,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, prev = next; } +found: /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. @@ -335,7 +339,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, fq->q.fragments = next; fq->q.meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + frag_kfree_skb(free_it); } } @@ -343,6 +347,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + fq->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -442,7 +448,6 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_init_frags.mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -452,8 +457,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_init_frags.mem); } + atomic_sub(head->truesize, &nf_init_frags.mem); head->next = NULL; head->dev = dev; @@ -467,6 +472,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum); fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 566798d69f37..d082eaeefa25 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -174,17 +174,28 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib, const struct snmp_mib *itemlist) { int i; - for (i=0; itemlist[i].name; i++) + + for (i = 0; itemlist[i].name; i++) seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, snmp_fold_field(mib, itemlist[i].entry)); } +static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, + const struct snmp_mib *itemlist, size_t syncpoff) +{ + int i; + + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, + snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); +} + static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics, - snmp6_ipstats_list); + snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, + snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4a4dcbe4f8b2..e677937a07fc 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -602,31 +602,33 @@ out: } static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, - struct flowi *fl, struct rt6_info *rt, + struct flowi *fl, struct dst_entry **dstp, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; int err; + struct rt6_info *rt = (struct rt6_info *)*dstp; - if (length > rt->u.dst.dev->mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); + if (length > rt->dst.dev->mtu) { + ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, &rt->dst); + *dstp = NULL; skb_put(skb, length); skb_reset_network_header(skb); @@ -641,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->u.dst.dev, dst_output); + rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) @@ -725,7 +727,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, { struct ipv6_txoptions opt_space; struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct raw6_sock *rp = raw6_sk(sk); @@ -847,13 +849,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); - /* merge ip6_build_xmit from ip6_output */ - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; @@ -892,9 +888,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, goto do_confirm; back_from_confirm: - if (inet->hdrincl) { - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); - } else { + if (inet->hdrincl) + err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); + else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 6d4292ff5854..545c4141b755 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -150,11 +150,8 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) EXPORT_SYMBOL(ip6_frag_match); /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct netns_frags *nf, - struct sk_buff *skb, int *work) +static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) { - if (work) - *work -= skb->truesize; atomic_sub(skb->truesize, &nf->mem); kfree_skb(skb); } @@ -336,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = fq->q.fragments_tail; + if (!prev || FRAG6_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) @@ -343,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, prev = next; } +found: /* We found where to put this one. Check for overlap with * preceding fragment, and, if needed, align things so that * any overlaps are eliminated. @@ -392,7 +395,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->q.fragments = next; fq->q.meat -= free_it->len; - frag_kfree_skb(fq->q.net, free_it, NULL); + frag_kfree_skb(fq->q.net, free_it); } } @@ -400,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + fq->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -466,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oom; fp->next = head->next; + if (!fp->next) + fq->q.fragments_tail = fp; prev->next = fp; skb_morph(head, fq->q.fragments); @@ -524,7 +531,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &fq->q.net->mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -534,8 +540,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &fq->q.net->mem); } + atomic_sub(head->truesize, &fq->q.net->mem); head->next = NULL; head->dev = dev; @@ -553,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; return 1; out_oversize: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 252d76199c41..8f2d0400cf8a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = { }; static struct rt6_info ip6_null_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -ENETUNREACH, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_discard, - .output = ip6_pkt_discard_out, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -ENETUNREACH, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); static struct rt6_info ip6_prohibit_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_prohibit, - .output = ip6_pkt_prohibit_out, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_prohibit, + .output = ip6_pkt_prohibit_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = { }; static struct rt6_info ip6_blk_hole_entry_template = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .obsolete = -1, - .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = dst_discard, - .output = dst_discard, - } + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = dst_discard, + .output = dst_discard, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (!oif && ipv6_addr_any(saddr)) goto out; - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { struct net_device *dev = sprt->rt6i_dev; if (oif) { @@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; - rt = rt->u.dst.rt6_next) + rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; - rt = rt->u.dst.rt6_next) + rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match); return match; @@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) if (!match && (strict & RT6_LOOKUP_F_REACHABLE)) { - struct rt6_info *next = rt0->u.dst.rt6_next; + struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) @@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt->rt6i_expires = jiffies + HZ * lifetime; rt->rt6i_flags |= RTF_EXPIRES; } - dst_release(&rt->u.dst); + dst_release(&rt->dst); } return 0; } @@ -555,7 +549,7 @@ restart: rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); BACKTRACK(net, &fl->fl6_src); out: - dst_use(&rt->u.dst, jiffies); + dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); return rt; @@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->u.dst.flags |= DST_HOST; + rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad if (net_ratelimit()) printk(KERN_WARNING "Neighbour table overflow.\n"); - dst_free(&rt->u.dst); + dst_free(&rt->dst); return NULL; } rt->rt6i_nexthop = neigh; @@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; - rt->u.dst.flags |= DST_HOST; + rt->dst.flags |= DST_HOST; rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); } return rt; @@ -726,7 +720,7 @@ restart: rt->rt6i_flags & RTF_CACHE) goto out; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) @@ -739,10 +733,10 @@ restart: #endif } - dst_release(&rt->u.dst); + dst_release(&rt->dst); rt = nrt ? : net->ipv6.ip6_null_entry; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); if (nrt) { err = ip6_ins_rt(nrt); if (!err) @@ -756,7 +750,7 @@ restart: * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto relookup; out: @@ -764,11 +758,11 @@ out: reachable = 0; goto restart_2; } - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); out2: - rt->u.dst.lastuse = jiffies; - rt->u.dst.__use++; + rt->dst.lastuse = jiffies; + rt->dst.__use++; return rt; } @@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl struct dst_entry *new = NULL; if (rt) { - new = &rt->u.dst; + new = &rt->dst; atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); - new->dev = ort->u.dst.dev; + memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); rt->rt6i_idev = ort->rt6i_idev; @@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { if (rt->rt6i_flags&RTF_CACHE) { - dst_set_expires(&rt->u.dst, 0); + dst_set_expires(&rt->dst, 0); rt->rt6i_flags |= RTF_EXPIRES; } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) rt->rt6i_node->fn_sernum = -1; @@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_dev = dev; rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; - atomic_set(&rt->u.dst.__refcnt, 1); - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.output = ip6_output; + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ - rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST + rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST ? DST_HOST : 0; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); @@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, #endif spin_lock_bh(&icmp6_dst_lock); - rt->u.dst.next = icmp6_dst_gc_list; - icmp6_dst_gc_list = &rt->u.dst; + rt->dst.next = icmp6_dst_gc_list; + icmp6_dst_gc_list = &rt->dst; spin_unlock_bh(&icmp6_dst_lock); fib6_force_start_gc(net); out: - return &rt->u.dst; + return &rt->dst; } int icmp6_dst_gc(void) @@ -1090,11 +1084,11 @@ static int ipv6_get_mtu(struct net_device *dev) int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; - idev = in6_dev_get(dev); - if (idev) { + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) mtu = idev->cnf.mtu6; - in6_dev_put(idev); - } + rcu_read_unlock(); return mtu; } @@ -1103,12 +1097,15 @@ int ip6_dst_hoplimit(struct dst_entry *dst) int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); if (hoplimit < 0) { struct net_device *dev = dst->dev; - struct inet6_dev *idev = in6_dev_get(dev); - if (idev) { + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) hoplimit = idev->cnf.hop_limit; - in6_dev_put(idev); - } else + else hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; + rcu_read_unlock(); } return hoplimit; } @@ -1159,7 +1156,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->u.dst.obsolete = -1; + rt->dst.obsolete = -1; rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? jiffies + clock_t_to_jiffies(cfg->fc_expires) : 0; @@ -1171,16 +1168,16 @@ int ip6_route_add(struct fib6_config *cfg) addr_type = ipv6_addr_type(&cfg->fc_dst); if (addr_type & IPV6_ADDR_MULTICAST) - rt->u.dst.input = ip6_mc_input; + rt->dst.input = ip6_mc_input; else - rt->u.dst.input = ip6_forward; + rt->dst.input = ip6_forward; - rt->u.dst.output = ip6_output; + rt->dst.output = ip6_output; ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->u.dst.flags = DST_HOST; + rt->dst.flags = DST_HOST; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1208,9 +1205,9 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->u.dst.output = ip6_pkt_discard_out; - rt->u.dst.input = ip6_pkt_discard; - rt->u.dst.error = -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; + rt->dst.error = -ENETUNREACH; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; goto install_route; } @@ -1244,7 +1241,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; if (dev) { if (dev != grt->rt6i_dev) { - dst_release(&grt->u.dst); + dst_release(&grt->dst); goto out; } } else { @@ -1255,7 +1252,7 @@ int ip6_route_add(struct fib6_config *cfg) } if (!(grt->rt6i_flags&RTF_GATEWAY)) err = 0; - dst_release(&grt->u.dst); + dst_release(&grt->dst); if (err) goto out; @@ -1294,18 +1291,18 @@ install_route: goto out; } - rt->u.dst.metrics[type - 1] = nla_get_u32(nla); + rt->dst.metrics[type - 1] = nla_get_u32(nla); } } } - if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!dst_mtu(&rt->u.dst)) - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.dev = dev; + if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + if (!dst_mtu(&rt->dst)) + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); + if (!dst_metric(&rt->dst, RTAX_ADVMSS)) + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1319,7 +1316,7 @@ out: if (idev) in6_dev_put(idev); if (rt) - dst_free(&rt->u.dst); + dst_free(&rt->dst); return err; } @@ -1336,7 +1333,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) write_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); - dst_release(&rt->u.dst); + dst_release(&rt->dst); write_unlock_bh(&table->tb6_lock); @@ -1369,7 +1366,7 @@ static int ip6_route_del(struct fib6_config *cfg) &cfg->fc_src, cfg->fc_src_len); if (fn) { - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (cfg->fc_ifindex && (rt->rt6i_dev == NULL || rt->rt6i_dev->ifindex != cfg->fc_ifindex)) @@ -1379,7 +1376,7 @@ static int ip6_route_del(struct fib6_config *cfg) continue; if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); return __ip6_del_rt(rt, &cfg->fc_nlinfo); @@ -1421,7 +1418,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { /* * Current route is on-link; redirect is always invalid. * @@ -1445,7 +1442,7 @@ restart: rt = net->ipv6.ip6_null_entry; BACKTRACK(net, &fl->fl6_src); out: - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); @@ -1513,10 +1510,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, * Look, redirects are sent only in response to data packets, * so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == rt->u.dst.neighbour) + if (neigh == rt->dst.neighbour) goto out; nrt = ip6_rt_copy(rt); @@ -1529,20 +1526,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); nrt->rt6i_dst.plen = 128; - nrt->u.dst.flags |= DST_HOST; + nrt->dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ - nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->u.dst)); + nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); + nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->dst)); if (ip6_ins_rt(nrt)) goto out; - netevent.old = &rt->u.dst; - netevent.new = &nrt->u.dst; + netevent.old = &rt->dst; + netevent.new = &nrt->dst; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags&RTF_CACHE) { @@ -1551,7 +1548,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, } out: - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* @@ -1570,7 +1567,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, if (rt == NULL) return; - if (pmtu >= dst_mtu(&rt->u.dst)) + if (pmtu >= dst_mtu(&rt->dst)) goto out; if (pmtu < IPV6_MIN_MTU) { @@ -1588,7 +1585,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, They are sent only in response to data packets, so that this nexthop apparently is reachable. --ANK */ - dst_confirm(&rt->u.dst); + dst_confirm(&rt->dst); /* Host route. If it is static, it would be better not to override it, but add new one, so that @@ -1596,10 +1593,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, would return automatically. */ if (rt->rt6i_flags & RTF_CACHE) { - rt->u.dst.metrics[RTAX_MTU-1] = pmtu; + rt->dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) - rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; - dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); + rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; } @@ -1615,9 +1612,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_alloc_clone(rt, daddr); if (nrt) { - nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; + nrt->dst.metrics[RTAX_MTU-1] = pmtu; if (allfrag) - nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; /* According to RFC 1981, detecting PMTU increase shouldn't be * happened within 5 mins, the recommended timer is 10 mins. @@ -1625,13 +1622,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, * which is 10 mins. After 10 mins the decreased pmtu is expired * and detecting PMTU increase will be automatically happened. */ - dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); + dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; ip6_ins_rt(nrt); } out: - dst_release(&rt->u.dst); + dst_release(&rt->dst); } /* @@ -1644,18 +1641,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); if (rt) { - rt->u.dst.input = ort->u.dst.input; - rt->u.dst.output = ort->u.dst.output; - - memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); - rt->u.dst.error = ort->u.dst.error; - rt->u.dst.dev = ort->u.dst.dev; - if (rt->u.dst.dev) - dev_hold(rt->u.dst.dev); + rt->dst.input = ort->dst.input; + rt->dst.output = ort->dst.output; + + memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + rt->dst.error = ort->dst.error; + rt->dst.dev = ort->dst.dev; + if (rt->dst.dev) + dev_hold(rt->dst.dev); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); - rt->u.dst.lastuse = jiffies; + rt->dst.lastuse = jiffies; rt->rt6i_expires = 0; ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); @@ -1689,14 +1686,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!fn) goto out; - for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->rt6i_dev->ifindex != ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) continue; - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); break; } out: @@ -1744,14 +1741,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d return NULL; write_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { if (dev == rt->rt6i_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) break; } if (rt) - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); write_unlock_bh(&table->tb6_lock); return rt; } @@ -1790,9 +1787,9 @@ void rt6_purge_dflt_routers(struct net *net) restart: read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { + for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt); goto restart; @@ -1930,15 +1927,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, dev_hold(net->loopback_dev); in6_dev_hold(idev); - rt->u.dst.flags = DST_HOST; - rt->u.dst.input = ip6_input; - rt->u.dst.output = ip6_output; + rt->dst.flags = DST_HOST; + rt->dst.input = ip6_input; + rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - rt->u.dst.obsolete = -1; + rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) @@ -1947,7 +1944,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_LOCAL; neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); if (IS_ERR(neigh)) { - dst_free(&rt->u.dst); + dst_free(&rt->dst); /* We are casting this because that is the return * value type. But an errno encoded pointer is the @@ -1962,7 +1959,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); - atomic_set(&rt->u.dst.__refcnt, 1); + atomic_set(&rt->dst.__refcnt, 1); return rt; } @@ -2033,12 +2030,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->rt6i_dev == arg->dev && - !dst_metric_locked(&rt->u.dst, RTAX_MTU) && - (dst_mtu(&rt->u.dst) >= arg->mtu || - (dst_mtu(&rt->u.dst) < arg->mtu && - dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { - rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU) && + (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6))) { + rt->dst.metrics[RTAX_MTU-1] = arg->mtu; + rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); } return 0; } @@ -2252,20 +2249,20 @@ static int rt6_fill_node(struct net *net, #endif NLA_PUT_U32(skb, RTA_IIF, iif); } else if (dst) { - struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); struct in6_addr saddr_buf; if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, dst, 0, &saddr_buf) == 0) NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) goto nla_put_failure; - if (rt->u.dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); + if (rt->dst.neighbour) + NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); - if (rt->u.dst.dev) + if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); @@ -2277,8 +2274,8 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, - expires, rt->u.dst.error) < 0) + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, + expires, rt->dst.error) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2364,7 +2361,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, @@ -2416,12 +2413,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { - net->ipv6.ip6_null_entry->u.dst.dev = dev; + net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; + net->ipv6.ip6_prohibit_entry->dst.dev = dev; net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); - net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; + net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); #endif } @@ -2464,8 +2461,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000"); } seq_printf(m, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, rt->rt6i_flags, + rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), + rt->dst.__use, rt->rt6i_flags, rt->rt6i_dev ? rt->rt6i_dev->name : ""); return 0; } @@ -2646,9 +2643,9 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_null_entry) goto out_ip6_dst_ops; - net->ipv6.ip6_null_entry->u.dst.path = + net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; - net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2656,18 +2653,18 @@ static int __net_init ip6_route_net_init(struct net *net) GFP_KERNEL); if (!net->ipv6.ip6_prohibit_entry) goto out_ip6_null_entry; - net->ipv6.ip6_prohibit_entry->u.dst.path = + net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; - net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), GFP_KERNEL); if (!net->ipv6.ip6_blk_hole_entry) goto out_ip6_prohibit_entry; - net->ipv6.ip6_blk_hole_entry->u.dst.path = + net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; - net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; + net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; #endif net->ipv6.sysctl.flush_delay = 0; @@ -2742,12 +2739,12 @@ int __init ip6_route_init(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ - init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif ret = fib6_init(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e51e650ea80b..4699cd3c3118 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -249,8 +249,6 @@ failed: return NULL; } -static DEFINE_SPINLOCK(ipip6_prl_lock); - #define for_each_prl_rcu(start) \ for (prl = rcu_dereference(start); \ prl; \ @@ -340,7 +338,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) if (a->addr == htonl(INADDR_ANY)) return -EINVAL; - spin_lock(&ipip6_prl_lock); + ASSERT_RTNL(); for (p = t->prl; p; p = p->next) { if (p->addr == a->addr) { @@ -370,7 +368,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) t->prl_count++; rcu_assign_pointer(t->prl, p); out: - spin_unlock(&ipip6_prl_lock); return err; } @@ -397,7 +394,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) struct ip_tunnel_prl_entry *x, **p; int err = 0; - spin_lock(&ipip6_prl_lock); + ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; *p; p = &(*p)->next) { @@ -419,7 +416,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) } } out: - spin_unlock(&ipip6_prl_lock); return err; } @@ -716,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, stats->tx_carrier_errors++; goto tx_error_icmp; } - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; if (tdev == dev) { ip_rt_put(rt); @@ -725,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (df) { - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { stats->collisions++; @@ -784,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags = 0; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -833,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_IPV6 }; struct rtable *rt; if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; ip_rt_put(rt); } dev->flags |= IFF_POINTOPOINT; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 34d1f0690d7e..09fd34f0dbf2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -27,28 +27,17 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -/* - * This table has to be sorted and terminated with (__u16)-1. - * XXX generate a better table. - * Unresolved Issues: HIPPI with a 64k MSS is not well supported. - * - * Taken directly from ipv4 implementation. - * Should this list be modified for ipv6 use or is it close enough? - * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart - */ +/* Table must be sorted. */ static __u16 const msstab[] = { - 64 - 1, - 256 - 1, - 512 - 1, - 536 - 1, - 1024 - 1, - 1440 - 1, - 1460 - 1, - 4312 - 1, - (__u16)-1 + 64, + 512, + 536, + 1280 - 60, + 1480 - 60, + 1500 - 60, + 4460 - 60, + 9000 - 60, }; -/* The number doesn't include the -1 terminator */ -#define NUM_MSS (ARRAY_SIZE(msstab) - 1) /* * This (misnamed) value is the age of syncookie which is permitted. @@ -134,9 +123,11 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) tcp_synq_overflow(sk); - for (mssind = 0; mss > msstab[mssind + 1]; mssind++) - ; - *mssp = msstab[mssind] + 1; + for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) + if (mss >= msstab[mssind]) + break; + + *mssp = msstab[mssind]; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); @@ -154,7 +145,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) th->source, th->dest, seq, jiffies / (HZ * 60), COUNTER_TRIES); - return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; + return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) @@ -173,8 +164,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; + bool ecn_ok; - if (!sysctl_tcp_syncookies || !th->ack) + if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; if (tcp_synq_no_recent_overflow(sk) || @@ -189,8 +181,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, &hash_location, 0); - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); + if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) + goto out; ret = NULL; req = inet6_reqsk_alloc(&tcp6_request_sock_ops); @@ -224,9 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; - ireq->ecn_ok = 0; + ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; @@ -240,17 +231,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) * me if there is a preferred way. */ { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b7c3a100e2c..fe6d40418c0b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct inet_connection_sock *icsk = inet_csk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct in6_addr *saddr = NULL, *final_p = NULL, final; + struct in6_addr *saddr = NULL, *final_p, final; struct flowi fl; struct dst_entry *dst; int addr_type; @@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->inet_sport; - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, np->opt, &final); security_sk_classify_flow(sk, &fl); @@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; - struct in6_addr * final_p = NULL, final; + struct in6_addr * final_p, final; struct flowi fl; struct dst_entry *dst; int err = -1; @@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, security_req_classify_flow(req, &fl); opt = np->opt; - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -1167,7 +1157,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) } #ifdef CONFIG_SYN_COOKIES - if (!th->rst && !th->syn && th->ack) + if (!th->syn) sk = cookie_v6_check(sk, skb); #endif return sk; @@ -1279,13 +1269,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - if (!want_cookie) + if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); - if (want_cookie) { - isn = cookie_v6_init_sequence(sk, skb, &req->mss); - req->cookie_ts = tmp_opt.tstamp_ok; - } else if (!isn) { + if (!isn) { if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1298,8 +1285,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) treq->iif = inet6_iif(skb); - - isn = tcp_v6_init_sequence(skb); + if (!want_cookie) { + isn = tcp_v6_init_sequence(skb); + } else { + isn = cookie_v6_init_sequence(sk, skb, &req->mss); + req->cookie_ts = tmp_opt.tstamp_ok; + } } tcp_rsk(req)->snt_isn = isn; @@ -1392,18 +1383,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto out_overflow; if (dst == NULL) { - struct in6_addr *final_p = NULL, final; + struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; - } + final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.mark = sk->sk_mark; @@ -2156,6 +2142,8 @@ struct proto tcpv6_prot = { .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, .recvmsg = tcp_recvmsg, + .sendmsg = tcp_sendmsg, + .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .hash = tcp_v6_hash, .unhash = inet_unhash, @@ -2174,6 +2162,7 @@ struct proto tcpv6_prot = { .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, + .no_autobind = true, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 87be58673b55..1dd1affdead2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -927,7 +927,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; - struct in6_addr *daddr, *final_p = NULL, final; + struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi fl; @@ -1097,14 +1097,9 @@ do_udp_sendmsg: ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl_ip_sport = inet->inet_sport; - /* merge ip6_build_xmit from ip6_output */ - if (opt && opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl.fl6_dst); - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); - final_p = &final; + final_p = fl6_update_dst(&fl, opt, &final); + if (final_p) connected = 0; - } if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { fl.oif = np->mcast_oif; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4a0e77e14468..6baeabbbca82 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -124,6 +124,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); + fl->mark = skb->mark; + ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 6a1a202710c5..800bc53b7f63 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -527,7 +527,7 @@ static int dev_irnet_close(struct inode * inode, struct file * file) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", file, ap); @@ -564,7 +564,7 @@ dev_irnet_write(struct file * file, size_t count, loff_t * ppos) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", file, ap, count); @@ -588,7 +588,7 @@ dev_irnet_read(struct file * file, size_t count, loff_t * ppos) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", file, ap, count); @@ -609,7 +609,7 @@ static unsigned int dev_irnet_poll(struct file * file, poll_table * wait) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; unsigned int mask; DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", @@ -638,7 +638,7 @@ dev_irnet_ioctl( unsigned int cmd, unsigned long arg) { - irnet_socket * ap = (struct irnet_socket *) file->private_data; + irnet_socket * ap = file->private_data; int err; int val; void __user *argp = (void __user *)arg; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 47db1d8a0d92..285761e77d90 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -1853,23 +1853,23 @@ static int irttp_seq_show(struct seq_file *seq, void *v) self->remote_credit); seq_printf(seq, "send credit: %d\n", self->send_credit); - seq_printf(seq, " tx packets: %ld, ", + seq_printf(seq, " tx packets: %lu, ", self->stats.tx_packets); - seq_printf(seq, "rx packets: %ld, ", + seq_printf(seq, "rx packets: %lu, ", self->stats.rx_packets); - seq_printf(seq, "tx_queue len: %d ", + seq_printf(seq, "tx_queue len: %u ", skb_queue_len(&self->tx_queue)); - seq_printf(seq, "rx_queue len: %d\n", + seq_printf(seq, "rx_queue len: %u\n", skb_queue_len(&self->rx_queue)); seq_printf(seq, " tx_sdu_busy: %s, ", self->tx_sdu_busy? "TRUE":"FALSE"); seq_printf(seq, "rx_sdu_busy: %s\n", self->rx_sdu_busy? "TRUE":"FALSE"); - seq_printf(seq, " max_seg_size: %d, ", + seq_printf(seq, " max_seg_size: %u, ", self->max_seg_size); - seq_printf(seq, "tx_max_sdu_size: %d, ", + seq_printf(seq, "tx_max_sdu_size: %u, ", self->tx_max_sdu_size); - seq_printf(seq, "rx_max_sdu_size: %d\n", + seq_printf(seq, "rx_max_sdu_size: %u\n", self->rx_max_sdu_size); seq_printf(seq, " Used by (%s)\n\n", diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index f28ad2cc8428..499c045d6910 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1463,7 +1463,7 @@ struct iucv_path_pending { u32 res3; u8 ippollfg; u8 res4[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_pending(struct iucv_irq_data *data) { @@ -1524,7 +1524,7 @@ struct iucv_path_complete { u32 res3; u8 ippollfg; u8 res4[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_complete(struct iucv_irq_data *data) { @@ -1554,7 +1554,7 @@ struct iucv_path_severed { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_severed(struct iucv_irq_data *data) { @@ -1590,7 +1590,7 @@ struct iucv_path_quiesced { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_quiesced(struct iucv_irq_data *data) { @@ -1618,7 +1618,7 @@ struct iucv_path_resumed { u32 res4; u8 ippollfg; u8 res5[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_path_resumed(struct iucv_irq_data *data) { @@ -1649,7 +1649,7 @@ struct iucv_message_complete { u32 ipbfln2f; u8 ippollfg; u8 res2[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_message_complete(struct iucv_irq_data *data) { @@ -1694,7 +1694,7 @@ struct iucv_message_pending { u32 ipbfln2f; u8 ippollfg; u8 res2[3]; -} __attribute__ ((packed)); +} __packed; static void iucv_message_pending(struct iucv_irq_data *data) { diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0852512d392c..226a0ae3bcfd 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len sk->sk_state = TCP_ESTABLISHED; inet->inet_id = jiffies; - sk_dst_set(sk, &rt->u.dst); + sk_dst_set(sk, &rt->dst); write_lock_bh(&l2tp_ip_lock); hlist_del_init(&sk->sk_bind_node); @@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) goto no_route; } - sk_setup_caps(sk, &rt->u.dst); + sk_setup_caps(sk, &rt->dst); } - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); /* Queue the packet to IP for output */ rc = ip_queue_xmit(skb); diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 8a91f6c0bb18..4d6f8653ec88 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -33,6 +33,13 @@ config MAC80211_RC_MINSTREL ---help--- This option enables the 'minstrel' TX rate control algorithm +config MAC80211_RC_MINSTREL_HT + bool "Minstrel 802.11n support" if EMBEDDED + depends on MAC80211_RC_MINSTREL + default y + ---help--- + This option enables the 'minstrel_ht' TX rate control algorithm + choice prompt "Default rate control algorithm" depends on MAC80211_HAS_RC @@ -62,6 +69,7 @@ endchoice config MAC80211_RC_DEFAULT string + default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL default "pid" if MAC80211_RC_DEFAULT_PID default "" diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 84b48ba8a77e..fdb54e61d637 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -51,7 +51,11 @@ rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o rc80211_minstrel-y := rc80211_minstrel.o rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o +rc80211_minstrel_ht-y := rc80211_minstrel_ht.o +rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o + mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) +mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y) ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 6bb9a9a94960..965b272499fd 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -6,39 +6,70 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> - * Copyright 2007-2008, Intel Corporation + * Copyright 2007-2010, Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +/** + * DOC: RX A-MPDU aggregation + * + * Aggregation on the RX side requires only implementing the + * @ampdu_action callback that is invoked to start/stop any + * block-ack sessions for RX aggregation. + * + * When RX aggregation is started by the peer, the driver is + * notified via @ampdu_action function, with the + * %IEEE80211_AMPDU_RX_START action, and may reject the request + * in which case a negative response is sent to the peer, if it + * accepts it a positive response is sent. + * + * While the session is active, the device/driver are required + * to de-aggregate frames and pass them up one by one to mac80211, + * which will handle the reorder buffer. + * + * When the aggregation session is stopped again by the peer or + * ourselves, the driver's @ampdu_action function will be called + * with the action %IEEE80211_AMPDU_RX_STOP. In this case, the + * call must not fail. + */ + #include <linux/ieee80211.h> #include <linux/slab.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" -static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, - bool from_timer) +static void ieee80211_free_tid_rx(struct rcu_head *h) { - struct ieee80211_local *local = sta->local; - struct tid_ampdu_rx *tid_rx; + struct tid_ampdu_rx *tid_rx = + container_of(h, struct tid_ampdu_rx, rcu_head); int i; - spin_lock_bh(&sta->lock); + for (i = 0; i < tid_rx->buf_size; i++) + dev_kfree_skb(tid_rx->reorder_buf[i]); + kfree(tid_rx->reorder_buf); + kfree(tid_rx->reorder_time); + kfree(tid_rx); +} - /* check if TID is in operational state */ - if (!sta->ampdu_mlme.tid_active_rx[tid]) { - spin_unlock_bh(&sta->lock); - return; - } +void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason) +{ + struct ieee80211_local *local = sta->local; + struct tid_ampdu_rx *tid_rx; - sta->ampdu_mlme.tid_active_rx[tid] = false; + lockdep_assert_held(&sta->ampdu_mlme.mtx); tid_rx = sta->ampdu_mlme.tid_rx[tid]; + if (!tid_rx) + return; + + rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL); + #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", sta->sta.addr, tid); @@ -54,32 +85,17 @@ static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, ieee80211_send_delba(sta->sdata, sta->sta.addr, tid, 0, reason); - /* free the reordering buffer */ - for (i = 0; i < tid_rx->buf_size; i++) { - if (tid_rx->reorder_buf[i]) { - /* release the reordered frames */ - dev_kfree_skb(tid_rx->reorder_buf[i]); - tid_rx->stored_mpdu_num--; - tid_rx->reorder_buf[i] = NULL; - } - } - - /* free resources */ - kfree(tid_rx->reorder_buf); - kfree(tid_rx->reorder_time); - sta->ampdu_mlme.tid_rx[tid] = NULL; - - spin_unlock_bh(&sta->lock); + del_timer_sync(&tid_rx->session_timer); - if (!from_timer) - del_timer_sync(&tid_rx->session_timer); - kfree(tid_rx); + call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason) { - ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, false); + mutex_lock(&sta->ampdu_mlme.mtx); + ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason); + mutex_unlock(&sta->ampdu_mlme.mtx); } /* @@ -100,8 +116,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); #endif - ___ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_TIMEOUT, true); + set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); + ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); } static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, @@ -212,9 +228,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* examine state machine */ - spin_lock_bh(&sta->lock); + mutex_lock(&sta->ampdu_mlme.mtx); - if (sta->ampdu_mlme.tid_active_rx[tid]) { + if (sta->ampdu_mlme.tid_rx[tid]) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "unexpected AddBA Req from " @@ -225,9 +241,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } /* prepare A-MPDU MLME for Rx aggregation */ - sta->ampdu_mlme.tid_rx[tid] = - kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_rx[tid]) { + tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); + if (!tid_agg_rx) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_ERR "allocate rx mlme to tid %d failed\n", @@ -235,14 +250,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, #endif goto end; } - /* rx timer */ - sta->ampdu_mlme.tid_rx[tid]->session_timer.function = - sta_rx_agg_session_timer_expired; - sta->ampdu_mlme.tid_rx[tid]->session_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + /* rx timer */ + tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; + tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&tid_agg_rx->session_timer); /* prepare reordering buffer */ tid_agg_rx->reorder_buf = @@ -257,8 +269,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, #endif kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); - kfree(sta->ampdu_mlme.tid_rx[tid]); - sta->ampdu_mlme.tid_rx[tid] = NULL; + kfree(tid_agg_rx); goto end; } @@ -270,13 +281,12 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, if (ret) { kfree(tid_agg_rx->reorder_buf); + kfree(tid_agg_rx->reorder_time); kfree(tid_agg_rx); - sta->ampdu_mlme.tid_rx[tid] = NULL; goto end; } - /* change state and send addba resp */ - sta->ampdu_mlme.tid_active_rx[tid] = true; + /* update data */ tid_agg_rx->dialog_token = dialog_token; tid_agg_rx->ssn = start_seq_num; tid_agg_rx->head_seq_num = start_seq_num; @@ -284,8 +294,15 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, tid_agg_rx->timeout = timeout; tid_agg_rx->stored_mpdu_num = 0; status = WLAN_STATUS_SUCCESS; + + /* activate it for RX */ + rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx); + + if (timeout) + mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout)); + end: - spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); end_no_lock: ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 98258b7341e3..c893f236acea 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -6,7 +6,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> - * Copyright 2007-2009, Intel Corporation + * Copyright 2007-2010, Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -21,28 +21,39 @@ #include "wme.h" /** - * DOC: TX aggregation + * DOC: TX A-MPDU aggregation * * Aggregation on the TX side requires setting the hardware flag - * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues - * hardware parameter to the number of hardware AMPDU queues. If there are no - * hardware queues then the driver will (currently) have to do all frame - * buffering. + * %IEEE80211_HW_AMPDU_AGGREGATION. The driver will then be handed + * packets with a flag indicating A-MPDU aggregation. The driver + * or device is responsible for actually aggregating the frames, + * as well as deciding how many and which to aggregate. * - * When TX aggregation is started by some subsystem (usually the rate control - * algorithm would be appropriate) by calling the - * ieee80211_start_tx_ba_session() function, the driver will be notified via - * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action. + * When TX aggregation is started by some subsystem (usually the rate + * control algorithm would be appropriate) by calling the + * ieee80211_start_tx_ba_session() function, the driver will be + * notified via its @ampdu_action function, with the + * %IEEE80211_AMPDU_TX_START action. * * In response to that, the driver is later required to call the - * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe()) - * function, which will start the aggregation session. + * ieee80211_start_tx_ba_cb_irqsafe() function, which will really + * start the aggregation session after the peer has also responded. + * If the peer responds negatively, the session will be stopped + * again right away. Note that it is possible for the aggregation + * session to be stopped before the driver has indicated that it + * is done setting it up, in which case it must not indicate the + * setup completion. * - * Similarly, when the aggregation session is stopped by - * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will - * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the - * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb() - * (or ieee80211_stop_tx_ba_cb_irqsafe()). + * Also note that, since we also need to wait for a response from + * the peer, the driver is notified of the completion of the + * handshake by the %IEEE80211_AMPDU_TX_OPERATIONAL action to the + * @ampdu_action callback. + * + * Similarly, when the aggregation session is stopped by the peer + * or something calling ieee80211_stop_tx_ba_session(), the driver's + * @ampdu_action function will be called with the action + * %IEEE80211_AMPDU_TX_STOP. In this case, the call must not fail, + * and the driver must later call ieee80211_stop_tx_ba_cb_irqsafe(). */ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, @@ -125,25 +136,53 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 ieee80211_tx_skb(sdata, skb); } +static void kfree_tid_tx(struct rcu_head *rcu_head) +{ + struct tid_ampdu_tx *tid_tx = + container_of(rcu_head, struct tid_ampdu_tx, rcu_head); + + kfree(tid_tx); +} + int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator) { struct ieee80211_local *local = sta->local; + struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; int ret; - u8 *state; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + + if (!tid_tx) + return -ENOENT; + + spin_lock_bh(&sta->lock); + + if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { + /* not even started yet! */ + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); + spin_unlock_bh(&sta->lock); + call_rcu(&tid_tx->rcu_head, kfree_tid_tx); + return 0; + } + + spin_unlock_bh(&sta->lock); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; + set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); - if (*state == HT_AGG_STATE_OPERATIONAL) - sta->ampdu_mlme.addba_req_num[tid] = 0; + /* + * After this packets are no longer handed right through + * to the driver but are put onto tid_tx->pending instead, + * with locking to ensure proper access. + */ + clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); - *state = HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + tid_tx->stop_initiator = initiator; ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_STOP, @@ -174,16 +213,14 @@ static void sta_addba_resp_timer_expired(unsigned long data) u16 tid = *(u8 *)data; struct sta_info *sta = container_of((void *)data, struct sta_info, timer_to_tid[tid]); - u8 *state; - - state = &sta->ampdu_mlme.tid_state_tx[tid]; + struct tid_ampdu_tx *tid_tx; /* check if the TID waits for addBA response */ - spin_lock_bh(&sta->lock); - if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK | - HT_AGG_STATE_REQ_STOP_BA_MSK)) != - HT_ADDBA_REQUESTED_MSK) { - spin_unlock_bh(&sta->lock); + rcu_read_lock(); + tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx || + test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { + rcu_read_unlock(); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "timer expired on tid %d but we are not " "(or no longer) expecting addBA response there\n", @@ -196,8 +233,8 @@ static void sta_addba_resp_timer_expired(unsigned long data) printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); #endif - ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); - spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(&sta->sta, tid); + rcu_read_unlock(); } static inline int ieee80211_ac_from_tid(int tid) @@ -205,14 +242,112 @@ static inline int ieee80211_ac_from_tid(int tid) return ieee802_1d_to_ac[tid & 7]; } +/* + * When multiple aggregation sessions on multiple stations + * are being created/destroyed simultaneously, we need to + * refcount the global queue stop caused by that in order + * to not get into a situation where one of the aggregation + * setup or teardown re-enables queues before the other is + * ready to handle that. + * + * These two functions take care of this issue by keeping + * a global "agg_queue_stop" refcount. + */ +static void __acquires(agg_queue) +ieee80211_stop_queue_agg(struct ieee80211_local *local, int tid) +{ + int queue = ieee80211_ac_from_tid(tid); + + if (atomic_inc_return(&local->agg_queue_stop[queue]) == 1) + ieee80211_stop_queue_by_reason( + &local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __acquire(agg_queue); +} + +static void __releases(agg_queue) +ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid) +{ + int queue = ieee80211_ac_from_tid(tid); + + if (atomic_dec_return(&local->agg_queue_stop[queue]) == 0) + ieee80211_wake_queue_by_reason( + &local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + __release(agg_queue); +} + +void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) +{ + struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; + struct ieee80211_local *local = sta->local; + struct ieee80211_sub_if_data *sdata = sta->sdata; + u16 start_seq_num; + int ret; + + lockdep_assert_held(&sta->ampdu_mlme.mtx); + + /* + * While we're asking the driver about the aggregation, + * stop the AC queue so that we don't have to worry + * about frames that came in while we were doing that, + * which would require us to put them to the AC pending + * afterwards which just makes the code more complex. + */ + ieee80211_stop_queue_agg(local, tid); + + clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); + + /* + * make sure no packets are being processed to get + * valid starting sequence number + */ + synchronize_net(); + + start_seq_num = sta->tid_seq[tid] >> 4; + + ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, + &sta->sta, tid, &start_seq_num); + if (ret) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - HW unavailable for" + " tid %d\n", tid); +#endif + spin_lock_bh(&sta->lock); + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); + spin_unlock_bh(&sta->lock); + + ieee80211_wake_queue_agg(local, tid); + call_rcu(&tid_tx->rcu_head, kfree_tid_tx); + return; + } + + /* we can take packets again now */ + ieee80211_wake_queue_agg(local, tid); + + /* activate the timer for the recipient's addBA response */ + mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); +#endif + + spin_lock_bh(&sta->lock); + sta->ampdu_mlme.addba_req_num[tid]++; + spin_unlock_bh(&sta->lock); + + /* send AddBA request */ + ieee80211_send_addba_request(sdata, sta->sta.addr, tid, + tid_tx->dialog_token, start_seq_num, + 0x40, 5000); +} + int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - u8 *state; + struct tid_ampdu_tx *tid_tx; int ret = 0; - u16 start_seq_num; trace_api_start_tx_ba_session(pubsta, tid); @@ -239,24 +374,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; - if (test_sta_flags(sta, WLAN_STA_DISASSOC)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Disassociation is in progress. " - "Denying BA session request\n"); -#endif - return -EINVAL; - } - if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Suspend in progress. " + printk(KERN_DEBUG "BA sessions blocked. " "Denying BA session request\n"); #endif return -EINVAL; } spin_lock_bh(&sta->lock); - spin_lock(&local->ampdu_lock); /* we have tried too many times, receiver does not want A-MPDU */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { @@ -264,9 +390,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) goto err_unlock_sta; } - state = &sta->ampdu_mlme.tid_state_tx[tid]; + tid_tx = sta->ampdu_mlme.tid_tx[tid]; /* check if the TID is not in aggregation flow already */ - if (*state != HT_AGG_STATE_IDLE) { + if (tid_tx) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - session is not " "idle on tid %u\n", tid); @@ -275,96 +401,37 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) goto err_unlock_sta; } - /* - * While we're asking the driver about the aggregation, - * stop the AC queue so that we don't have to worry - * about frames that came in while we were doing that, - * which would require us to put them to the AC pending - * afterwards which just makes the code more complex. - */ - ieee80211_stop_queue_by_reason( - &local->hw, ieee80211_ac_from_tid(tid), - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - /* prepare A-MPDU MLME for Tx aggregation */ - sta->ampdu_mlme.tid_tx[tid] = - kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_tx[tid]) { + tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); + if (!tid_tx) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_ERR "allocate tx mlme to tid %d failed\n", tid); #endif ret = -ENOMEM; - goto err_wake_queue; + goto err_unlock_sta; } - skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending); + skb_queue_head_init(&tid_tx->pending); + __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); /* Tx timer */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = - sta_addba_resp_timer_expired; - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - - /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the - * call back right away, it must see that the flow has begun */ - *state |= HT_ADDBA_REQUESTED_MSK; - - start_seq_num = sta->tid_seq[tid] >> 4; - - ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, - pubsta, tid, &start_seq_num); + tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired; + tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&tid_tx->addba_resp_timer); - if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - HW unavailable for" - " tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - *state = HT_AGG_STATE_IDLE; - goto err_free; - } - - /* Driver vetoed or OKed, but we can take packets again now */ - ieee80211_wake_queue_by_reason( - &local->hw, ieee80211_ac_from_tid(tid), - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - - spin_unlock(&local->ampdu_lock); - - /* prepare tid data */ + /* assign a dialog token */ sta->ampdu_mlme.dialog_token_allocator++; - sta->ampdu_mlme.tid_tx[tid]->dialog_token = - sta->ampdu_mlme.dialog_token_allocator; - sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator; - spin_unlock_bh(&sta->lock); + /* finally, assign it to the array */ + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); - /* send AddBA request */ - ieee80211_send_addba_request(sdata, pubsta->addr, tid, - sta->ampdu_mlme.tid_tx[tid]->dialog_token, - sta->ampdu_mlme.tid_tx[tid]->ssn, - 0x40, 5000); - sta->ampdu_mlme.addba_req_num[tid]++; - /* activate the timer for the recipient's addBA response */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = - jiffies + ADDBA_RESP_INTERVAL; - add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); -#endif - return 0; - - err_free: - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - err_wake_queue: - ieee80211_wake_queue_by_reason( - &local->hw, ieee80211_ac_from_tid(tid), - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + + /* this flow continues off the work */ err_unlock_sta: - spin_unlock(&local->ampdu_lock); spin_unlock_bh(&sta->lock); return ret; } @@ -372,69 +439,65 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_session); /* * splice packets from the STA's pending to the local pending, - * requires a call to ieee80211_agg_splice_finish and holding - * local->ampdu_lock across both calls. + * requires a call to ieee80211_agg_splice_finish later */ -static void ieee80211_agg_splice_packets(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) +static void __acquires(agg_queue) +ieee80211_agg_splice_packets(struct ieee80211_local *local, + struct tid_ampdu_tx *tid_tx, u16 tid) { + int queue = ieee80211_ac_from_tid(tid); unsigned long flags; - u16 queue = ieee80211_ac_from_tid(tid); - - ieee80211_stop_queue_by_reason( - &local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - if (!(sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)) - return; + ieee80211_stop_queue_agg(local, tid); - if (WARN(!sta->ampdu_mlme.tid_tx[tid], - "TID %d gone but expected when splicing aggregates from" - "the pending queue\n", tid)) + if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" + " from the pending queue\n", tid)) return; - if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) { + if (!skb_queue_empty(&tid_tx->pending)) { spin_lock_irqsave(&local->queue_stop_reason_lock, flags); /* copy over remaining packets */ - skb_queue_splice_tail_init( - &sta->ampdu_mlme.tid_tx[tid]->pending, - &local->pending[queue]); + skb_queue_splice_tail_init(&tid_tx->pending, + &local->pending[queue]); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } } -static void ieee80211_agg_splice_finish(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) +static void __releases(agg_queue) +ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid) { - u16 queue = ieee80211_ac_from_tid(tid); - - ieee80211_wake_queue_by_reason( - &local->hw, queue, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + ieee80211_wake_queue_agg(local, tid); } -/* caller must hold sta->lock */ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { + lockdep_assert_held(&sta->ampdu_mlme.mtx); + #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid); #endif - spin_lock(&local->ampdu_lock); - ieee80211_agg_splice_packets(local, sta, tid); - /* - * NB: we rely on sta->lock being taken in the TX - * processing here when adding to the pending queue, - * otherwise we could only change the state of the - * session to OPERATIONAL _here_. - */ - ieee80211_agg_splice_finish(local, sta, tid); - spin_unlock(&local->ampdu_lock); - drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, &sta->sta, tid, NULL); + + /* + * synchronize with TX path, while splicing the TX path + * should block so it won't put more packets onto pending. + */ + spin_lock_bh(&sta->lock); + + ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid); + /* + * Now mark as operational. This will be visible + * in the TX path, and lets it go lock-free in + * the common case. + */ + set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state); + ieee80211_agg_splice_finish(local, tid); + + spin_unlock_bh(&sta->lock); } void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) @@ -442,7 +505,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u8 *state; + struct tid_ampdu_tx *tid_tx; trace_api_start_tx_ba_cb(sdata, ra, tid); @@ -454,42 +517,36 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) return; } - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, ra); if (!sta) { - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find station: %pM\n", ra); #endif return; } - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); + mutex_lock(&sta->ampdu_mlme.mtx); + tid_tx = sta->ampdu_mlme.tid_tx[tid]; - if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) { + if (WARN_ON(!tid_tx)) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", - *state); + printk(KERN_DEBUG "addBA was not requested!\n"); #endif - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); - return; + goto unlock; } - if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK)) - goto out; - - *state |= HT_ADDBA_DRV_READY_MSK; + if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) + goto unlock; - if (*state == HT_AGG_STATE_OPERATIONAL) + if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); - out: - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); + unlock: + mutex_unlock(&sta->ampdu_mlme.mtx); + mutex_unlock(&local->sta_mtx); } -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) @@ -510,44 +567,36 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; - ra_tid->vif = vif; - skb->pkt_type = IEEE80211_ADDBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &sdata->work); } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator) { - u8 *state; int ret; - /* check if the TID is in aggregation */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); - - if (*state != HT_AGG_STATE_OPERATIONAL) { - ret = -ENOENT; - goto unlock; - } + mutex_lock(&sta->ampdu_mlme.mtx); ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); - unlock: - spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); + return ret; } -int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, - enum ieee80211_back_parties initiator) +int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; + struct tid_ampdu_tx *tid_tx; + int ret = 0; - trace_api_stop_tx_ba_session(pubsta, tid, initiator); + trace_api_stop_tx_ba_session(pubsta, tid); if (!local->ops->ampdu_action) return -EINVAL; @@ -555,7 +604,26 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (tid >= STA_TID_NUM) return -EINVAL; - return __ieee80211_stop_tx_ba_session(sta, tid, initiator); + spin_lock_bh(&sta->lock); + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + + if (!tid_tx) { + ret = -ENOENT; + goto unlock; + } + + if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { + /* already in progress stopping it */ + ret = 0; + goto unlock; + } + + set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + + unlock: + spin_unlock_bh(&sta->lock); + return ret; } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); @@ -564,7 +632,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u8 *state; + struct tid_ampdu_tx *tid_tx; trace_api_stop_tx_ba_cb(sdata, ra, tid); @@ -581,51 +649,56 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - rcu_read_lock(); + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, ra); if (!sta) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Could not find station: %pM\n", ra); #endif - rcu_read_unlock(); - return; + goto unlock; } - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* NOTE: no need to use sta->lock in this state check, as - * ieee80211_stop_tx_ba_session will let only one stop call to - * pass through per sta/tid - */ - if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { + mutex_lock(&sta->ampdu_mlme.mtx); + spin_lock_bh(&sta->lock); + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + + if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); #endif - rcu_read_unlock(); - return; + goto unlock_sta; } - if (*state & HT_AGG_STATE_INITIATOR_MSK) + if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - spin_lock_bh(&sta->lock); - spin_lock(&local->ampdu_lock); + /* + * When we get here, the TX path will not be lockless any more wrt. + * aggregation, since the OPERATIONAL bit has long been cleared. + * Thus it will block on getting the lock, if it occurs. So if we + * stop the queue now, we will not get any more packets, and any + * that might be being processed will wait for us here, thereby + * guaranteeing that no packets go to the tid_tx pending queue any + * more. + */ - ieee80211_agg_splice_packets(local, sta, tid); + ieee80211_agg_splice_packets(local, tid_tx, tid); - *state = HT_AGG_STATE_IDLE; - /* from now on packets are no longer put onto sta->pending */ - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; + /* future packets must not find the tid_tx struct any more */ + rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL); - ieee80211_agg_splice_finish(local, sta, tid); + ieee80211_agg_splice_finish(local, tid); - spin_unlock(&local->ampdu_lock); - spin_unlock_bh(&sta->lock); + call_rcu(&tid_tx->rcu_head, kfree_tid_tx); - rcu_read_unlock(); + unlock_sta: + spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); + unlock: + mutex_unlock(&local->sta_mtx); } -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) @@ -646,11 +719,10 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; - ra_tid->vif = vif; - skb->pkt_type = IEEE80211_DELBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &sdata->work); } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); @@ -660,40 +732,40 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len) { + struct tid_ampdu_tx *tid_tx; u16 capab, tid; - u8 *state; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - state = &sta->ampdu_mlme.tid_state_tx[tid]; - - spin_lock_bh(&sta->lock); + mutex_lock(&sta->ampdu_mlme.mtx); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + if (!tid_tx) goto out; - if (mgmt->u.action.u.addba_resp.dialog_token != - sta->ampdu_mlme.tid_tx[tid]->dialog_token) { + if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ +#endif goto out; } - del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + del_timer(&tid_tx->addba_resp_timer); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ +#endif if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { - u8 curstate = *state; - - *state |= HT_ADDBA_RECEIVED_MSK; + if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, + &tid_tx->state)) { + /* ignore duplicate response */ + goto out; + } - if (*state != curstate && *state == HT_AGG_STATE_OPERATIONAL) + if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); sta->ampdu_mlme.addba_req_num[tid] = 0; @@ -702,5 +774,5 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } out: - spin_unlock_bh(&sta->lock); + mutex_unlock(&sta->ampdu_mlme.mtx); } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c7000a6ca379..29ac8e1a509e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -120,6 +120,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_key *key; int err; + if (!netif_running(dev)) + return -ENETDOWN; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (params->cipher) { @@ -140,17 +143,22 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } + /* reject WEP and TKIP keys if WEP failed to initialize */ + if ((alg == ALG_WEP || alg == ALG_TKIP) && + IS_ERR(sdata->local->wep_tx_tfm)) + return -EINVAL; + key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, params->seq_len, params->seq); if (!key) return -ENOMEM; - rcu_read_lock(); + mutex_lock(&sdata->local->sta_mtx); if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); if (!sta) { - ieee80211_key_free(key); + ieee80211_key_free(sdata->local, key); err = -ENOENT; goto out_unlock; } @@ -160,7 +168,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, err = 0; out_unlock: - rcu_read_unlock(); + mutex_unlock(&sdata->local->sta_mtx); return err; } @@ -174,7 +182,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - rcu_read_lock(); + mutex_lock(&sdata->local->sta_mtx); if (mac_addr) { ret = -ENOENT; @@ -184,7 +192,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; if (sta->key) { - ieee80211_key_free(sta->key); + ieee80211_key_free(sdata->local, sta->key); WARN_ON(sta->key); ret = 0; } @@ -197,12 +205,12 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; } - ieee80211_key_free(sdata->keys[key_idx]); + ieee80211_key_free(sdata->local, sdata->keys[key_idx]); WARN_ON(sdata->keys[key_idx]); ret = 0; out_unlock: - rcu_read_unlock(); + mutex_unlock(&sdata->local->sta_mtx); return ret; } @@ -305,15 +313,10 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ieee80211_set_default_key(sdata, key_idx); - rcu_read_unlock(); - return 0; } @@ -321,15 +324,10 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ieee80211_set_default_mgmt_key(sdata, key_idx); - rcu_read_unlock(); - return 0; } @@ -415,9 +413,6 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (!local->ops->get_survey) - return -EOPNOTSUPP; - return drv_get_survey(local, idx, survey); } @@ -600,7 +595,7 @@ struct iapp_layer2_update { u8 ssap; /* 0 */ u8 control; u8 xid_info[3]; -} __attribute__ ((packed)); +} __packed; static void ieee80211_send_layer2_update(struct sta_info *sta) { @@ -632,7 +627,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) skb->dev = sta->sdata->dev; skb->protocol = eth_type_trans(skb, sta->sdata->dev); memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); + netif_rx_ni(skb); } static void sta_apply_parameters(struct ieee80211_local *local, @@ -1154,10 +1149,6 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, return -EINVAL; } - /* enable WMM or activate new settings */ - local->hw.conf.flags |= IEEE80211_CONF_QOS; - drv_config(local, IEEE80211_CONF_CHANGE_QOS); - return 0; } @@ -1331,28 +1322,28 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) } static int ieee80211_set_tx_power(struct wiphy *wiphy, - enum tx_power_setting type, int dbm) + enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_channel *chan = local->hw.conf.channel; u32 changes = 0; switch (type) { - case TX_POWER_AUTOMATIC: + case NL80211_TX_POWER_AUTOMATIC: local->user_power_level = -1; break; - case TX_POWER_LIMITED: - if (dbm < 0) - return -EINVAL; - local->user_power_level = dbm; + case NL80211_TX_POWER_LIMITED: + if (mbm < 0 || (mbm % 100)) + return -EOPNOTSUPP; + local->user_power_level = MBM_TO_DBM(mbm); break; - case TX_POWER_FIXED: - if (dbm < 0) - return -EINVAL; + case NL80211_TX_POWER_FIXED: + if (mbm < 0 || (mbm % 100)) + return -EOPNOTSUPP; /* TODO: move to cfg80211 when it knows the channel */ - if (dbm > chan->max_power) + if (MBM_TO_DBM(mbm) > chan->max_power) return -EINVAL; - local->user_power_level = dbm; + local->user_power_level = MBM_TO_DBM(mbm); break; } @@ -1448,7 +1439,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_conf *conf = &local->hw.conf; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; @@ -1457,11 +1447,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, return -EOPNOTSUPP; if (enabled == sdata->u.mgd.powersave && - timeout == conf->dynamic_ps_forced_timeout) + timeout == local->dynamic_ps_forced_timeout) return 0; sdata->u.mgd.powersave = enabled; - conf->dynamic_ps_forced_timeout = timeout; + local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ mutex_lock(&sdata->u.mgd.mtx); @@ -1554,10 +1544,58 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie) { - return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan, - channel_type, buf, len, cookie); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct sta_info *sta; + const struct ieee80211_mgmt *mgmt = (void *)buf; + u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + + /* Check that we are on the requested channel for transmission */ + if (chan != local->tmp_channel && + chan != local->oper_channel) + return -EBUSY; + if (channel_type_valid && + (channel_type != local->tmp_channel_type && + channel_type != local->_oper_channel_type)) + return -EBUSY; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_ADHOC: + if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) + break; + rcu_read_lock(); + sta = sta_info_get(sdata, mgmt->da); + rcu_read_unlock(); + if (!sta) + return -ENOLINK; + break; + case NL80211_IFTYPE_STATION: + if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED)) + flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + break; + default: + return -EOPNOTSUPP; + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); + if (!skb) + return -ENOMEM; + skb_reserve(skb, local->hw.extra_tx_headroom); + + memcpy(skb_put(skb, len), buf, len); + + IEEE80211_SKB_CB(skb)->flags = flags; + + skb->dev = sdata->dev; + ieee80211_tx_skb(sdata, skb); + + *cookie = (unsigned long) skb; + return 0; } struct cfg80211_ops mac80211_config_ops = { diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 637929b65ccc..a694c593ff6a 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -307,9 +307,6 @@ static const struct file_operations queues_ops = { /* statistics stuff */ -#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ - DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value) - static ssize_t format_devstat_counter(struct ieee80211_local *local, char __user *userbuf, size_t count, loff_t *ppos, @@ -351,75 +348,16 @@ static const struct file_operations stats_ ##name## _ops = { \ .open = mac80211_open_file_generic, \ }; -#define DEBUGFS_STATS_ADD(name) \ +#define DEBUGFS_STATS_ADD(name, field) \ + debugfs_create_u32(#name, 0400, statsd, (u32 *) &field); +#define DEBUGFS_DEVSTATS_ADD(name) \ debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); -DEBUGFS_STATS_FILE(transmitted_fragment_count, 20, "%u", - local->dot11TransmittedFragmentCount); -DEBUGFS_STATS_FILE(multicast_transmitted_frame_count, 20, "%u", - local->dot11MulticastTransmittedFrameCount); -DEBUGFS_STATS_FILE(failed_count, 20, "%u", - local->dot11FailedCount); -DEBUGFS_STATS_FILE(retry_count, 20, "%u", - local->dot11RetryCount); -DEBUGFS_STATS_FILE(multiple_retry_count, 20, "%u", - local->dot11MultipleRetryCount); -DEBUGFS_STATS_FILE(frame_duplicate_count, 20, "%u", - local->dot11FrameDuplicateCount); -DEBUGFS_STATS_FILE(received_fragment_count, 20, "%u", - local->dot11ReceivedFragmentCount); -DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u", - local->dot11MulticastReceivedFrameCount); -DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u", - local->dot11TransmittedFrameCount); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS -DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u", - local->tx_handlers_drop); -DEBUGFS_STATS_FILE(tx_handlers_queued, 20, "%u", - local->tx_handlers_queued); -DEBUGFS_STATS_FILE(tx_handlers_drop_unencrypted, 20, "%u", - local->tx_handlers_drop_unencrypted); -DEBUGFS_STATS_FILE(tx_handlers_drop_fragment, 20, "%u", - local->tx_handlers_drop_fragment); -DEBUGFS_STATS_FILE(tx_handlers_drop_wep, 20, "%u", - local->tx_handlers_drop_wep); -DEBUGFS_STATS_FILE(tx_handlers_drop_not_assoc, 20, "%u", - local->tx_handlers_drop_not_assoc); -DEBUGFS_STATS_FILE(tx_handlers_drop_unauth_port, 20, "%u", - local->tx_handlers_drop_unauth_port); -DEBUGFS_STATS_FILE(rx_handlers_drop, 20, "%u", - local->rx_handlers_drop); -DEBUGFS_STATS_FILE(rx_handlers_queued, 20, "%u", - local->rx_handlers_queued); -DEBUGFS_STATS_FILE(rx_handlers_drop_nullfunc, 20, "%u", - local->rx_handlers_drop_nullfunc); -DEBUGFS_STATS_FILE(rx_handlers_drop_defrag, 20, "%u", - local->rx_handlers_drop_defrag); -DEBUGFS_STATS_FILE(rx_handlers_drop_short, 20, "%u", - local->rx_handlers_drop_short); -DEBUGFS_STATS_FILE(rx_handlers_drop_passive_scan, 20, "%u", - local->rx_handlers_drop_passive_scan); -DEBUGFS_STATS_FILE(tx_expand_skb_head, 20, "%u", - local->tx_expand_skb_head); -DEBUGFS_STATS_FILE(tx_expand_skb_head_cloned, 20, "%u", - local->tx_expand_skb_head_cloned); -DEBUGFS_STATS_FILE(rx_expand_skb_head, 20, "%u", - local->rx_expand_skb_head); -DEBUGFS_STATS_FILE(rx_expand_skb_head2, 20, "%u", - local->rx_expand_skb_head2); -DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u", - local->rx_handlers_fragments); -DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u", - local->tx_status_drop); - -#endif - DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount); DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount); DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount); DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount); - void debugfs_hw_add(struct ieee80211_local *local) { struct dentry *phyd = local->hw.wiphy->debugfsdir; @@ -448,38 +386,60 @@ void debugfs_hw_add(struct ieee80211_local *local) if (!statsd) return; - DEBUGFS_STATS_ADD(transmitted_fragment_count); - DEBUGFS_STATS_ADD(multicast_transmitted_frame_count); - DEBUGFS_STATS_ADD(failed_count); - DEBUGFS_STATS_ADD(retry_count); - DEBUGFS_STATS_ADD(multiple_retry_count); - DEBUGFS_STATS_ADD(frame_duplicate_count); - DEBUGFS_STATS_ADD(received_fragment_count); - DEBUGFS_STATS_ADD(multicast_received_frame_count); - DEBUGFS_STATS_ADD(transmitted_frame_count); + DEBUGFS_STATS_ADD(transmitted_fragment_count, + local->dot11TransmittedFragmentCount); + DEBUGFS_STATS_ADD(multicast_transmitted_frame_count, + local->dot11MulticastTransmittedFrameCount); + DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount); + DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount); + DEBUGFS_STATS_ADD(multiple_retry_count, + local->dot11MultipleRetryCount); + DEBUGFS_STATS_ADD(frame_duplicate_count, + local->dot11FrameDuplicateCount); + DEBUGFS_STATS_ADD(received_fragment_count, + local->dot11ReceivedFragmentCount); + DEBUGFS_STATS_ADD(multicast_received_frame_count, + local->dot11MulticastReceivedFrameCount); + DEBUGFS_STATS_ADD(transmitted_frame_count, + local->dot11TransmittedFrameCount); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_STATS_ADD(tx_handlers_drop); - DEBUGFS_STATS_ADD(tx_handlers_queued); - DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted); - DEBUGFS_STATS_ADD(tx_handlers_drop_fragment); - DEBUGFS_STATS_ADD(tx_handlers_drop_wep); - DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc); - DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port); - DEBUGFS_STATS_ADD(rx_handlers_drop); - DEBUGFS_STATS_ADD(rx_handlers_queued); - DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc); - DEBUGFS_STATS_ADD(rx_handlers_drop_defrag); - DEBUGFS_STATS_ADD(rx_handlers_drop_short); - DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan); - DEBUGFS_STATS_ADD(tx_expand_skb_head); - DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned); - DEBUGFS_STATS_ADD(rx_expand_skb_head); - DEBUGFS_STATS_ADD(rx_expand_skb_head2); - DEBUGFS_STATS_ADD(rx_handlers_fragments); - DEBUGFS_STATS_ADD(tx_status_drop); + DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop); + DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued); + DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted, + local->tx_handlers_drop_unencrypted); + DEBUGFS_STATS_ADD(tx_handlers_drop_fragment, + local->tx_handlers_drop_fragment); + DEBUGFS_STATS_ADD(tx_handlers_drop_wep, + local->tx_handlers_drop_wep); + DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc, + local->tx_handlers_drop_not_assoc); + DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port, + local->tx_handlers_drop_unauth_port); + DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop); + DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued); + DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc, + local->rx_handlers_drop_nullfunc); + DEBUGFS_STATS_ADD(rx_handlers_drop_defrag, + local->rx_handlers_drop_defrag); + DEBUGFS_STATS_ADD(rx_handlers_drop_short, + local->rx_handlers_drop_short); + DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan, + local->rx_handlers_drop_passive_scan); + DEBUGFS_STATS_ADD(tx_expand_skb_head, + local->tx_expand_skb_head); + DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned, + local->tx_expand_skb_head_cloned); + DEBUGFS_STATS_ADD(rx_expand_skb_head, + local->rx_expand_skb_head); + DEBUGFS_STATS_ADD(rx_expand_skb_head2, + local->rx_expand_skb_head2); + DEBUGFS_STATS_ADD(rx_handlers_fragments, + local->rx_handlers_fragments); + DEBUGFS_STATS_ADD(tx_status_drop, + local->tx_status_drop); #endif - DEBUGFS_STATS_ADD(dot11ACKFailureCount); - DEBUGFS_STATS_ADD(dot11RTSFailureCount); - DEBUGFS_STATS_ADD(dot11FCSErrorCount); - DEBUGFS_STATS_ADD(dot11RTSSuccessCount); + DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount); + DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount); + DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount); + DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount); } diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 97c9e46e859e..fa5e76e658ef 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -143,7 +143,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, len = p - buf; break; case ALG_CCMP: - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) { + for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { rpn = key->u.ccmp.rx_pn[i]; p += scnprintf(p, sizeof(buf)+buf-p, "%02x%02x%02x%02x%02x%02x\n", diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index e763f1529ddb..76839d4dfaac 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -30,7 +30,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \ } #define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n") #define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n") -#define STA_READ_LU(name, field) STA_READ(name, 20, field, "%lu\n") #define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n") #define STA_OPS(name) \ @@ -52,19 +51,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_FILE(aid, sta.aid, D); STA_FILE(dev, sdata->name, S); -STA_FILE(rx_packets, rx_packets, LU); -STA_FILE(tx_packets, tx_packets, LU); -STA_FILE(rx_bytes, rx_bytes, LU); -STA_FILE(tx_bytes, tx_bytes, LU); -STA_FILE(rx_duplicates, num_duplicates, LU); -STA_FILE(rx_fragments, rx_fragments, LU); -STA_FILE(rx_dropped, rx_dropped, LU); -STA_FILE(tx_fragments, tx_fragments, LU); -STA_FILE(tx_filtered, tx_filtered_count, LU); -STA_FILE(tx_retry_failed, tx_retry_failed, LU); -STA_FILE(tx_retry_count, tx_retry_count, LU); STA_FILE(last_signal, last_signal, D); -STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU); static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) @@ -134,28 +121,25 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); p += scnprintf(p, sizeof(buf) + buf - p, - "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n"); + "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < STA_TID_NUM; i++) { p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", - sta->ampdu_mlme.tid_active_rx[i]); + !!sta->ampdu_mlme.tid_rx[i]); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - sta->ampdu_mlme.tid_active_rx[i] ? + sta->ampdu_mlme.tid_rx[i] ? sta->ampdu_mlme.tid_rx[i]->dialog_token : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", - sta->ampdu_mlme.tid_active_rx[i] ? + sta->ampdu_mlme.tid_rx[i] ? sta->ampdu_mlme.tid_rx[i]->ssn : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", - sta->ampdu_mlme.tid_state_tx[i]); + !!sta->ampdu_mlme.tid_tx[i]); p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", - sta->ampdu_mlme.tid_state_tx[i] ? + sta->ampdu_mlme.tid_tx[i] ? sta->ampdu_mlme.tid_tx[i]->dialog_token : 0); - p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", - sta->ampdu_mlme.tid_state_tx[i] ? - sta->ampdu_mlme.tid_tx[i]->ssn : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d", - sta->ampdu_mlme.tid_state_tx[i] ? + sta->ampdu_mlme.tid_tx[i] ? skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0); p += scnprintf(p, sizeof(buf) + buf - p, "\n"); } @@ -210,8 +194,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (start) ret = ieee80211_start_tx_ba_session(&sta->sta, tid); else - ret = ieee80211_stop_tx_ba_session(&sta->sta, tid, - WLAN_BACK_RECIPIENT); + ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3); ret = 0; @@ -307,6 +290,13 @@ STA_OPS(ht_capa); debugfs_create_file(#name, 0400, \ sta->debugfs.dir, sta, &sta_ ##name## _ops); +#define DEBUGFS_ADD_COUNTER(name, field) \ + if (sizeof(sta->field) == sizeof(u32)) \ + debugfs_create_u32(#name, 0400, sta->debugfs.dir, \ + (u32 *) &sta->field); \ + else \ + debugfs_create_u64(#name, 0400, sta->debugfs.dir, \ + (u64 *) &sta->field); void ieee80211_sta_debugfs_add(struct sta_info *sta) { @@ -339,20 +329,21 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(last_seq_ctrl); DEBUGFS_ADD(agg_status); DEBUGFS_ADD(dev); - DEBUGFS_ADD(rx_packets); - DEBUGFS_ADD(tx_packets); - DEBUGFS_ADD(rx_bytes); - DEBUGFS_ADD(tx_bytes); - DEBUGFS_ADD(rx_duplicates); - DEBUGFS_ADD(rx_fragments); - DEBUGFS_ADD(rx_dropped); - DEBUGFS_ADD(tx_fragments); - DEBUGFS_ADD(tx_filtered); - DEBUGFS_ADD(tx_retry_failed); - DEBUGFS_ADD(tx_retry_count); DEBUGFS_ADD(last_signal); - DEBUGFS_ADD(wep_weak_iv_count); DEBUGFS_ADD(ht_capa); + + DEBUGFS_ADD_COUNTER(rx_packets, rx_packets); + DEBUGFS_ADD_COUNTER(tx_packets, tx_packets); + DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes); + DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes); + DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates); + DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments); + DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped); + DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments); + DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count); + DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed); + DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count); + DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count); } void ieee80211_sta_debugfs_remove(struct sta_info *sta) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 9c1da0809160..14123dce544b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -16,10 +16,11 @@ static inline int drv_start(struct ieee80211_local *local) might_sleep(); + trace_drv_start(local); local->started = true; smp_mb(); ret = local->ops->start(&local->hw); - trace_drv_start(local, ret); + trace_drv_return_int(local, ret); return ret; } @@ -27,8 +28,9 @@ static inline void drv_stop(struct ieee80211_local *local) { might_sleep(); - local->ops->stop(&local->hw); trace_drv_stop(local); + local->ops->stop(&local->hw); + trace_drv_return_void(local); /* sync away all work on the tasklet before clearing started */ tasklet_disable(&local->tasklet); @@ -46,8 +48,9 @@ static inline int drv_add_interface(struct ieee80211_local *local, might_sleep(); + trace_drv_add_interface(local, vif_to_sdata(vif)); ret = local->ops->add_interface(&local->hw, vif); - trace_drv_add_interface(local, vif_to_sdata(vif), ret); + trace_drv_return_int(local, ret); return ret; } @@ -56,8 +59,9 @@ static inline void drv_remove_interface(struct ieee80211_local *local, { might_sleep(); - local->ops->remove_interface(&local->hw, vif); trace_drv_remove_interface(local, vif_to_sdata(vif)); + local->ops->remove_interface(&local->hw, vif); + trace_drv_return_void(local); } static inline int drv_config(struct ieee80211_local *local, u32 changed) @@ -66,8 +70,9 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed) might_sleep(); + trace_drv_config(local, changed); ret = local->ops->config(&local->hw, changed); - trace_drv_config(local, changed, ret); + trace_drv_return_int(local, ret); return ret; } @@ -78,9 +83,10 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, { might_sleep(); + trace_drv_bss_info_changed(local, sdata, info, changed); if (local->ops->bss_info_changed) local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed); - trace_drv_bss_info_changed(local, sdata, info, changed); + trace_drv_return_void(local); } static inline u64 drv_prepare_multicast(struct ieee80211_local *local, @@ -88,10 +94,12 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local, { u64 ret = 0; + trace_drv_prepare_multicast(local, mc_list->count); + if (local->ops->prepare_multicast) ret = local->ops->prepare_multicast(&local->hw, mc_list); - trace_drv_prepare_multicast(local, mc_list->count, ret); + trace_drv_return_u64(local, ret); return ret; } @@ -103,19 +111,21 @@ static inline void drv_configure_filter(struct ieee80211_local *local, { might_sleep(); - local->ops->configure_filter(&local->hw, changed_flags, total_flags, - multicast); trace_drv_configure_filter(local, changed_flags, total_flags, multicast); + local->ops->configure_filter(&local->hw, changed_flags, total_flags, + multicast); + trace_drv_return_void(local); } static inline int drv_set_tim(struct ieee80211_local *local, struct ieee80211_sta *sta, bool set) { int ret = 0; + trace_drv_set_tim(local, sta, set); if (local->ops->set_tim) ret = local->ops->set_tim(&local->hw, sta, set); - trace_drv_set_tim(local, sta, set, ret); + trace_drv_return_int(local, ret); return ret; } @@ -129,8 +139,9 @@ static inline int drv_set_key(struct ieee80211_local *local, might_sleep(); + trace_drv_set_key(local, cmd, sdata, sta, key); ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); - trace_drv_set_key(local, cmd, sdata, sta, key, ret); + trace_drv_return_int(local, ret); return ret; } @@ -145,10 +156,11 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local, if (sta) ista = &sta->sta; + trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); if (local->ops->update_tkip_key) local->ops->update_tkip_key(&local->hw, &sdata->vif, conf, ista, iv32, phase1key); - trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); + trace_drv_return_void(local); } static inline int drv_hw_scan(struct ieee80211_local *local, @@ -159,8 +171,9 @@ static inline int drv_hw_scan(struct ieee80211_local *local, might_sleep(); + trace_drv_hw_scan(local, sdata, req); ret = local->ops->hw_scan(&local->hw, &sdata->vif, req); - trace_drv_hw_scan(local, sdata, req, ret); + trace_drv_return_int(local, ret); return ret; } @@ -168,18 +181,20 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local) { might_sleep(); + trace_drv_sw_scan_start(local); if (local->ops->sw_scan_start) local->ops->sw_scan_start(&local->hw); - trace_drv_sw_scan_start(local); + trace_drv_return_void(local); } static inline void drv_sw_scan_complete(struct ieee80211_local *local) { might_sleep(); + trace_drv_sw_scan_complete(local); if (local->ops->sw_scan_complete) local->ops->sw_scan_complete(&local->hw); - trace_drv_sw_scan_complete(local); + trace_drv_return_void(local); } static inline int drv_get_stats(struct ieee80211_local *local, @@ -211,9 +226,10 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local, might_sleep(); + trace_drv_set_rts_threshold(local, value); if (local->ops->set_rts_threshold) ret = local->ops->set_rts_threshold(&local->hw, value); - trace_drv_set_rts_threshold(local, value, ret); + trace_drv_return_int(local, ret); return ret; } @@ -223,12 +239,13 @@ static inline int drv_set_coverage_class(struct ieee80211_local *local, int ret = 0; might_sleep(); + trace_drv_set_coverage_class(local, value); if (local->ops->set_coverage_class) local->ops->set_coverage_class(&local->hw, value); else ret = -EOPNOTSUPP; - trace_drv_set_coverage_class(local, value, ret); + trace_drv_return_int(local, ret); return ret; } @@ -237,9 +254,10 @@ static inline void drv_sta_notify(struct ieee80211_local *local, enum sta_notify_cmd cmd, struct ieee80211_sta *sta) { + trace_drv_sta_notify(local, sdata, cmd, sta); if (local->ops->sta_notify) local->ops->sta_notify(&local->hw, &sdata->vif, cmd, sta); - trace_drv_sta_notify(local, sdata, cmd, sta); + trace_drv_return_void(local); } static inline int drv_sta_add(struct ieee80211_local *local, @@ -250,13 +268,11 @@ static inline int drv_sta_add(struct ieee80211_local *local, might_sleep(); + trace_drv_sta_add(local, sdata, sta); if (local->ops->sta_add) ret = local->ops->sta_add(&local->hw, &sdata->vif, sta); - else if (local->ops->sta_notify) - local->ops->sta_notify(&local->hw, &sdata->vif, - STA_NOTIFY_ADD, sta); - trace_drv_sta_add(local, sdata, sta, ret); + trace_drv_return_int(local, ret); return ret; } @@ -267,13 +283,11 @@ static inline void drv_sta_remove(struct ieee80211_local *local, { might_sleep(); + trace_drv_sta_remove(local, sdata, sta); if (local->ops->sta_remove) local->ops->sta_remove(&local->hw, &sdata->vif, sta); - else if (local->ops->sta_notify) - local->ops->sta_notify(&local->hw, &sdata->vif, - STA_NOTIFY_REMOVE, sta); - trace_drv_sta_remove(local, sdata, sta); + trace_drv_return_void(local); } static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, @@ -283,9 +297,10 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, might_sleep(); + trace_drv_conf_tx(local, queue, params); if (local->ops->conf_tx) ret = local->ops->conf_tx(&local->hw, queue, params); - trace_drv_conf_tx(local, queue, params, ret); + trace_drv_return_int(local, ret); return ret; } @@ -295,9 +310,10 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local) might_sleep(); + trace_drv_get_tsf(local); if (local->ops->get_tsf) ret = local->ops->get_tsf(&local->hw); - trace_drv_get_tsf(local, ret); + trace_drv_return_u64(local, ret); return ret; } @@ -305,18 +321,20 @@ static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf) { might_sleep(); + trace_drv_set_tsf(local, tsf); if (local->ops->set_tsf) local->ops->set_tsf(&local->hw, tsf); - trace_drv_set_tsf(local, tsf); + trace_drv_return_void(local); } static inline void drv_reset_tsf(struct ieee80211_local *local) { might_sleep(); + trace_drv_reset_tsf(local); if (local->ops->reset_tsf) local->ops->reset_tsf(&local->hw); - trace_drv_reset_tsf(local); + trace_drv_return_void(local); } static inline int drv_tx_last_beacon(struct ieee80211_local *local) @@ -325,9 +343,10 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local) might_sleep(); + trace_drv_tx_last_beacon(local); if (local->ops->tx_last_beacon) ret = local->ops->tx_last_beacon(&local->hw); - trace_drv_tx_last_beacon(local, ret); + trace_drv_return_int(local, ret); return ret; } @@ -338,10 +357,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, u16 *ssn) { int ret = -EOPNOTSUPP; + + might_sleep(); + + trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn); + if (local->ops->ampdu_action) ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, sta, tid, ssn); - trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret); + + trace_drv_return_int(local, ret); + return ret; } @@ -349,9 +375,14 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, struct survey_info *survey) { int ret = -EOPNOTSUPP; + + trace_drv_get_survey(local, idx, survey); + if (local->ops->get_survey) ret = local->ops->get_survey(&local->hw, idx, survey); - /* trace_drv_get_survey(local, idx, survey, ret); */ + + trace_drv_return_int(local, ret); + return ret; } @@ -370,6 +401,7 @@ static inline void drv_flush(struct ieee80211_local *local, bool drop) trace_drv_flush(local, drop); if (local->ops->flush) local->ops->flush(&local->hw, drop); + trace_drv_return_void(local); } static inline void drv_channel_switch(struct ieee80211_local *local, @@ -377,9 +409,9 @@ static inline void drv_channel_switch(struct ieee80211_local *local, { might_sleep(); - local->ops->channel_switch(&local->hw, ch_switch); - trace_drv_channel_switch(local, ch_switch); + local->ops->channel_switch(&local->hw, ch_switch); + trace_drv_return_void(local); } #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6a9b2342a9c2..5d5d2a974668 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -36,20 +36,58 @@ static inline void trace_ ## name(proto) {} * Tracing for driver callbacks. */ -TRACE_EVENT(drv_start, - TP_PROTO(struct ieee80211_local *local, int ret), +TRACE_EVENT(drv_return_void, + TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local), + TP_STRUCT__entry( + LOCAL_ENTRY + ), + TP_fast_assign( + LOCAL_ASSIGN; + ), + TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) +); +TRACE_EVENT(drv_return_int, + TP_PROTO(struct ieee80211_local *local, int ret), TP_ARGS(local, ret), - TP_STRUCT__entry( LOCAL_ENTRY __field(int, ret) ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->ret = ret; + ), + TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret) +); +TRACE_EVENT(drv_return_u64, + TP_PROTO(struct ieee80211_local *local, u64 ret), + TP_ARGS(local, ret), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u64, ret) + ), TP_fast_assign( LOCAL_ASSIGN; __entry->ret = ret; ), + TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret) +); + +TRACE_EVENT(drv_start, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG @@ -76,28 +114,25 @@ TRACE_EVENT(drv_stop, TRACE_EVENT(drv_add_interface, TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - int ret), + struct ieee80211_sub_if_data *sdata), - TP_ARGS(local, sdata, ret), + TP_ARGS(local, sdata), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __array(char, addr, 6) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; memcpy(__entry->addr, sdata->vif.addr, 6); - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " addr:%pM ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT " addr:%pM", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr ) ); @@ -126,15 +161,13 @@ TRACE_EVENT(drv_remove_interface, TRACE_EVENT(drv_config, TP_PROTO(struct ieee80211_local *local, - u32 changed, - int ret), + u32 changed), - TP_ARGS(local, changed, ret), + TP_ARGS(local, changed), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, changed) - __field(int, ret) __field(u32, flags) __field(int, power_level) __field(int, dynamic_ps_timeout) @@ -150,7 +183,6 @@ TRACE_EVENT(drv_config, TP_fast_assign( LOCAL_ASSIGN; __entry->changed = changed; - __entry->ret = ret; __entry->flags = local->hw.conf.flags; __entry->power_level = local->hw.conf.power_level; __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; @@ -164,8 +196,8 @@ TRACE_EVENT(drv_config, ), TP_printk( - LOCAL_PR_FMT " ch:%#x freq:%d ret:%d", - LOCAL_PR_ARG, __entry->changed, __entry->center_freq, __entry->ret + LOCAL_PR_FMT " ch:%#x freq:%d", + LOCAL_PR_ARG, __entry->changed, __entry->center_freq ) ); @@ -220,26 +252,23 @@ TRACE_EVENT(drv_bss_info_changed, ); TRACE_EVENT(drv_prepare_multicast, - TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret), + TP_PROTO(struct ieee80211_local *local, int mc_count), - TP_ARGS(local, mc_count, ret), + TP_ARGS(local, mc_count), TP_STRUCT__entry( LOCAL_ENTRY __field(int, mc_count) - __field(u64, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->mc_count = mc_count; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " prepare mc (%d): %llx", - LOCAL_PR_ARG, __entry->mc_count, - (unsigned long long) __entry->ret + LOCAL_PR_FMT " prepare mc (%d)", + LOCAL_PR_ARG, __entry->mc_count ) ); @@ -273,27 +302,25 @@ TRACE_EVENT(drv_configure_filter, TRACE_EVENT(drv_set_tim, TP_PROTO(struct ieee80211_local *local, - struct ieee80211_sta *sta, bool set, int ret), + struct ieee80211_sta *sta, bool set), - TP_ARGS(local, sta, set, ret), + TP_ARGS(local, sta, set), TP_STRUCT__entry( LOCAL_ENTRY STA_ENTRY __field(bool, set) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; STA_ASSIGN; __entry->set = set; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT STA_PR_FMT " set:%d ret:%d", - LOCAL_PR_ARG, STA_PR_FMT, __entry->set, __entry->ret + LOCAL_PR_FMT STA_PR_FMT " set:%d", + LOCAL_PR_ARG, STA_PR_FMT, __entry->set ) ); @@ -301,9 +328,9 @@ TRACE_EVENT(drv_set_key, TP_PROTO(struct ieee80211_local *local, enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, - struct ieee80211_key_conf *key, int ret), + struct ieee80211_key_conf *key), - TP_ARGS(local, cmd, sdata, sta, key, ret), + TP_ARGS(local, cmd, sdata, sta, key), TP_STRUCT__entry( LOCAL_ENTRY @@ -313,7 +340,6 @@ TRACE_EVENT(drv_set_key, __field(u8, hw_key_idx) __field(u8, flags) __field(s8, keyidx) - __field(int, ret) ), TP_fast_assign( @@ -324,12 +350,11 @@ TRACE_EVENT(drv_set_key, __entry->flags = key->flags; __entry->keyidx = key->keyidx; __entry->hw_key_idx = key->hw_key_idx; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG ) ); @@ -364,25 +389,23 @@ TRACE_EVENT(drv_update_tkip_key, TRACE_EVENT(drv_hw_scan, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct cfg80211_scan_request *req, int ret), + struct cfg80211_scan_request *req), - TP_ARGS(local, sdata, req, ret), + TP_ARGS(local, sdata, req), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " ret:%d", - LOCAL_PR_ARG,VIF_PR_ARG, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG,VIF_PR_ARG ) ); @@ -479,48 +502,44 @@ TRACE_EVENT(drv_get_tkip_seq, ); TRACE_EVENT(drv_set_rts_threshold, - TP_PROTO(struct ieee80211_local *local, u32 value, int ret), + TP_PROTO(struct ieee80211_local *local, u32 value), - TP_ARGS(local, value, ret), + TP_ARGS(local, value), TP_STRUCT__entry( LOCAL_ENTRY __field(u32, value) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; __entry->value = value; ), TP_printk( - LOCAL_PR_FMT " value:%d ret:%d", - LOCAL_PR_ARG, __entry->value, __entry->ret + LOCAL_PR_FMT " value:%d", + LOCAL_PR_ARG, __entry->value ) ); TRACE_EVENT(drv_set_coverage_class, - TP_PROTO(struct ieee80211_local *local, u8 value, int ret), + TP_PROTO(struct ieee80211_local *local, u8 value), - TP_ARGS(local, value, ret), + TP_ARGS(local, value), TP_STRUCT__entry( LOCAL_ENTRY __field(u8, value) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; __entry->value = value; ), TP_printk( - LOCAL_PR_FMT " value:%d ret:%d", - LOCAL_PR_ARG, __entry->value, __entry->ret + LOCAL_PR_FMT " value:%d", + LOCAL_PR_ARG, __entry->value ) ); @@ -555,27 +574,25 @@ TRACE_EVENT(drv_sta_notify, TRACE_EVENT(drv_sta_add, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, int ret), + struct ieee80211_sta *sta), - TP_ARGS(local, sdata, sta, ret), + TP_ARGS(local, sdata, sta), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG ) ); @@ -606,10 +623,9 @@ TRACE_EVENT(drv_sta_remove, TRACE_EVENT(drv_conf_tx, TP_PROTO(struct ieee80211_local *local, u16 queue, - const struct ieee80211_tx_queue_params *params, - int ret), + const struct ieee80211_tx_queue_params *params), - TP_ARGS(local, queue, params, ret), + TP_ARGS(local, queue, params), TP_STRUCT__entry( LOCAL_ENTRY @@ -618,13 +634,11 @@ TRACE_EVENT(drv_conf_tx, __field(u16, cw_min) __field(u16, cw_max) __field(u8, aifs) - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; __entry->queue = queue; - __entry->ret = ret; __entry->txop = params->txop; __entry->cw_max = params->cw_max; __entry->cw_min = params->cw_min; @@ -632,29 +646,27 @@ TRACE_EVENT(drv_conf_tx, ), TP_printk( - LOCAL_PR_FMT " queue:%d ret:%d", - LOCAL_PR_ARG, __entry->queue, __entry->ret + LOCAL_PR_FMT " queue:%d", + LOCAL_PR_ARG, __entry->queue ) ); TRACE_EVENT(drv_get_tsf, - TP_PROTO(struct ieee80211_local *local, u64 ret), + TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local, ret), + TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY - __field(u64, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " ret:%llu", - LOCAL_PR_ARG, (unsigned long long)__entry->ret + LOCAL_PR_FMT, + LOCAL_PR_ARG ) ); @@ -698,23 +710,21 @@ TRACE_EVENT(drv_reset_tsf, ); TRACE_EVENT(drv_tx_last_beacon, - TP_PROTO(struct ieee80211_local *local, int ret), + TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local, ret), + TP_ARGS(local), TP_STRUCT__entry( LOCAL_ENTRY - __field(int, ret) ), TP_fast_assign( LOCAL_ASSIGN; - __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT " ret:%d", - LOCAL_PR_ARG, __entry->ret + LOCAL_PR_FMT, + LOCAL_PR_ARG ) ); @@ -723,9 +733,9 @@ TRACE_EVENT(drv_ampdu_action, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn, int ret), + u16 *ssn), - TP_ARGS(local, sdata, action, sta, tid, ssn, ret), + TP_ARGS(local, sdata, action, sta, tid, ssn), TP_STRUCT__entry( LOCAL_ENTRY @@ -733,7 +743,6 @@ TRACE_EVENT(drv_ampdu_action, __field(u32, action) __field(u16, tid) __field(u16, ssn) - __field(int, ret) VIF_ENTRY ), @@ -741,15 +750,36 @@ TRACE_EVENT(drv_ampdu_action, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->ret = ret; __entry->action = action; __entry->tid = tid; __entry->ssn = ssn ? *ssn : 0; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid + ) +); + +TRACE_EVENT(drv_get_survey, + TP_PROTO(struct ieee80211_local *local, int idx, + struct survey_info *survey), + + TP_ARGS(local, idx, survey), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, idx) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->idx = idx; + ), + + TP_printk( + LOCAL_PR_FMT " idx:%d", + LOCAL_PR_ARG, __entry->idx ) ); @@ -851,25 +881,23 @@ TRACE_EVENT(api_start_tx_ba_cb, ); TRACE_EVENT(api_stop_tx_ba_session, - TP_PROTO(struct ieee80211_sta *sta, u16 tid, u16 initiator), + TP_PROTO(struct ieee80211_sta *sta, u16 tid), - TP_ARGS(sta, tid, initiator), + TP_ARGS(sta, tid), TP_STRUCT__entry( STA_ENTRY __field(u16, tid) - __field(u16, initiator) ), TP_fast_assign( STA_ASSIGN; __entry->tid = tid; - __entry->initiator = initiator; ), TP_printk( - STA_PR_FMT " tid:%d initiator:%d", - STA_PR_ARG, __entry->tid, __entry->initiator + STA_PR_FMT " tid:%d", + STA_PR_ARG, __entry->tid ) ); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 2ab106a0a491..9d101fb33861 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -6,7 +6,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> - * Copyright 2007-2008, Intel Corporation + * Copyright 2007-2010, Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -29,7 +29,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, memset(ht_cap, 0, sizeof(*ht_cap)); - if (!ht_cap_ie) + if (!ht_cap_ie || !sband->ht_cap.ht_supported) return; ht_cap->ht_supported = true; @@ -105,6 +105,8 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) { int i; + cancel_work_sync(&sta->ampdu_mlme.work); + for (i = 0; i < STA_TID_NUM; i++) { __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR); __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, @@ -112,6 +114,43 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) } } +void ieee80211_ba_session_work(struct work_struct *work) +{ + struct sta_info *sta = + container_of(work, struct sta_info, ampdu_mlme.work); + struct tid_ampdu_tx *tid_tx; + int tid; + + /* + * When this flag is set, new sessions should be + * blocked, and existing sessions will be torn + * down by the code that set the flag, so this + * need not run. + */ + if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) + return; + + mutex_lock(&sta->ampdu_mlme.mtx); + for (tid = 0; tid < STA_TID_NUM; tid++) { + if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) + ___ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_TIMEOUT); + + tid_tx = sta->ampdu_mlme.tid_tx[tid]; + if (!tid_tx) + continue; + + if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) + ieee80211_tx_ba_session_handle_start(sta, tid); + else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, + &tid_tx->state)) + ___ieee80211_stop_tx_ba_session(sta, tid, + WLAN_BACK_INITIATOR); + } + mutex_unlock(&sta->ampdu_mlme.mtx); +} + void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u16 initiator, u16 reason_code) @@ -176,13 +215,8 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0); - else { /* WLAN_BACK_RECIPIENT */ - spin_lock_bh(&sta->lock); - if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK) - ___ieee80211_stop_tx_ba_session(sta, tid, - WLAN_BACK_RECIPIENT); - spin_unlock_bh(&sta->lock); - } + else + __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT); } int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index b2cc1fda6cfd..c691780725a7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -43,6 +43,8 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, { u16 auth_alg, auth_transaction, status_code; + lockdep_assert_held(&sdata->u.ibss.mtx); + if (len < 24 + 6) return; @@ -78,6 +80,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 bss_change; u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; + lockdep_assert_held(&ifibss->mtx); + /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local); @@ -172,11 +176,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, rcu_assign_pointer(ifibss->presp, skb); sdata->vif.bss_conf.beacon_int = beacon_int; + sdata->vif.bss_conf.basic_rates = basic_rates; bss_change = BSS_CHANGED_BEACON_INT; bss_change |= ieee80211_reset_erp_info(sdata); bss_change |= BSS_CHANGED_BSSID; bss_change |= BSS_CHANGED_BEACON; bss_change |= BSS_CHANGED_BEACON_ENABLED; + bss_change |= BSS_CHANGED_BASIC_RATES; bss_change |= BSS_CHANGED_IBSS; sdata->vif.bss_conf.ibss_joined = true; ieee80211_bss_info_change_notify(sdata, bss_change); @@ -203,6 +209,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, int i, j; u16 beacon_int = cbss->beacon_interval; + lockdep_assert_held(&sdata->u.ibss.mtx); + if (beacon_int < 10) beacon_int = 10; @@ -447,6 +455,8 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; + lockdep_assert_held(&sdata->u.ibss.mtx); + rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { @@ -471,6 +481,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + lockdep_assert_held(&ifibss->mtx); + mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -503,6 +515,8 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; + lockdep_assert_held(&ifibss->mtx); + if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); } else { @@ -529,7 +543,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) sdata->drop_unencrypted = 0; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, - ifibss->channel, 3, /* first two are basic */ + ifibss->channel, ifibss->basic_rates, capability, 0); } @@ -547,6 +561,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) int active_ibss; u16 capability; + lockdep_assert_held(&ifibss->mtx); + active_ibss = ieee80211_sta_active_ibss(sdata); #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", @@ -635,6 +651,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *resp; u8 *pos, *end; + lockdep_assert_held(&ifibss->mtx); + if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || len < 24 + 2 || !ifibss->presp) return; @@ -727,8 +745,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); } -static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; @@ -738,6 +756,8 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); + mutex_lock(&sdata->u.ibss.mtx); + switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); @@ -755,35 +775,22 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; } - kfree_skb(skb); + mutex_unlock(&sdata->u.ibss.mtx); } -static void ieee80211_ibss_work(struct work_struct *work) +void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.ibss.work); - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_ibss *ifibss; - struct sk_buff *skb; - - if (WARN_ON(local->suspended)) - return; - - if (!ieee80211_sdata_running(sdata)) - return; - - if (local->scanning) - return; - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC)) - return; - ifibss = &sdata->u.ibss; + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - while ((skb = skb_dequeue(&ifibss->skb_queue))) - ieee80211_ibss_rx_queued_mgmt(sdata, skb); + mutex_lock(&ifibss->mtx); - if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request)) - return; + /* + * Work could be scheduled after scan or similar + * when we aren't even joined (or trying) with a + * network. + */ + if (!ifibss->ssid_len) + goto out; switch (ifibss->state) { case IEEE80211_IBSS_MLME_SEARCH: @@ -796,6 +803,9 @@ static void ieee80211_ibss_work(struct work_struct *work) WARN_ON(1); break; } + + out: + mutex_unlock(&ifibss->mtx); } static void ieee80211_ibss_timer(unsigned long data) @@ -810,8 +820,7 @@ static void ieee80211_ibss_timer(unsigned long data) return; } - set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); - ieee80211_queue_work(&local->hw, &ifibss->work); + ieee80211_queue_work(&local->hw, &sdata->work); } #ifdef CONFIG_PM @@ -819,7 +828,6 @@ void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - cancel_work_sync(&ifibss->work); if (del_timer_sync(&ifibss->timer)) ifibss->timer_running = true; } @@ -839,10 +847,9 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - INIT_WORK(&ifibss->work, ieee80211_ibss_work); setup_timer(&ifibss->timer, ieee80211_ibss_timer, (unsigned long) sdata); - skb_queue_head_init(&ifibss->skb_queue); + mutex_init(&ifibss->mtx); } /* scan finished notification */ @@ -856,45 +863,28 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) continue; if (sdata->vif.type != NL80211_IFTYPE_ADHOC) continue; - if (!sdata->u.ibss.ssid_len) - continue; sdata->u.ibss.last_scan_completed = jiffies; - mod_timer(&sdata->u.ibss.timer, 0); + ieee80211_queue_work(&local->hw, &sdata->work); } mutex_unlock(&local->iflist_mtx); } -ieee80211_rx_result -ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_mgmt *mgmt; - u16 fc; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); - - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: - case IEEE80211_STYPE_PROBE_REQ: - case IEEE80211_STYPE_AUTH: - skb_queue_tail(&sdata->u.ibss.skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->u.ibss.work); - return RX_QUEUED; - } - - return RX_DROP_MONITOR; -} - int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params) { struct sk_buff *skb; + skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + + 36 /* bitrates */ + + 34 /* SSID */ + + 3 /* DS params */ + + 4 /* IBSS params */ + + params->ie_len); + if (!skb) + return -ENOMEM; + + mutex_lock(&sdata->u.ibss.mtx); + if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); sdata->u.ibss.fixed_bssid = true; @@ -902,6 +892,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.fixed_bssid = false; sdata->u.ibss.privacy = params->privacy; + sdata->u.ibss.basic_rates = params->basic_rates; sdata->vif.bss_conf.beacon_int = params->beacon_interval; @@ -922,34 +913,18 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.ie_len = params->ie_len; } - skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + - 36 /* bitrates */ + - 34 /* SSID */ + - 3 /* DS params */ + - 4 /* IBSS params */ + - params->ie_len); - if (!skb) - return -ENOMEM; - sdata->u.ibss.skb = skb; sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; sdata->u.ibss.ibss_join_req = jiffies; memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); - - /* - * The ssid_len setting below is used to see whether - * we are active, and we need all other settings - * before that may get visible. - */ - mb(); - sdata->u.ibss.ssid_len = params->ssid_len; ieee80211_recalc_idle(sdata->local); - set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); - ieee80211_queue_work(&sdata->local->hw, &sdata->u.ibss.work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + + mutex_unlock(&sdata->u.ibss.mtx); return 0; } @@ -957,11 +932,33 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) { struct sk_buff *skb; + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + struct cfg80211_bss *cbss; + u16 capability; + int active_ibss; - del_timer_sync(&sdata->u.ibss.timer); - clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); - cancel_work_sync(&sdata->u.ibss.work); - clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); + mutex_lock(&sdata->u.ibss.mtx); + + active_ibss = ieee80211_sta_active_ibss(sdata); + + if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { + capability = WLAN_CAPABILITY_IBSS; + + if (ifibss->privacy) + capability |= WLAN_CAPABILITY_PRIVACY; + + cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel, + ifibss->bssid, ifibss->ssid, + ifibss->ssid_len, WLAN_CAPABILITY_IBSS | + WLAN_CAPABILITY_PRIVACY, + capability); + + if (cbss) { + cfg80211_unlink_bss(local->hw.wiphy, cbss); + cfg80211_put_bss(cbss); + } + } sta_info_flush(sdata->local, sdata); @@ -975,10 +972,14 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) synchronize_rcu(); kfree_skb(skb); - skb_queue_purge(&sdata->u.ibss.skb_queue); + skb_queue_purge(&sdata->skb_queue); memset(sdata->u.ibss.bssid, 0, ETH_ALEN); sdata->u.ibss.ssid_len = 0; + del_timer_sync(&sdata->u.ibss.timer); + + mutex_unlock(&sdata->u.ibss.mtx); + ieee80211_recalc_idle(sdata->local); return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1a9e2da37a93..65e0ed6c2975 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -238,6 +238,7 @@ enum ieee80211_work_type { IEEE80211_WORK_ABORT, IEEE80211_WORK_DIRECT_PROBE, IEEE80211_WORK_AUTH, + IEEE80211_WORK_ASSOC_BEACON_WAIT, IEEE80211_WORK_ASSOC, IEEE80211_WORK_REMAIN_ON_CHANNEL, }; @@ -325,7 +326,6 @@ struct ieee80211_if_managed { struct timer_list conn_mon_timer; struct timer_list bcn_mon_timer; struct timer_list chswitch_timer; - struct work_struct work; struct work_struct monitor_work; struct work_struct chswitch_work; struct work_struct beacon_connection_loss_work; @@ -340,8 +340,6 @@ struct ieee80211_if_managed { u16 aid; - struct sk_buff_head skb_queue; - unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ enum ieee80211_smps_mode req_smps, /* requested smps mode */ @@ -380,19 +378,15 @@ struct ieee80211_if_managed { int last_cqm_event_signal; }; -enum ieee80211_ibss_request { - IEEE80211_IBSS_REQ_RUN = 0, -}; - struct ieee80211_if_ibss { struct timer_list timer; - struct work_struct work; - struct sk_buff_head skb_queue; + struct mutex mtx; - unsigned long request; unsigned long last_scan_completed; + u32 basic_rates; + bool timer_running; bool fixed_bssid; @@ -416,11 +410,9 @@ struct ieee80211_if_ibss { }; struct ieee80211_if_mesh { - struct work_struct work; struct timer_list housekeeping_timer; struct timer_list mesh_path_timer; struct timer_list mesh_path_root_timer; - struct sk_buff_head skb_queue; unsigned long timers_running; @@ -517,6 +509,11 @@ struct ieee80211_sub_if_data { u16 sequence_number; + struct work_struct work; + struct sk_buff_head skb_queue; + + bool arp_filter_state; + /* * AP this belongs to: self in AP mode and * corresponding AP in VLAN mode, NULL for @@ -569,11 +566,15 @@ ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata, #endif } +enum sdata_queue_type { + IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, + IEEE80211_SDATA_QUEUE_AGG_START = 1, + IEEE80211_SDATA_QUEUE_AGG_STOP = 2, +}; + enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, - IEEE80211_DELBA_MSG = 3, - IEEE80211_ADDBA_MSG = 4, }; enum queue_stop_reason { @@ -724,13 +725,7 @@ struct ieee80211_local { struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; - /* - * This lock is used to prevent concurrent A-MPDU - * session start/stop processing, this thus also - * synchronises the ->ampdu_action() callback to - * drivers and limits it to one at a time. - */ - spinlock_t ampdu_lock; + atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES]; /* number of interfaces with corresponding IFF_ flags */ atomic_t iff_allmultis, iff_promiscs; @@ -746,10 +741,10 @@ struct ieee80211_local { struct mutex iflist_mtx; /* - * Key lock, protects sdata's key_list and sta_info's + * Key mutex, protects sdata's key_list and sta_info's * key pointers (write access, they're RCU.) */ - spinlock_t key_lock; + struct mutex key_mtx; /* Scanning and BSS list */ @@ -851,6 +846,15 @@ struct ieee80211_local { struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; struct notifier_block network_latency_notifier; + struct notifier_block ifa_notifier; + + /* + * The dynamic ps timeout configured from user space via WEXT - + * this will override whatever chosen by mac80211 internally. + */ + int dynamic_ps_forced_timeout; + int dynamic_ps_user_timeout; + bool disable_dynamic_ps; int user_power_level; /* in dBm */ int power_constr_level; /* in dBm */ @@ -874,9 +878,8 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) return netdev_priv(dev); } -/* this struct represents 802.11n's RA/TID combination along with our vif */ +/* this struct represents 802.11n's RA/TID combination */ struct ieee80211_ra_tid { - struct ieee80211_vif *vif; u8 ra[ETH_ALEN]; u16 tid; }; @@ -985,29 +988,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_disassoc_request *req, void *cookie); -int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - const u8 *buf, size_t len, u64 *cookie); -ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb); void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); int ieee80211_max_network_latency(struct notifier_block *nb, unsigned long data, void *dummy); +int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_sw_ie *sw_elem, struct ieee80211_bss *bss, u64 timestamp); void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); +void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); +void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); /* IBSS code */ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata); -ieee80211_rx_result -ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, u8 *bssid, u8 *addr, u32 supp_rates, gfp_t gfp); @@ -1016,6 +1015,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata); +void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata); +void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); + +/* mesh code */ +void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata); +void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); /* scan/BSS handling */ void ieee80211_scan_work(struct work_struct *work); @@ -1084,7 +1091,7 @@ struct ieee80211_tx_status_rtap_hdr { u8 padding_for_rate; __le16 tx_flags; u8 data_retries; -} __attribute__ ((packed)); +} __packed; /* HT */ @@ -1099,6 +1106,8 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid); +void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta); @@ -1118,6 +1127,10 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator); int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator); +void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); +void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); +void ieee80211_ba_session_work(struct work_struct *work); +void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 50deb017fd6e..ebbe264e2b0b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -249,6 +249,8 @@ static int ieee80211_open(struct net_device *dev) local->fif_other_bss++; ieee80211_configure_filter(local); + + netif_carrier_on(dev); break; default: res = drv_add_interface(local, &sdata->vif); @@ -268,7 +270,6 @@ static int ieee80211_open(struct net_device *dev) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_enable_keys(sdata); if (sdata->vif.type == NL80211_IFTYPE_STATION) netif_carrier_off(dev); @@ -321,15 +322,6 @@ static int ieee80211_open(struct net_device *dev) ieee80211_recalc_ps(local, -1); - /* - * ieee80211_sta_work is disabled while network interface - * is down. Therefore, some configuration changes may not - * yet be effective. Trigger execution of ieee80211_sta_work - * to fix this. - */ - if (sdata->vif.type == NL80211_IFTYPE_STATION) - ieee80211_queue_work(&local->hw, &sdata->u.mgd.work); - netif_tx_start_all_queues(dev); return 0; @@ -349,7 +341,6 @@ static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct sta_info *sta; unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; @@ -366,18 +357,6 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_work_purge(sdata); /* - * Now delete all active aggregation sessions. - */ - rcu_read_lock(); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sta->sdata == sdata) - ieee80211_sta_tear_down_BA_sessions(sta); - } - - rcu_read_unlock(); - - /* * Remove all stations associated with this interface. * * This must be done before calling ops->remove_interface() @@ -483,27 +462,14 @@ static int ieee80211_stop(struct net_device *dev) * whether the interface is running, which, at this point, * it no longer is. */ - cancel_work_sync(&sdata->u.mgd.work); cancel_work_sync(&sdata->u.mgd.chswitch_work); cancel_work_sync(&sdata->u.mgd.monitor_work); cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work); - /* - * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. - */ - synchronize_rcu(); - skb_queue_purge(&sdata->u.mgd.skb_queue); /* fall through */ case NL80211_IFTYPE_ADHOC: - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) del_timer_sync(&sdata->u.ibss.timer); - cancel_work_sync(&sdata->u.ibss.work); - synchronize_rcu(); - skb_queue_purge(&sdata->u.ibss.skb_queue); - } /* fall through */ case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -518,6 +484,16 @@ static int ieee80211_stop(struct net_device *dev) } /* fall through */ default: + flush_work(&sdata->work); + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->skb_queue); + if (local->scan_sdata == sdata) ieee80211_scan_cancel(local); @@ -531,8 +507,8 @@ static int ieee80211_stop(struct net_device *dev) BSS_CHANGED_BEACON_ENABLED); } - /* disable all keys for as long as this netdev is down */ - ieee80211_disable_keys(sdata); + /* free all remaining keys, there shouldn't be any */ + ieee80211_free_keys(sdata); drv_remove_interface(local, &sdata->vif); } @@ -727,6 +703,136 @@ static void ieee80211_if_setup(struct net_device *dev) dev->destructor = free_netdev; } +static void ieee80211_iface_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, work); + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct sta_info *sta; + struct ieee80211_ra_tid *ra_tid; + + if (!ieee80211_sdata_running(sdata)) + return; + + if (local->scanning) + return; + + /* + * ieee80211_queue_work() should have picked up most cases, + * here we'll pick the rest. + */ + if (WARN(local->suspended, + "interface work scheduled while going to suspend\n")) + return; + + /* first process frames */ + while ((skb = skb_dequeue(&sdata->skb_queue))) { + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) { + ra_tid = (void *)&skb->cb; + ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra, + ra_tid->tid); + } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) { + ra_tid = (void *)&skb->cb; + ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra, + ra_tid->tid); + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_BACK) { + int len = skb->len; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + if (sta) { + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + ieee80211_process_addba_request( + local, sta, mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + ieee80211_process_addba_resp(local, sta, + mgmt, len); + break; + case WLAN_ACTION_DELBA: + ieee80211_process_delba(sdata, sta, + mgmt, len); + break; + default: + WARN_ON(1); + break; + } + } + mutex_unlock(&local->sta_mtx); + } else if (ieee80211_is_data_qos(mgmt->frame_control)) { + struct ieee80211_hdr *hdr = (void *)mgmt; + /* + * So the frame isn't mgmt, but frame_control + * is at the right place anyway, of course, so + * the if statement is correct. + * + * Warn if we have other data frame types here, + * they must not get here. + */ + WARN_ON(hdr->frame_control & + cpu_to_le16(IEEE80211_STYPE_NULLFUNC)); + WARN_ON(!(hdr->seq_ctrl & + cpu_to_le16(IEEE80211_SCTL_FRAG))); + /* + * This was a fragment of a frame, received while + * a block-ack session was active. That cannot be + * right, so terminate the session. + */ + mutex_lock(&local->sta_mtx); + sta = sta_info_get_bss(sdata, mgmt->sa); + if (sta) { + u16 tid = *ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_TID_MASK; + + __ieee80211_stop_rx_ba_session( + sta, tid, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); + } + mutex_unlock(&local->sta_mtx); + } else switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + ieee80211_sta_rx_queued_mgmt(sdata, skb); + break; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_rx_queued_mgmt(sdata, skb); + break; + case NL80211_IFTYPE_MESH_POINT: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + ieee80211_mesh_rx_queued_mgmt(sdata, skb); + break; + default: + WARN(1, "frame for unexpected interface type"); + break; + } + + kfree_skb(skb); + } + + /* then other type-dependent work */ + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + ieee80211_sta_work(sdata); + break; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_work(sdata); + break; + case NL80211_IFTYPE_MESH_POINT: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + ieee80211_mesh_work(sdata); + break; + default: + break; + } +} + + /* * Helper function to initialise an interface to a specific type. */ @@ -744,6 +850,9 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* only monitor differs */ sdata->dev->type = ARPHRD_ETHER; + skb_queue_head_init(&sdata->skb_queue); + INIT_WORK(&sdata->work, ieee80211_iface_work); + switch (type) { case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps_bc_buf); @@ -969,6 +1078,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; sdata->dev = ndev; +#ifdef CONFIG_INET + sdata->arp_filter_state = true; +#endif for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index e8f6e3b252d8..1b9d87ed143a 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -36,80 +36,20 @@ * There is currently no way of knowing this except by looking into * debugfs. * - * All key operations are protected internally so you can call them at - * any time. + * All key operations are protected internally. * * Within mac80211, key references are, just as STA structure references, * protected by RCU. Note, however, that some things are unprotected, * namely the key->sta dereferences within the hardware acceleration - * functions. This means that sta_info_destroy() must flush the key todo - * list. - * - * All the direct key list manipulation functions must not sleep because - * they can operate on STA info structs that are protected by RCU. + * functions. This means that sta_info_destroy() must remove the key + * which waits for an RCU grace period. */ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; -/* key mutex: used to synchronise todo runners */ -static DEFINE_MUTEX(key_mutex); -static DEFINE_SPINLOCK(todo_lock); -static LIST_HEAD(todo_list); - -static void key_todo(struct work_struct *work) -{ - ieee80211_key_todo(); -} - -static DECLARE_WORK(todo_work, key_todo); - -/** - * add_todo - add todo item for a key - * - * @key: key to add to do item for - * @flag: todo flag(s) - * - * Must be called with IRQs or softirqs disabled. - */ -static void add_todo(struct ieee80211_key *key, u32 flag) -{ - if (!key) - return; - - spin_lock(&todo_lock); - key->flags |= flag; - /* - * Remove again if already on the list so that we move it to the end. - */ - if (!list_empty(&key->todo)) - list_del(&key->todo); - list_add_tail(&key->todo, &todo_list); - schedule_work(&todo_work); - spin_unlock(&todo_lock); -} - -/** - * ieee80211_key_lock - lock the mac80211 key operation lock - * - * This locks the (global) mac80211 key operation lock, all - * key operations must be done under this lock. - */ -static void ieee80211_key_lock(void) -{ - mutex_lock(&key_mutex); -} - -/** - * ieee80211_key_unlock - unlock the mac80211 key operation lock - */ -static void ieee80211_key_unlock(void) -{ - mutex_unlock(&key_mutex); -} - -static void assert_key_lock(void) +static void assert_key_lock(struct ieee80211_local *local) { - WARN_ON(!mutex_is_locked(&key_mutex)); + WARN_ON(!mutex_is_locked(&local->key_mtx)); } static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) @@ -126,12 +66,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) struct ieee80211_sta *sta; int ret; - assert_key_lock(); might_sleep(); if (!key->local->ops->set_key) return; + assert_key_lock(key->local); + sta = get_sta_for_key(key); sdata = key->sdata; @@ -142,11 +83,8 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); - if (!ret) { - spin_lock_bh(&todo_lock); + if (!ret) key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock_bh(&todo_lock); - } if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) printk(KERN_ERR "mac80211-%s: failed to set key " @@ -161,18 +99,15 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) struct ieee80211_sta *sta; int ret; - assert_key_lock(); might_sleep(); if (!key || !key->local->ops->set_key) return; - spin_lock_bh(&todo_lock); - if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { - spin_unlock_bh(&todo_lock); + assert_key_lock(key->local); + + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; - } - spin_unlock_bh(&todo_lock); sta = get_sta_for_key(key); sdata = key->sdata; @@ -191,9 +126,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) wiphy_name(key->local->hw.wiphy), key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); - spin_lock_bh(&todo_lock); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - spin_unlock_bh(&todo_lock); } static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, @@ -201,22 +134,24 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, { struct ieee80211_key *key = NULL; + assert_key_lock(sdata->local); + if (idx >= 0 && idx < NUM_DEFAULT_KEYS) key = sdata->keys[idx]; rcu_assign_pointer(sdata->default_key, key); - if (key) - add_todo(key, KEY_FLAG_TODO_DEFKEY); + if (key) { + ieee80211_debugfs_key_remove_default(key->sdata); + ieee80211_debugfs_key_add_default(key->sdata); + } } void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) { - unsigned long flags; - - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); __ieee80211_set_default_key(sdata, idx); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } static void @@ -224,24 +159,26 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) { struct ieee80211_key *key = NULL; + assert_key_lock(sdata->local); + if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) key = sdata->keys[idx]; rcu_assign_pointer(sdata->default_mgmt_key, key); - if (key) - add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY); + if (key) { + ieee80211_debugfs_key_remove_mgmt_default(key->sdata); + ieee80211_debugfs_key_add_mgmt_default(key->sdata); + } } void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) { - unsigned long flags; - - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); __ieee80211_set_default_mgmt_key(sdata, idx); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } @@ -336,7 +273,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.iv_len = CCMP_HDR_LEN; key->conf.icv_len = CCMP_MIC_LEN; if (seq) { - for (i = 0; i < NUM_RX_DATA_QUEUES; i++) + for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) for (j = 0; j < CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = seq[CCMP_PN_LEN - j - 1]; @@ -352,7 +289,6 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); - INIT_LIST_HEAD(&key->todo); if (alg == ALG_CCMP) { /* @@ -382,12 +318,29 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, return key; } +static void __ieee80211_key_destroy(struct ieee80211_key *key) +{ + if (!key) + return; + + if (key->local) + ieee80211_key_disable_hw_accel(key); + + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + if (key->conf.alg == ALG_AES_CMAC) + ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); + if (key->local) + ieee80211_debugfs_key_remove(key); + + kfree(key); +} + void ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_key *old_key; - unsigned long flags; int idx; BUG_ON(!sdata); @@ -431,7 +384,7 @@ void ieee80211_key_link(struct ieee80211_key *key, } } - spin_lock_irqsave(&sdata->local->key_lock, flags); + mutex_lock(&sdata->local->key_mtx); if (sta) old_key = sta->key; @@ -439,15 +392,13 @@ void ieee80211_key_link(struct ieee80211_key *key, old_key = sdata->keys[idx]; __ieee80211_key_replace(sdata, sta, old_key, key); + __ieee80211_key_destroy(old_key); - /* free old key later */ - add_todo(old_key, KEY_FLAG_TODO_DELETE); + ieee80211_debugfs_key_add(key); - add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS); - if (ieee80211_sdata_running(sdata)) - add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD); + ieee80211_key_enable_hw_accel(key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); + mutex_unlock(&sdata->local->key_mtx); } static void __ieee80211_key_free(struct ieee80211_key *key) @@ -458,170 +409,62 @@ static void __ieee80211_key_free(struct ieee80211_key *key) if (key->sdata) __ieee80211_key_replace(key->sdata, key->sta, key, NULL); - - add_todo(key, KEY_FLAG_TODO_DELETE); + __ieee80211_key_destroy(key); } -void ieee80211_key_free(struct ieee80211_key *key) +void ieee80211_key_free(struct ieee80211_local *local, + struct ieee80211_key *key) { - unsigned long flags; - if (!key) return; - if (!key->sdata) { - /* The key has not been linked yet, simply free it - * and don't Oops */ - if (key->conf.alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - kfree(key); - return; - } - - spin_lock_irqsave(&key->sdata->local->key_lock, flags); + mutex_lock(&local->key_mtx); __ieee80211_key_free(key); - spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); + mutex_unlock(&local->key_mtx); } -/* - * To be safe against concurrent manipulations of the list (which shouldn't - * actually happen) we need to hold the spinlock. But under the spinlock we - * can't actually do much, so we defer processing to the todo list. Then run - * the todo list to be sure the operation and possibly previously pending - * operations are completed. - */ -static void ieee80211_todo_for_each_key(struct ieee80211_sub_if_data *sdata, - u32 todo_flags) +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; - unsigned long flags; - - might_sleep(); - - spin_lock_irqsave(&sdata->local->key_lock, flags); - list_for_each_entry(key, &sdata->key_list, list) - add_todo(key, todo_flags); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - ieee80211_key_todo(); -} - -void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) -{ ASSERT_RTNL(); if (WARN_ON(!ieee80211_sdata_running(sdata))) return; - ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_ADD); -} + mutex_lock(&sdata->local->key_mtx); -void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) -{ - ASSERT_RTNL(); - - ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_REMOVE); -} - -static void __ieee80211_key_destroy(struct ieee80211_key *key) -{ - if (!key) - return; - - ieee80211_key_disable_hw_accel(key); - - if (key->conf.alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.alg == ALG_AES_CMAC) - ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); - ieee80211_debugfs_key_remove(key); + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_enable_hw_accel(key); - kfree(key); + mutex_unlock(&sdata->local->key_mtx); } -static void __ieee80211_key_todo(void) +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; - bool work_done; - u32 todoflags; - /* - * NB: sta_info_destroy relies on this! - */ - synchronize_rcu(); - - spin_lock_bh(&todo_lock); - while (!list_empty(&todo_list)) { - key = list_first_entry(&todo_list, struct ieee80211_key, todo); - list_del_init(&key->todo); - todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS | - KEY_FLAG_TODO_DEFKEY | - KEY_FLAG_TODO_DEFMGMTKEY | - KEY_FLAG_TODO_HWACCEL_ADD | - KEY_FLAG_TODO_HWACCEL_REMOVE | - KEY_FLAG_TODO_DELETE); - key->flags &= ~todoflags; - spin_unlock_bh(&todo_lock); - - work_done = false; - - if (todoflags & KEY_FLAG_TODO_ADD_DEBUGFS) { - ieee80211_debugfs_key_add(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DEFKEY) { - ieee80211_debugfs_key_remove_default(key->sdata); - ieee80211_debugfs_key_add_default(key->sdata); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) { - ieee80211_debugfs_key_remove_mgmt_default(key->sdata); - ieee80211_debugfs_key_add_mgmt_default(key->sdata); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) { - ieee80211_key_enable_hw_accel(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_HWACCEL_REMOVE) { - ieee80211_key_disable_hw_accel(key); - work_done = true; - } - if (todoflags & KEY_FLAG_TODO_DELETE) { - __ieee80211_key_destroy(key); - work_done = true; - } + ASSERT_RTNL(); - WARN_ON(!work_done); + mutex_lock(&sdata->local->key_mtx); - spin_lock_bh(&todo_lock); - } - spin_unlock_bh(&todo_lock); -} + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_disable_hw_accel(key); -void ieee80211_key_todo(void) -{ - ieee80211_key_lock(); - __ieee80211_key_todo(); - ieee80211_key_unlock(); + mutex_unlock(&sdata->local->key_mtx); } void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key, *tmp; - unsigned long flags; - ieee80211_key_lock(); + mutex_lock(&sdata->local->key_mtx); ieee80211_debugfs_key_remove_default(sdata); ieee80211_debugfs_key_remove_mgmt_default(sdata); - spin_lock_irqsave(&sdata->local->key_lock, flags); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) __ieee80211_key_free(key); - spin_unlock_irqrestore(&sdata->local->key_lock, flags); - - __ieee80211_key_todo(); - ieee80211_key_unlock(); + mutex_unlock(&sdata->local->key_mtx); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index bdc2968c2bbe..b665bbb7a471 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -38,25 +38,9 @@ struct sta_info; * * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present * in the hardware for TX crypto hardware acceleration. - * @KEY_FLAG_TODO_DELETE: Key is marked for deletion and will, after an - * RCU grace period, no longer be reachable other than from the - * todo list. - * @KEY_FLAG_TODO_HWACCEL_ADD: Key needs to be added to hardware acceleration. - * @KEY_FLAG_TODO_HWACCEL_REMOVE: Key needs to be removed from hardware - * acceleration. - * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated. - * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs. - * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs - * to be updated. */ enum ieee80211_internal_key_flags { KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), - KEY_FLAG_TODO_DELETE = BIT(1), - KEY_FLAG_TODO_HWACCEL_ADD = BIT(2), - KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3), - KEY_FLAG_TODO_DEFKEY = BIT(4), - KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), - KEY_FLAG_TODO_DEFMGMTKEY = BIT(6), }; enum ieee80211_internal_tkip_state { @@ -79,10 +63,8 @@ struct ieee80211_key { /* for sdata list */ struct list_head list; - /* for todo list */ - struct list_head todo; - /* protected by todo lock! */ + /* protected by key mutex */ unsigned int flags; union { @@ -95,7 +77,13 @@ struct ieee80211_key { } tkip; struct { u8 tx_pn[6]; - u8 rx_pn[NUM_RX_DATA_QUEUES][6]; + /* + * Last received packet number. The first + * NUM_RX_DATA_QUEUES counters are used with Data + * frames and the last counter is used with Robust + * Management frames. + */ + u8 rx_pn[NUM_RX_DATA_QUEUES + 1][6]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCCMPReplays */ /* scratch buffers for virt_to_page() (crypto API) */ @@ -147,7 +135,8 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, void ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta); -void ieee80211_key_free(struct ieee80211_key *key); +void ieee80211_key_free(struct ieee80211_local *local, + struct ieee80211_key *key); void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); @@ -155,6 +144,4 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); -void ieee80211_key_todo(void); - #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 22a384dfab65..7cc4f913a431 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/bitmap.h> #include <linux/pm_qos_params.h> +#include <linux/inetdevice.h> #include <net/net_namespace.h> #include <net/cfg80211.h> @@ -106,12 +107,15 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) if (scan_chan) { chan = scan_chan; channel_type = NL80211_CHAN_NO_HT; + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else if (local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else { chan = local->oper_channel; channel_type = local->_oper_channel_type; + local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; } if (chan != local->hw.conf.channel || @@ -259,7 +263,6 @@ static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; struct sk_buff *skb; - struct ieee80211_ra_tid *ra_tid; while ((skb = skb_dequeue(&local->skb_queue)) || (skb = skb_dequeue(&local->skb_queue_unreliable))) { @@ -274,18 +277,6 @@ static void ieee80211_tasklet_handler(unsigned long data) skb->pkt_type = 0; ieee80211_tx_status(local_to_hw(local), skb); break; - case IEEE80211_DELBA_MSG: - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_stop_tx_ba_cb(ra_tid->vif, ra_tid->ra, - ra_tid->tid); - dev_kfree_skb(skb); - break; - case IEEE80211_ADDBA_MSG: - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_start_tx_ba_cb(ra_tid->vif, ra_tid->ra, - ra_tid->tid); - dev_kfree_skb(skb); - break ; default: WARN(1, "mac80211: Packet is of unknown type %d\n", skb->pkt_type); @@ -329,6 +320,76 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) mutex_unlock(&local->iflist_mtx); } +#ifdef CONFIG_INET +static int ieee80211_ifa_changed(struct notifier_block *nb, + unsigned long data, void *arg) +{ + struct in_ifaddr *ifa = arg; + struct ieee80211_local *local = + container_of(nb, struct ieee80211_local, + ifa_notifier); + struct net_device *ndev = ifa->ifa_dev->dev; + struct wireless_dev *wdev = ndev->ieee80211_ptr; + struct in_device *idev; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_bss_conf *bss_conf; + struct ieee80211_if_managed *ifmgd; + int c = 0; + + if (!netif_running(ndev)) + return NOTIFY_DONE; + + /* Make sure it's our interface that got changed */ + if (!wdev) + return NOTIFY_DONE; + + if (wdev->wiphy != local->hw.wiphy) + return NOTIFY_DONE; + + sdata = IEEE80211_DEV_TO_SUB_IF(ndev); + bss_conf = &sdata->vif.bss_conf; + + /* ARP filtering is only supported in managed mode */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return NOTIFY_DONE; + + idev = sdata->dev->ip_ptr; + if (!idev) + return NOTIFY_DONE; + + ifmgd = &sdata->u.mgd; + mutex_lock(&ifmgd->mtx); + + /* Copy the addresses to the bss_conf list */ + ifa = idev->ifa_list; + while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) { + bss_conf->arp_addr_list[c] = ifa->ifa_address; + ifa = ifa->ifa_next; + c++; + } + + /* If not all addresses fit the list, disable filtering */ + if (ifa) { + sdata->arp_filter_state = false; + c = 0; + } else { + sdata->arp_filter_state = true; + } + bss_conf->arp_addr_cnt = c; + + /* Configure driver only if associated */ + if (ifmgd->associated) { + bss_conf->arp_filter_enabled = sdata->arp_filter_state; + ieee80211_bss_info_change_notify(sdata, + BSS_CHANGED_ARP_FILTER); + } + + mutex_unlock(&ifmgd->mtx); + + return NOTIFY_DONE; +} +#endif + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -396,7 +457,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, mutex_init(&local->iflist_mtx); mutex_init(&local->scan_mtx); - spin_lock_init(&local->key_lock); + mutex_init(&local->key_mtx); spin_lock_init(&local->filter_lock); spin_lock_init(&local->queue_stop_reason_lock); @@ -419,8 +480,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, sta_info_init(local); - for (i = 0; i < IEEE80211_MAX_QUEUES; i++) + for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { skb_queue_head_init(&local->pending[i]); + atomic_set(&local->agg_queue_stop[i], 0); + } tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, (unsigned long)local); @@ -431,8 +494,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); - spin_lock_init(&local->ampdu_lock); - return local_to_hw(local); } EXPORT_SYMBOL(ieee80211_alloc_hw); @@ -572,18 +633,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.conf.listen_interval = local->hw.max_listen_interval; - local->hw.conf.dynamic_ps_forced_timeout = -1; + local->dynamic_ps_forced_timeout = -1; result = sta_info_start(local); if (result < 0) goto fail_sta_info; result = ieee80211_wep_init(local); - if (result < 0) { + if (result < 0) printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", wiphy_name(local->hw.wiphy), result); - goto fail_wep; - } rtnl_lock(); @@ -612,21 +671,30 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_max_network_latency; result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); - if (result) { rtnl_lock(); goto fail_pm_qos; } +#ifdef CONFIG_INET + local->ifa_notifier.notifier_call = ieee80211_ifa_changed; + result = register_inetaddr_notifier(&local->ifa_notifier); + if (result) + goto fail_ifa; +#endif + return 0; + fail_ifa: + pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, + &local->network_latency_notifier); + rtnl_lock(); fail_pm_qos: ieee80211_led_exit(local); ieee80211_remove_interfaces(local); fail_rate: rtnl_unlock(); ieee80211_wep_free(local); - fail_wep: sta_info_stop(local); fail_sta_info: destroy_workqueue(local->workqueue); @@ -647,6 +715,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, &local->network_latency_notifier); +#ifdef CONFIG_INET + unregister_inetaddr_notifier(&local->ifa_notifier); +#endif rtnl_lock(); @@ -704,6 +775,10 @@ static int __init ieee80211_init(void) if (ret) return ret; + ret = rc80211_minstrel_ht_init(); + if (ret) + goto err_minstrel; + ret = rc80211_pid_init(); if (ret) goto err_pid; @@ -716,6 +791,8 @@ static int __init ieee80211_init(void) err_netdev: rc80211_pid_exit(); err_pid: + rc80211_minstrel_ht_exit(); + err_minstrel: rc80211_minstrel_exit(); return ret; @@ -724,6 +801,7 @@ static int __init ieee80211_init(void) static void __exit ieee80211_exit(void) { rc80211_pid_exit(); + rc80211_minstrel_ht_exit(); rc80211_minstrel_exit(); /* diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index bde81031727a..c8a4f19ed13b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -54,7 +54,7 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } /** @@ -345,7 +345,7 @@ static void ieee80211_mesh_path_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } static void ieee80211_mesh_path_root_timer(unsigned long data) @@ -362,7 +362,7 @@ static void ieee80211_mesh_path_root_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) @@ -484,9 +484,6 @@ void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - /* might restart the timer but that doesn't matter */ - cancel_work_sync(&ifmsh->work); - /* use atomic bitops in case both timers fire at the same time */ if (del_timer_sync(&ifmsh->housekeeping_timer)) @@ -518,7 +515,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); ieee80211_mesh_root_setup(ifmsh); - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED | @@ -536,16 +533,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) * whether the interface is running, which, at this point, * it no longer is. */ - cancel_work_sync(&sdata->u.mesh.work); - - /* - * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. - */ - rcu_barrier(); /* Wait for RX path and call_rcu()'s */ - skb_queue_purge(&sdata->u.mesh.skb_queue); + cancel_work_sync(&sdata->work); } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, @@ -608,8 +596,8 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, } } -static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; struct ieee80211_if_mesh *ifmsh; @@ -632,26 +620,11 @@ static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } - - kfree_skb(skb); } -static void ieee80211_mesh_work(struct work_struct *work) +void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.mesh.work); - struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct sk_buff *skb; - - if (!ieee80211_sdata_running(sdata)) - return; - - if (local->scanning) - return; - - while ((skb = skb_dequeue(&ifmsh->skb_queue))) - ieee80211_mesh_rx_queued_mgmt(sdata, skb); if (ifmsh->preq_queue_len && time_after(jiffies, @@ -678,7 +651,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) if (ieee80211_vif_is_mesh(&sdata->vif)) - ieee80211_queue_work(&local->hw, &sdata->u.mesh.work); + ieee80211_queue_work(&local->hw, &sdata->work); rcu_read_unlock(); } @@ -686,11 +659,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - INIT_WORK(&ifmsh->work, ieee80211_mesh_work); setup_timer(&ifmsh->housekeeping_timer, ieee80211_mesh_housekeeping_timer, (unsigned long) sdata); - skb_queue_head_init(&sdata->u.mesh.skb_queue); ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; @@ -731,29 +702,3 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) INIT_LIST_HEAD(&ifmsh->preq_queue.list); spin_lock_init(&ifmsh->mesh_preq_queue_lock); } - -ieee80211_rx_result -ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_mgmt *mgmt; - u16 fc; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); - - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_ACTION: - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: - skb_queue_tail(&ifmsh->skb_queue, skb); - ieee80211_queue_work(&local->hw, &ifmsh->work); - return RX_QUEUED; - } - - return RX_CONTINUE; -} diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index c88087f1cd0f..ebd3f1d9d889 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -237,8 +237,6 @@ void ieee80211s_update_metric(struct ieee80211_local *local, struct sta_info *stainfo, struct sk_buff *skb); void ieee80211s_stop(void); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); -ieee80211_rx_result -ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 0705018d8d1e..829e08a657d0 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -805,14 +805,14 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) spin_unlock(&ifmsh->mesh_preq_queue_lock); if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) - ieee80211_queue_work(&sdata->local->hw, &ifmsh->work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); else if (time_before(jiffies, ifmsh->last_preq)) { /* avoid long wait if did not send preqs for a long time * and jiffies wrapped around */ ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; - ieee80211_queue_work(&sdata->local->hw, &ifmsh->work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } else mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq + min_preq_int_jiff(sdata)); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 181ffd6efd81..349e466cf08b 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -315,7 +315,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) read_unlock(&pathtbl_resize_lock); if (grow) { set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } return 0; @@ -425,7 +425,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) read_unlock(&pathtbl_resize_lock); if (grow) { set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags); - ieee80211_queue_work(&local->hw, &ifmsh->work); + ieee80211_queue_work(&local->hw, &sdata->work); } return 0; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 3cd5f7b5d693..ea13a80a476c 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -65,7 +65,6 @@ void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } static inline @@ -73,7 +72,6 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } /** @@ -115,7 +113,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, } /** - * mesh_plink_deactivate - deactivate mesh peer link + * __mesh_plink_deactivate - deactivate mesh peer link * * @sta: mesh peer link to deactivate * @@ -123,18 +121,23 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, * * Locking: the caller must hold sta->lock */ -static void __mesh_plink_deactivate(struct sta_info *sta) +static bool __mesh_plink_deactivate(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; + bool deactivated = false; - if (sta->plink_state == PLINK_ESTAB) + if (sta->plink_state == PLINK_ESTAB) { mesh_plink_dec_estab_count(sdata); + deactivated = true; + } sta->plink_state = PLINK_BLOCKED; mesh_path_flush_by_nexthop(sta); + + return deactivated; } /** - * __mesh_plink_deactivate - deactivate mesh peer link + * mesh_plink_deactivate - deactivate mesh peer link * * @sta: mesh peer link to deactivate * @@ -142,9 +145,15 @@ static void __mesh_plink_deactivate(struct sta_info *sta) */ void mesh_plink_deactivate(struct sta_info *sta) { + struct ieee80211_sub_if_data *sdata = sta->sdata; + bool deactivated; + spin_lock_bh(&sta->lock); - __mesh_plink_deactivate(sta); + deactivated = __mesh_plink_deactivate(sta); spin_unlock_bh(&sta->lock); + + if (deactivated) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, @@ -381,10 +390,16 @@ int mesh_plink_open(struct sta_info *sta) void mesh_plink_block(struct sta_info *sta) { + struct ieee80211_sub_if_data *sdata = sta->sdata; + bool deactivated; + spin_lock_bh(&sta->lock); - __mesh_plink_deactivate(sta); + deactivated = __mesh_plink_deactivate(sta); sta->plink_state = PLINK_BLOCKED; spin_unlock_bh(&sta->lock); + + if (deactivated) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } @@ -397,6 +412,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m enum plink_event event; enum plink_frame_type ftype; size_t baselen; + bool deactivated; u8 ie_len; u8 *baseaddr; __le16 plid, llid, reason; @@ -651,8 +667,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case CNF_ACPT: del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; - mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); + mesh_plink_inc_estab_count(sdata); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mpl_dbg("Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); break; @@ -684,8 +701,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case OPN_ACPT: del_timer(&sta->plink_timer); sta->plink_state = PLINK_ESTAB; - mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); + mesh_plink_inc_estab_count(sdata); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mpl_dbg("Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, @@ -702,11 +720,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m case CLS_ACPT: reason = cpu_to_le16(MESH_CLOSE_RCVD); sta->reason = reason; - __mesh_plink_deactivate(sta); + deactivated = __mesh_plink_deactivate(sta); sta->plink_state = PLINK_HOLDING; llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); + if (deactivated) + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f803f8b72a93..b6c163ac22da 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -478,6 +478,39 @@ static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, } } +void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_conf *conf = &local->hw.conf; + + WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || + !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || + (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); + + local->disable_dynamic_ps = false; + conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout; +} +EXPORT_SYMBOL(ieee80211_enable_dyn_ps); + +void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_conf *conf = &local->hw.conf; + + WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || + !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) || + (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)); + + local->disable_dynamic_ps = true; + conf->dynamic_ps_timeout = 0; + del_timer_sync(&local->dynamic_ps_timer); + ieee80211_queue_work(&local->hw, + &local->dynamic_ps_enable_work); +} +EXPORT_SYMBOL(ieee80211_disable_dyn_ps); + /* powersave */ static void ieee80211_enable_ps(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) @@ -553,6 +586,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) found->u.mgd.associated->beacon_ies && !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL))) { + struct ieee80211_conf *conf = &local->hw.conf; s32 beaconint_us; if (latency < 0) @@ -561,25 +595,24 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) beaconint_us = ieee80211_tu_to_usec( found->vif.bss_conf.beacon_int); - timeout = local->hw.conf.dynamic_ps_forced_timeout; + timeout = local->dynamic_ps_forced_timeout; if (timeout < 0) { /* + * Go to full PSM if the user configures a very low + * latency requirement. * The 2 second value is there for compatibility until * the PM_QOS_NETWORK_LATENCY is configured with real * values. */ - if (latency == 2000000000) - timeout = 100; - else if (latency <= 50000) - timeout = 300; - else if (latency <= 100000) - timeout = 100; - else if (latency <= 500000) - timeout = 50; - else + if (latency > 1900000000 && latency != 2000000000) timeout = 0; + else + timeout = 100; } - local->hw.conf.dynamic_ps_timeout = timeout; + local->dynamic_ps_user_timeout = timeout; + if (!local->disable_dynamic_ps) + conf->dynamic_ps_timeout = + local->dynamic_ps_user_timeout; if (beaconint_us > latency) { local->ps_sdata = NULL; @@ -665,10 +698,11 @@ void ieee80211_dynamic_ps_timer(unsigned long data) /* MLME */ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, - struct ieee80211_if_managed *ifmgd, + struct ieee80211_sub_if_data *sdata, u8 *wmm_param, size_t wmm_param_len) { struct ieee80211_tx_queue_params params; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; size_t left; int count; u8 *pos, uapsd_queues = 0; @@ -757,8 +791,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, } /* enable WMM or activate new settings */ - local->hw.conf.flags |= IEEE80211_CONF_QOS; - drv_config(local, IEEE80211_CONF_CHANGE_QOS); + sdata->vif.bss_conf.qos = true; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS); } static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, @@ -806,11 +840,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, { struct ieee80211_bss *bss = (void *)cbss->priv; struct ieee80211_local *local = sdata->local; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; bss_info_changed |= BSS_CHANGED_ASSOC; /* set timing information */ - sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; - sdata->vif.bss_conf.timestamp = cbss->tsf; + bss_conf->beacon_int = cbss->beacon_interval; + bss_conf->timestamp = cbss->tsf; bss_info_changed |= BSS_CHANGED_BEACON_INT; bss_info_changed |= ieee80211_handle_bss_capability(sdata, @@ -835,7 +870,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - sdata->vif.bss_conf.assoc = 1; + if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) + bss_conf->dtim_period = bss->dtim_period; + else + bss_conf->dtim_period = 0; + + bss_conf->assoc = 1; /* * For now just always ask the driver to update the basic rateset * when we have associated, we aren't checking whether it actually @@ -848,9 +888,15 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, /* Tell the driver to monitor connection quality (if supported) */ if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) && - sdata->vif.bss_conf.cqm_rssi_thold) + bss_conf->cqm_rssi_thold) bss_info_changed |= BSS_CHANGED_CQM; + /* Enable ARP filtering */ + if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) { + bss_conf->arp_filter_enabled = sdata->arp_filter_state; + bss_info_changed |= BSS_CHANGED_ARP_FILTER; + } + ieee80211_bss_info_change_notify(sdata, bss_info_changed); mutex_lock(&local->iflist_mtx); @@ -898,13 +944,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, bssid); if (sta) { - set_sta_flags(sta, WLAN_STA_DISASSOC); + set_sta_flags(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta); } - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); changed |= ieee80211_reset_erp_info(sdata); @@ -932,6 +978,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_hw_config(local, config_changed); + /* Disable ARP filtering */ + if (sdata->vif.bss_conf.arp_filter_enabled) { + sdata->vif.bss_conf.arp_filter_enabled = false; + changed |= BSS_CHANGED_ARP_FILTER; + } + /* The BSSID (not really interesting) and HT changed */ changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT; ieee80211_bss_info_change_notify(sdata, changed); @@ -1279,7 +1331,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk, } if (elems.wmm_param) - ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, + ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, elems.wmm_param_len); else ieee80211_set_wmm_default(sdata); @@ -1551,7 +1603,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); - ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, + ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, elems.wmm_param_len); } @@ -1633,35 +1685,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, changed); } -ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_mgmt *mgmt; - u16 fc; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = le16_to_cpu(mgmt->frame_control); - - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_RESP: - case IEEE80211_STYPE_BEACON: - case IEEE80211_STYPE_DEAUTH: - case IEEE80211_STYPE_DISASSOC: - case IEEE80211_STYPE_ACTION: - skb_queue_tail(&sdata->u.mgd.skb_queue, skb); - ieee80211_queue_work(&local->hw, &sdata->u.mgd.work); - return RX_QUEUED; - } - - return RX_DROP_MONITOR; -} - -static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, - struct sk_buff *skb) +void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_rx_status *rx_status; @@ -1693,44 +1718,6 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; case IEEE80211_STYPE_ACTION: switch (mgmt->u.action.category) { - case WLAN_CATEGORY_BACK: { - struct ieee80211_local *local = sdata->local; - int len = skb->len; - struct sta_info *sta; - - rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->sa); - if (!sta) { - rcu_read_unlock(); - break; - } - - local_bh_disable(); - - switch (mgmt->u.action.u.addba_req.action_code) { - case WLAN_ACTION_ADDBA_REQ: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_req))) - break; - ieee80211_process_addba_request(local, sta, mgmt, len); - break; - case WLAN_ACTION_ADDBA_RESP: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_resp))) - break; - ieee80211_process_addba_resp(local, sta, mgmt, len); - break; - case WLAN_ACTION_DELBA: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.delba))) - break; - ieee80211_process_delba(sdata, sta, mgmt, len); - break; - } - local_bh_enable(); - rcu_read_unlock(); - break; - } case WLAN_CATEGORY_SPECTRUM_MGMT: ieee80211_sta_process_chanswitch(sdata, &mgmt->u.action.u.chan_switch.sw_elem, @@ -1754,7 +1741,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, default: WARN(1, "unexpected: %d", rma); } - goto out; + return; } mutex_unlock(&ifmgd->mtx); @@ -1769,7 +1756,8 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, if (wk->sdata != sdata) continue; - if (wk->type != IEEE80211_WORK_ASSOC) + if (wk->type != IEEE80211_WORK_ASSOC && + wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) continue; if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN)) @@ -1799,8 +1787,6 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); } - out: - kfree_skb(skb); } static void ieee80211_sta_timer(unsigned long data) @@ -1815,39 +1801,13 @@ static void ieee80211_sta_timer(unsigned long data) return; } - ieee80211_queue_work(&local->hw, &ifmgd->work); + ieee80211_queue_work(&local->hw, &sdata->work); } -static void ieee80211_sta_work(struct work_struct *work) +void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.mgd.work); struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd; - struct sk_buff *skb; - - if (!ieee80211_sdata_running(sdata)) - return; - - if (local->scanning) - return; - - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) - return; - - /* - * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the the rest. - */ - if (WARN(local->suspended, "STA MLME work scheduled while " - "going to suspend\n")) - return; - - ifmgd = &sdata->u.mgd; - - /* first process frames to avoid timing out while a frame is pending */ - while ((skb = skb_dequeue(&ifmgd->skb_queue))) - ieee80211_sta_rx_queued_mgmt(sdata, skb); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; /* then process the rest of the work */ mutex_lock(&ifmgd->mtx); @@ -1942,8 +1902,7 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); /* and do all the other regular work too */ - ieee80211_queue_work(&sdata->local->hw, - &sdata->u.mgd.work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } } @@ -1958,7 +1917,6 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) * time -- the code here is properly synchronised. */ - cancel_work_sync(&ifmgd->work); cancel_work_sync(&ifmgd->beacon_connection_loss_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -1990,7 +1948,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd; ifmgd = &sdata->u.mgd; - INIT_WORK(&ifmgd->work, ieee80211_sta_work); INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work); INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); INIT_WORK(&ifmgd->beacon_connection_loss_work, @@ -2003,7 +1960,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) (unsigned long) sdata); setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, (unsigned long) sdata); - skb_queue_head_init(&ifmgd->skb_queue); ifmgd->flags = 0; @@ -2081,6 +2037,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, auth_alg = WLAN_AUTH_OPEN; break; case NL80211_AUTHTYPE_SHARED_KEY: + if (IS_ERR(sdata->local->wep_tx_tfm)) + return -EOPNOTSUPP; auth_alg = WLAN_AUTH_SHARED_KEY; break; case NL80211_AUTHTYPE_FT: @@ -2134,6 +2092,8 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt; + struct ieee80211_rx_status *rx_status; + struct ieee802_11_elems elems; u16 status; if (!skb) { @@ -2141,6 +2101,19 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, return WORK_DONE_DESTROY; } + if (wk->type == IEEE80211_WORK_ASSOC_BEACON_WAIT) { + mutex_lock(&wk->sdata->u.mgd.mtx); + rx_status = (void *) skb->cb; + ieee802_11_parse_elems(skb->data + 24 + 12, skb->len - 24 - 12, &elems); + ieee80211_rx_bss_info(wk->sdata, (void *)skb->data, skb->len, rx_status, + &elems, true); + mutex_unlock(&wk->sdata->u.mgd.mtx); + + wk->type = IEEE80211_WORK_ASSOC; + /* not really done yet */ + return WORK_DONE_REQUEUE; + } + mgmt = (void *)skb->data; status = le16_to_cpu(mgmt->u.assoc_resp.status_code); @@ -2153,6 +2126,7 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk, wk->filter_ta); return WORK_DONE_DESTROY; } + mutex_unlock(&wk->sdata->u.mgd.mtx); } @@ -2253,10 +2227,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (req->prev_bssid) memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN); - wk->type = IEEE80211_WORK_ASSOC; wk->chan = req->bss->channel; wk->sdata = sdata; wk->done = ieee80211_assoc_done; + if (!bss->dtim_period && + sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) + wk->type = IEEE80211_WORK_ASSOC_BEACON_WAIT; + else + wk->type = IEEE80211_WORK_ASSOC; if (req->use_mfp) { ifmgd->mfp = IEEE80211_MFP_REQUIRED; @@ -2282,14 +2260,16 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_work *wk; - const u8 *bssid = req->bss->bssid; + u8 bssid[ETH_ALEN]; + bool assoc_bss = false; mutex_lock(&ifmgd->mtx); + memcpy(bssid, req->bss->bssid, ETH_ALEN); if (ifmgd->associated == req->bss) { - bssid = req->bss->bssid; - ieee80211_set_disassoc(sdata, true); + ieee80211_set_disassoc(sdata, false); mutex_unlock(&ifmgd->mtx); + assoc_bss = true; } else { bool not_auth_yet = false; @@ -2302,7 +2282,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, if (wk->type != IEEE80211_WORK_DIRECT_PROBE && wk->type != IEEE80211_WORK_AUTH && - wk->type != IEEE80211_WORK_ASSOC) + wk->type != IEEE80211_WORK_ASSOC && + wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) continue; if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN)) @@ -2335,6 +2316,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, cookie, !req->local_state_change); + if (assoc_bss) + sta_info_destroy_addr(sdata, bssid); ieee80211_recalc_idle(sdata->local); @@ -2379,41 +2362,6 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return 0; } -int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - const u8 *buf, size_t len, u64 *cookie) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct sk_buff *skb; - - /* Check that we are on the requested channel for transmission */ - if ((chan != local->tmp_channel || - channel_type != local->tmp_channel_type) && - (chan != local->oper_channel || - channel_type != local->_oper_channel_type)) - return -EBUSY; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); - if (!skb) - return -ENOMEM; - skb_reserve(skb, local->hw.extra_tx_headroom); - - memcpy(skb_put(skb, len), buf, len); - - if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) - IEEE80211_SKB_CB(skb)->flags |= - IEEE80211_TX_INTFL_DONT_ENCRYPT; - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX | - IEEE80211_TX_CTL_REQ_TX_STATUS; - skb->dev = sdata->dev; - ieee80211_tx_skb(sdata, skb); - - *cookie = (unsigned long) skb; - return 0; -} - void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 75202b295a4e..d287fde0431d 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -40,22 +40,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) list_for_each_entry(sdata, &local->interfaces, list) ieee80211_disable_keys(sdata); - /* Tear down aggregation sessions */ - - rcu_read_lock(); - - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { - list_for_each_entry_rcu(sta, &local->sta_list, list) { + /* tear down aggregation sessions and remove STAs */ + mutex_lock(&local->sta_mtx); + list_for_each_entry(sta, &local->sta_list, list) { + if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { set_sta_flags(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta); } - } - rcu_read_unlock(); - - /* remove STAs */ - mutex_lock(&local->sta_mtx); - list_for_each_entry(sta, &local->sta_list, list) { if (sta->uploaded) { sdata = sta->sdata; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -72,6 +64,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { + cancel_work_sync(&sdata->work); + switch(sdata->vif.type) { case NL80211_IFTYPE_STATION: ieee80211_sta_quiesce(sdata); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 065a96190e32..168427b0ffdc 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -147,5 +147,18 @@ static inline void rc80211_minstrel_exit(void) } #endif +#ifdef CONFIG_MAC80211_RC_MINSTREL_HT +extern int rc80211_minstrel_ht_init(void); +extern void rc80211_minstrel_ht_exit(void); +#else +static inline int rc80211_minstrel_ht_init(void) +{ + return 0; +} +static inline void rc80211_minstrel_ht_exit(void) +{ +} +#endif + #endif /* IEEE80211_RATE_H */ diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index f65ce6dcc8e2..778c604d7939 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -67,7 +67,6 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) for (i = rix; i >= 0; i--) if (mi->r[i].rix == rix) break; - WARN_ON(i < 0); return i; } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c new file mode 100644 index 000000000000..c5b465904e3b --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -0,0 +1,827 @@ +/* + * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/random.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rate.h" +#include "rc80211_minstrel.h" +#include "rc80211_minstrel_ht.h" + +#define AVG_PKT_SIZE 1200 +#define SAMPLE_COLUMNS 10 +#define EWMA_LEVEL 75 + +/* Number of bits for an average sized packet */ +#define MCS_NBITS (AVG_PKT_SIZE << 3) + +/* Number of symbols for a packet with (bps) bits per symbol */ +#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) + +/* Transmission time for a packet containing (syms) symbols */ +#define MCS_SYMBOL_TIME(sgi, syms) \ + (sgi ? \ + ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \ + (syms) << 2 /* syms * 4 us */ \ + ) + +/* Transmit duration for the raw data part of an average sized packet */ +#define MCS_DURATION(streams, sgi, bps) MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps))) + +/* MCS rate information for an MCS group */ +#define MCS_GROUP(_streams, _sgi, _ht40) { \ + .streams = _streams, \ + .flags = \ + (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ + (_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \ + .duration = { \ + MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234), \ + MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) \ + } \ +} + +/* + * To enable sufficiently targeted rate sampling, MCS rates are divided into + * groups, based on the number of streams and flags (HT40, SGI) that they + * use. + */ +const struct mcs_group minstrel_mcs_groups[] = { + MCS_GROUP(1, 0, 0), + MCS_GROUP(2, 0, 0), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 0, 0), +#endif + + MCS_GROUP(1, 1, 0), + MCS_GROUP(2, 1, 0), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 1, 0), +#endif + + MCS_GROUP(1, 0, 1), + MCS_GROUP(2, 0, 1), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 0, 1), +#endif + + MCS_GROUP(1, 1, 1), + MCS_GROUP(2, 1, 1), +#if MINSTREL_MAX_STREAMS >= 3 + MCS_GROUP(3, 1, 1), +#endif +}; + +static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; + +/* + * Perform EWMA (Exponentially Weighted Moving Average) calculation + */ +static int +minstrel_ewma(int old, int new, int weight) +{ + return (new * (100 - weight) + old * weight) / 100; +} + +/* + * Look up an MCS group index based on mac80211 rate information + */ +static int +minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate) +{ + int streams = (rate->idx / MCS_GROUP_RATES) + 1; + u32 flags = IEEE80211_TX_RC_SHORT_GI | IEEE80211_TX_RC_40_MHZ_WIDTH; + int i; + + for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) { + if (minstrel_mcs_groups[i].streams != streams) + continue; + if (minstrel_mcs_groups[i].flags != (rate->flags & flags)) + continue; + + return i; + } + + WARN_ON(1); + return 0; +} + +static inline struct minstrel_rate_stats * +minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) +{ + return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES]; +} + + +/* + * Recalculate success probabilities and counters for a rate using EWMA + */ +static void +minstrel_calc_rate_ewma(struct minstrel_priv *mp, struct minstrel_rate_stats *mr) +{ + if (unlikely(mr->attempts > 0)) { + mr->sample_skipped = 0; + mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts); + if (!mr->att_hist) + mr->probability = mr->cur_prob; + else + mr->probability = minstrel_ewma(mr->probability, + mr->cur_prob, EWMA_LEVEL); + mr->att_hist += mr->attempts; + mr->succ_hist += mr->success; + } else { + mr->sample_skipped++; + } + mr->last_success = mr->success; + mr->last_attempts = mr->attempts; + mr->success = 0; + mr->attempts = 0; +} + +/* + * Calculate throughput based on the average A-MPDU length, taking into account + * the expected number of retransmissions and their expected length + */ +static void +minstrel_ht_calc_tp(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + int group, int rate) +{ + struct minstrel_rate_stats *mr; + unsigned int usecs; + + mr = &mi->groups[group].rates[rate]; + + if (mr->probability < MINSTREL_FRAC(1, 10)) { + mr->cur_tp = 0; + return; + } + + usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + usecs += minstrel_mcs_groups[group].duration[rate]; + mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); +} + +/* + * Update rate statistics and select new primary rates + * + * Rules for rate selection: + * - max_prob_rate must use only one stream, as a tradeoff between delivery + * probability and throughput during strong fluctuations + * - as long as the max prob rate has a probability of more than 3/4, pick + * higher throughput rates, even if the probablity is a bit lower + */ +static void +minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +{ + struct minstrel_mcs_group_data *mg; + struct minstrel_rate_stats *mr; + int cur_prob, cur_prob_tp, cur_tp, cur_tp2; + int group, i, index; + + if (mi->ampdu_packets > 0) { + mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, + MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL); + mi->ampdu_len = 0; + mi->ampdu_packets = 0; + } + + mi->sample_slow = 0; + mi->sample_count = 0; + mi->max_tp_rate = 0; + mi->max_tp_rate2 = 0; + mi->max_prob_rate = 0; + + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + cur_prob = 0; + cur_prob_tp = 0; + cur_tp = 0; + cur_tp2 = 0; + + mg = &mi->groups[group]; + if (!mg->supported) + continue; + + mg->max_tp_rate = 0; + mg->max_tp_rate2 = 0; + mg->max_prob_rate = 0; + mi->sample_count++; + + for (i = 0; i < MCS_GROUP_RATES; i++) { + if (!(mg->supported & BIT(i))) + continue; + + mr = &mg->rates[i]; + mr->retry_updated = false; + index = MCS_GROUP_RATES * group + i; + minstrel_calc_rate_ewma(mp, mr); + minstrel_ht_calc_tp(mp, mi, group, i); + + if (!mr->cur_tp) + continue; + + /* ignore the lowest rate of each single-stream group */ + if (!i && minstrel_mcs_groups[group].streams == 1) + continue; + + if ((mr->cur_tp > cur_prob_tp && mr->probability > + MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) { + mg->max_prob_rate = index; + cur_prob = mr->probability; + cur_prob_tp = mr->cur_tp; + } + + if (mr->cur_tp > cur_tp) { + swap(index, mg->max_tp_rate); + cur_tp = mr->cur_tp; + mr = minstrel_get_ratestats(mi, index); + } + + if (index >= mg->max_tp_rate) + continue; + + if (mr->cur_tp > cur_tp2) { + mg->max_tp_rate2 = index; + cur_tp2 = mr->cur_tp; + } + } + } + + /* try to sample up to half of the availble rates during each interval */ + mi->sample_count *= 4; + + cur_prob = 0; + cur_prob_tp = 0; + cur_tp = 0; + cur_tp2 = 0; + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + mg = &mi->groups[group]; + if (!mg->supported) + continue; + + mr = minstrel_get_ratestats(mi, mg->max_prob_rate); + if (cur_prob_tp < mr->cur_tp && + minstrel_mcs_groups[group].streams == 1) { + mi->max_prob_rate = mg->max_prob_rate; + cur_prob = mr->cur_prob; + cur_prob_tp = mr->cur_tp; + } + + mr = minstrel_get_ratestats(mi, mg->max_tp_rate); + if (cur_tp < mr->cur_tp) { + mi->max_tp_rate = mg->max_tp_rate; + cur_tp = mr->cur_tp; + } + + mr = minstrel_get_ratestats(mi, mg->max_tp_rate2); + if (cur_tp2 < mr->cur_tp) { + mi->max_tp_rate2 = mg->max_tp_rate2; + cur_tp2 = mr->cur_tp; + } + } + + mi->stats_update = jiffies; +} + +static bool +minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate) +{ + if (!rate->count) + return false; + + if (rate->idx < 0) + return false; + + return !!(rate->flags & IEEE80211_TX_RC_MCS); +} + +static void +minstrel_next_sample_idx(struct minstrel_ht_sta *mi) +{ + struct minstrel_mcs_group_data *mg; + + for (;;) { + mi->sample_group++; + mi->sample_group %= ARRAY_SIZE(minstrel_mcs_groups); + mg = &mi->groups[mi->sample_group]; + + if (!mg->supported) + continue; + + if (++mg->index >= MCS_GROUP_RATES) { + mg->index = 0; + if (++mg->column >= ARRAY_SIZE(sample_table)) + mg->column = 0; + } + break; + } +} + +static void +minstrel_downgrade_rate(struct minstrel_ht_sta *mi, unsigned int *idx, + bool primary) +{ + int group, orig_group; + + orig_group = group = *idx / MCS_GROUP_RATES; + while (group > 0) { + group--; + + if (!mi->groups[group].supported) + continue; + + if (minstrel_mcs_groups[group].streams > + minstrel_mcs_groups[orig_group].streams) + continue; + + if (primary) + *idx = mi->groups[group].max_tp_rate; + else + *idx = mi->groups[group].max_tp_rate2; + break; + } +} + +static void +minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + u16 tid; + + if (unlikely(!ieee80211_is_data_qos(hdr->frame_control))) + return; + + if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE))) + return; + + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_tx[tid])) + return; + + ieee80211_start_tx_ba_session(pubsta, tid); +} + +static void +minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_rate *ar = info->status.rates; + struct minstrel_rate_stats *rate, *rate2; + struct minstrel_priv *mp = priv; + bool last = false; + int group; + int i = 0; + + if (!msp->is_ht) + return mac80211_minstrel.tx_status(priv, sband, sta, &msp->legacy, skb); + + /* This packet was aggregated but doesn't carry status info */ + if ((info->flags & IEEE80211_TX_CTL_AMPDU) && + !(info->flags & IEEE80211_TX_STAT_AMPDU)) + return; + + if (!info->status.ampdu_len) { + info->status.ampdu_ack_len = 1; + info->status.ampdu_len = 1; + } + + mi->ampdu_packets++; + mi->ampdu_len += info->status.ampdu_len; + + if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { + mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); + mi->sample_tries = 3; + mi->sample_count--; + } + + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { + mi->sample_packets += info->status.ampdu_len; + minstrel_next_sample_idx(mi); + } + + for (i = 0; !last; i++) { + last = (i == IEEE80211_TX_MAX_RATES - 1) || + !minstrel_ht_txstat_valid(&ar[i + 1]); + + if (!minstrel_ht_txstat_valid(&ar[i])) + break; + + group = minstrel_ht_get_group_idx(&ar[i]); + rate = &mi->groups[group].rates[ar[i].idx % 8]; + + if (last && (info->flags & IEEE80211_TX_STAT_ACK)) + rate->success += info->status.ampdu_ack_len; + + rate->attempts += ar[i].count * info->status.ampdu_len; + } + + /* + * check for sudden death of spatial multiplexing, + * downgrade to a lower number of streams if necessary. + */ + rate = minstrel_get_ratestats(mi, mi->max_tp_rate); + if (rate->attempts > 30 && + MINSTREL_FRAC(rate->success, rate->attempts) < + MINSTREL_FRAC(20, 100)) + minstrel_downgrade_rate(mi, &mi->max_tp_rate, true); + + rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2); + if (rate2->attempts > 30 && + MINSTREL_FRAC(rate2->success, rate2->attempts) < + MINSTREL_FRAC(20, 100)) + minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false); + + if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { + minstrel_ht_update_stats(mp, mi); + minstrel_aggr_check(mp, sta, skb); + } +} + +static void +minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + int index) +{ + struct minstrel_rate_stats *mr; + const struct mcs_group *group; + unsigned int tx_time, tx_time_rtscts, tx_time_data; + unsigned int cw = mp->cw_min; + unsigned int t_slot = 9; /* FIXME */ + unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); + + mr = minstrel_get_ratestats(mi, index); + if (mr->probability < MINSTREL_FRAC(1, 10)) { + mr->retry_count = 1; + mr->retry_count_rtscts = 1; + return; + } + + mr->retry_count = 2; + mr->retry_count_rtscts = 2; + mr->retry_updated = true; + + group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; + tx_time = 2 * (t_slot + mi->overhead + tx_time_data); + tx_time_rtscts = 2 * (t_slot + mi->overhead_rtscts + tx_time_data); + do { + cw = (cw << 1) | 1; + cw = min(cw, mp->cw_max); + tx_time += cw + t_slot + mi->overhead; + tx_time_rtscts += cw + t_slot + mi->overhead_rtscts; + if (tx_time_rtscts < mp->segment_size) + mr->retry_count_rtscts++; + } while ((tx_time < mp->segment_size) && + (++mr->retry_count < mp->max_retry)); +} + + +static void +minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_tx_rate *rate, int index, + struct ieee80211_tx_rate_control *txrc, + bool sample, bool rtscts) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + struct minstrel_rate_stats *mr; + + mr = minstrel_get_ratestats(mi, index); + if (!mr->retry_updated) + minstrel_calc_retransmit(mp, mi, index); + + if (mr->probability < MINSTREL_FRAC(20, 100)) + rate->count = 2; + else if (rtscts) + rate->count = mr->retry_count_rtscts; + else + rate->count = mr->retry_count; + + rate->flags = IEEE80211_TX_RC_MCS | group->flags; + if (txrc->short_preamble) + rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + if (txrc->rts || rtscts) + rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; + rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; +} + +static inline int +minstrel_get_duration(int index) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + return group->duration[index % MCS_GROUP_RATES]; +} + +static int +minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +{ + struct minstrel_rate_stats *mr; + struct minstrel_mcs_group_data *mg; + int sample_idx = 0; + + if (mi->sample_wait > 0) { + mi->sample_wait--; + return -1; + } + + if (!mi->sample_tries) + return -1; + + mi->sample_tries--; + mg = &mi->groups[mi->sample_group]; + sample_idx = sample_table[mg->column][mg->index]; + mr = &mg->rates[sample_idx]; + sample_idx += mi->sample_group * MCS_GROUP_RATES; + + /* + * When not using MRR, do not sample if the probability is already + * higher than 95% to avoid wasting airtime + */ + if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) + goto next; + + /* + * Make sure that lower rates get sampled only occasionally, + * if the link is working perfectly. + */ + if (minstrel_get_duration(sample_idx) > + minstrel_get_duration(mi->max_tp_rate)) { + if (mr->sample_skipped < 10) + goto next; + + if (mi->sample_slow++ > 2) + goto next; + } + + return sample_idx; + +next: + minstrel_next_sample_idx(mi); + return -1; +} + +static void +minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_tx_rate_control *txrc) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); + struct ieee80211_tx_rate *ar = info->status.rates; + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_priv *mp = priv; + int sample_idx; + + if (rate_control_send_low(sta, priv_sta, txrc)) + return; + + if (!msp->is_ht) + return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); + + info->flags |= mi->tx_flags; + sample_idx = minstrel_get_sample_rate(mp, mi); + if (sample_idx >= 0) { + minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, + txrc, true, false); + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, + txrc, false, true); + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } else { + minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, + txrc, false, false); + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, + txrc, false, true); + } + minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true); + + ar[3].count = 0; + ar[3].idx = -1; + + mi->total_packets++; + + /* wraparound */ + if (mi->total_packets == ~0) { + mi->total_packets = 0; + mi->sample_packets = 0; + } +} + +static void +minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + enum nl80211_channel_type oper_chan_type) +{ + struct minstrel_priv *mp = priv; + struct minstrel_ht_sta_priv *msp = priv_sta; + struct minstrel_ht_sta *mi = &msp->ht; + struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; + struct ieee80211_local *local = hw_to_local(mp->hw); + u16 sta_cap = sta->ht_cap.cap; + int ack_dur; + int stbc; + int i; + + /* fall back to the old minstrel for legacy stations */ + if (!sta->ht_cap.ht_supported) { + msp->is_ht = false; + memset(&msp->legacy, 0, sizeof(msp->legacy)); + msp->legacy.r = msp->ratelist; + msp->legacy.sample_table = msp->sample_table; + return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy); + } + + BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != + MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS); + + msp->is_ht = true; + memset(mi, 0, sizeof(*mi)); + mi->stats_update = jiffies; + + ack_dur = ieee80211_frame_duration(local, 10, 60, 1, 1); + mi->overhead = ieee80211_frame_duration(local, 0, 60, 1, 1) + ack_dur; + mi->overhead_rtscts = mi->overhead + 2 * ack_dur; + + mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); + + /* When using MRR, sample more on the first attempt, without delay */ + if (mp->has_mrr) { + mi->sample_count = 16; + mi->sample_wait = 0; + } else { + mi->sample_count = 8; + mi->sample_wait = 8; + } + mi->sample_tries = 4; + + stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >> + IEEE80211_HT_CAP_RX_STBC_SHIFT; + mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT; + + if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING) + mi->tx_flags |= IEEE80211_TX_CTL_LDPC; + + if (oper_chan_type != NL80211_CHAN_HT40MINUS && + oper_chan_type != NL80211_CHAN_HT40PLUS) + sta_cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + for (i = 0; i < ARRAY_SIZE(mi->groups); i++) { + u16 req = 0; + + mi->groups[i].supported = 0; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) { + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + req |= IEEE80211_HT_CAP_SGI_40; + else + req |= IEEE80211_HT_CAP_SGI_20; + } + + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + + if ((sta_cap & req) != req) + continue; + + mi->groups[i].supported = + mcs->rx_mask[minstrel_mcs_groups[i].streams - 1]; + } +} + +static void +minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_priv *mp = priv; + + minstrel_ht_update_caps(priv, sband, sta, priv_sta, mp->hw->conf.channel_type); +} + +static void +minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + u32 changed, enum nl80211_channel_type oper_chan_type) +{ + minstrel_ht_update_caps(priv, sband, sta, priv_sta, oper_chan_type); +} + +static void * +minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) +{ + struct ieee80211_supported_band *sband; + struct minstrel_ht_sta_priv *msp; + struct minstrel_priv *mp = priv; + struct ieee80211_hw *hw = mp->hw; + int max_rates = 0; + int i; + + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = hw->wiphy->bands[i]; + if (sband && sband->n_bitrates > max_rates) + max_rates = sband->n_bitrates; + } + + msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp); + if (!msp) + return NULL; + + msp->ratelist = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp); + if (!msp->ratelist) + goto error; + + msp->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp); + if (!msp->sample_table) + goto error1; + + return msp; + +error1: + kfree(msp->ratelist); +error: + kfree(msp); + return NULL; +} + +static void +minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + kfree(msp->sample_table); + kfree(msp->ratelist); + kfree(msp); +} + +static void * +minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +{ + return mac80211_minstrel.alloc(hw, debugfsdir); +} + +static void +minstrel_ht_free(void *priv) +{ + mac80211_minstrel.free(priv); +} + +static struct rate_control_ops mac80211_minstrel_ht = { + .name = "minstrel_ht", + .tx_status = minstrel_ht_tx_status, + .get_rate = minstrel_ht_get_rate, + .rate_init = minstrel_ht_rate_init, + .rate_update = minstrel_ht_rate_update, + .alloc_sta = minstrel_ht_alloc_sta, + .free_sta = minstrel_ht_free_sta, + .alloc = minstrel_ht_alloc, + .free = minstrel_ht_free, +#ifdef CONFIG_MAC80211_DEBUGFS + .add_sta_debugfs = minstrel_ht_add_sta_debugfs, + .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs, +#endif +}; + + +static void +init_sample_table(void) +{ + int col, i, new_idx; + u8 rnd[MCS_GROUP_RATES]; + + memset(sample_table, 0xff, sizeof(sample_table)); + for (col = 0; col < SAMPLE_COLUMNS; col++) { + for (i = 0; i < MCS_GROUP_RATES; i++) { + get_random_bytes(rnd, sizeof(rnd)); + new_idx = (i + rnd[i]) % MCS_GROUP_RATES; + + while (sample_table[col][new_idx] != 0xff) + new_idx = (new_idx + 1) % MCS_GROUP_RATES; + + sample_table[col][new_idx] = i; + } + } +} + +int __init +rc80211_minstrel_ht_init(void) +{ + init_sample_table(); + return ieee80211_rate_control_register(&mac80211_minstrel_ht); +} + +void +rc80211_minstrel_ht_exit(void) +{ + ieee80211_rate_control_unregister(&mac80211_minstrel_ht); +} diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h new file mode 100644 index 000000000000..462d2b227ed5 --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __RC_MINSTREL_HT_H +#define __RC_MINSTREL_HT_H + +/* + * The number of streams can be changed to 2 to reduce code + * size and memory footprint. + */ +#define MINSTREL_MAX_STREAMS 3 +#define MINSTREL_STREAM_GROUPS 4 + +/* scaled fraction values */ +#define MINSTREL_SCALE 16 +#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) +#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) + +#define MCS_GROUP_RATES 8 + +struct mcs_group { + u32 flags; + unsigned int streams; + unsigned int duration[MCS_GROUP_RATES]; +}; + +extern const struct mcs_group minstrel_mcs_groups[]; + +struct minstrel_rate_stats { + /* current / last sampling period attempts/success counters */ + unsigned int attempts, last_attempts; + unsigned int success, last_success; + + /* total attempts/success counters */ + u64 att_hist, succ_hist; + + /* current throughput */ + unsigned int cur_tp; + + /* packet delivery probabilities */ + unsigned int cur_prob, probability; + + /* maximum retry counts */ + unsigned int retry_count; + unsigned int retry_count_rtscts; + + bool retry_updated; + u8 sample_skipped; +}; + +struct minstrel_mcs_group_data { + u8 index; + u8 column; + + /* bitfield of supported MCS rates of this group */ + u8 supported; + + /* selected primary rates */ + unsigned int max_tp_rate; + unsigned int max_tp_rate2; + unsigned int max_prob_rate; + + /* MCS rate statistics */ + struct minstrel_rate_stats rates[MCS_GROUP_RATES]; +}; + +struct minstrel_ht_sta { + /* ampdu length (average, per sampling interval) */ + unsigned int ampdu_len; + unsigned int ampdu_packets; + + /* ampdu length (EWMA) */ + unsigned int avg_ampdu_len; + + /* best throughput rate */ + unsigned int max_tp_rate; + + /* second best throughput rate */ + unsigned int max_tp_rate2; + + /* best probability rate */ + unsigned int max_prob_rate; + + /* time of last status update */ + unsigned long stats_update; + + /* overhead time in usec for each frame */ + unsigned int overhead; + unsigned int overhead_rtscts; + + unsigned int total_packets; + unsigned int sample_packets; + + /* tx flags to add for frames for this sta */ + u32 tx_flags; + + u8 sample_wait; + u8 sample_tries; + u8 sample_count; + u8 sample_slow; + + /* current MCS group to be sampled */ + u8 sample_group; + + /* MCS rate group info and statistics */ + struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS]; +}; + +struct minstrel_ht_sta_priv { + union { + struct minstrel_ht_sta ht; + struct minstrel_sta_info legacy; + }; +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *dbg_stats; +#endif + void *ratelist; + void *sample_table; + bool is_ht; +}; + +void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); +void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta); + +#endif diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c new file mode 100644 index 000000000000..4a5a4b3e7799 --- /dev/null +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rc80211_minstrel.h" +#include "rc80211_minstrel_ht.h" + +static int +minstrel_ht_stats_open(struct inode *inode, struct file *file) +{ + struct minstrel_ht_sta_priv *msp = inode->i_private; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_debugfs_info *ms; + unsigned int i, j, tp, prob, eprob; + char *p; + int ret; + + if (!msp->is_ht) { + inode->i_private = &msp->legacy; + ret = minstrel_stats_open(inode, file); + inode->i_private = msp; + return ret; + } + + ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL); + if (!ms) + return -ENOMEM; + + file->private_data = ms; + p = ms->buf; + p += sprintf(p, "type rate throughput ewma prob this prob " + "this succ/attempt success attempts\n"); + for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) { + char htmode = '2'; + char gimode = 'L'; + + if (!mi->groups[i].supported) + continue; + + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + htmode = '4'; + if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) + gimode = 'S'; + + for (j = 0; j < MCS_GROUP_RATES; j++) { + struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; + int idx = i * MCS_GROUP_RATES + j; + + if (!(mi->groups[i].supported & BIT(j))) + continue; + + p += sprintf(p, "HT%c0/%cGI ", htmode, gimode); + + *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' '; + *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; + p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) * + MCS_GROUP_RATES + j); + + tp = mr->cur_tp / 10; + prob = MINSTREL_TRUNC(mr->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mr->probability * 1000); + + p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " + "%3u(%3u) %8llu %8llu\n", + tp / 10, tp % 10, + eprob / 10, eprob % 10, + prob / 10, prob % 10, + mr->last_success, + mr->last_attempts, + (unsigned long long)mr->succ_hist, + (unsigned long long)mr->att_hist); + } + } + p += sprintf(p, "\nTotal packet count:: ideal %d " + "lookaround %d\n", + max(0, (int) mi->total_packets - (int) mi->sample_packets), + mi->sample_packets); + p += sprintf(p, "Average A-MPDU length: %d.%d\n", + MINSTREL_TRUNC(mi->avg_ampdu_len), + MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); + ms->len = p - ms->buf; + + return 0; +} + +static const struct file_operations minstrel_ht_stat_fops = { + .owner = THIS_MODULE, + .open = minstrel_ht_stats_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, +}; + +void +minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp, + &minstrel_ht_stat_fops); +} + +void +minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta) +{ + struct minstrel_ht_sta_priv *msp = priv_sta; + + debugfs_remove(msp->dbg_stats); +} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index be9abc2e6348..fa0f37e4afe4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -293,7 +293,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = prev_dev; - netif_rx(skb2); + netif_receive_skb(skb2); } } @@ -304,7 +304,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (prev_dev) { skb->dev = prev_dev; - netif_rx(skb); + netif_receive_skb(skb); } else dev_kfree_skb(skb); @@ -719,16 +719,13 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; - spin_lock(&sta->lock); - - if (!sta->ampdu_mlme.tid_active_rx[tid]) - goto dont_reorder_unlock; - - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); + if (!tid_agg_rx) + goto dont_reorder; /* qos null data frames are excluded */ if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) - goto dont_reorder_unlock; + goto dont_reorder; /* new, potentially un-ordered, ampdu frame - process it */ @@ -740,20 +737,22 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - spin_unlock(&sta->lock); - __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_REQUIRE_SETUP); - dev_kfree_skb(skb); + skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&rx->sdata->skb_queue, skb); + ieee80211_queue_work(&local->hw, &rx->sdata->work); return; } - if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) { - spin_unlock(&sta->lock); + /* + * No locking needed -- we will only ever process one + * RX packet at a time, and thus own tid_agg_rx. All + * other code manipulating it needs to (and does) make + * sure that we cannot get to it any more before doing + * anything with it. + */ + if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) return; - } - dont_reorder_unlock: - spin_unlock(&sta->lock); dont_reorder: __skb_queue_tail(frames, skb); } @@ -825,6 +824,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) ieee80211_rx_result result = RX_DROP_UNUSABLE; struct ieee80211_key *stakey = NULL; int mmie_keyidx = -1; + __le16 fc; /* * Key selection 101 @@ -866,13 +866,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (rx->sta) stakey = rcu_dereference(rx->sta->key); - if (!ieee80211_has_protected(hdr->frame_control)) + fc = hdr->frame_control; + + if (!ieee80211_has_protected(fc)) mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); if (!is_multicast_ether_addr(hdr->addr1) && stakey) { rx->key = stakey; /* Skip decryption if the frame is not protected. */ - if (!ieee80211_has_protected(hdr->frame_control)) + if (!ieee80211_has_protected(fc)) return RX_CONTINUE; } else if (mmie_keyidx >= 0) { /* Broadcast/multicast robust management frame / BIP */ @@ -884,7 +886,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) return RX_DROP_MONITOR; /* unexpected BIP keyidx */ rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); - } else if (!ieee80211_has_protected(hdr->frame_control)) { + } else if (!ieee80211_has_protected(fc)) { /* * The frame was not protected, so skip decryption. However, we * need to set rx->key if there is a key that could have been @@ -892,7 +894,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * have been expected. */ struct ieee80211_key *key = NULL; - if (ieee80211_is_mgmt(hdr->frame_control) && + if (ieee80211_is_mgmt(fc) && is_multicast_ether_addr(hdr->addr1) && (key = rcu_dereference(rx->sdata->default_mgmt_key))) rx->key = key; @@ -914,7 +916,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; - hdrlen = ieee80211_hdrlen(hdr->frame_control); + hdrlen = ieee80211_hdrlen(fc); if (rx->skb->len < 8 + hdrlen) return RX_DROP_UNUSABLE; /* TODO: count this? */ @@ -947,19 +949,17 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (skb_linearize(rx->skb)) return RX_DROP_UNUSABLE; - - hdr = (struct ieee80211_hdr *)rx->skb->data; - - /* Check for weak IVs if possible */ - if (rx->sta && rx->key->conf.alg == ALG_WEP && - ieee80211_is_data(hdr->frame_control) && - (!(status->flag & RX_FLAG_IV_STRIPPED) || - !(status->flag & RX_FLAG_DECRYPTED)) && - ieee80211_wep_is_weak_iv(rx->skb, rx->key)) - rx->sta->wep_weak_iv_count++; + /* the hdr variable is invalid now! */ switch (rx->key->conf.alg) { case ALG_WEP: + /* Check for weak IVs if possible */ + if (rx->sta && ieee80211_is_data(fc) && + (!(status->flag & RX_FLAG_IV_STRIPPED) || + !(status->flag & RX_FLAG_DECRYPTED)) && + ieee80211_wep_is_weak_iv(rx->skb, rx->key)) + rx->sta->wep_weak_iv_count++; + result = ieee80211_crypto_wep_decrypt(rx); break; case ALG_TKIP: @@ -1267,11 +1267,13 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rx->queue, &(rx->skb)); if (rx->key && rx->key->conf.alg == ALG_CCMP && ieee80211_has_protected(fc)) { + int queue = ieee80211_is_mgmt(fc) ? + NUM_RX_DATA_QUEUES : rx->queue; /* Store CCMP PN so that we can verify that the next * fragment has a sequential PN value. */ entry->ccmp = 1; memcpy(entry->last_pn, - rx->key->u.ccmp.rx_pn[rx->queue], + rx->key->u.ccmp.rx_pn[queue], CCMP_PN_LEN); } return RX_QUEUED; @@ -1291,6 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (entry->ccmp) { int i; u8 pn[CCMP_PN_LEN], *rpn; + int queue; if (!rx->key || rx->key->conf.alg != ALG_CCMP) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, CCMP_PN_LEN); @@ -1299,7 +1302,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (pn[i]) break; } - rpn = rx->key->u.ccmp.rx_pn[rx->queue]; + queue = ieee80211_is_mgmt(fc) ? + NUM_RX_DATA_QUEUES : rx->queue; + rpn = rx->key->u.ccmp.rx_pn[queue]; if (memcmp(pn, rpn, CCMP_PN_LEN)) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, CCMP_PN_LEN); @@ -1573,7 +1578,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) /* deliver to local stack */ skb->protocol = eth_type_trans(skb, dev); memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); + netif_receive_skb(skb); } } @@ -1829,13 +1834,11 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) &bar_data, sizeof(bar_data))) return RX_DROP_MONITOR; - spin_lock(&rx->sta->lock); tid = le16_to_cpu(bar_data.control) >> 12; - if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) { - spin_unlock(&rx->sta->lock); + + tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); + if (!tid_agg_rx) return RX_DROP_MONITOR; - } - tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4; @@ -1848,11 +1851,15 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num, frames); kfree_skb(skb); - spin_unlock(&rx->sta->lock); return RX_QUEUED; } - return RX_CONTINUE; + /* + * After this point, we only want management frames, + * so we can drop all remaining control frames to + * cooked monitor interfaces. + */ + return RX_DROP_MONITOR; } static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, @@ -1944,30 +1951,27 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (len < IEEE80211_MIN_ACTION_SIZE + 1) break; - if (sdata->vif.type == NL80211_IFTYPE_STATION) - return ieee80211_sta_rx_mgmt(sdata, rx->skb); - switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.addba_req))) - return RX_DROP_MONITOR; - ieee80211_process_addba_request(local, rx->sta, mgmt, len); - goto handled; + goto invalid; + break; case WLAN_ACTION_ADDBA_RESP: if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.addba_resp))) - break; - ieee80211_process_addba_resp(local, rx->sta, mgmt, len); - goto handled; + goto invalid; + break; case WLAN_ACTION_DELBA: if (len < (IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.delba))) - break; - ieee80211_process_delba(sdata, rx->sta, mgmt, len); - goto handled; + goto invalid; + break; + default: + goto invalid; } - break; + + goto queue; case WLAN_CATEGORY_SPECTRUM_MGMT: if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) break; @@ -1997,7 +2001,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN)) break; - return ieee80211_sta_rx_mgmt(sdata, rx->skb); + goto queue; } break; case WLAN_CATEGORY_SA_QUERY: @@ -2015,11 +2019,12 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; case WLAN_CATEGORY_MESH_PLINK: case WLAN_CATEGORY_MESH_PATH_SEL: - if (ieee80211_vif_is_mesh(&sdata->vif)) - return ieee80211_mesh_rx_mgmt(sdata, rx->skb); - break; + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + goto queue; } + invalid: /* * For AP mode, hostapd is responsible for handling any action * frames that we didn't handle, including returning unknown @@ -2039,8 +2044,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) */ status = IEEE80211_SKB_RXCB(rx->skb); - if (sdata->vif.type == NL80211_IFTYPE_STATION && - cfg80211_rx_action(rx->sdata->dev, status->freq, + if (cfg80211_rx_action(rx->sdata->dev, status->freq, rx->skb->data, rx->skb->len, GFP_ATOMIC)) goto handled; @@ -2052,11 +2056,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0, GFP_ATOMIC); if (nskb) { - struct ieee80211_mgmt *mgmt = (void *)nskb->data; + struct ieee80211_mgmt *nmgmt = (void *)nskb->data; - mgmt->u.action.category |= 0x80; - memcpy(mgmt->da, mgmt->sa, ETH_ALEN); - memcpy(mgmt->sa, rx->sdata->vif.addr, ETH_ALEN); + nmgmt->u.action.category |= 0x80; + memcpy(nmgmt->da, nmgmt->sa, ETH_ALEN); + memcpy(nmgmt->sa, rx->sdata->vif.addr, ETH_ALEN); memset(nskb->cb, 0, sizeof(nskb->cb)); @@ -2068,6 +2072,14 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) rx->sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; + + queue: + rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, rx->skb); + ieee80211_queue_work(&local->hw, &sdata->work); + if (rx->sta) + rx->sta->rx_packets++; + return RX_QUEUED; } static ieee80211_rx_result debug_noinline @@ -2075,10 +2087,15 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; ieee80211_rx_result rxs; + struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; + __le16 stype; if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; + if (rx->skb->len < 24) + return RX_DROP_MONITOR; + if (ieee80211_drop_unencrypted_mgmt(rx)) return RX_DROP_UNUSABLE; @@ -2086,16 +2103,42 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) if (rxs != RX_CONTINUE) return rxs; - if (ieee80211_vif_is_mesh(&sdata->vif)) - return ieee80211_mesh_rx_mgmt(sdata, rx->skb); + stype = mgmt->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE); - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) - return ieee80211_ibss_rx_mgmt(sdata, rx->skb); + if (!ieee80211_vif_is_mesh(&sdata->vif) && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_STATION) + return RX_DROP_MONITOR; + + switch (stype) { + case cpu_to_le16(IEEE80211_STYPE_BEACON): + case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP): + /* process for all: mesh, mlme, ibss */ + break; + case cpu_to_le16(IEEE80211_STYPE_DEAUTH): + case cpu_to_le16(IEEE80211_STYPE_DISASSOC): + /* process only for station */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return RX_DROP_MONITOR; + break; + case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ): + case cpu_to_le16(IEEE80211_STYPE_AUTH): + /* process only for ibss */ + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_DROP_MONITOR; + break; + default: + return RX_DROP_MONITOR; + } - if (sdata->vif.type == NL80211_IFTYPE_STATION) - return ieee80211_sta_rx_mgmt(sdata, rx->skb); + /* queue up frame and kick off work to process it */ + rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, rx->skb); + ieee80211_queue_work(&rx->local->hw, &sdata->work); + if (rx->sta) + rx->sta->rx_packets++; - return RX_DROP_MONITOR; + return RX_QUEUED; } static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr, @@ -2151,7 +2194,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, u8 rate_or_pad; __le16 chan_freq; __le16 chan_flags; - } __attribute__ ((packed)) *rthdr; + } __packed *rthdr; struct sk_buff *skb = rx->skb, *skb2; struct net_device *prev_dev = NULL; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -2201,7 +2244,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = prev_dev; - netif_rx(skb2); + netif_receive_skb(skb2); } } @@ -2212,7 +2255,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, if (prev_dev) { skb->dev = prev_dev; - netif_rx(skb); + netif_receive_skb(skb); skb = NULL; } else goto out_free_skb; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index e1b0be7a57b9..41f20fb7e670 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -114,6 +114,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->dtim_period = tim_ie->dtim_period; } + /* If the beacon had no TIM IE, or it was invalid, use 1 */ + if (beacon && !bss->dtim_period) + bss->dtim_period = 1; + /* replace old supported rates if we get new values */ srlen = 0; if (elems->supp_rates) { @@ -734,7 +738,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; int ret = -EBUSY; - enum nl80211_band band; + enum ieee80211_band band; mutex_lock(&local->scan_mtx); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ba9360a475b0..6d86f0c1ad04 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -235,6 +235,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); spin_lock_init(&sta->flaglock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); + INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); + mutex_init(&sta->ampdu_mlme.mtx); memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; @@ -246,14 +248,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, } for (i = 0; i < STA_TID_NUM; i++) { - /* timer_to_tid must be initialized with identity mapping to - * enable session_timer's data differentiation. refer to - * sta_rx_agg_session_timer_expired for useage */ + /* + * timer_to_tid must be initialized with identity mapping + * to enable session_timer's data differentiation. See + * sta_rx_agg_session_timer_expired for usage. + */ sta->timer_to_tid[i] = i; - /* tx */ - sta->ampdu_mlme.tid_state_tx[i] = HT_AGG_STATE_IDLE; - sta->ampdu_mlme.tid_tx[i] = NULL; - sta->ampdu_mlme.addba_req_num[i] = 0; } skb_queue_head_init(&sta->ps_tx_buf); skb_queue_head_init(&sta->tx_filtered); @@ -647,15 +647,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) return ret; if (sta->key) { - ieee80211_key_free(sta->key); - /* - * We have only unlinked the key, and actually destroying it - * may mean it is removed from hardware which requires that - * the key->sta pointer is still valid, so flush the key todo - * list here. - */ - ieee80211_key_todo(); - + ieee80211_key_free(local, sta->key); WARN_ON(sta->key); } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index df9d45544ca5..54262e72376d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -42,9 +42,6 @@ * be in the queues * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping * station in power-save mode, reply when the driver unblocks. - * @WLAN_STA_DISASSOC: Disassociation in progress. - * This is used to reject TX BA session requests when disassociation - * is in progress. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH = 1<<0, @@ -60,38 +57,44 @@ enum ieee80211_sta_info_flags { WLAN_STA_BLOCK_BA = 1<<11, WLAN_STA_PS_DRIVER = 1<<12, WLAN_STA_PSPOLL = 1<<13, - WLAN_STA_DISASSOC = 1<<14, }; #define STA_TID_NUM 16 #define ADDBA_RESP_INTERVAL HZ -#define HT_AGG_MAX_RETRIES (0x3) +#define HT_AGG_MAX_RETRIES 0x3 -#define HT_AGG_STATE_INITIATOR_SHIFT (4) - -#define HT_ADDBA_REQUESTED_MSK BIT(0) -#define HT_ADDBA_DRV_READY_MSK BIT(1) -#define HT_ADDBA_RECEIVED_MSK BIT(2) -#define HT_AGG_STATE_REQ_STOP_BA_MSK BIT(3) -#define HT_AGG_STATE_INITIATOR_MSK BIT(HT_AGG_STATE_INITIATOR_SHIFT) -#define HT_AGG_STATE_IDLE (0x0) -#define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \ - HT_ADDBA_DRV_READY_MSK | \ - HT_ADDBA_RECEIVED_MSK) +#define HT_AGG_STATE_DRV_READY 0 +#define HT_AGG_STATE_RESPONSE_RECEIVED 1 +#define HT_AGG_STATE_OPERATIONAL 2 +#define HT_AGG_STATE_STOPPING 3 +#define HT_AGG_STATE_WANT_START 4 +#define HT_AGG_STATE_WANT_STOP 5 /** * struct tid_ampdu_tx - TID aggregation information (Tx). * + * @rcu_head: rcu head for freeing structure * @addba_resp_timer: timer for peer's response to addba request * @pending: pending frames queue -- use sta's spinlock to protect - * @ssn: Starting Sequence Number expected to be aggregated. * @dialog_token: dialog token for aggregation session + * @state: session state (see above) + * @stop_initiator: initiator of a session stop + * + * This structure is protected by RCU and the per-station + * spinlock. Assignments to the array holding it must hold + * the spinlock, only the TX path can access it under RCU + * lock-free if, and only if, the state has the flag + * %HT_AGG_STATE_OPERATIONAL set. Otherwise, the TX path + * must also acquire the spinlock and re-check the state, + * see comments in the tx code touching it. */ struct tid_ampdu_tx { + struct rcu_head rcu_head; struct timer_list addba_resp_timer; struct sk_buff_head pending; - u16 ssn; + unsigned long state; u8 dialog_token; + u8 stop_initiator; }; /** @@ -106,8 +109,18 @@ struct tid_ampdu_tx { * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). * @dialog_token: dialog token for aggregation session + * @rcu_head: RCU head used for freeing this struct + * + * This structure is protected by RCU and the per-station + * spinlock. Assignments to the array holding it must hold + * the spinlock, only the RX path can access it under RCU + * lock-free. The RX path, since it is single-threaded, + * can even modify the structure without locking since the + * only other modifications to it are done when the struct + * can not yet or no longer be found by the RX path. */ struct tid_ampdu_rx { + struct rcu_head rcu_head; struct sk_buff **reorder_buf; unsigned long *reorder_time; struct timer_list session_timer; @@ -120,6 +133,32 @@ struct tid_ampdu_rx { }; /** + * struct sta_ampdu_mlme - STA aggregation information. + * + * @tid_rx: aggregation info for Rx per TID -- RCU protected + * @tid_tx: aggregation info for Tx per TID + * @addba_req_num: number of times addBA request has been sent. + * @dialog_token_allocator: dialog token enumerator for each new session; + * @work: work struct for starting/stopping aggregation + * @tid_rx_timer_expired: bitmap indicating on which TIDs the + * RX timer expired until the work for it runs + * @mtx: mutex to protect all TX data (except non-NULL assignments + * to tid_tx[idx], which are protected by the sta spinlock) + */ +struct sta_ampdu_mlme { + struct mutex mtx; + /* rx */ + struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; + unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)]; + /* tx */ + struct work_struct work; + struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; + u8 addba_req_num[STA_TID_NUM]; + u8 dialog_token_allocator; +}; + + +/** * enum plink_state - state of a mesh peer link finite state machine * * @PLINK_LISTEN: initial state, considered the implicit state of non existant @@ -143,28 +182,6 @@ enum plink_state { }; /** - * struct sta_ampdu_mlme - STA aggregation information. - * - * @tid_active_rx: TID's state in Rx session state machine. - * @tid_rx: aggregation info for Rx per TID - * @tid_state_tx: TID's state in Tx session state machine. - * @tid_tx: aggregation info for Tx per TID - * @addba_req_num: number of times addBA request has been sent. - * @dialog_token_allocator: dialog token enumerator for each new session; - */ -struct sta_ampdu_mlme { - /* rx */ - bool tid_active_rx[STA_TID_NUM]; - struct tid_ampdu_rx *tid_rx[STA_TID_NUM]; - /* tx */ - u8 tid_state_tx[STA_TID_NUM]; - struct tid_ampdu_tx *tid_tx[STA_TID_NUM]; - u8 addba_req_num[STA_TID_NUM]; - u8 dialog_token_allocator; -}; - - -/** * struct sta_info - STA information * * This structure collects information about a station that @@ -410,20 +427,20 @@ void for_each_sta_info_type_check(struct ieee80211_local *local, { } -#define for_each_sta_info(local, _addr, sta, nxt) \ +#define for_each_sta_info(local, _addr, _sta, nxt) \ for ( /* initialise loop */ \ - sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ - nxt = sta ? rcu_dereference(sta->hnext) : NULL; \ + _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \ /* typecheck */ \ - for_each_sta_info_type_check(local, (_addr), sta, nxt), \ + for_each_sta_info_type_check(local, (_addr), _sta, nxt),\ /* continue condition */ \ - sta; \ + _sta; \ /* advance loop */ \ - sta = nxt, \ - nxt = sta ? rcu_dereference(sta->hnext) : NULL \ + _sta = nxt, \ + nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ ) \ /* compare address and run code only if it matches */ \ - if (memcmp(sta->sta.addr, (_addr), ETH_ALEN) == 0) + if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0) /* * Get STA info by index, BROKEN! diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 94613af009f3..10caec5ea8fa 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -47,7 +47,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, /* * This skb 'survived' a round-trip through the driver, and * hopefully the driver didn't mangle it too badly. However, - * we can definitely not rely on the the control information + * we can definitely not rely on the control information * being correct. Clear it so we don't get junk there, and * indicate that it needs new processing, but must not be * modified/encrypted again. @@ -377,7 +377,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = prev_dev; - netif_rx(skb2); + netif_receive_skb(skb2); } } @@ -386,7 +386,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (prev_dev) { skb->dev = prev_dev; - netif_rx(skb); + netif_receive_skb(skb); skb = NULL; } rcu_read_unlock(); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 7ef491e9d66d..e840c9cd46db 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -202,9 +202,9 @@ EXPORT_SYMBOL(ieee80211_get_tkip_key); * @payload_len is the length of payload (_not_ including IV/ICV length). * @ta is the transmitter addresses. */ -void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, - struct ieee80211_key *key, - u8 *pos, size_t payload_len, u8 *ta) +int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, + struct ieee80211_key *key, + u8 *pos, size_t payload_len, u8 *ta) { u8 rc4key[16]; struct tkip_ctx *ctx = &key->u.tkip.tx; @@ -216,7 +216,7 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key); - ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); + return ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); } /* Decrypt packet payload with TKIP using @key. @pos is a pointer to the diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h index d4714383f5fc..7e83dee976fa 100644 --- a/net/mac80211/tkip.h +++ b/net/mac80211/tkip.h @@ -15,7 +15,7 @@ u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16); -void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, +int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, struct ieee80211_key *key, u8 *pos, size_t payload_len, u8 *ta); enum { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 680bcb7093db..c54db966926b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -576,17 +576,6 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) } static ieee80211_tx_result debug_noinline -ieee80211_tx_h_sta(struct ieee80211_tx_data *tx) -{ - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - - if (tx->sta && tx->sta->uploaded) - info->control.sta = &tx->sta->sta; - - return TX_CONTINUE; -} - -static ieee80211_tx_result debug_noinline ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); @@ -1092,6 +1081,59 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, return true; } +static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, + struct sk_buff *skb, + struct ieee80211_tx_info *info, + struct tid_ampdu_tx *tid_tx, + int tid) +{ + bool queued = false; + + if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { + info->flags |= IEEE80211_TX_CTL_AMPDU; + } else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) { + /* + * nothing -- this aggregation session is being started + * but that might still fail with the driver + */ + } else { + spin_lock(&tx->sta->lock); + /* + * Need to re-check now, because we may get here + * + * 1) in the window during which the setup is actually + * already done, but not marked yet because not all + * packets are spliced over to the driver pending + * queue yet -- if this happened we acquire the lock + * either before or after the splice happens, but + * need to recheck which of these cases happened. + * + * 2) during session teardown, if the OPERATIONAL bit + * was cleared due to the teardown but the pointer + * hasn't been assigned NULL yet (or we loaded it + * before it was assigned) -- in this case it may + * now be NULL which means we should just let the + * packet pass through because splicing the frames + * back is already done. + */ + tid_tx = tx->sta->ampdu_mlme.tid_tx[tid]; + + if (!tid_tx) { + /* do nothing, let packet pass through */ + } else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) { + info->flags |= IEEE80211_TX_CTL_AMPDU; + } else { + queued = true; + info->control.vif = &tx->sdata->vif; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + __skb_queue_tail(&tid_tx->pending, skb); + } + spin_unlock(&tx->sta->lock); + } + + return queued; +} + /* * initialises @tx */ @@ -1104,8 +1146,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int hdrlen, tid; - u8 *qc, *state; - bool queued = false; + u8 *qc; memset(tx, 0, sizeof(*tx)); tx->skb = skb; @@ -1157,35 +1198,16 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - spin_lock(&tx->sta->lock); - /* - * XXX: This spinlock could be fairly expensive, but see the - * comment in agg-tx.c:ieee80211_agg_tx_operational(). - * One way to solve this would be to do something RCU-like - * for managing the tid_tx struct and using atomic bitops - * for the actual state -- by introducing an actual - * 'operational' bit that would be possible. It would - * require changing ieee80211_agg_tx_operational() to - * set that bit, and changing the way tid_tx is managed - * everywhere, including races between that bit and - * tid_tx going away (tid_tx being added can be easily - * committed to memory before the 'operational' bit). - */ - tid_tx = tx->sta->ampdu_mlme.tid_tx[tid]; - state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; - if (*state == HT_AGG_STATE_OPERATIONAL) { - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else if (*state != HT_AGG_STATE_IDLE) { - /* in progress */ - queued = true; - info->control.vif = &sdata->vif; - info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - __skb_queue_tail(&tid_tx->pending, skb); - } - spin_unlock(&tx->sta->lock); + tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + if (tid_tx) { + bool queued; - if (unlikely(queued)) - return TX_QUEUED; + queued = ieee80211_tx_prep_agg(tx, skb, info, + tid_tx, tid); + + if (unlikely(queued)) + return TX_QUEUED; + } } if (is_multicast_ether_addr(hdr->addr1)) { @@ -1274,6 +1296,11 @@ static int __ieee80211_tx(struct ieee80211_local *local, break; } + if (sta && sta->uploaded) + info->control.sta = &sta->sta; + else + info->control.sta = NULL; + ret = drv_tx(local, skb); if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { dev_kfree_skb(skb); @@ -1313,7 +1340,6 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) CALL_TXH(ieee80211_tx_h_check_assoc); CALL_TXH(ieee80211_tx_h_ps_buf); CALL_TXH(ieee80211_tx_h_select_key); - CALL_TXH(ieee80211_tx_h_sta); if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); @@ -1909,11 +1935,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, h_pos += encaps_len; } +#ifdef CONFIG_MAC80211_MESH if (meshhdrlen > 0) { memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen); nh_pos += meshhdrlen; h_pos += meshhdrlen; } +#endif if (ieee80211_is_data_qos(fc)) { __le16 *qos_control; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5b79d552780a..748387d45bc0 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -803,8 +803,12 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) /* after reinitialize QoS TX queues setting to default, * disable QoS at all */ - local->hw.conf.flags &= ~IEEE80211_CONF_QOS; - drv_config(local, IEEE80211_CONF_CHANGE_QOS); + + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { + sdata->vif.bss_conf.qos = + sdata->vif.type != NL80211_IFTYPE_STATION; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS); + } } void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, @@ -1138,18 +1142,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) } mutex_unlock(&local->sta_mtx); - /* Clear Suspend state so that ADDBA requests can be processed */ - - rcu_read_lock(); - - if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { - list_for_each_entry_rcu(sta, &local->sta_list, list) { - clear_sta_flags(sta, WLAN_STA_BLOCK_BA); - } - } - - rcu_read_unlock(); - /* setup RTS threshold */ drv_set_rts_threshold(local, hw->wiphy->rts_threshold); @@ -1173,7 +1165,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT | BSS_CHANGED_BSSID | - BSS_CHANGED_CQM; + BSS_CHANGED_CQM | + BSS_CHANGED_QOS; switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: @@ -1202,13 +1195,26 @@ int ieee80211_reconfig(struct ieee80211_local *local) } } - rcu_read_lock(); + /* + * Clear the WLAN_STA_BLOCK_BA flag so new aggregation + * sessions can be established after a resume. + * + * Also tear down aggregation sessions since reconfiguring + * them in a hardware restart scenario is not easily done + * right now, and the hardware will have lost information + * about the sessions, but we and the AP still think they + * are active. This is really a workaround though. + */ if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { - list_for_each_entry_rcu(sta, &local->sta_list, list) { + mutex_lock(&local->sta_mtx); + + list_for_each_entry(sta, &local->sta_list, list) { ieee80211_sta_tear_down_BA_sessions(sta); + clear_sta_flags(sta, WLAN_STA_BLOCK_BA); } + + mutex_unlock(&local->sta_mtx); } - rcu_read_unlock(); /* add back keys */ list_for_each_entry(sdata, &local->interfaces, list) diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 5f3a4113bda1..9ebc8d8a1f5b 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -32,13 +32,16 @@ int ieee80211_wep_init(struct ieee80211_local *local) local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(local->wep_tx_tfm)) + if (IS_ERR(local->wep_tx_tfm)) { + local->wep_rx_tfm = ERR_PTR(-EINVAL); return PTR_ERR(local->wep_tx_tfm); + } local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_rx_tfm)) { crypto_free_blkcipher(local->wep_tx_tfm); + local->wep_tx_tfm = ERR_PTR(-EINVAL); return PTR_ERR(local->wep_rx_tfm); } @@ -47,8 +50,10 @@ int ieee80211_wep_init(struct ieee80211_local *local) void ieee80211_wep_free(struct ieee80211_local *local) { - crypto_free_blkcipher(local->wep_tx_tfm); - crypto_free_blkcipher(local->wep_rx_tfm); + if (!IS_ERR(local->wep_tx_tfm)) + crypto_free_blkcipher(local->wep_tx_tfm); + if (!IS_ERR(local->wep_rx_tfm)) + crypto_free_blkcipher(local->wep_rx_tfm); } static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen) @@ -122,19 +127,24 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, /* Perform WEP encryption using given key. data buffer must have tailroom * for 4-byte ICV. data_len must not include this ICV. Note: this function * does _not_ add IV. data = RC4(data | CRC32(data)) */ -void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, - size_t klen, u8 *data, size_t data_len) +int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, + size_t klen, u8 *data, size_t data_len) { struct blkcipher_desc desc = { .tfm = tfm }; struct scatterlist sg; __le32 icv; + if (IS_ERR(tfm)) + return -1; + icv = cpu_to_le32(~crc32_le(~0, data, data_len)); put_unaligned(icv, (__le32 *)(data + data_len)); crypto_blkcipher_setkey(tfm, rc4key, klen); sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length); + + return 0; } @@ -168,10 +178,8 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, /* Add room for ICV */ skb_put(skb, WEP_ICV_LEN); - ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, - iv + WEP_IV_LEN, len); - - return 0; + return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, + iv + WEP_IV_LEN, len); } @@ -185,6 +193,9 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, struct scatterlist sg; __le32 crc; + if (IS_ERR(tfm)) + return -1; + crypto_blkcipher_setkey(tfm, rc4key, klen); sg_init_one(&sg, data, data_len + WEP_ICV_LEN); crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length); diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h index fe29d7e5759f..58654ee33518 100644 --- a/net/mac80211/wep.h +++ b/net/mac80211/wep.h @@ -18,7 +18,7 @@ int ieee80211_wep_init(struct ieee80211_local *local); void ieee80211_wep_free(struct ieee80211_local *local); -void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, +int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, size_t klen, u8 *data, size_t data_len); int ieee80211_wep_encrypt(struct ieee80211_local *local, struct sk_buff *skb, diff --git a/net/mac80211/work.c b/net/mac80211/work.c index b025dc7bb0fd..81d4ad64184a 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -560,6 +560,22 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk) return WORK_ACT_TIMEOUT; } +static enum work_action __must_check +ieee80211_assoc_beacon_wait(struct ieee80211_work *wk) +{ + if (wk->started) + return WORK_ACT_TIMEOUT; + + /* + * Wait up to one beacon interval ... + * should this be more if we miss one? + */ + printk(KERN_DEBUG "%s: waiting for beacon from %pM\n", + wk->sdata->name, wk->filter_ta); + wk->timeout = TU_TO_EXP_TIME(wk->assoc.bss->beacon_interval); + return WORK_ACT_NONE; +} + static void ieee80211_auth_challenge(struct ieee80211_work *wk, struct ieee80211_mgmt *mgmt, size_t len) @@ -709,6 +725,25 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk, return WORK_ACT_DONE; } +static enum work_action __must_check +ieee80211_rx_mgmt_beacon(struct ieee80211_work *wk, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_sub_if_data *sdata = wk->sdata; + struct ieee80211_local *local = sdata->local; + + ASSERT_WORK_MTX(local); + + if (wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) + return WORK_ACT_MISMATCH; + + if (len < 24 + 12) + return WORK_ACT_NONE; + + printk(KERN_DEBUG "%s: beacon received\n", sdata->name); + return WORK_ACT_DONE; +} + static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, struct sk_buff *skb) { @@ -731,6 +766,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, case IEEE80211_WORK_DIRECT_PROBE: case IEEE80211_WORK_AUTH: case IEEE80211_WORK_ASSOC: + case IEEE80211_WORK_ASSOC_BEACON_WAIT: bssid = wk->filter_ta; break; default: @@ -745,6 +781,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, continue; switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_BEACON: + rma = ieee80211_rx_mgmt_beacon(wk, mgmt, skb->len); + break; case IEEE80211_STYPE_PROBE_RESP: rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len, rx_status); @@ -840,7 +879,7 @@ static void ieee80211_work_work(struct work_struct *work) /* * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the the rest. + * here we'll pick the rest. */ if (WARN(local->suspended, "work scheduled while going to suspend\n")) return; @@ -916,6 +955,9 @@ static void ieee80211_work_work(struct work_struct *work) case IEEE80211_WORK_REMAIN_ON_CHANNEL: rma = ieee80211_remain_on_channel_timeout(wk); break; + case IEEE80211_WORK_ASSOC_BEACON_WAIT: + rma = ieee80211_assoc_beacon_wait(wk); + break; } wk->started = started; @@ -1065,6 +1107,7 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata, case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_ASSOC_RESP: case IEEE80211_STYPE_REASSOC_RESP: + case IEEE80211_STYPE_BEACON: skb_queue_tail(&local->work_skb_queue, skb); ieee80211_queue_work(&local->hw, &local->work_work); return RX_QUEUED; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 0adbcc941ac9..8d59d27d887e 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -183,9 +183,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) skb_put(skb, TKIP_ICV_LEN); hdr = (struct ieee80211_hdr *) skb->data; - ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, - key, pos, len, hdr->addr2); - return 0; + return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, + key, pos, len, hdr->addr2); } @@ -436,6 +435,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 pn[CCMP_PN_LEN]; int data_len; + int queue; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -453,7 +453,10 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) ccmp_hdr2pn(pn, skb->data + hdrlen); - if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) { + queue = ieee80211_is_mgmt(hdr->frame_control) ? + NUM_RX_DATA_QUEUES : rx->queue; + + if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } @@ -470,7 +473,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; } - memcpy(key->u.ccmp.rx_pn[rx->queue], pn, CCMP_PN_LEN); + memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN); /* Remove CCMP header and MIC */ skb_trim(skb, skb->len - CCMP_MIC_LEN); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8593a77cfea9..43288259f4a1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -40,27 +40,6 @@ config NF_CONNTRACK if NF_CONNTRACK -config NF_CT_ACCT - bool "Connection tracking flow accounting" - depends on NETFILTER_ADVANCED - help - If this option is enabled, the connection tracking code will - keep per-flow packet and byte counters. - - Those counters can be used for flow-based accounting or the - `connbytes' match. - - Please note that currently this option only sets a default state. - You may change it at boot time with nf_conntrack.acct=0/1 kernel - parameter or by loading the nf_conntrack module with acct=0/1. - - You may also disable/enable it on a running system with: - sysctl net.netfilter.nf_conntrack_acct=0/1 - - This option will be removed in 2.6.29. - - If unsure, say `N'. - config NF_CONNTRACK_MARK bool 'Connection mark tracking support' depends on NETFILTER_ADVANCED @@ -347,6 +326,22 @@ config NETFILTER_XT_CONNMARK comment "Xtables targets" +config NETFILTER_XT_TARGET_CHECKSUM + tristate "CHECKSUM target support" + depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds a `CHECKSUM' target, which can be used in the iptables mangle + table. + + You can use this target to compute and fill in the checksum in + a packet that lacks a checksum. This is particularly useful, + if you need to work around old applications such as dhcp clients, + that do not work well with checksum offloads, but don't want to disable + checksum offload in your device. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_CLASSIFY tristate '"CLASSIFY" target support' depends on NETFILTER_ADVANCED @@ -424,6 +419,18 @@ config NETFILTER_XT_TARGET_HL since you can easily create immortal packets that loop forever on the network. +config NETFILTER_XT_TARGET_IDLETIMER + tristate "IDLETIMER target support" + depends on NETFILTER_ADVANCED + help + + This option adds the `IDLETIMER' target. Each matching packet + resets the timer associated with label specified when the rule is + added. When the timer expires, it triggers a sysfs notification. + The remaining time for expiration can be read via sysfs. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_LED tristate '"LED" target support' depends on LEDS_CLASS && LEDS_TRIGGERS @@ -503,7 +510,7 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. config NETFILTER_XT_TARGET_TEE - tristate '"TEE" - packet cloning to alternate destiantion' + tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED depends on (IPV6 || IPV6=n) depends on !NF_CONNTRACK || NF_CONNTRACK @@ -618,7 +625,6 @@ config NETFILTER_XT_MATCH_CONNBYTES tristate '"connbytes" per-connection counter match support' depends on NF_CONNTRACK depends on NETFILTER_ADVANCED - select NF_CT_ACCT help This option adds a `connbytes' match, which allows you to match the number of bytes and/or packets for each direction within a connection. @@ -657,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_CPU + tristate '"cpu" match support' + depends on NETFILTER_ADVANCED + help + CPU matching allows you to match packets based on the CPU + currently handling the packet. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' depends on NETFILTER_ADVANCED @@ -736,6 +751,16 @@ config NETFILTER_XT_MATCH_IPRANGE If unsure, say M. +config NETFILTER_XT_MATCH_IPVS + tristate '"ipvs" match support' + depends on IP_VS + depends on NETFILTER_ADVANCED + depends on NF_CONNTRACK + help + This option allows you to match against IPVS properties of a packet. + + If unsure, say N. + config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 14e3a8fd8180..441050f31111 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o # targets +obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o @@ -61,6 +62,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o +obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o @@ -68,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o @@ -75,6 +78,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o +obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 712ccad13344..46a77d5c3887 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -3,7 +3,7 @@ # menuconfig IP_VS tristate "IP virtual server support" - depends on NET && INET && NETFILTER + depends on NET && INET && NETFILTER && NF_CONNTRACK ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This @@ -26,7 +26,7 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" - depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) + depends on IPV6 = y || IP_VS = IPV6 ---help--- Add IPv6 support to IPVS. This is incomplete and might be dangerous. @@ -87,19 +87,16 @@ config IP_VS_PROTO_UDP protocol. Say Y if unsure. config IP_VS_PROTO_AH_ESP - bool - depends on UNDEFINED + def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH config IP_VS_PROTO_ESP bool "ESP load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" - select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. @@ -238,7 +235,7 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP + depends on IP_VS_PROTO_TCP && NF_NAT ---help--- FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 1cb0e834f8ff..e76f87f4aca8 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = { }; #endif - -/* - * Replace a segment of data with a new segment - */ -int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, - char *o_buf, int o_len, char *n_buf, int n_len) -{ - int diff; - int o_offset; - int o_left; - - EnterFunction(9); - - diff = n_len - o_len; - o_offset = o_buf - (char *)skb->data; - /* The length of left data after o_buf+o_len in the skb data */ - o_left = skb->len - (o_offset + o_len); - - if (diff <= 0) { - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - skb_trim(skb, skb->len + diff); - } else if (diff <= skb_tailroom(skb)) { - skb_put(skb, diff); - memmove(o_buf + n_len, o_buf + o_len, o_left); - memcpy(o_buf, n_buf, n_len); - } else { - if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) - return -ENOMEM; - skb_put(skb, diff); - memmove(skb->data + o_offset + n_len, - skb->data + o_offset + o_len, o_left); - skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); - } - - /* must update the iph total length here */ - ip_hdr(skb)->tot_len = htons(skb->len); - - LeaveFunction(9); - return 0; -} - - int __init ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index ff04e9edbed6..b71c69a2db13 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -158,6 +158,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) unsigned hash; int ret; + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return 0; + /* Hash by protocol, client address and port */ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); @@ -268,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get return cp; } +struct ip_vs_conn * +ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); + /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, @@ -353,14 +379,37 @@ struct ip_vs_conn *ip_vs_conn_out_get return ret; } +struct ip_vs_conn * +ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} +EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); /* * Put back the conn and restart its timer with its timeout */ void ip_vs_conn_put(struct ip_vs_conn *cp) { - /* reset it expire in its timeout */ - mod_timer(&cp->timer, jiffies+cp->timeout); + unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? + 0 : cp->timeout; + mod_timer(&cp->timer, jiffies+t); __ip_vs_conn_put(cp); } @@ -653,7 +702,7 @@ static void ip_vs_conn_expire(unsigned long data) /* * unhash it if it is hashed in the conn table */ - if (!ip_vs_conn_unhash(cp)) + if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) goto expire_later; /* diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1cd6e3fd058b..4f8ddba48011 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -54,7 +54,6 @@ EXPORT_SYMBOL(register_ip_vs_scheduler); EXPORT_SYMBOL(unregister_ip_vs_scheduler); -EXPORT_SYMBOL(ip_vs_skb_replace); EXPORT_SYMBOL(ip_vs_proto_name); EXPORT_SYMBOL(ip_vs_conn_new); EXPORT_SYMBOL(ip_vs_conn_in_get); @@ -194,6 +193,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport; /* destination port to forward */ + __be16 flags; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -340,6 +340,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dport = ports[1]; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a new connection according to the template */ @@ -347,7 +351,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, &iph.saddr, ports[0], &iph.daddr, ports[1], &dest->addr, dport, - 0, + flags, dest); if (cp == NULL) { ip_vs_conn_put(ct); @@ -377,7 +381,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, flags; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -407,6 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; } + flags = (svc->flags & IP_VS_SVC_F_ONEPACKET + && iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; + /* * Create a connection entry. */ @@ -414,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) &iph.saddr, pptr[0], &iph.daddr, pptr[1], &dest->addr, dest->port ? dest->port : pptr[1], - 0, + flags, dest); if (cp == NULL) return NULL; @@ -464,6 +472,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; + __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && + iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); @@ -474,7 +485,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &daddr, 0, - IP_VS_CONN_F_BYPASS, + IP_VS_CONN_F_BYPASS | flags, NULL); if (cp == NULL) return NF_DROP; @@ -524,26 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } - -/* - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING - * chain, and is used for VS/NAT. - * It detects packets for VS/NAT connections and sends the packets - * immediately. This can avoid that iptable_nat mangles the packets - * for VS/NAT. - */ -static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - if (!skb->ipvs_property) - return NF_ACCEPT; - /* The packet was sent from IPVS, exit this chain */ - return NF_STOP; -} - __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -1487,14 +1478,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC-1, - }, #ifdef CONFIG_IP_VS_IPV6 /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1523,14 +1506,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC-1, - }, #endif }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 36dc1d88c2fa..0f0c079c422a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) svc->scheduler->name); else #endif - seq_printf(seq, "%s %08X:%04X %s ", + seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - svc->scheduler->name); + svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { - seq_printf(seq, "FWM %08X %s ", - svc->fwmark, svc->scheduler->name); + seq_printf(seq, "FWM %08X %s %s", + svc->fwmark, svc->scheduler->name, + (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } if (svc->flags & IP_VS_SVC_F_PERSISTENT) diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 2ae747a376a5..f228a17ec649 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -20,6 +20,17 @@ * * Author: Wouter Gadeyne * + * + * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from + * http://www.ssi.bg/~ja/nfct/: + * + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS + * + * Portions Copyright (C) 2001-2002 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. + * + * Portions Copyright (C) 2003-2008 + * Julian Anastasov */ #define KMSG_COMPONENT "IPVS" @@ -32,6 +43,9 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat_helper.h> #include <linux/gfp.h> #include <net/protocol.h> #include <net/tcp.h> @@ -43,6 +57,16 @@ #define SERVER_STRING "227 Entering Passive Mode (" #define CLIENT_STRING "PORT " +#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" +#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ + &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ + (T)->dst.protonum + +#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" +#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ + &((C)->vaddr.ip), ntohs((C)->vport), \ + &((C)->daddr.ip), ntohs((C)->dport), \ + (C)->protocol, (C)->state /* * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper @@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, return 1; } +/* + * Called from init_conntrack() as expectfn handler. + */ +static void +ip_vs_expect_callback(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_tuple *orig, new_reply; + struct ip_vs_conn *cp; + + if (exp->tuple.src.l3num != PF_INET) + return; + + /* + * We assume that no NF locks are held before this callback. + * ip_vs_conn_out_get and ip_vs_conn_in_get should match their + * expectations even if they use wildcard values, now we provide the + * actual values from the newly created original conntrack direction. + * The conntrack is confirmed when packet reaches IPVS hooks. + */ + + /* RS->CLIENT */ + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply CLIENT->RS to CLIENT->VS */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found inout cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.dst.u3 = cp->vaddr; + new_reply.dst.u.tcp.port = cp->vport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE + ", inout cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + /* CLIENT->VS */ + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply VS->CLIENT to RS->CLIENT */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found outin cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.src.u3 = cp->daddr; + new_reply.src.u.tcp.port = cp->dport; + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " + FMT_TUPLE ", outin cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE + " - unknown expect\n", + __func__, ct, ct->status, ARG_TUPLE(orig)); + return; + +alter: + /* Never alter conntrack for non-NAT conns */ + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) + nf_conntrack_alter_reply(ct, &new_reply); + ip_vs_conn_put(cp); + return; +} + +/* + * Create NF conntrack expectation with wildcard (optional) source port. + * Then the default callback function will alter the reply and will confirm + * the conntrack entry when the first packet comes. + */ +static void +ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 *port, int from_rs) +{ + struct nf_conntrack_expect *exp; + + BUG_ON(!ct || ct == &nf_conntrack_untracked); + + exp = nf_ct_expect_alloc(ct); + if (!exp) + return; + + if (from_rs) + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->daddr, &cp->caddr, + proto, port, &cp->cport); + else + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, + nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, + proto, port, &cp->vport); + + exp->expectfn = ip_vs_expect_callback; + + IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&exp->tuple)); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); +} /* * Look at outgoing ftp packets to catch the response to a PASV command @@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; - int ret; + int ret = 0; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); + ct = nf_ct_get(skb, &ctinfo); + if (ct && !nf_ct_is_untracked(ct)) { + /* If mangling fails this function will return 0 + * which will cause the packet to be dropped. + * Mangling can only fail under memory pressure, + * hopefully it will succeed on the retransmitted + * packet. + */ + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + start-data, end-start, + buf, buf_len); + if (ret) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, NULL, 0); + } + /* - * Calculate required delta-offset to keep TCP happy + * Not setting 'diff' is intentional, otherwise the sequence + * would be adjusted twice. */ - *diff = buf_len - (end-start); - - if (*diff == 0) { - /* simply replace it with new passive address */ - memcpy(start, buf, buf_len); - ret = 1; - } else { - ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, - end-start, buf, buf_len); - } cp->app_data = NULL; ip_vs_tcp_conn_listen(n_cp); @@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } + ct = (struct nf_conn *)skb->nfct; + if (ct && ct != &nf_conntrack_untracked) + ip_vs_expect_related(skb, ct, n_cp, + IPPROTO_TCP, &n_cp->dport, 1); + /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 94a45213faa6..9323f8944199 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -11,7 +11,7 @@ * Changes: * Martin Hamilton : fixed the terrible locking bugs * *lock(tbl->lock) ==> *lock(&tbl->lock) - * Wensong Zhang : fixed the uninitilized tbl->lock bug + * Wensong Zhang : fixed the uninitialized tbl->lock bug * Wensong Zhang : added doing full expiration check to * collect stale entries of 24+ hours when * no partial expire check in a half hour diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 535dc2b419d8..dbeed8ea421a 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -386,7 +386,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, ip_vs_addr_copy(dest->af, &en->addr, daddr); en->lastuse = jiffies; - /* initilize its dest set */ + /* initialize its dest set */ atomic_set(&(en->set.size), 0); INIT_LIST_HEAD(&en->set.list); rwlock_init(&en->set.lock); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 2d3d5e4b35f8..027f654799fe 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) return NULL; } +EXPORT_SYMBOL(ip_vs_proto_get); /* diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index c9a3f7a21d53..4c0855cb006e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -8,55 +8,6 @@ #include <net/sctp/checksum.h> #include <net/ip_vs.h> - -static struct ip_vs_conn * -sctp_conn_in_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - -static struct ip_vs_conn * -sctp_conn_out_get(int af, - const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); -} - static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -173,7 +124,7 @@ sctp_dnat_handler(struct sk_buff *skb, return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!ip_vs_app_pkt_in(cp, skb)) return 0; } @@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = { .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, .conn_schedule = sctp_conn_schedule, - .conn_in_get = sctp_conn_in_get, - .conn_out_get = sctp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, .csum_check = sctp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 91d28e073742..282d24de8592 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -27,52 +27,6 @@ #include <net/ip_vs.h> - -static struct ip_vs_conn * -tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - -static struct ip_vs_conn * -tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } -} - - static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, - .conn_in_get = tcp_conn_in_get, - .conn_out_get = tcp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = tcp_snat_handler, .dnat_handler = tcp_dnat_handler, .csum_check = tcp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index e7a6885e0167..8553231b5d41 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -27,58 +27,6 @@ #include <net/ip.h> #include <net/ip6_checksum.h> -static struct ip_vs_conn * -udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - -static struct ip_vs_conn * -udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct ip_vs_iphdr *iph, unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - __be16 _ports[2], *pptr; - - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) - return NULL; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - } else { - cp = ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); - } - - return cp; -} - - static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) @@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .init = udp_init, .exit = udp_exit, .conn_schedule = udp_conn_schedule, - .conn_in_get = udp_conn_in_get, - .conn_out_get = udp_conn_out_get, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, .csum_check = udp_csum_check, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 93c15a107b2c..21e1a5e9b9d3 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -28,6 +28,7 @@ #include <net/ip6_route.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack.h> #include <linux/netfilter_ipv4.h> #include <net/ip_vs.h> @@ -90,10 +91,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) &dest->addr.ip); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, - atomic_read(&rt->u.dst.__refcnt), rtos); + atomic_read(&rt->dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); } else { @@ -148,10 +149,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) &dest->addr.in6); return NULL; } - __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); + __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", &dest->addr.in6, - atomic_read(&rt->u.dst.__refcnt)); + atomic_read(&rt->dst.__refcnt)); } spin_unlock(&dest->dst_lock); } else { @@ -198,7 +199,7 @@ do { \ (skb)->ipvs_property = 1; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - (rt)->u.dst.dev, dst_output); \ + (rt)->dst.dev, dst_output); \ } while (0) @@ -245,7 +246,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -265,7 +266,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -309,9 +310,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -323,13 +324,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif +static void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +{ + struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conntrack_tuple new_tuple; + + if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct)) + return; + + /* + * The connection is not yet in the hashtable, so we update it. + * CIP->VIP will remain the same, so leave the tuple in + * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the + * real-server we will see RIP->DIP. + */ + new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + new_tuple.src.u3 = cp->daddr; + /* + * This will also take care of UDP and other protocols. + */ + new_tuple.src.u.tcp.port = cp->dport; + nf_conntrack_alter_reply(ct, &new_tuple); +} + /* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP @@ -376,7 +401,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); @@ -388,12 +413,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -452,9 +479,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); @@ -465,12 +492,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + ip_vs_update_conntrack(skb, cp); + /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -498,7 +527,7 @@ tx_error: kfree_skb(skb); return NF_STOLEN; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif @@ -549,9 +578,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { ip_rt_put(rt); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); @@ -601,7 +630,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -615,7 +644,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; iph->ttl = old_iph->ttl; - ip_select_ident(iph, &rt->u.dst, NULL); + ip_select_ident(iph, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -660,12 +689,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!rt) goto tx_error_icmp; - tdev = rt->u.dst.dev; + tdev = rt->dst.dev; - mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); + mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); /* TODO IPv6: do we need this check in IPv6? */ if (mtu < 1280) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); goto tx_error; } @@ -674,7 +703,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -689,7 +718,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); kfree_skb(skb); IP_VS_ERR_RL("%s(): no memory\n", __func__); return NF_STOLEN; @@ -707,7 +736,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* * Push down and install the IPIP header. @@ -760,7 +789,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); @@ -780,7 +809,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -813,10 +842,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->u.dst); + dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -827,13 +856,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(skb == NULL)) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); return NF_STOLEN; } /* drop old route */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -888,7 +917,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -900,12 +929,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp(skb, pp, cp, 0); @@ -963,9 +992,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_icmp; /* MTU checking */ - mtu = dst_mtu(&rt->u.dst); + mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->u.dst); + dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -975,12 +1004,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (!skb_make_writable(skb, offset)) goto tx_error_put; - if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* drop the old route when skb is not shared */ skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); + skb_dst_set(skb, &rt->dst); ip_vs_nat_icmp_v6(skb, pp, cp, 0); @@ -1001,7 +1030,7 @@ out: LeaveFunction(10); return rc; tx_error_put: - dst_release(&rt->u.dst); + dst_release(&rt->dst); goto tx_error; } #endif diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index ab81b380eae6..5178c691ecbf 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -17,13 +17,7 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> -#ifdef CONFIG_NF_CT_ACCT -#define NF_CT_ACCT_DEFAULT 1 -#else -#define NF_CT_ACCT_DEFAULT 0 -#endif - -static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; +static int nf_ct_acct __read_mostly; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); @@ -114,12 +108,6 @@ int nf_conntrack_acct_init(struct net *net) net->ct.sysctl_acct = nf_ct_acct; if (net_eq(net, &init_net)) { -#ifdef CONFIG_NF_CT_ACCT - printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n"); - printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n"); - printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); -#endif - ret = nf_ct_extend_register(&acct_extend); if (ret < 0) { printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index eeeb8bc73982..df3eedb142ff 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct nf_conn nf_conntrack_untracked __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_untracked); +DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); -#ifdef CONFIG_NET_NS - ct->ct_net = net; -#endif + write_pnet(&ct->ct_net, net); #ifdef CONFIG_NF_CONNTRACK_ZONES if (zone) { struct nf_conntrack_zone *nf_ct_zone; @@ -968,8 +966,7 @@ acct: if (acct) { spin_lock_bh(&ct->lock); acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); + acct[CTINFO2DIR(ctinfo)].bytes += skb->len; spin_unlock_bh(&ct->lock); } } @@ -1183,10 +1180,21 @@ static void nf_ct_release_dying_list(struct net *net) spin_unlock_bh(&nf_conntrack_lock); } +static int untrack_refs(void) +{ + int cnt = 0, cpu; + + for_each_possible_cpu(cpu) { + struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); + + cnt += atomic_read(&ct->ct_general.use) - 1; + } + return cnt; +} + static void nf_conntrack_cleanup_init_net(void) { - /* wait until all references to nf_conntrack_untracked are dropped */ - while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) + while (untrack_refs() > 0) schedule(); nf_conntrack_helper_fini(); @@ -1321,10 +1329,19 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); +void nf_ct_untracked_status_or(unsigned long bits) +{ + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(nf_conntrack_untracked, cpu).status |= bits; +} +EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); + static int nf_conntrack_init_init_net(void) { int max_factor = 8; - int ret; + int ret, cpu; /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ @@ -1363,13 +1380,13 @@ static int nf_conntrack_init_init_net(void) goto err_extend; #endif /* Set up fake conntrack: to never be deleted, not in any hashes */ -#ifdef CONFIG_NET_NS - nf_conntrack_untracked.ct_net = &init_net; -#endif - atomic_set(&nf_conntrack_untracked.ct_general.use, 1); + for_each_possible_cpu(cpu) { + struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu); + write_pnet(&ct->ct_net, &init_net); + atomic_set(&ct->ct_general.use, 1); + } /* - and look it like as a confirmed connection */ - set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - + nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); return 0; #ifdef CONFIG_NF_CONNTRACK_ZONES diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index fdc8fb4ae10f..7dcf7a404190 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) { unsigned int i; struct nf_ct_ext_type *t; + struct nf_ct_ext *ext = ct->ext; for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(ext, i)) continue; rcu_read_lock(); @@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head) void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { - struct nf_ct_ext *new; + struct nf_ct_ext *old, *new; int i, newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); - if (!ct->ext) + old = ct->ext; + if (!old) return nf_ct_ext_create(&ct->ext, id, gfp); - if (nf_ct_ext_exist(ct, id)) + if (__nf_ct_ext_exist(old, id)) return NULL; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); - newoff = ALIGN(ct->ext->len, t->align); + newoff = ALIGN(old->len, t->align); newlen = newoff + t->len; rcu_read_unlock(); - new = __krealloc(ct->ext, newlen, gfp); + new = __krealloc(old, newlen, gfp); if (!new) return NULL; - if (new != ct->ext) { + if (new != old) { for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!nf_ct_ext_exist(ct, i)) + if (!__nf_ct_ext_exist(old, i)) continue; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[i]); if (t && t->move) t->move((void *)new + new->offset[i], - (void *)ct->ext + ct->ext->offset[i]); + (void *)old + old->offset[i]); rcu_read_unlock(); } - call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); + call_rcu(&old->rcu, __nf_ct_ext_free_rcu); ct->ext = new; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 6eaee7c8a337..b969025cf82f 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { if (rt1->rt_gateway == rt2->rt_gateway && - rt1->u.dst.dev == rt2->u.dst.dev) + rt1->dst.dev == rt2->dst.dev) ret = 1; - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); } - dst_release(&rt1->u.dst); + dst_release(&rt1->dst); } break; } @@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && - rt1->u.dst.dev == rt2->u.dst.dev) + rt1->dst.dev == rt2->dst.dev) ret = 1; - dst_release(&rt2->u.dst); + dst_release(&rt2->dst); } - dst_release(&rt1->u.dst); + dst_release(&rt1->dst); } break; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 497b2224536f..aadde018a072 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, goto out; rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->u.dst.dev); + in_dev = __in_dev_get_rcu(rt->dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { if (ifa->ifa_broadcast == iph->daddr) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c42ff6aa441d..5bae1cd15eea 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) int err; /* ignore our fake conntrack entry */ - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return 0; if (events & (1 << IPCT_DESTROY)) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9dd8cd4fb6e6..c4c885dca3bd 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -585,8 +585,16 @@ static bool tcp_in_window(const struct nf_conn *ct, * Let's try to use the data from the packet. */ sender->td_end = end; + win <<= sender->td_scale; sender->td_maxwin = (win == 0 ? 1 : win); sender->td_maxend = end + sender->td_maxwin; + /* + * We haven't seen traffic in the other direction yet + * but we have to tweak window tracking to pass III + * and IV until that happens. + */ + if (receiver->td_maxwin == 0) + receiver->td_end = receiver->td_maxend = sack; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) @@ -680,7 +688,7 @@ static bool tcp_in_window(const struct nf_conn *ct, /* * Update receiver data. */ - if (after(end, sender->td_maxend)) + if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; @@ -736,27 +744,19 @@ static bool tcp_in_window(const struct nf_conn *ct, return res; } -#define TH_FIN 0x01 -#define TH_SYN 0x02 -#define TH_RST 0x04 -#define TH_PUSH 0x08 -#define TH_ACK 0x10 -#define TH_URG 0x20 -#define TH_ECE 0x40 -#define TH_CWR 0x80 - /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ -static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = +static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| + TCPHDR_URG) + 1] = { - [TH_SYN] = 1, - [TH_SYN|TH_URG] = 1, - [TH_SYN|TH_ACK] = 1, - [TH_RST] = 1, - [TH_RST|TH_ACK] = 1, - [TH_FIN|TH_ACK] = 1, - [TH_FIN|TH_ACK|TH_URG] = 1, - [TH_ACK] = 1, - [TH_ACK|TH_URG] = 1, + [TCPHDR_SYN] = 1, + [TCPHDR_SYN|TCPHDR_URG] = 1, + [TCPHDR_SYN|TCPHDR_ACK] = 1, + [TCPHDR_RST] = 1, + [TCPHDR_RST|TCPHDR_ACK] = 1, + [TCPHDR_FIN|TCPHDR_ACK] = 1, + [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, + [TCPHDR_ACK] = 1, + [TCPHDR_ACK|TCPHDR_URG] = 1, }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ @@ -803,7 +803,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, } /* Check TCP flags. */ - tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); + tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fc9a211e629e..6a1572b0ab41 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -66,9 +66,10 @@ struct nfulnl_instance { u_int16_t group_num; /* number of this queue */ u_int16_t flags; u_int8_t copy_mode; + struct rcu_head rcu; }; -static DEFINE_RWLOCK(instances_lock); +static DEFINE_SPINLOCK(instances_lock); static atomic_t global_seq; #define INSTANCE_BUCKETS 16 @@ -88,7 +89,7 @@ __instance_lookup(u_int16_t group_num) struct nfulnl_instance *inst; head = &instance_table[instance_hashfn(group_num)]; - hlist_for_each_entry(inst, pos, head, hlist) { + hlist_for_each_entry_rcu(inst, pos, head, hlist) { if (inst->group_num == group_num) return inst; } @@ -106,22 +107,26 @@ instance_lookup_get(u_int16_t group_num) { struct nfulnl_instance *inst; - read_lock_bh(&instances_lock); + rcu_read_lock_bh(); inst = __instance_lookup(group_num); - if (inst) - instance_get(inst); - read_unlock_bh(&instances_lock); + if (inst && !atomic_inc_not_zero(&inst->use)) + inst = NULL; + rcu_read_unlock_bh(); return inst; } +static void nfulnl_instance_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct nfulnl_instance, rcu)); + module_put(THIS_MODULE); +} + static void instance_put(struct nfulnl_instance *inst) { - if (inst && atomic_dec_and_test(&inst->use)) { - kfree(inst); - module_put(THIS_MODULE); - } + if (inst && atomic_dec_and_test(&inst->use)) + call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu); } static void nfulnl_timer(unsigned long data); @@ -132,7 +137,7 @@ instance_create(u_int16_t group_num, int pid) struct nfulnl_instance *inst; int err; - write_lock_bh(&instances_lock); + spin_lock_bh(&instances_lock); if (__instance_lookup(group_num)) { err = -EEXIST; goto out_unlock; @@ -166,32 +171,37 @@ instance_create(u_int16_t group_num, int pid) inst->copy_mode = NFULNL_COPY_PACKET; inst->copy_range = NFULNL_COPY_RANGE_MAX; - hlist_add_head(&inst->hlist, + hlist_add_head_rcu(&inst->hlist, &instance_table[instance_hashfn(group_num)]); - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); return inst; out_unlock: - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); return ERR_PTR(err); } static void __nfulnl_flush(struct nfulnl_instance *inst); +/* called with BH disabled */ static void __instance_destroy(struct nfulnl_instance *inst) { /* first pull it out of the global list */ - hlist_del(&inst->hlist); + hlist_del_rcu(&inst->hlist); /* then flush all pending packets from skb */ - spin_lock_bh(&inst->lock); + spin_lock(&inst->lock); + + /* lockless readers wont be able to use us */ + inst->copy_mode = NFULNL_COPY_DISABLED; + if (inst->skb) __nfulnl_flush(inst); - spin_unlock_bh(&inst->lock); + spin_unlock(&inst->lock); /* and finally put the refcount */ instance_put(inst); @@ -200,9 +210,9 @@ __instance_destroy(struct nfulnl_instance *inst) static inline void instance_destroy(struct nfulnl_instance *inst) { - write_lock_bh(&instances_lock); + spin_lock_bh(&instances_lock); __instance_destroy(inst); - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); } static int @@ -403,8 +413,9 @@ __build_packet_message(struct nfulnl_instance *inst, NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, - htonl(indev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(indev)->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ @@ -430,8 +441,9 @@ __build_packet_message(struct nfulnl_instance *inst, NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, - htonl(outdev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); } else { /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ @@ -619,6 +631,7 @@ nfulnl_log_packet(u_int8_t pf, size += nla_total_size(data_len); break; + case NFULNL_COPY_DISABLED: default: goto unlock_and_release; } @@ -672,7 +685,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, int i; /* destroy all instances for this pid */ - write_lock_bh(&instances_lock); + spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; struct nfulnl_instance *inst; @@ -684,7 +697,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, __instance_destroy(inst); } } - write_unlock_bh(&instances_lock); + spin_unlock_bh(&instances_lock); } return NOTIFY_DONE; } @@ -861,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st) for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { if (!hlist_empty(&instance_table[st->bucket])) - return instance_table[st->bucket].first; + return rcu_dereference_bh(instance_table[st->bucket].first); } return NULL; } static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) { - h = h->next; + h = rcu_dereference_bh(h->next); while (!h) { if (++st->bucket >= INSTANCE_BUCKETS) return NULL; - h = instance_table[st->bucket].first; + h = rcu_dereference_bh(instance_table[st->bucket].first); } return h; } @@ -890,9 +903,9 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos) } static void *seq_start(struct seq_file *seq, loff_t *pos) - __acquires(instances_lock) + __acquires(rcu_bh) { - read_lock_bh(&instances_lock); + rcu_read_lock_bh(); return get_idx(seq->private, *pos); } @@ -903,9 +916,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) - __releases(instances_lock) + __releases(rcu_bh) { - read_unlock_bh(&instances_lock); + rcu_read_unlock_bh(); } static int seq_show(struct seq_file *s, void *v) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 12e1ab37fcd8..68e67d19724d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -46,17 +46,19 @@ struct nfqnl_instance { int peer_pid; unsigned int queue_maxlen; unsigned int copy_range; - unsigned int queue_total; unsigned int queue_dropped; unsigned int queue_user_dropped; - unsigned int id_sequence; /* 'sequence' of pkt ids */ u_int16_t queue_num; /* number of this queue */ u_int8_t copy_mode; - - spinlock_t lock; - +/* + * Following fields are dirtied for each queued packet, + * keep them in same cache line if possible. + */ + spinlock_t lock; + unsigned int queue_total; + atomic_t id_sequence; /* 'sequence' of pkt ids */ struct list_head queue_list; /* packets in queue */ }; @@ -238,32 +240,24 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, outdev = entry->outdev; - spin_lock_bh(&queue->lock); - - switch ((enum nfqnl_config_mode)queue->copy_mode) { + switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: break; case NFQNL_COPY_PACKET: if (entskb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(entskb)) { - spin_unlock_bh(&queue->lock); + skb_checksum_help(entskb)) return NULL; - } - if (queue->copy_range == 0 - || queue->copy_range > entskb->len) + + data_len = ACCESS_ONCE(queue->copy_range); + if (data_len == 0 || data_len > entskb->len) data_len = entskb->len; - else - data_len = queue->copy_range; size += nla_total_size(data_len); break; } - entry->id = queue->id_sequence++; - - spin_unlock_bh(&queue->lock); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) @@ -278,6 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(queue->queue_num); + entry->id = atomic_inc_return(&queue->id_sequence); pmsg.packet_id = htonl(entry->id); pmsg.hw_protocol = entskb->protocol; pmsg.hook = entry->hook; @@ -296,8 +291,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, - htonl(indev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(indev)->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ @@ -321,8 +317,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)); /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, - htonl(outdev->br_port->br->dev->ifindex)); + htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ @@ -866,7 +863,7 @@ static int seq_show(struct seq_file *s, void *v) inst->peer_pid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, - inst->id_sequence, 1); + atomic_read(&inst->id_sequence), 1); } static const struct seq_operations nfqnl_seq_ops = { diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c new file mode 100644 index 000000000000..0f642ef8cd26 --- /dev/null +++ b/net/netfilter/xt_CHECKSUM.c @@ -0,0 +1,70 @@ +/* iptables module for the packet checksum mangling + * + * (C) 2002 by Harald Welte <laforge@netfilter.org> + * (C) 2010 Red Hat, Inc. + * + * Author: Michael S. Tsirkin <mst@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_CHECKSUM.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>"); +MODULE_DESCRIPTION("Xtables: checksum modification"); +MODULE_ALIAS("ipt_CHECKSUM"); +MODULE_ALIAS("ip6t_CHECKSUM"); + +static unsigned int +checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_checksum_help(skb); + + return XT_CONTINUE; +} + +static int checksum_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_CHECKSUM_info *einfo = par->targinfo; + + if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { + pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); + return -EINVAL; + } + if (!einfo->operation) { + pr_info("no CHECKSUM operation enabled\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target checksum_tg_reg __read_mostly = { + .name = "CHECKSUM", + .family = NFPROTO_UNSPEC, + .target = checksum_tg, + .targetsize = sizeof(struct xt_CHECKSUM_info), + .table = "mangle", + .checkentry = checksum_tg_check, + .me = THIS_MODULE, +}; + +static int __init checksum_tg_init(void) +{ + return xt_register_target(&checksum_tg_reg); +} + +static void __exit checksum_tg_exit(void) +{ + xt_unregister_target(&checksum_tg_reg); +} + +module_init(checksum_tg_init); +module_exit(checksum_tg_exit); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 562bf3266e04..0cb6053f02fd 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->flags & XT_CT_NOTRACK) { - ct = &nf_conntrack_untracked; + ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); goto out; } @@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (ct != &nf_conntrack_untracked) { + if (!nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c new file mode 100644 index 000000000000..be1f22e13545 --- /dev/null +++ b/net/netfilter/xt_IDLETIMER.c @@ -0,0 +1,315 @@ +/* + * linux/net/netfilter/xt_IDLETIMER.c + * + * Netfilter module to trigger a timer when packet matches. + * After timer expires a kevent will be sent. + * + * Copyright (C) 2004, 2010 Nokia Corporation + * Written by Timo Teras <ext-timo.teras@nokia.com> + * + * Converted to x_tables and reworked for upstream inclusion + * by Luciano Coelho <luciano.coelho@nokia.com> + * + * Contact: Luciano Coelho <luciano.coelho@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_IDLETIMER.h> +#include <linux/kdev_t.h> +#include <linux/kobject.h> +#include <linux/workqueue.h> +#include <linux/sysfs.h> + +struct idletimer_tg_attr { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, + struct attribute *attr, char *buf); +}; + +struct idletimer_tg { + struct list_head entry; + struct timer_list timer; + struct work_struct work; + + struct kobject *kobj; + struct idletimer_tg_attr attr; + + unsigned int refcnt; +}; + +static LIST_HEAD(idletimer_tg_list); +static DEFINE_MUTEX(list_mutex); + +static struct kobject *idletimer_tg_kobj; + +static +struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) +{ + struct idletimer_tg *entry; + + BUG_ON(!label); + + list_for_each_entry(entry, &idletimer_tg_list, entry) { + if (!strcmp(label, entry->attr.attr.name)) + return entry; + } + + return NULL; +} + +static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct idletimer_tg *timer; + unsigned long expires = 0; + + mutex_lock(&list_mutex); + + timer = __idletimer_tg_find_by_label(attr->name); + if (timer) + expires = timer->timer.expires; + + mutex_unlock(&list_mutex); + + if (time_after(expires, jiffies)) + return sprintf(buf, "%u\n", + jiffies_to_msecs(expires - jiffies) / 1000); + + return sprintf(buf, "0\n"); +} + +static void idletimer_tg_work(struct work_struct *work) +{ + struct idletimer_tg *timer = container_of(work, struct idletimer_tg, + work); + + sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name); +} + +static void idletimer_tg_expired(unsigned long data) +{ + struct idletimer_tg *timer = (struct idletimer_tg *) data; + + pr_debug("timer %s expired\n", timer->attr.attr.name); + + schedule_work(&timer->work); +} + +static int idletimer_tg_create(struct idletimer_tg_info *info) +{ + int ret; + + info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); + if (!info->timer) { + pr_debug("couldn't alloc timer\n"); + ret = -ENOMEM; + goto out; + } + + info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); + if (!info->timer->attr.attr.name) { + pr_debug("couldn't alloc attribute name\n"); + ret = -ENOMEM; + goto out_free_timer; + } + info->timer->attr.attr.mode = S_IRUGO; + info->timer->attr.show = idletimer_tg_show; + + ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr); + if (ret < 0) { + pr_debug("couldn't add file to sysfs"); + goto out_free_attr; + } + + list_add(&info->timer->entry, &idletimer_tg_list); + + setup_timer(&info->timer->timer, idletimer_tg_expired, + (unsigned long) info->timer); + info->timer->refcnt = 1; + + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + + INIT_WORK(&info->timer->work, idletimer_tg_work); + + return 0; + +out_free_attr: + kfree(info->timer->attr.attr.name); +out_free_timer: + kfree(info->timer); +out: + return ret; +} + +/* + * The actual xt_tables plugin. + */ +static unsigned int idletimer_tg_target(struct sk_buff *skb, + const struct xt_action_param *par) +{ + const struct idletimer_tg_info *info = par->targinfo; + + pr_debug("resetting timer %s, timeout period %u\n", + info->label, info->timeout); + + BUG_ON(!info->timer); + + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + + return XT_CONTINUE; +} + +static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) +{ + struct idletimer_tg_info *info = par->targinfo; + int ret; + + pr_debug("checkentry targinfo%s\n", info->label); + + if (info->timeout == 0) { + pr_debug("timeout value is zero\n"); + return -EINVAL; + } + + if (info->label[0] == '\0' || + strnlen(info->label, + MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) { + pr_debug("label is empty or not nul-terminated\n"); + return -EINVAL; + } + + mutex_lock(&list_mutex); + + info->timer = __idletimer_tg_find_by_label(info->label); + if (info->timer) { + info->timer->refcnt++; + mod_timer(&info->timer->timer, + msecs_to_jiffies(info->timeout * 1000) + jiffies); + + pr_debug("increased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } else { + ret = idletimer_tg_create(info); + if (ret < 0) { + pr_debug("failed to create timer\n"); + mutex_unlock(&list_mutex); + return ret; + } + } + + mutex_unlock(&list_mutex); + return 0; +} + +static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) +{ + const struct idletimer_tg_info *info = par->targinfo; + + pr_debug("destroy targinfo %s\n", info->label); + + mutex_lock(&list_mutex); + + if (--info->timer->refcnt == 0) { + pr_debug("deleting timer %s\n", info->label); + + list_del(&info->timer->entry); + del_timer_sync(&info->timer->timer); + sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + kfree(info->timer->attr.attr.name); + kfree(info->timer); + } else { + pr_debug("decreased refcnt of timer %s to %u\n", + info->label, info->timer->refcnt); + } + + mutex_unlock(&list_mutex); +} + +static struct xt_target idletimer_tg __read_mostly = { + .name = "IDLETIMER", + .family = NFPROTO_UNSPEC, + .target = idletimer_tg_target, + .targetsize = sizeof(struct idletimer_tg_info), + .checkentry = idletimer_tg_checkentry, + .destroy = idletimer_tg_destroy, + .me = THIS_MODULE, +}; + +static struct class *idletimer_tg_class; + +static struct device *idletimer_tg_device; + +static int __init idletimer_tg_init(void) +{ + int err; + + idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer"); + err = PTR_ERR(idletimer_tg_class); + if (IS_ERR(idletimer_tg_class)) { + pr_debug("couldn't register device class\n"); + goto out; + } + + idletimer_tg_device = device_create(idletimer_tg_class, NULL, + MKDEV(0, 0), NULL, "timers"); + err = PTR_ERR(idletimer_tg_device); + if (IS_ERR(idletimer_tg_device)) { + pr_debug("couldn't register system device\n"); + goto out_class; + } + + idletimer_tg_kobj = &idletimer_tg_device->kobj; + + err = xt_register_target(&idletimer_tg); + if (err < 0) { + pr_debug("couldn't register xt target\n"); + goto out_dev; + } + + return 0; +out_dev: + device_destroy(idletimer_tg_class, MKDEV(0, 0)); +out_class: + class_destroy(idletimer_tg_class); +out: + return err; +} + +static void __exit idletimer_tg_exit(void) +{ + xt_unregister_target(&idletimer_tg); + + device_destroy(idletimer_tg_class, MKDEV(0, 0)); + class_destroy(idletimer_tg_class); +} + +module_init(idletimer_tg_init); +module_exit(idletimer_tg_exit); + +MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>"); +MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); +MODULE_DESCRIPTION("Xtables: idle time monitor"); +MODULE_LICENSE("GPL v2"); diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 512b9123252f..9d782181b6c8 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) If there is a real ct entry correspondig to this packet, it'll hang aroun till timing out. We don't deal with it for performance reasons. JK */ - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 69c01e10f8af..de079abd5bc8 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name) } EXPORT_SYMBOL_GPL(xt_rateest_lookup); +static void xt_rateest_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct xt_rateest, rcu)); +} + void xt_rateest_put(struct xt_rateest *est) { mutex_lock(&xt_rateest_mutex); if (--est->refcnt == 0) { hlist_del(&est->list); gen_kill_estimator(&est->bstats, &est->rstats); - kfree(est); + /* + * gen_estimator est_timer() might access est->lock or bstats, + * wait a RCU grace period before freeing 'est' + */ + call_rcu(&est->rcu, xt_rateest_free_rcu); } mutex_unlock(&xt_rateest_mutex); } @@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void) static void __exit xt_rateest_tg_fini(void) { xt_unregister_target(&xt_rateest_tg_reg); + rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */ } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 62ec021fbd50..eb81c380da1b 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, rcu_read_unlock(); if (rt != NULL) { - mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); } return mtu; } @@ -220,15 +220,13 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) } #endif -#define TH_SYN 0x02 - /* Must specify -p tcp --syn */ static inline bool find_syn_match(const struct xt_entry_match *m) { const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; if (strcmp(m->u.kernel.match->name, "tcp") == 0 && - tcpinfo->flg_cmp & TH_SYN && + tcpinfo->flg_cmp & TCPHDR_SYN && !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) return true; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 859d9fd429c8..22a2d421e7eb 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -77,8 +77,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; skb_dst_drop(skb); - skb_dst_set(skb, &rt->u.dst); - skb->dev = rt->u.dst.dev; + skb_dst_set(skb, &rt->dst); + skb->dev = rt->dst.dev; skb->protocol = htons(ETH_P_IP); return true; } @@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) #ifdef WITH_CONNTRACK /* Avoid counting cloned packets towards the original connection. */ nf_conntrack_put(skb->nfct); - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); #endif @@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) #ifdef WITH_CONNTRACK nf_conntrack_put(skb->nfct); - skb->nfct = &nf_conntrack_untracked.ct_general; + skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); #endif diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index e1a0dedac258..c61294d85fda 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -37,8 +37,10 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_DROP; sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, - iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, - hp->source, tgi->lport ? tgi->lport : hp->dest, + iph->saddr, + tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, + tgi->lport ? tgi->lport : hp->dest, par->in, true); /* NOTE: assign_sock consumes our sk reference */ diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 30b95a1c1c89..f4af1bfafb1c 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - if (ct == &nf_conntrack_untracked) + if (nf_ct_is_untracked(ct)) return false; if (ct->master) diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 73517835303d..5b138506690e 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); + + /* + * This filter cannot function correctly unless connection tracking + * accounting is enabled, so complain in the hope that someone notices. + */ + if (!nf_ct_acct_enabled(par->net)) { + pr_warning("Forcing CT accounting to be enabled\n"); + nf_ct_set_acct(par->net, true); + } + return ret; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 39681f10291c..e536710ad916 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); - if (ct == &nf_conntrack_untracked) - statebit = XT_CONNTRACK_STATE_UNTRACKED; - else if (ct != NULL) - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); - else + if (ct) { + if (nf_ct_is_untracked(ct)) + statebit = XT_CONNTRACK_STATE_UNTRACKED; + else + statebit = XT_CONNTRACK_STATE_BIT(ctinfo); + } else statebit = XT_CONNTRACK_STATE_INVALID; if (info->match_flags & XT_CONNTRACK_STATE) { diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c new file mode 100644 index 000000000000..b39db8a5cbae --- /dev/null +++ b/net/netfilter/xt_cpu.c @@ -0,0 +1,63 @@ +/* Kernel module to match running CPU */ + +/* + * Might be used to distribute connections on several daemons, if + * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, + * each RX queue IRQ affined to one CPU (1:1 mapping) + * + */ + +/* (C) 2010 Eric Dumazet + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/xt_cpu.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); +MODULE_DESCRIPTION("Xtables: CPU match"); + +static int cpu_mt_check(const struct xt_mtchk_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + if (info->invert & ~1) + return -EINVAL; + return 0; +} + +static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cpu_info *info = par->matchinfo; + + return (info->cpu == smp_processor_id()) ^ info->invert; +} + +static struct xt_match cpu_mt_reg __read_mostly = { + .name = "cpu", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cpu_mt_check, + .match = cpu_mt, + .matchsize = sizeof(struct xt_cpu_info), + .me = THIS_MODULE, +}; + +static int __init cpu_mt_init(void) +{ + return xt_register_match(&cpu_mt_reg); +} + +static void __exit cpu_mt_exit(void) +{ + xt_unregister_match(&cpu_mt_reg); +} + +module_init(cpu_mt_init); +module_exit(cpu_mt_exit); diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c new file mode 100644 index 000000000000..7a4d66db95ae --- /dev/null +++ b/net/netfilter/xt_ipvs.c @@ -0,0 +1,189 @@ +/* + * xt_ipvs - kernel module to match IPVS connection properties + * + * Author: Hannes Eder <heder@google.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/spinlock.h> +#include <linux/skbuff.h> +#ifdef CONFIG_IP_VS_IPV6 +#include <net/ipv6.h> +#endif +#include <linux/ip_vs.h> +#include <linux/types.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_ipvs.h> +#include <net/netfilter/nf_conntrack.h> + +#include <net/ip_vs.h> + +MODULE_AUTHOR("Hannes Eder <heder@google.com>"); +MODULE_DESCRIPTION("Xtables: match IPVS connection properties"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ipvs"); +MODULE_ALIAS("ip6t_ipvs"); + +/* borrowed from xt_conntrack */ +static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr, + const union nf_inet_addr *uaddr, + const union nf_inet_addr *umask, + unsigned int l3proto) +{ + if (l3proto == NFPROTO_IPV4) + return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; +#ifdef CONFIG_IP_VS_IPV6 + else if (l3proto == NFPROTO_IPV6) + return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, + &uaddr->in6) == 0; +#endif + else + return false; +} + +static bool +ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_ipvs_mtinfo *data = par->matchinfo; + /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ + const u_int8_t family = par->family; + struct ip_vs_iphdr iph; + struct ip_vs_protocol *pp; + struct ip_vs_conn *cp; + bool match = true; + + if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { + match = skb->ipvs_property ^ + !!(data->invert & XT_IPVS_IPVS_PROPERTY); + goto out; + } + + /* other flags than XT_IPVS_IPVS_PROPERTY are set */ + if (!skb->ipvs_property) { + match = false; + goto out; + } + + ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); + + if (data->bitmask & XT_IPVS_PROTO) + if ((iph.protocol == data->l4proto) ^ + !(data->invert & XT_IPVS_PROTO)) { + match = false; + goto out; + } + + pp = ip_vs_proto_get(iph.protocol); + if (unlikely(!pp)) { + match = false; + goto out; + } + + /* + * Check if the packet belongs to an existing entry + */ + cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + if (unlikely(cp == NULL)) { + match = false; + goto out; + } + + /* + * We found a connection, i.e. ct != 0, make sure to call + * __ip_vs_conn_put before returning. In our case jump to out_put_con. + */ + + if (data->bitmask & XT_IPVS_VPORT) + if ((cp->vport == data->vport) ^ + !(data->invert & XT_IPVS_VPORT)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VPORTCTL) + if ((cp->control != NULL && + cp->control->vport == data->vportctl) ^ + !(data->invert & XT_IPVS_VPORTCTL)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_DIR) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + + if (ct == NULL || nf_ct_is_untracked(ct)) { + match = false; + goto out_put_cp; + } + + if ((ctinfo >= IP_CT_IS_REPLY) ^ + !!(data->invert & XT_IPVS_DIR)) { + match = false; + goto out_put_cp; + } + } + + if (data->bitmask & XT_IPVS_METHOD) + if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ + !(data->invert & XT_IPVS_METHOD)) { + match = false; + goto out_put_cp; + } + + if (data->bitmask & XT_IPVS_VADDR) { + if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, + &data->vmask, family) ^ + !(data->invert & XT_IPVS_VADDR)) { + match = false; + goto out_put_cp; + } + } + +out_put_cp: + __ip_vs_conn_put(cp); +out: + pr_debug("match=%d\n", match); + return match; +} + +static int ipvs_mt_check(const struct xt_mtchk_param *par) +{ + if (par->family != NFPROTO_IPV4 +#ifdef CONFIG_IP_VS_IPV6 + && par->family != NFPROTO_IPV6 +#endif + ) { + pr_info("protocol family %u not supported\n", par->family); + return -EINVAL; + } + + return 0; +} + +static struct xt_match xt_ipvs_mt_reg __read_mostly = { + .name = "ipvs", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = ipvs_mt, + .checkentry = ipvs_mt_check, + .matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)), + .me = THIS_MODULE, +}; + +static int __init ipvs_mt_init(void) +{ + return xt_register_match(&xt_ipvs_mt_reg); +} + +static void __exit ipvs_mt_exit(void) +{ + xt_unregister_match(&xt_ipvs_mt_reg); +} + +module_init(ipvs_mt_init); +module_exit(ipvs_mt_exit); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index b4f7dfea5980..70eb2b4984dd 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -11,7 +11,8 @@ #include <linux/netfilter/xt_quota.h> struct xt_quota_priv { - uint64_t quota; + spinlock_t lock; + uint64_t quota; }; MODULE_LICENSE("GPL"); @@ -20,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: countdown quota match"); MODULE_ALIAS("ipt_quota"); MODULE_ALIAS("ip6t_quota"); -static DEFINE_SPINLOCK(quota_lock); - static bool quota_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -29,7 +28,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par) struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; - spin_lock_bh("a_lock); + spin_lock_bh(&priv->lock); if (priv->quota >= skb->len) { priv->quota -= skb->len; ret = !ret; @@ -37,9 +36,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par) /* we do not allow even small packets from now on */ priv->quota = 0; } - /* Copy quota back to matchinfo so that iptables can display it */ - q->quota = priv->quota; - spin_unlock_bh("a_lock); + spin_unlock_bh(&priv->lock); return ret; } @@ -55,6 +52,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par) if (q->master == NULL) return -ENOMEM; + spin_lock_init(&q->master->lock); q->master->quota = q->quota; return 0; } diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index c04fcf385c59..ef36a56a02c6 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -3,6 +3,7 @@ #include <linux/skbuff.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/sctp/sctp.h> #include <linux/sctp.h> #include <linux/netfilter/x_tables.h> @@ -67,7 +68,7 @@ match_packet(const struct sk_buff *skb, ++i, offset, sch->type, htons(sch->length), sch->flags); #endif - offset += (ntohs(sch->length) + 3) & ~3; + offset += WORD_ROUND(ntohs(sch->length)); pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 3d54c236a1ba..1ca89908cbad 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, * reply packet of an established SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); - if (ct && (ct != &nf_conntrack_untracked) && + if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || (iph->protocol == IPPROTO_ICMP && diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index e12e053d3782..a507922d80cd 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_state_info *sinfo = par->matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - if (nf_ct_is_untracked(skb)) - statebit = XT_STATE_UNTRACKED; - else if (!nf_ct_get(skb, &ctinfo)) + if (!ct) statebit = XT_STATE_INVALID; - else - statebit = XT_STATE_BIT(ctinfo); - + else { + if (nf_ct_is_untracked(ct)) + statebit = XT_STATE_UNTRACKED; + else + statebit = XT_STATE_BIT(ctinfo); + } return (sinfo->statemask & statebit); } diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 96e62b8fd6b1..42ecb71d445f 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -18,8 +18,8 @@ #include <linux/netfilter/x_tables.h> struct xt_statistic_priv { - uint32_t count; -}; + atomic_t count; +} ____cacheline_aligned_in_smp; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); @@ -27,13 +27,12 @@ MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)"); MODULE_ALIAS("ipt_statistic"); MODULE_ALIAS("ip6t_statistic"); -static DEFINE_SPINLOCK(nth_lock); - static bool statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_statistic_info *info = par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; + int nval, oval; switch (info->mode) { case XT_STATISTIC_MODE_RANDOM: @@ -41,12 +40,12 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par) ret = !ret; break; case XT_STATISTIC_MODE_NTH: - spin_lock_bh(&nth_lock); - if (info->master->count++ == info->u.nth.every) { - info->master->count = 0; + do { + oval = atomic_read(&info->master->count); + nval = (oval == info->u.nth.every) ? 0 : oval + 1; + } while (atomic_cmpxchg(&info->master->count, oval, nval) != oval); + if (nval == 0) ret = !ret; - } - spin_unlock_bh(&nth_lock); break; } @@ -64,7 +63,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par) info->master = kzalloc(sizeof(*info->master), GFP_KERNEL); if (info->master == NULL) return -ENOMEM; - info->master->count = info->u.nth.count; + atomic_set(&info->master->count, info->u.nth.count); return 0; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a2eb965207d3..2cbf380377d5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1076,14 +1076,15 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); - kfree_skb(skb); + consume_skb(skb); netlink_unlock_table(); - kfree_skb(info.skb2); - - if (info.delivery_failure) + if (info.delivery_failure) { + kfree_skb(info.skb2); return -ENOBUFS; + } else + consume_skb(info.skb2); if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) @@ -1323,19 +1324,23 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) + if (NULL == siocb->scm) { siocb->scm = &scm; + memset(&scm, 0, sizeof(scm)); + } err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; if (msg->msg_namelen) { + err = -EINVAL; if (addr->nl_family != AF_NETLINK) - return -EINVAL; + goto out; dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); + err = -EPERM; if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) - return -EPERM; + goto out; } else { dst_pid = nlk->dst_pid; dst_group = nlk->dst_group; @@ -1387,6 +1392,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); out: + scm_destroy(siocb->scm); return err; } @@ -1400,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb, *frag __maybe_unused = NULL; + struct sk_buff *skb; int err; if (flags&MSG_OOB) @@ -1435,7 +1441,21 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, kfree_skb(skb); skb = compskb; } else { - frag = skb_shinfo(skb)->frag_list; + /* + * Before setting frag_list to NULL, we must get a + * private copy of skb if shared (because of MSG_PEEK) + */ + if (skb_shared(skb)) { + struct sk_buff *nskb; + + nskb = pskb_copy(skb, GFP_KERNEL); + kfree_skb(skb); + skb = nskb; + err = -ENOMEM; + if (!skb) + goto out; + } + kfree_skb(skb_shinfo(skb)->frag_list); skb_shinfo(skb)->frag_list = NULL; } } @@ -1472,10 +1492,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (flags & MSG_TRUNC) copied = skb->len; -#ifdef CONFIG_COMPAT_NETLINK_MESSAGES - skb_shinfo(skb)->frag_list = frag; -#endif - skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index aa4308afcc7f..26ed3e8587c2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -303,6 +303,7 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) errout: return err; } +EXPORT_SYMBOL(genl_register_ops); /** * genl_unregister_ops - unregister generic netlink operations @@ -337,6 +338,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) return -ENOENT; } +EXPORT_SYMBOL(genl_unregister_ops); /** * genl_register_family - register a generic netlink family @@ -405,6 +407,7 @@ errout_locked: errout: return err; } +EXPORT_SYMBOL(genl_register_family); /** * genl_register_family_with_ops - register a generic netlink family @@ -485,6 +488,7 @@ int genl_unregister_family(struct genl_family *family) return -ENOENT; } +EXPORT_SYMBOL(genl_unregister_family); static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { @@ -873,11 +877,7 @@ static int __init genl_init(void) for (i = 0; i < GENL_FAM_TAB_SIZE; i++) INIT_LIST_HEAD(&family_ht[i]); - err = genl_register_family(&genl_ctrl); - if (err < 0) - goto problem; - - err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops); + err = genl_register_family_with_ops(&genl_ctrl, &genl_ctrl_ops, 1); if (err < 0) goto problem; @@ -899,11 +899,6 @@ problem: subsys_initcall(genl_init); -EXPORT_SYMBOL(genl_register_ops); -EXPORT_SYMBOL(genl_unregister_ops); -EXPORT_SYMBOL(genl_register_family); -EXPORT_SYMBOL(genl_unregister_family); - static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, gfp_t flags) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2078a277e06b..9a17f28b1253 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -83,6 +83,7 @@ #include <linux/if_vlan.h> #include <linux/virtio_net.h> #include <linux/errqueue.h> +#include <linux/net_tstamp.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -202,6 +203,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; + unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; @@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sk_buff *copy_skb = NULL; struct timeval tv; struct timespec ts; + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h1->tp_snaplen = snaplen; h.h1->tp_mac = macoff; h.h1->tp_net = netoff; - if (skb->tstamp.tv64) + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + tv = ktime_to_timeval(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + tv = ktime_to_timeval(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) tv = ktime_to_timeval(skb->tstamp); else do_gettimeofday(&tv); @@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, h.h2->tp_snaplen = snaplen; h.h2->tp_mac = macoff; h.h2->tp_net = netoff; - if (skb->tstamp.tv64) + if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) + && shhwtstamps->syststamp.tv64) + ts = ktime_to_timespec(shhwtstamps->syststamp); + else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) + && shhwtstamps->hwtstamp.tv64) + ts = ktime_to_timespec(shhwtstamps->hwtstamp); + else if (skb->tstamp.tv64) ts = ktime_to_timespec(skb->tstamp); else getnstimeofday(&ts); @@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->has_vnet_hdr = !!val; return 0; } + case PACKET_TIMESTAMP: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + po->tp_tstamp = val; + return 0; + } default: return -ENOPROTOOPT; } @@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_loss; data = &val; break; + case PACKET_TIMESTAMP: + if (len > sizeof(int)) + len = sizeof(int); + val = po->tp_tstamp; + data = &val; + break; default: return -ENOPROTOOPT; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 94d72e85a475..b2a3ae6cad78 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -698,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) newsk = NULL; goto out; } + kfree_skb(oskb); sock_hold(sk); pep_sk(newsk)->listener = sk; diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index c33da6576942..b18e48fae975 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr) return err; } +static void phonet_device_rcu_free(struct rcu_head *head) +{ + struct phonet_device *pnd; + + pnd = container_of(head, struct phonet_device, rcu); + kfree(pnd); +} + int phonet_address_del(struct net_device *dev, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); @@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = NULL; mutex_unlock(&pndevs->lock); - if (pnd) { - synchronize_rcu(); - kfree(pnd); - } + if (pnd) + call_rcu(&pnd->rcu, phonet_device_rcu_free); + return err; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index cbc244a128bd..b4fdaac233f7 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -109,7 +109,9 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, init_timer(&rose_neigh->t0timer); if (rose_route->ndigis != 0) { - if ((rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) { + rose_neigh->digipeat = + kmalloc(sizeof(ax25_digi), GFP_ATOMIC); + if (rose_neigh->digipeat == NULL) { kfree(rose_neigh); res = -ENOMEM; goto out; diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index f0f85b0123f7..9f1729bd60de 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) return; } - peer->if_mtu = dst_mtu(&rt->u.dst); - dst_release(&rt->u.dst); + peer->if_mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); _leave(" [if_mtu %u]", peer->if_mtu); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 972378f47f3c..23b25f89e7e0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -26,6 +26,11 @@ #include <net/act_api.h> #include <net/netlink.h> +static void tcf_common_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct tcf_common, tcfc_rcu)); +} + void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) { unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); @@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) write_unlock_bh(hinfo->lock); gen_kill_estimator(&p->tcfc_bstats, &p->tcfc_rate_est); - kfree(p); + /* + * gen_estimator est_timer() might access p->tcfc_lock + * or bstats, wait a RCU grace period before freeing p + */ + call_rcu(&p->tcfc_rcu, tcf_common_free_rcu); return; } } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c0b6863e3b87..11f195af2da0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -33,6 +33,7 @@ static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); +static LIST_HEAD(mirred_list); static struct tcf_hashinfo mirred_hash_info = { .htab = tcf_mirred_ht, @@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) m->tcf_bindcnt--; m->tcf_refcnt--; if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - dev_put(m->tcfm_dev); + list_del(&m->tcfm_list); + if (m->tcfm_dev) + dev_put(m->tcfm_dev); tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } @@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, m->tcfm_ok_push = ok_push; } spin_unlock_bh(&m->tcf_lock); - if (ret == ACT_P_CREATED) + if (ret == ACT_P_CREATED) { + list_add(&m->tcfm_list, &mirred_list); tcf_hash_insert(pc, &mirred_hash_info); + } return ret; } @@ -160,22 +165,27 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, spin_lock(&m->tcf_lock); m->tcf_tm.lastuse = jiffies; + m->tcf_bstats.bytes += qdisc_pkt_len(skb); + m->tcf_bstats.packets++; dev = m->tcfm_dev; + if (!dev) { + printk_once(KERN_NOTICE "tc mirred: target device is gone\n"); + goto out; + } + if (!(dev->flags & IFF_UP)) { if (net_ratelimit()) - pr_notice("tc mirred to Houston: device %s is gone!\n", + pr_notice("tc mirred to Houston: device %s is down\n", dev->name); goto out; } - skb2 = skb_act_clone(skb, GFP_ATOMIC); + at = G_TC_AT(skb->tc_verd); + skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action); if (skb2 == NULL) goto out; - m->tcf_bstats.bytes += qdisc_pkt_len(skb2); - m->tcf_bstats.packets++; - at = G_TC_AT(skb->tc_verd); if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) skb_push(skb2, skb2->dev->hard_header_len); @@ -185,16 +195,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, if (m->tcfm_eaction != TCA_EGRESS_MIRROR) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); - skb2->dev = dev; skb2->skb_iif = skb->dev->ifindex; + skb2->dev = dev; dev_queue_xmit(skb2); err = 0; out: if (err) { m->tcf_qstats.overlimits++; - m->tcf_bstats.bytes += qdisc_pkt_len(skb); - m->tcf_bstats.packets++; /* should we be asking for packet to be dropped? * may make sense for redirect case only */ @@ -232,6 +240,28 @@ nla_put_failure: return -1; } +static int mirred_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct tcf_mirred *m; + + if (event == NETDEV_UNREGISTER) + list_for_each_entry(m, &mirred_list, tcfm_list) { + if (m->tcfm_dev == dev) { + dev_put(dev); + m->tcfm_dev = NULL; + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block mirred_device_notifier = { + .notifier_call = mirred_device_event, +}; + + static struct tc_action_ops act_mirred_ops = { .kind = "mirred", .hinfo = &mirred_hash_info, @@ -252,12 +282,17 @@ MODULE_LICENSE("GPL"); static int __init mirred_init_module(void) { + int err = register_netdevice_notifier(&mirred_device_notifier); + if (err) + return err; + pr_info("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } static void __exit mirred_cleanup_module(void) { + unregister_netdevice_notifier(&mirred_device_notifier); tcf_unregister_action(&act_mirred_ops); } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 570949417f38..d0386a413e8d 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -205,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, { struct icmphdr *icmph; - if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + if (!pskb_may_pull(skb, ihl + sizeof(*icmph))) goto drop; icmph = (void *)(skb_network_header(skb) + ihl); @@ -215,6 +215,10 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, (icmph->type != ICMP_PARAMETERPROB)) break; + if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) + goto drop; + + icmph = (void *)(skb_network_header(skb) + ihl); iph = (void *)(icmph + 1); if (egress) addr = iph->daddr; @@ -243,7 +247,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, iph->saddr = new_addr; inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, - 1); + 0); break; } default: @@ -265,40 +269,29 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_nat *p = a->priv; - struct tc_nat *opt; + struct tc_nat opt; struct tcf_t t; - int s; - - s = sizeof(*opt); - /* netlink spinlocks held above us - must use ATOMIC */ - opt = kzalloc(s, GFP_ATOMIC); - if (unlikely(!opt)) - return -ENOBUFS; + opt.old_addr = p->old_addr; + opt.new_addr = p->new_addr; + opt.mask = p->mask; + opt.flags = p->flags; - opt->old_addr = p->old_addr; - opt->new_addr = p->new_addr; - opt->mask = p->mask; - opt->flags = p->flags; + opt.index = p->tcf_index; + opt.action = p->tcf_action; + opt.refcnt = p->tcf_refcnt - ref; + opt.bindcnt = p->tcf_bindcnt - bind; - opt->index = p->tcf_index; - opt->action = p->tcf_action; - opt->refcnt = p->tcf_refcnt - ref; - opt->bindcnt = p->tcf_bindcnt - bind; - - NLA_PUT(skb, TCA_NAT_PARMS, s, opt); + NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(p->tcf_tm.expires); NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); - kfree(opt); - return skb->len; nla_put_failure: nlmsg_trim(skb, b); - kfree(opt); return -1; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 50e3d945e1f4..a0593c9640db 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -127,8 +127,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, int i, munged = 0; unsigned int off; - if (!(skb->tc_verd & TC_OK2MUNGE)) { - /* should we set skb->cloned? */ + if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { return p->tcf_action; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 654f73dff7c1..537a48732e9e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -97,6 +97,11 @@ nla_put_failure: goto done; } +static void tcf_police_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct tcf_police, tcf_rcu)); +} + static void tcf_police_destroy(struct tcf_police *p) { unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); @@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p) qdisc_put_rtab(p->tcfp_R_tab); if (p->tcfp_P_tab) qdisc_put_rtab(p->tcfp_P_tab); - kfree(p); + /* + * gen_estimator est_timer() might access p->tcf_lock + * or bstats, wait a RCU grace period before freeing p + */ + call_rcu(&p->tcf_rcu, tcf_police_free_rcu); return; } } @@ -397,6 +406,7 @@ static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops); + rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */ } module_init(police_init_module); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 1b4bc691d7d1..4a1d640b0cf1 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -73,10 +73,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind) static int alloc_defdata(struct tcf_defact *d, char *defdata) { - d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL); + d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; - + strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); return 0; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4f522143811e..7416a5c73b2a 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -134,10 +134,12 @@ next_knode: #endif for (i = n->sel.nkeys; i>0; i--, key++) { - unsigned int toff; + int toff = off + key->off + (off2 & key->offmask); __be32 *data, _data; - toff = off + key->off + (off2 & key->offmask); + if (skb_headroom(skb) + toff < 0) + goto out; + data = skb_header_pointer(skb, toff, 4, &_data); if (!data) goto out; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index fcbb86a486a2..e114f23d5eae 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -52,7 +52,7 @@ struct atm_flow_data { int ref; /* reference count */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct atm_flow_data *next; + struct list_head list; struct atm_flow_data *excess; /* flow for excess traffic; NULL to set CLP instead */ int hdr_len; @@ -61,34 +61,23 @@ struct atm_flow_data { struct atm_qdisc_data { struct atm_flow_data link; /* unclassified skbs go here */ - struct atm_flow_data *flows; /* NB: "link" is also on this + struct list_head flows; /* NB: "link" is also on this list */ struct tasklet_struct task; /* dequeue tasklet */ }; /* ------------------------- Class/flow operations ------------------------- */ -static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow) -{ - struct atm_flow_data *walk; - - pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow); - for (walk = qdisc->flows; walk; walk = walk->next) - if (walk == flow) - return 1; - pr_debug("find_flow: not found\n"); - return 0; -} - static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow; - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) { if (flow->classid == classid) - break; - return flow; + return flow; + } + return NULL; } static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, @@ -99,7 +88,7 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", sch, p, flow, new, old); - if (!find_flow(p, flow)) + if (list_empty(&flow->list)) return -EINVAL; if (!new) new = &noop_qdisc; @@ -146,20 +135,12 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)cl; - struct atm_flow_data **prev; pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); if (--flow->ref) return; pr_debug("atm_tc_put: destroying\n"); - for (prev = &p->flows; *prev; prev = &(*prev)->next) - if (*prev == flow) - break; - if (!*prev) { - printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow); - return; - } - *prev = flow->next; + list_del_init(&flow->list); pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); tcf_destroy_chain(&flow->filter_list); @@ -274,7 +255,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (find_flow(p, flow)) { + if (!list_empty(&flow->list)) { error = -EEXIST; goto err_out; } @@ -313,8 +294,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, flow->classid = classid; flow->ref = 1; flow->excess = excess; - flow->next = p->link.next; - p->link.next = flow; + list_add(&flow->list, &p->link.list); flow->hdr_len = hdr_len; if (hdr) memcpy(flow->hdr, hdr, hdr_len); @@ -335,7 +315,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) struct atm_flow_data *flow = (struct atm_flow_data *)arg; pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (!find_flow(qdisc_priv(sch), flow)) + if (list_empty(&flow->list)) return -EINVAL; if (flow->filter_list || flow == &p->link) return -EBUSY; @@ -361,12 +341,12 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); if (walker->stop) return; - for (flow = p->flows; flow; flow = flow->next) { - if (walker->count >= walker->skip) - if (walker->fn(sch, (unsigned long)flow, walker) < 0) { - walker->stop = 1; - break; - } + list_for_each_entry(flow, &p->flows, list) { + if (walker->count >= walker->skip && + walker->fn(sch, (unsigned long)flow, walker) < 0) { + walker->stop = 1; + break; + } walker->count++; } } @@ -385,16 +365,17 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = NULL; /* @@@ */ + struct atm_flow_data *flow; struct tcf_result res; int result; int ret = NET_XMIT_POLICED; pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); result = TC_POLICE_OK; /* be nice to gcc */ + flow = NULL; if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) - for (flow = p->flows; flow; flow = flow->next) + !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) { + list_for_each_entry(flow, &p->flows, list) { if (flow->filter_list) { result = tc_classify_compat(skb, flow->filter_list, @@ -404,8 +385,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - break; + goto done; } + } + flow = NULL; + done: + ; + } if (!flow) flow = &p->link; else { @@ -477,7 +463,9 @@ static void sch_atm_dequeue(unsigned long data) struct sk_buff *skb; pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->link.next; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) { + if (flow == &p->link) + continue; /* * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) @@ -512,6 +500,7 @@ static void sch_atm_dequeue(unsigned long data) /* atm.atm_options are already set by atm_tc_enqueue */ flow->vcc->send(flow->vcc, skb); } + } } static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) @@ -543,9 +532,10 @@ static unsigned int atm_tc_drop(struct Qdisc *sch) unsigned int len; pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) { if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) return len; + } return 0; } @@ -554,7 +544,9 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) struct atm_qdisc_data *p = qdisc_priv(sch); pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - p->flows = &p->link; + INIT_LIST_HEAD(&p->flows); + INIT_LIST_HEAD(&p->link.list); + list_add(&p->link.list, &p->flows); p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (!p->link.q) @@ -565,7 +557,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) p->link.sock = NULL; p->link.classid = sch->handle; p->link.ref = 1; - p->link.next = NULL; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } @@ -576,7 +567,7 @@ static void atm_tc_reset(struct Qdisc *sch) struct atm_flow_data *flow; pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) qdisc_reset(flow->q); sch->q.qlen = 0; } @@ -584,24 +575,17 @@ static void atm_tc_reset(struct Qdisc *sch) static void atm_tc_destroy(struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; + struct atm_flow_data *flow, *tmp; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - for (flow = p->flows; flow; flow = flow->next) + list_for_each_entry(flow, &p->flows, list) tcf_destroy_chain(&flow->filter_list); - /* races ? */ - while ((flow = p->flows)) { + list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); atm_tc_put(sch, (unsigned long)flow); - if (p->flows == flow) { - printk(KERN_ERR "atm_destroy: putting flow %p didn't " - "kill it\n", flow); - p->flows = flow->next; /* brute force */ - break; - } } tasklet_kill(&p->task); } @@ -615,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", sch, p, flow, skb, tcm); - if (!find_flow(p, flow)) + if (list_empty(&flow->list)) return -EINVAL; tcm->tcm_handle = flow->classid; tcm->tcm_info = flow->q->handle; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a63029ef3edd..2aeb3a4386a1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -96,7 +96,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * Another cpu is holding lock, requeue & delay xmits for * some time. */ - __get_cpu_var(softnet_data).cpu_collision++; + __this_cpu_inc(softnet_data.cpu_collision); ret = dev_requeue_skb(skb, q); } @@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q) } } - clear_bit(__QDISC_STATE_RUNNING, &q->state); + qdisc_run_end(q); } unsigned long dev_trans_start(struct net_device *dev) @@ -327,6 +327,24 @@ void netif_carrier_off(struct net_device *dev) } EXPORT_SYMBOL(netif_carrier_off); +/** + * netif_notify_peers - notify network peers about existence of @dev + * @dev: network device + * + * Generate traffic such that interested network peers are aware of + * @dev, such as by generating a gratuitous ARP. This may be used when + * a device wants to inform the rest of the network about some sort of + * reconfiguration such as a failover event or virtual machine + * migration. + */ +void netif_notify_peers(struct net_device *dev) +{ + rtnl_lock(); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + rtnl_unlock(); +} +EXPORT_SYMBOL(netif_notify_peers); + /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. @@ -543,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); + spin_lock_init(&sch->busylock); sch->ops = ops; sch->enqueue = ops->enqueue; sch->dequeue = ops->dequeue; @@ -779,7 +798,7 @@ static bool some_qdisc_is_busy(struct net_device *dev) spin_lock_bh(root_lock); - val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || + val = (qdisc_is_running(q) || test_bit(__QDISC_STATE_SCHED, &q->state)); spin_unlock_bh(root_lock); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0b52b8de562c..4be8d04b262d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1550,7 +1550,6 @@ static const struct Qdisc_class_ops htb_class_ops = { }; static struct Qdisc_ops htb_qdisc_ops __read_mostly = { - .next = NULL, .cl_ops = &htb_class_ops, .id = "htb", .priv_size = sizeof(struct htb_sched), @@ -1561,7 +1560,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .init = htb_init, .reset = htb_reset, .destroy = htb_destroy, - .change = NULL /* htb_change */, .dump = htb_dump, .owner = THIS_MODULE, }; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index e41feff19e43..0b85e5256434 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -172,7 +172,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = (unsigned long)sp->autoclose * HZ; - /* Initilizes the timers */ + /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) setup_timer(&asoc->timers[i], sctp_timer_events[i], (unsigned long)asoc); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 182749867c72..5027b83f1cc0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, __func__, &fl.fl4_dst, &fl.fl4_src); if (!ip_route_output_key(&init_net, &rt, &fl)) { - dst = &rt->u.dst; + dst = &rt->dst; } /* If there is no association or if a source address is passed, no @@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, fl.fl4_src = laddr->a.v4.sin_addr.s_addr; fl.fl_ip_sport = laddr->a.v4.sin_port; if (!ip_route_output_key(&init_net, &rt, &fl)) { - dst = &rt->u.dst; + dst = &rt->dst; goto out_unlock; } } @@ -1002,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) static inline int init_sctp_mibs(void) { return snmp_mib_init((void __percpu **)sctp_statistics, - sizeof(struct sctp_mib)); + sizeof(struct sctp_mib), + __alignof__(struct sctp_mib)); } static inline void cleanup_sctp_mibs(void) @@ -1162,7 +1163,7 @@ SCTP_STATIC __init int sctp_init(void) /* Set the pressure threshold to be a fraction of global memory that * is up to 1/2 at 256 MB, decreasing toward zero with the amount of * memory, with a floor of 128 pages. - * Note this initalizes the data in sctpv6_prot too + * Note this initializes the data in sctpv6_prot too * Unabashedly stolen from tcp_init */ nr_pages = totalram_pages - totalhigh_pages; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index bd2a50b482ac..246f92924658 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1817,7 +1817,7 @@ malformed: struct __sctp_missing { __be32 num_missing; __be16 type; -} __attribute__((packed)); +} __packed; /* * Report a missing mandatory parameter. diff --git a/net/socket.c b/net/socket.c index 367d5477d00f..2270b941bcc7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags); /* @@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly; * Statistics counters of the socket lists */ -static DEFINE_PER_CPU(int, sockets_in_use) = 0; +static DEFINE_PER_CPU(int, sockets_in_use); /* * Support routines. @@ -170,15 +170,6 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; * divide and look after the messy bits. */ -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - - 16 for IP, 16 for IPX, - 24 for IPv6, - about 80 for AX.25 - must be at least one bigger than - the AF_UNIX size (see net/unix/af_unix.c - :unix_mkname()). - */ - /** * move_addr_to_kernel - copy a socket address into kernel space * @uaddr: Address in user space @@ -309,9 +300,9 @@ static int init_inodecache(void) } static const struct super_operations sockfs_ops = { - .alloc_inode = sock_alloc_inode, - .destroy_inode =sock_destroy_inode, - .statfs = simple_statfs, + .alloc_inode = sock_alloc_inode, + .destroy_inode = sock_destroy_inode, + .statfs = simple_statfs, }; static int sockfs_get_sb(struct file_system_type *fs_type, @@ -411,6 +402,7 @@ int sock_map_fd(struct socket *sock, int flags) return fd; } +EXPORT_SYMBOL(sock_map_fd); static struct socket *sock_from_file(struct file *file, int *err) { @@ -422,7 +414,7 @@ static struct socket *sock_from_file(struct file *file, int *err) } /** - * sockfd_lookup - Go from a file number to its socket slot + * sockfd_lookup - Go from a file number to its socket slot * @fd: file handle * @err: pointer to an error code return * @@ -450,6 +442,7 @@ struct socket *sockfd_lookup(int fd, int *err) fput(file); return sock; } +EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { @@ -540,6 +533,7 @@ void sock_release(struct socket *sock) } sock->file = NULL; } +EXPORT_SYMBOL(sock_release); int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, union skb_shared_tx *shtx) @@ -586,6 +580,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_sendmsg); int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) @@ -604,6 +599,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_sendmsg); static int ktime2ts(ktime_t kt, struct timespec *ts) { @@ -664,7 +660,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(ts), &ts); } - EXPORT_SYMBOL_GPL(__sock_recv_timestamp); inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -720,6 +715,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_recvmsg); static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -752,6 +748,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } +EXPORT_SYMBOL(kernel_recvmsg); static void sock_aio_dtor(struct kiocb *iocb) { @@ -774,7 +771,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct socket *sock = file->private_data; @@ -887,7 +884,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { @@ -895,7 +892,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } - EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); @@ -907,7 +903,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } - EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); @@ -919,7 +914,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } - EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, @@ -1047,6 +1041,7 @@ out_release: sock = NULL; goto out; } +EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) @@ -1147,6 +1142,7 @@ call_kill: rcu_read_unlock(); return 0; } +EXPORT_SYMBOL(sock_wake_async); static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) @@ -1265,11 +1261,13 @@ int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } +EXPORT_SYMBOL(sock_create); int sock_create_kern(int family, int type, int protocol, struct socket **res) { return __sock_create(&init_net, family, type, protocol, res, 1); } +EXPORT_SYMBOL(sock_create_kern); SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { @@ -1474,7 +1472,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + newsock = sock_alloc(); + if (!newsock) goto out_put; newsock->type = sock->type; @@ -1861,8 +1860,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); @@ -1964,8 +1962,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; err = -EMSGSIZE; @@ -2191,10 +2188,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) static const unsigned char nargs[20] = { - AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5) + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5) }; #undef AL @@ -2340,6 +2337,7 @@ int sock_register(const struct net_proto_family *ops) printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); return err; } +EXPORT_SYMBOL(sock_register); /** * sock_unregister - remove a protocol handler @@ -2366,6 +2364,7 @@ void sock_unregister(int family) printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); } +EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { @@ -2395,6 +2394,10 @@ static int __init sock_init(void) netfilter_init(); #endif +#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING + skb_timestamping_init(); +#endif + return 0; } @@ -2490,13 +2493,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifc.ifc_req = NULL; uifc = compat_alloc_user_space(sizeof(struct ifconf)); } else { - size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * - sizeof (struct ifreq); + size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * + sizeof(struct ifreq); uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); ifc.ifc_len = len; ifr = ifc.ifc_req = (void __user *)(uifc + 1); ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { + for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) return -EFAULT; ifr++; @@ -2516,9 +2519,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifr = ifc.ifc_req; ifr32 = compat_ptr(ifc32.ifcbuf); for (i = 0, j = 0; - i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) + i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; + i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { + if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) return -EFAULT; ifr32++; ifr++; @@ -2567,7 +2570,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2601,9 +2604,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, &kifr); - set_fs (old_fs); + set_fs(old_fs); return err; case SIOCBONDSLAVEINFOQUERY: @@ -2710,9 +2713,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); err = dev_ioctl(net, cmd, (void __user *)&ifr); - set_fs (old_fs); + set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@ -2734,7 +2737,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2750,20 +2753,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif } struct rtentry32 { - u32 rt_pad1; + u32 rt_pad1; struct sockaddr rt_dst; /* target address */ struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ unsigned short rt_irtt; /* Initial RTT */ }; @@ -2793,29 +2796,29 @@ static int routing_ioctl(struct net *net, struct socket *sock, if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), + ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3 * sizeof(struct in6_addr)); - ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); r = (void *) &r6; } else { /* ipv4 */ struct rtentry32 __user *ur4 = argp; - ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), + ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3 * sizeof(struct sockaddr)); - ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); - ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); - ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); - ret |= __get_user (r4.rt_window, &(ur4->rt_window)); - ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); - ret |= __get_user (rtdev, &(ur4->rt_dev)); + ret |= __get_user(r4.rt_flags, &(ur4->rt_flags)); + ret |= __get_user(r4.rt_metric, &(ur4->rt_metric)); + ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu)); + ret |= __get_user(r4.rt_window, &(ur4->rt_window)); + ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt)); + ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { - ret |= copy_from_user (devname, compat_ptr(rtdev), 15); + ret |= copy_from_user(devname, compat_ptr(rtdev), 15); r4.rt_dev = devname; devname[15] = 0; } else r4.rt_dev = NULL; @@ -2828,9 +2831,9 @@ static int routing_ioctl(struct net *net, struct socket *sock, goto out; } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs (old_fs); + set_fs(old_fs); out: return ret; @@ -2993,11 +2996,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } +EXPORT_SYMBOL(kernel_bind); int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } +EXPORT_SYMBOL(kernel_listen); int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { @@ -3022,24 +3027,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) done: return err; } +EXPORT_SYMBOL(kernel_accept); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { return sock->ops->connect(sock, addr, addrlen, flags); } +EXPORT_SYMBOL(kernel_connect); int kernel_getsockname(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 0); } +EXPORT_SYMBOL(kernel_getsockname); int kernel_getpeername(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 1); } +EXPORT_SYMBOL(kernel_getpeername); int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) @@ -3056,6 +3065,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_getsockopt); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) @@ -3072,6 +3082,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_setsockopt); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) @@ -3083,6 +3094,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, return sock_no_sendpage(sock, page, offset, size, flags); } +EXPORT_SYMBOL(kernel_sendpage); int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) { @@ -3095,33 +3107,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) return err; } +EXPORT_SYMBOL(kernel_sock_ioctl); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } - -EXPORT_SYMBOL(sock_create); -EXPORT_SYMBOL(sock_create_kern); -EXPORT_SYMBOL(sock_create_lite); -EXPORT_SYMBOL(sock_map_fd); -EXPORT_SYMBOL(sock_recvmsg); -EXPORT_SYMBOL(sock_register); -EXPORT_SYMBOL(sock_release); -EXPORT_SYMBOL(sock_sendmsg); -EXPORT_SYMBOL(sock_unregister); -EXPORT_SYMBOL(sock_wake_async); -EXPORT_SYMBOL(sockfd_lookup); -EXPORT_SYMBOL(kernel_sendmsg); -EXPORT_SYMBOL(kernel_recvmsg); -EXPORT_SYMBOL(kernel_bind); -EXPORT_SYMBOL(kernel_listen); -EXPORT_SYMBOL(kernel_accept); -EXPORT_SYMBOL(kernel_connect); -EXPORT_SYMBOL(kernel_getsockname); -EXPORT_SYMBOL(kernel_getpeername); -EXPORT_SYMBOL(kernel_getsockopt); -EXPORT_SYMBOL(kernel_setsockopt); -EXPORT_SYMBOL(kernel_sendpage); -EXPORT_SYMBOL(kernel_sock_ioctl); EXPORT_SYMBOL(kernel_sock_shutdown); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 73affb8624fa..880d0de3f50f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -19,6 +19,15 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif +#define RPC_CREDCACHE_DEFAULT_HASHBITS (4) +struct rpc_cred_cache { + struct hlist_head *hashtable; + unsigned int hashbits; + spinlock_t lock; +}; + +static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS; + static DEFINE_SPINLOCK(rpc_authflavor_lock); static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { &authnull_ops, /* AUTH_NULL */ @@ -29,6 +38,42 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; +#define MAX_HASHTABLE_BITS (10) +static int param_set_hashtbl_sz(const char *val, struct kernel_param *kp) +{ + unsigned long num; + unsigned int nbits; + int ret; + + if (!val) + goto out_inval; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL) + goto out_inval; + nbits = fls(num); + if (num > (1U << nbits)) + nbits++; + if (nbits > MAX_HASHTABLE_BITS || nbits < 2) + goto out_inval; + *(unsigned int *)kp->arg = nbits; + return 0; +out_inval: + return -EINVAL; +} + +static int param_get_hashtbl_sz(char *buffer, struct kernel_param *kp) +{ + unsigned int nbits; + + nbits = *(unsigned int *)kp->arg; + return sprintf(buffer, "%u", 1U << nbits); +} + +#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int); + +module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644); +MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size"); + static u32 pseudoflavor_to_flavor(u32 flavor) { if (flavor >= RPC_AUTH_MAXFLAVOR) @@ -145,16 +190,23 @@ int rpcauth_init_credcache(struct rpc_auth *auth) { struct rpc_cred_cache *new; - int i; + unsigned int hashsize; new = kmalloc(sizeof(*new), GFP_KERNEL); if (!new) - return -ENOMEM; - for (i = 0; i < RPC_CREDCACHE_NR; i++) - INIT_HLIST_HEAD(&new->hashtable[i]); + goto out_nocache; + new->hashbits = auth_hashbits; + hashsize = 1U << new->hashbits; + new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL); + if (!new->hashtable) + goto out_nohashtbl; spin_lock_init(&new->lock); auth->au_credcache = new; return 0; +out_nohashtbl: + kfree(new); +out_nocache: + return -ENOMEM; } EXPORT_SYMBOL_GPL(rpcauth_init_credcache); @@ -183,11 +235,12 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache) LIST_HEAD(free); struct hlist_head *head; struct rpc_cred *cred; + unsigned int hashsize = 1U << cache->hashbits; int i; spin_lock(&rpc_credcache_lock); spin_lock(&cache->lock); - for (i = 0; i < RPC_CREDCACHE_NR; i++) { + for (i = 0; i < hashsize; i++) { head = &cache->hashtable[i]; while (!hlist_empty(head)) { cred = hlist_entry(head->first, struct rpc_cred, cr_hash); @@ -216,6 +269,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) if (cache) { auth->au_credcache = NULL; rpcauth_clear_credcache(cache); + kfree(cache->hashtable); kfree(cache); } } @@ -267,7 +321,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Run memory cache shrinker. */ static int -rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) +rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(free); int res; @@ -297,7 +351,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, *entry, *new; unsigned int nr; - nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS); + nr = hash_long(acred->uid, cache->hashbits); rcu_read_lock(); hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { @@ -390,16 +444,16 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, } EXPORT_SYMBOL_GPL(rpcauth_init_cred); -void +struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) { - task->tk_msg.rpc_cred = get_rpccred(cred); dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + return get_rpccred(cred); } EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); -static void +static struct rpc_cred * rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; @@ -407,45 +461,43 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) .uid = 0, .gid = 0, }; - struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return auth->au_ops->lookup_cred(auth, &acred, lookupflags); } -static void +static struct rpc_cred * rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; - struct rpc_cred *ret; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, auth->au_ops->au_name); - ret = rpcauth_lookupcred(auth, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return rpcauth_lookupcred(auth, lookupflags); } -void +static int rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_cred *new; int lookupflags = 0; if (flags & RPC_TASK_ASYNC) lookupflags |= RPCAUTH_LOOKUP_NEW; if (cred != NULL) - cred->cr_ops->crbind(task, cred, lookupflags); + new = cred->cr_ops->crbind(task, cred, lookupflags); else if (flags & RPC_TASK_ROOTCREDS) - rpcauth_bind_root_cred(task, lookupflags); + new = rpcauth_bind_root_cred(task, lookupflags); else - rpcauth_bind_new_cred(task, lookupflags); + new = rpcauth_bind_new_cred(task, lookupflags); + if (IS_ERR(new)) + return PTR_ERR(new); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); + req->rq_cred = new; + return 0; } void @@ -484,22 +536,10 @@ out_nodestroy: } EXPORT_SYMBOL_GPL(put_rpccred); -void -rpcauth_unbindcred(struct rpc_task *task) -{ - struct rpc_cred *cred = task->tk_msg.rpc_cred; - - dprintk("RPC: %5u releasing %s cred %p\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); - - put_rpccred(cred); - task->tk_msg.rpc_cred = NULL; -} - __be32 * rpcauth_marshcred(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u marshaling %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -510,7 +550,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p) __be32 * rpcauth_checkverf(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u validating %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -522,7 +562,7 @@ int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -536,7 +576,7 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", task->tk_pid, cred->cr_ops->cr_name, cred); @@ -550,13 +590,21 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, int rpcauth_refreshcred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; int err; + cred = task->tk_rqstp->rq_cred; + if (cred == NULL) { + err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags); + if (err < 0) + goto out; + cred = task->tk_rqstp->rq_cred; + }; dprintk("RPC: %5u refreshing %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); err = cred->cr_ops->crrefresh(task); +out: if (err < 0) task->tk_status = err; return err; @@ -565,7 +613,7 @@ rpcauth_refreshcred(struct rpc_task *task) void rpcauth_invalcred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; dprintk("RPC: %5u invalidating %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); @@ -576,7 +624,7 @@ rpcauth_invalcred(struct rpc_task *task) int rpcauth_uptodatecred(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; return cred == NULL || test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; @@ -587,14 +635,27 @@ static struct shrinker rpc_cred_shrinker = { .seeks = DEFAULT_SEEKS, }; -void __init rpcauth_init_module(void) +int __init rpcauth_init_module(void) { - rpc_init_authunix(); - rpc_init_generic_auth(); + int err; + + err = rpc_init_authunix(); + if (err < 0) + goto out1; + err = rpc_init_generic_auth(); + if (err < 0) + goto out2; register_shrinker(&rpc_cred_shrinker); + return 0; +out2: + rpc_destroy_authunix(); +out1: + return err; } void __exit rpcauth_remove_module(void) { + rpc_destroy_authunix(); + rpc_destroy_generic_auth(); unregister_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 8f623b0f03dd..43162bb3b78f 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -27,7 +27,6 @@ struct generic_cred { }; static struct rpc_auth generic_auth; -static struct rpc_cred_cache generic_cred_cache; static const struct rpc_credops generic_credops; /* @@ -55,18 +54,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void) } EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); -static void -generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) +static struct rpc_cred *generic_bind_cred(struct rpc_task *task, + struct rpc_cred *cred, int lookupflags) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - struct rpc_cred *ret; - ret = auth->au_ops->lookup_cred(auth, acred, lookupflags); - if (!IS_ERR(ret)) - task->tk_msg.rpc_cred = ret; - else - task->tk_status = PTR_ERR(ret); + return auth->au_ops->lookup_cred(auth, acred, lookupflags); } /* @@ -159,20 +153,16 @@ out_nomatch: return 0; } -void __init rpc_init_generic_auth(void) +int __init rpc_init_generic_auth(void) { - spin_lock_init(&generic_cred_cache.lock); + return rpcauth_init_credcache(&generic_auth); } void __exit rpc_destroy_generic_auth(void) { - rpcauth_clear_credcache(&generic_cred_cache); + rpcauth_destroy_credcache(&generic_auth); } -static struct rpc_cred_cache generic_cred_cache = { - {{ NULL, },}, -}; - static const struct rpc_authops generic_auth_ops = { .owner = THIS_MODULE, .au_name = "Generic", @@ -183,7 +173,6 @@ static const struct rpc_authops generic_auth_ops = { static struct rpc_auth generic_auth = { .au_ops = &generic_auth_ops, .au_count = ATOMIC_INIT(0), - .au_credcache = &generic_cred_cache, }; static const struct rpc_credops generic_credops = { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 8da2a0e68574..dcfc66bab2bb 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -373,7 +373,7 @@ gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss static void gss_upcall_callback(struct rpc_task *task) { - struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, + struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; struct inode *inode = &gss_msg->inode->vfs_inode; @@ -502,7 +502,7 @@ static void warn_gssd(void) static inline int gss_refresh_upcall(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, @@ -928,6 +928,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx) { dprintk("RPC: gss_free_ctx\n"); + gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); kfree(ctx); } @@ -942,13 +943,7 @@ gss_free_ctx_callback(struct rcu_head *head) static void gss_free_ctx(struct gss_cl_ctx *ctx) { - struct gss_ctx *gc_gss_ctx; - - gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx); - rcu_assign_pointer(ctx->gc_gss_ctx, NULL); call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); - if (gc_gss_ctx) - gss_delete_sec_context(&gc_gss_ctx); } static void @@ -1064,12 +1059,12 @@ out: static __be32 * gss_marshal(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_cred *cred = req->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 *cred_len; - struct rpc_rqst *req = task->tk_rqstp; u32 maj_stat = 0; struct xdr_netobj mic; struct kvec iov; @@ -1119,7 +1114,7 @@ out_put_ctx: static int gss_renew_cred(struct rpc_task *task) { - struct rpc_cred *oldcred = task->tk_msg.rpc_cred; + struct rpc_cred *oldcred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(oldcred, struct gss_cred, gc_base); @@ -1133,7 +1128,7 @@ static int gss_renew_cred(struct rpc_task *task) new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(new)) return PTR_ERR(new); - task->tk_msg.rpc_cred = new; + task->tk_rqstp->rq_cred = new; put_rpccred(oldcred); return 0; } @@ -1161,7 +1156,7 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred) static int gss_refresh(struct rpc_task *task) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; int ret = 0; if (gss_cred_is_negative_entry(cred)) @@ -1172,7 +1167,7 @@ gss_refresh(struct rpc_task *task) ret = gss_renew_cred(task); if (ret < 0) goto out; - cred = task->tk_msg.rpc_cred; + cred = task->tk_rqstp->rq_cred; } if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) @@ -1191,7 +1186,7 @@ gss_refresh_null(struct rpc_task *task) static __be32 * gss_validate(struct rpc_task *task, __be32 *p) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 seq; struct kvec iov; @@ -1400,7 +1395,7 @@ static int gss_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); @@ -1503,7 +1498,7 @@ static int gss_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 1db618f56ecb..a5c36c01707b 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -75,7 +75,7 @@ nul_marshal(struct rpc_task *task, __be32 *p) static int nul_refresh(struct rpc_task *task) { - set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); return 0; } diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index aac2f8b4ee21..4cb70dc6e7ad 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -29,7 +29,6 @@ struct unx_cred { #endif static struct rpc_auth unix_auth; -static struct rpc_cred_cache unix_cred_cache; static const struct rpc_credops unix_credops; static struct rpc_auth * @@ -141,7 +140,7 @@ static __be32 * unx_marshal(struct rpc_task *task, __be32 *p) { struct rpc_clnt *clnt = task->tk_client; - struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); + struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base); __be32 *base, *hold; int i; @@ -174,7 +173,7 @@ unx_marshal(struct rpc_task *task, __be32 *p) static int unx_refresh(struct rpc_task *task) { - set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); return 0; } @@ -197,15 +196,20 @@ unx_validate(struct rpc_task *task, __be32 *p) printk("RPC: giant verf size: %u\n", size); return NULL; } - task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; + task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2; p += (size >> 2); return p; } -void __init rpc_init_authunix(void) +int __init rpc_init_authunix(void) { - spin_lock_init(&unix_cred_cache.lock); + return rpcauth_init_credcache(&unix_auth); +} + +void rpc_destroy_authunix(void) +{ + rpcauth_destroy_credcache(&unix_auth); } const struct rpc_authops authunix_ops = { @@ -219,17 +223,12 @@ const struct rpc_authops authunix_ops = { }; static -struct rpc_cred_cache unix_cred_cache = { -}; - -static struct rpc_auth unix_auth = { .au_cslack = UNX_WRITESLACK, .au_rslack = 2, /* assume AUTH_NULL verf */ .au_ops = &authunix_ops, .au_flavor = RPC_AUTH_UNIX, .au_count = ATOMIC_INIT(0), - .au_credcache = &unix_cred_cache, }; static diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b75879b173c8..2b06410e584e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -319,7 +319,7 @@ static struct cache_detail *current_detail; static int current_index; static void do_cache_clean(struct work_struct *work); -static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); +static struct delayed_work cache_cleaner; static void sunrpc_init_cache_detail(struct cache_detail *cd) { @@ -1503,6 +1503,11 @@ static int create_cache_proc_entries(struct cache_detail *cd) } #endif +void __init cache_initialize(void) +{ + INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean); +} + int cache_register(struct cache_detail *cd) { int ret; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 756fc324db9e..2388d83b68ff 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -414,6 +414,35 @@ out_no_clnt: EXPORT_SYMBOL_GPL(rpc_clone_client); /* + * Kill all tasks for the given client. + * XXX: kill their descendants as well? + */ +void rpc_killall_tasks(struct rpc_clnt *clnt) +{ + struct rpc_task *rovr; + + + if (list_empty(&clnt->cl_tasks)) + return; + dprintk("RPC: killing all tasks for client %p\n", clnt); + /* + * Spin lock all_tasks to prevent changes... + */ + spin_lock(&clnt->cl_lock); + list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { + if (!RPC_IS_ACTIVATED(rovr)) + continue; + if (!(rovr->tk_flags & RPC_TASK_KILLED)) { + rovr->tk_flags |= RPC_TASK_KILLED; + rpc_exit(rovr, -EIO); + rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr); + } + } + spin_unlock(&clnt->cl_lock); +} +EXPORT_SYMBOL_GPL(rpc_killall_tasks); + +/* * Properly shut down an RPC client, terminating all outstanding * requests. */ @@ -538,6 +567,49 @@ out: } EXPORT_SYMBOL_GPL(rpc_bind_new_program); +void rpc_task_release_client(struct rpc_task *task) +{ + struct rpc_clnt *clnt = task->tk_client; + + if (clnt != NULL) { + /* Remove from client task list */ + spin_lock(&clnt->cl_lock); + list_del(&task->tk_task); + spin_unlock(&clnt->cl_lock); + task->tk_client = NULL; + + rpc_release_client(clnt); + } +} + +static +void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + if (clnt != NULL) { + rpc_task_release_client(task); + task->tk_client = clnt; + kref_get(&clnt->cl_kref); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + /* Add to the client's list of all tasks */ + spin_lock(&clnt->cl_lock); + list_add_tail(&task->tk_task, &clnt->cl_tasks); + spin_unlock(&clnt->cl_lock); + } +} + +static void +rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) +{ + if (msg != NULL) { + task->tk_msg.rpc_proc = msg->rpc_proc; + task->tk_msg.rpc_argp = msg->rpc_argp; + task->tk_msg.rpc_resp = msg->rpc_resp; + if (msg->rpc_cred != NULL) + task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred); + } +} + /* * Default callback for async RPC calls */ @@ -562,6 +634,18 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) if (IS_ERR(task)) goto out; + rpc_task_set_client(task, task_setup_data->rpc_client); + rpc_task_set_rpc_message(task, task_setup_data->rpc_message); + + if (task->tk_status != 0) { + int ret = task->tk_status; + rpc_put_task(task); + return ERR_PTR(ret); + } + + if (task->tk_action == NULL) + rpc_call_start(task); + atomic_inc(&task->tk_count); rpc_execute(task); out: @@ -756,12 +840,13 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind); * Restart an (async) RPC call from the call_prepare state. * Usually called from within the exit handler. */ -void +int rpc_restart_call_prepare(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) - return; + return 0; task->tk_action = rpc_prepare_task; + return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); @@ -769,13 +854,13 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); * Restart an (async) RPC call. Usually called from within the * exit handler. */ -void +int rpc_restart_call(struct rpc_task *task) { if (RPC_ASSASSINATED(task)) - return; - + return 0; task->tk_action = call_start; + return 1; } EXPORT_SYMBOL_GPL(rpc_restart_call); @@ -824,11 +909,6 @@ call_reserve(struct rpc_task *task) { dprint_status(task); - if (!rpcauth_uptodatecred(task)) { - task->tk_action = call_refresh; - return; - } - task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); @@ -892,7 +972,7 @@ call_reserveresult(struct rpc_task *task) static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; + unsigned int slack = task->tk_client->cl_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -900,7 +980,7 @@ call_allocate(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_bind; + task->tk_action = call_refresh; if (req->rq_buffer) return; @@ -937,6 +1017,47 @@ call_allocate(struct rpc_task *task) rpc_exit(task, -ERESTARTSYS); } +/* + * 2a. Bind and/or refresh the credentials + */ +static void +call_refresh(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_action = call_refreshresult; + task->tk_status = 0; + task->tk_client->cl_stats->rpcauthrefresh++; + rpcauth_refreshcred(task); +} + +/* + * 2b. Process the results of a credential refresh + */ +static void +call_refreshresult(struct rpc_task *task) +{ + int status = task->tk_status; + + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_bind; + if (status >= 0 && rpcauth_uptodatecred(task)) + return; + switch (status) { + case -EACCES: + rpc_exit(task, -EACCES); + return; + case -ENOMEM: + rpc_exit(task, -ENOMEM); + return; + case -ETIMEDOUT: + rpc_delay(task, 3*HZ); + } + task->tk_action = call_refresh; +} + static inline int rpc_task_need_encode(struct rpc_task *task) { @@ -1472,43 +1593,6 @@ out_retry: } } -/* - * 8. Refresh the credentials if rejected by the server - */ -static void -call_refresh(struct rpc_task *task) -{ - dprint_status(task); - - task->tk_action = call_refreshresult; - task->tk_status = 0; - task->tk_client->cl_stats->rpcauthrefresh++; - rpcauth_refreshcred(task); -} - -/* - * 8a. Process the results of a credential refresh - */ -static void -call_refreshresult(struct rpc_task *task) -{ - int status = task->tk_status; - - dprint_status(task); - - task->tk_status = 0; - task->tk_action = call_reserve; - if (status >= 0 && rpcauth_uptodatecred(task)) - return; - if (status == -EACCES) { - rpc_exit(task, -EACCES); - return; - } - task->tk_action = call_refresh; - if (status != -ETIMEDOUT) - rpc_delay(task, 3*HZ); -} - static __be32 * rpc_encode_header(struct rpc_task *task) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4a843b883b89..cace6049e4a5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -246,17 +246,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { - struct rpc_clnt *clnt; - if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) - return; rpc_task_set_debuginfo(task); - /* Add to global list of all tasks */ - clnt = task->tk_client; - if (clnt != NULL) { - spin_lock(&clnt->cl_lock); - list_add_tail(&task->tk_task, &clnt->cl_tasks); - spin_unlock(&clnt->cl_lock); - } + set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); } /* @@ -319,11 +310,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) { - printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n"); - return; - } - __rpc_add_wait_queue(q, task); BUG_ON(task->tk_callback != NULL); @@ -334,8 +320,8 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action) { - /* Mark the task as being activated if so needed */ - rpc_set_active(task); + /* We shouldn't ever put an inactive task to sleep */ + BUG_ON(!RPC_IS_ACTIVATED(task)); /* * Protect the queue operations. @@ -406,14 +392,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* - * Wake up the specified task - */ -static void rpc_wake_up_task(struct rpc_task *task) -{ - rpc_wake_up_queued_task(task->tk_waitqueue, task); -} - -/* * Wake up the next task on a priority queue. */ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) @@ -600,7 +578,15 @@ void rpc_exit_task(struct rpc_task *task) } } } -EXPORT_SYMBOL_GPL(rpc_exit_task); + +void rpc_exit(struct rpc_task *task, int status) +{ + task->tk_status = status; + task->tk_action = rpc_exit_task; + if (RPC_IS_QUEUED(task)) + rpc_wake_up_queued_task(task->tk_waitqueue, task); +} +EXPORT_SYMBOL_GPL(rpc_exit); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { @@ -690,7 +676,6 @@ static void __rpc_execute(struct rpc_task *task) dprintk("RPC: %5u got signal\n", task->tk_pid); task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); - rpc_wake_up_task(task); } rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); @@ -714,8 +699,9 @@ static void __rpc_execute(struct rpc_task *task) void rpc_execute(struct rpc_task *task) { rpc_set_active(task); - rpc_set_running(task); - __rpc_execute(task); + rpc_make_runnable(task); + if (!RPC_IS_ASYNC(task)) + __rpc_execute(task); } static void rpc_async_schedule(struct work_struct *work) @@ -808,26 +794,9 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize workqueue for async tasks */ task->tk_workqueue = task_setup_data->workqueue; - task->tk_client = task_setup_data->rpc_client; - if (task->tk_client != NULL) { - kref_get(&task->tk_client->cl_kref); - if (task->tk_client->cl_softrtry) - task->tk_flags |= RPC_TASK_SOFT; - } - if (task->tk_ops->rpc_call_prepare != NULL) task->tk_action = rpc_prepare_task; - if (task_setup_data->rpc_message != NULL) { - task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc; - task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp; - task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp; - /* Bind the user cred */ - rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags); - if (task->tk_action == NULL) - rpc_call_start(task); - } - /* starting timestamp */ task->tk_start = ktime_get(); @@ -896,11 +865,8 @@ void rpc_put_task(struct rpc_task *task) if (task->tk_rqstp) xprt_release(task); if (task->tk_msg.rpc_cred) - rpcauth_unbindcred(task); - if (task->tk_client) { - rpc_release_client(task->tk_client); - task->tk_client = NULL; - } + put_rpccred(task->tk_msg.rpc_cred); + rpc_task_release_client(task); if (task->tk_workqueue != NULL) { INIT_WORK(&task->u.tk_work, rpc_async_release); queue_work(task->tk_workqueue, &task->u.tk_work); @@ -913,13 +879,6 @@ static void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %5u release task\n", task->tk_pid); - if (!list_empty(&task->tk_task)) { - struct rpc_clnt *clnt = task->tk_client; - /* Remove from client task list */ - spin_lock(&clnt->cl_lock); - list_del(&task->tk_task); - spin_unlock(&clnt->cl_lock); - } BUG_ON (RPC_IS_QUEUED(task)); /* Wake up anyone who is waiting for task completion */ @@ -928,35 +887,6 @@ static void rpc_release_task(struct rpc_task *task) rpc_put_task(task); } -/* - * Kill all tasks for the given client. - * XXX: kill their descendants as well? - */ -void rpc_killall_tasks(struct rpc_clnt *clnt) -{ - struct rpc_task *rovr; - - - if (list_empty(&clnt->cl_tasks)) - return; - dprintk("RPC: killing all tasks for client %p\n", clnt); - /* - * Spin lock all_tasks to prevent changes... - */ - spin_lock(&clnt->cl_lock); - list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { - if (! RPC_IS_ACTIVATED(rovr)) - continue; - if (!(rovr->tk_flags & RPC_TASK_KILLED)) { - rovr->tk_flags |= RPC_TASK_KILLED; - rpc_exit(rovr, -EIO); - rpc_wake_up_task(rovr); - } - } - spin_unlock(&clnt->cl_lock); -} -EXPORT_SYMBOL_GPL(rpc_killall_tasks); - int rpciod_up(void) { return try_module_get(THIS_MODULE) ? 0 : -EINVAL; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index f438347d817b..c0d085013a2b 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -33,21 +33,27 @@ init_sunrpc(void) if (err) goto out; err = rpc_init_mempool(); - if (err) { - unregister_rpc_pipefs(); - goto out; - } + if (err) + goto out2; + err = rpcauth_init_module(); + if (err) + goto out3; #ifdef RPC_DEBUG rpc_register_sysctl(); #endif #ifdef CONFIG_PROC_FS rpc_proc_init(); #endif + cache_initialize(); cache_register(&ip_map_cache); cache_register(&unix_gid_cache); svc_init_xprt_sock(); /* svc sock transport */ init_socket_xprt(); /* clnt sock transport */ - rpcauth_init_module(); + return 0; +out3: + rpc_destroy_mempool(); +out2: + unregister_rpc_pipefs(); out: return err; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index dcd0132396ba..970fb00f388c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1032,6 +1032,8 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + if (req->rq_cred != NULL) + put_rpccred(req->rq_cred); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); @@ -1129,6 +1131,7 @@ static void xprt_destroy(struct kref *kref) rpc_destroy_wait_queue(&xprt->sending); rpc_destroy_wait_queue(&xprt->resend); rpc_destroy_wait_queue(&xprt->backlog); + cancel_work_sync(&xprt->task_cleanup); /* * Tear down transport state and free the rpc_xprt */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fef2cc5e9d2b..4414a18c63b4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net, return s; } -static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) +static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; struct hlist_node *node; @@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; - if (!net_eq(sock_net(s), net)) - continue; - if (dentry && dentry->d_inode == i) { sock_hold(s); goto found; @@ -450,11 +447,31 @@ static int unix_release_sock(struct sock *sk, int embrion) return 0; } +static void init_peercred(struct sock *sk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(task_tgid(current)); + sk->sk_peer_cred = get_current_cred(); +} + +static void copy_peercred(struct sock *sk, struct sock *peersk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); + sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); +} + static int unix_listen(struct socket *sock, int backlog) { int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); + struct pid *old_pid = NULL; + const struct cred *old_cred = NULL; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) @@ -470,12 +487,14 @@ static int unix_listen(struct socket *sock, int backlog) sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ - sk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid); + init_peercred(sk); err = 0; out_unlock: unix_state_unlock(sk); + put_pid(old_pid); + if (old_cred) + put_cred(old_cred); out: return err; } @@ -736,7 +755,7 @@ static struct sock *unix_find_other(struct net *net, err = -ECONNREFUSED; if (!S_ISSOCK(inode->i_mode)) goto put_fail; - u = unix_find_socket_byinode(net, inode); + u = unix_find_socket_byinode(inode); if (!u) goto put_fail; @@ -1140,8 +1159,7 @@ restart: unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; - newsk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); + init_peercred(newsk); newu = unix_sk(newsk); newsk->sk_wq = &newu->peer_wq; otheru = unix_sk(other); @@ -1157,7 +1175,7 @@ restart: } /* Set credentials */ - sk->sk_peercred = other->sk_peercred; + copy_peercred(sk, other); sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; @@ -1199,10 +1217,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) sock_hold(skb); unix_peer(ska) = skb; unix_peer(skb) = ska; - ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid); - ska->sk_peercred.uid = skb->sk_peercred.uid; - ska->sk_peercred.gid = skb->sk_peercred.gid; + init_peercred(ska); + init_peercred(skb); if (ska->sk_type != SOCK_DGRAM) { ska->sk_state = TCP_ESTABLISHED; @@ -1297,18 +1313,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) int i; scm->fp = UNIXCB(skb).fp; - skb->destructor = sock_wfree; UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->fp[i]); } -static void unix_destruct_fds(struct sk_buff *skb) +static void unix_destruct_scm(struct sk_buff *skb) { struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); - unix_detach_fds(&scm, skb); + scm.pid = UNIXCB(skb).pid; + scm.cred = UNIXCB(skb).cred; + if (UNIXCB(skb).fp) + unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ /* So fscking what? fput() had been SMP-safe since the last Summer */ @@ -1331,10 +1349,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count-1; i >= 0; i--) unix_inflight(scm->fp->fp[i]); - skb->destructor = unix_destruct_fds; return 0; } +static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) +{ + int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); + UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).fp = NULL; + if (scm->fp && send_fds) + err = unix_attach_fds(scm, skb); + + skb->destructor = unix_destruct_scm; + return err; +} + /* * Send AF_UNIX data. */ @@ -1391,12 +1421,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) { - err = unix_attach_fds(siocb->scm, skb); - if (err) - goto out_free; - } + err = unix_scm_to_skb(siocb->scm, skb, true); + if (err) + goto out_free; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1566,16 +1593,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, */ size = min_t(int, size, skb_tailroom(skb)); - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); + /* Only send the fds in the first buffer */ - if (siocb->scm->fp && !fds_sent) { - err = unix_attach_fds(siocb->scm, skb); - if (err) { - kfree_skb(skb); - goto out_err; - } - fds_sent = true; + err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); + if (err) { + kfree_skb(skb); + goto out_err; } + fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err) { @@ -1692,7 +1717,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1841,14 +1866,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ - if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, - sizeof(siocb->scm->creds)) != 0) { + if ((UNIXCB(skb).pid != siocb->scm->pid) || + (UNIXCB(skb).cred != siocb->scm->cred)) { skb_queue_head(&sk->sk_receive_queue, skb); break; } } else { /* Copy credentials */ - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; } @@ -1881,7 +1906,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, break; } - kfree_skb(skb); + consume_skb(skb); if (siocb->scm->fp) break; diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 258daa80ad92..2bf23406637a 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -48,7 +48,7 @@ #include <linux/kernel.h> #include <linux/module.h> /* support for loadable modules */ #include <linux/slab.h> /* kmalloc(), kfree() */ -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <linux/mm.h> #include <linux/string.h> /* inline mem*, str* functions */ @@ -71,6 +71,7 @@ * WAN device IOCTL handlers */ +static DEFINE_MUTEX(wanrouter_mutex); static int wanrouter_device_setup(struct wan_device *wandev, wandev_conf_t __user *u_conf); static int wanrouter_device_stat(struct wan_device *wandev, @@ -376,7 +377,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (wandev->magic != ROUTER_MAGIC) return -EINVAL; - lock_kernel(); + mutex_lock(&wanrouter_mutex); switch (cmd) { case ROUTER_SETUP: err = wanrouter_device_setup(wandev, data); @@ -408,7 +409,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = wandev->ioctl(wandev, cmd, arg); else err = -EINVAL; } - unlock_kernel(); + mutex_unlock(&wanrouter_mutex); return err; } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index c44d96b3a437..11f25c7a7a05 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -27,7 +27,7 @@ #include <linux/module.h> #include <linux/wanrouter.h> /* WAN router API definitions */ #include <linux/seq_file.h> -#include <linux/smp_lock.h> +#include <linux/mutex.h> #include <net/net_namespace.h> #include <asm/io.h> @@ -66,6 +66,7 @@ * /proc/net/router */ +static DEFINE_MUTEX(config_mutex); static struct proc_dir_entry *proc_router; /* Strings */ @@ -85,7 +86,7 @@ static void *r_start(struct seq_file *m, loff_t *pos) struct wan_device *wandev; loff_t l = *pos; - lock_kernel(); + mutex_lock(&config_mutex); if (!l--) return SEQ_START_TOKEN; for (wandev = wanrouter_router_devlist; l-- && wandev; @@ -104,7 +105,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) static void r_stop(struct seq_file *m, void *v) __releases(kernel_lock) { - unlock_kernel(); + mutex_unlock(&config_mutex); } static int config_show(struct seq_file *m, void *v) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index b01a6f6397d7..d0c92dddb26b 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -35,8 +35,9 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev, if (!ht_cap->ht_supported) return NULL; - if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || - ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) + if (channel_type != NL80211_CHAN_HT20 && + (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || + ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT)) return NULL; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 37d0e0ab4432..541e2fff5e9c 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -472,24 +472,22 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); + mutex_lock(&cfg80211_mutex); + res = device_add(&rdev->wiphy.dev); if (res) - return res; + goto out_unlock; res = rfkill_register(rdev->rfkill); if (res) goto out_rm_dev; - mutex_lock(&cfg80211_mutex); - /* set up regulatory info */ wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); list_add_rcu(&rdev->list, &cfg80211_rdev_list); cfg80211_rdev_list_generation++; - mutex_unlock(&cfg80211_mutex); - /* add to debugfs */ rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy), @@ -509,11 +507,15 @@ int wiphy_register(struct wiphy *wiphy) } cfg80211_debugfs_rdev_add(rdev); + mutex_unlock(&cfg80211_mutex); return 0; - out_rm_dev: +out_rm_dev: device_del(&rdev->wiphy.dev); + +out_unlock: + mutex_unlock(&cfg80211_mutex); return res; } EXPORT_SYMBOL(wiphy_register); @@ -894,7 +896,7 @@ out_fail_pernet: } subsys_initcall(cfg80211_init); -static void cfg80211_exit(void) +static void __exit cfg80211_exit(void) { debugfs_remove(ieee80211_debugfs_dir); nl80211_exit(); @@ -905,3 +907,52 @@ static void cfg80211_exit(void) destroy_workqueue(cfg80211_wq); } module_exit(cfg80211_exit); + +static int ___wiphy_printk(const char *level, const struct wiphy *wiphy, + struct va_format *vaf) +{ + if (!wiphy) + return printk("%s(NULL wiphy *): %pV", level, vaf); + + return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf); +} + +int __wiphy_printk(const char *level, const struct wiphy *wiphy, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int r; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + r = ___wiphy_printk(level, wiphy, &vaf); + va_end(args); + + return r; +} +EXPORT_SYMBOL(__wiphy_printk); + +#define define_wiphy_printk_level(func, kern_level) \ +int func(const struct wiphy *wiphy, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + int r; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + r = ___wiphy_printk(kern_level, wiphy, &vaf); \ + va_end(args); \ + \ + return r; \ +} \ +EXPORT_SYMBOL(func); + +define_wiphy_printk_level(wiphy_debug, KERN_DEBUG); diff --git a/net/wireless/core.h b/net/wireless/core.h index ae930acf75e9..63d57ae399c3 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -339,6 +339,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie); /* SME */ diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk index 3cc9e69880a8..53c143f5e770 100644 --- a/net/wireless/genregdb.awk +++ b/net/wireless/genregdb.awk @@ -21,6 +21,7 @@ BEGIN { print "" print "#include <linux/nl80211.h>" print "#include <net/cfg80211.h>" + print "#include \"regdb.h\"" print "" regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n" } diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index adcabba02e20..27a8ce9343c3 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -247,8 +247,10 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return 0; - if (wdev->wext.keys) + if (wdev->wext.keys) { wdev->wext.keys->def = wdev->wext.default_key; + wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; + } wdev->wext.ibss.privacy = wdev->wext.default_key != -1; diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index b7fa31d5fd13..dacb3b4b1bdb 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -467,7 +467,6 @@ static struct lib80211_crypto_ops lib80211_crypt_ccmp = { .name = "CCMP", .init = lib80211_ccmp_init, .deinit = lib80211_ccmp_deinit, - .build_iv = lib80211_ccmp_hdr, .encrypt_mpdu = lib80211_ccmp_encrypt, .decrypt_mpdu = lib80211_ccmp_decrypt, .encrypt_msdu = NULL, diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 8cbdb32ff316..0fe40510e2cb 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -578,7 +578,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) } if (ieee80211_is_data_qos(hdr11->frame_control)) { - hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11)) + hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11))) & IEEE80211_QOS_CTL_TID_MASK; } else hdr[12] = 0; /* priority */ @@ -757,7 +757,6 @@ static struct lib80211_crypto_ops lib80211_crypt_tkip = { .name = "TKIP", .init = lib80211_tkip_init, .deinit = lib80211_tkip_deinit, - .build_iv = lib80211_tkip_hdr, .encrypt_mpdu = lib80211_tkip_encrypt, .decrypt_mpdu = lib80211_tkip_decrypt, .encrypt_msdu = lib80211_michael_mic_add, diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c index 6d41e05ca33b..e2e88878ba35 100644 --- a/net/wireless/lib80211_crypt_wep.c +++ b/net/wireless/lib80211_crypt_wep.c @@ -269,7 +269,6 @@ static struct lib80211_crypto_ops lib80211_crypt_wep = { .name = "WEP", .init = lib80211_wep_init, .deinit = lib80211_wep_deinit, - .build_iv = lib80211_wep_build_iv, .encrypt_mpdu = lib80211_wep_encrypt, .decrypt_mpdu = lib80211_wep_decrypt, .encrypt_msdu = NULL, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 48ead6f0426d..e74a1a2119d3 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -44,10 +44,10 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) } } - WARN_ON(!done); - - nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); - cfg80211_sme_rx_auth(dev, buf, len); + if (done) { + nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); + cfg80211_sme_rx_auth(dev, buf, len); + } wdev_unlock(wdev); } @@ -827,6 +827,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, + bool channel_type_valid, const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -845,8 +846,9 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, if (!wdev->current_bss || memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, ETH_ALEN) != 0 || - memcmp(wdev->current_bss->pub.bssid, mgmt->da, - ETH_ALEN) != 0) + (wdev->iftype == NL80211_IFTYPE_STATION && + memcmp(wdev->current_bss->pub.bssid, mgmt->da, + ETH_ALEN) != 0)) return -ENOTCONN; } @@ -855,7 +857,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, /* Transmit the Action frame as requested by user space */ return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, - buf, len, cookie); + channel_type_valid, buf, len, cookie); } bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index db71150b8040..37902a54e9c1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -153,6 +153,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, + + [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, }; /* policy for the attributes */ @@ -869,6 +872,34 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { + enum nl80211_tx_power_setting type; + int idx, mbm = 0; + + if (!rdev->ops->set_tx_power) { + result = -EOPNOTSUPP; + goto bad_res; + } + + idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; + type = nla_get_u32(info->attrs[idx]); + + if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && + (type != NL80211_TX_POWER_AUTOMATIC)) { + result = -EINVAL; + goto bad_res; + } + + if (type != NL80211_TX_POWER_AUTOMATIC) { + idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; + mbm = nla_get_u32(info->attrs[idx]); + } + + result = rdev->ops->set_tx_power(&rdev->wiphy, type, mbm); + if (result) + goto bad_res; + } + changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { @@ -1107,7 +1138,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype) { if (!use_4addr) { - if (netdev && netdev->br_port) + if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT)) return -EBUSY; return 0; } @@ -2738,6 +2769,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, nla_put_failure: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); err = -EMSGSIZE; out: /* Cleanup */ @@ -2929,6 +2961,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_put_failure: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); err = -EMSGSIZE; out: mutex_unlock(&cfg80211_mutex); @@ -3955,6 +3988,55 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + u8 *rates = + nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + int n_rates = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + struct ieee80211_supported_band *sband = + wiphy->bands[ibss.channel->band]; + int i, j; + + if (n_rates == 0) { + err = -EINVAL; + goto out; + } + + for (i = 0; i < n_rates; i++) { + int rate = (rates[i] & 0x7f) * 5; + bool found = false; + + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].bitrate == rate) { + found = true; + ibss.basic_rates |= BIT(j); + break; + } + } + if (!found) { + err = -EINVAL; + goto out; + } + } + } else { + /* + * If no rates were explicitly configured, + * use the mandatory rate set for 11b or + * 11a for maximum compatibility. + */ + struct ieee80211_supported_band *sband = + wiphy->bands[ibss.channel->band]; + int j; + u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ? + IEEE80211_RATE_MANDATORY_A : + IEEE80211_RATE_MANDATORY_B; + + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].flags & flag) + ibss.basic_rates |= BIT(j); + } + } + err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); out: @@ -4653,7 +4735,8 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { err = -EOPNOTSUPP; goto out; } @@ -4681,6 +4764,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) struct net_device *dev; struct ieee80211_channel *chan; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + bool channel_type_valid = false; u32 freq; int err; void *hdr; @@ -4702,7 +4786,8 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) goto out; } - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { err = -EOPNOTSUPP; goto out; } @@ -4722,6 +4807,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out; } + channel_type_valid = true; } freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); @@ -4745,6 +4831,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) goto free_msg; } err = cfg80211_mlme_action(rdev, dev, chan, channel_type, + channel_type_valid, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), &cookie); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8f0d97dd3109..f180db0de66c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -67,20 +67,12 @@ static struct platform_device *reg_pdev; const struct ieee80211_regdomain *cfg80211_regdomain; /* - * We use this as a place for the rd structure built from the - * last parsed country IE to rest until CRDA gets back to us with - * what it thinks should apply for the same country - */ -static const struct ieee80211_regdomain *country_ie_regdomain; - -/* * Protects static reg.c components: * - cfg80211_world_regdom * - cfg80211_regdom - * - country_ie_regdomain * - last_request */ -DEFINE_MUTEX(reg_mutex); +static DEFINE_MUTEX(reg_mutex); #define assert_reg_lock() WARN_ON(!mutex_is_locked(®_mutex)) /* Used to queue up regulatory hints */ @@ -275,25 +267,6 @@ static bool is_user_regdom_saved(void) return true; } -/** - * country_ie_integrity_changes - tells us if the country IE has changed - * @checksum: checksum of country IE of fields we are interested in - * - * If the country IE has not changed you can ignore it safely. This is - * useful to determine if two devices are seeing two different country IEs - * even on the same alpha2. Note that this will return false if no IE has - * been set on the wireless core yet. - */ -static bool country_ie_integrity_changes(u32 checksum) -{ - /* If no IE has been set then the checksum doesn't change */ - if (unlikely(!last_request->country_ie_checksum)) - return false; - if (unlikely(last_request->country_ie_checksum != checksum)) - return true; - return false; -} - static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, const struct ieee80211_regdomain *src_regd) { @@ -506,471 +479,6 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, } /* - * This is a work around for sanity checking ieee80211_channel_to_frequency()'s - * work. ieee80211_channel_to_frequency() can for example currently provide a - * 2 GHz channel when in fact a 5 GHz channel was desired. An example would be - * an AP providing channel 8 on a country IE triplet when it sent this on the - * 5 GHz band, that channel is designed to be channel 8 on 5 GHz, not a 2 GHz - * channel. - * - * This can be removed once ieee80211_channel_to_frequency() takes in a band. - */ -static bool chan_in_band(int chan, enum ieee80211_band band) -{ - int center_freq = ieee80211_channel_to_frequency(chan); - - switch (band) { - case IEEE80211_BAND_2GHZ: - if (center_freq <= 2484) - return true; - return false; - case IEEE80211_BAND_5GHZ: - if (center_freq >= 5005) - return true; - return false; - default: - return false; - } -} - -/* - * Some APs may send a country IE triplet for each channel they - * support and while this is completely overkill and silly we still - * need to support it. We avoid making a single rule for each channel - * though and to help us with this we use this helper to find the - * actual subband end channel. These type of country IE triplet - * scenerios are handled then, all yielding two regulaotry rules from - * parsing a country IE: - * - * [1] - * [2] - * [36] - * [40] - * - * [1] - * [2-4] - * [5-12] - * [36] - * [40-44] - * - * [1-4] - * [5-7] - * [36-44] - * [48-64] - * - * [36-36] - * [40-40] - * [44-44] - * [48-48] - * [52-52] - * [56-56] - * [60-60] - * [64-64] - * [100-100] - * [104-104] - * [108-108] - * [112-112] - * [116-116] - * [120-120] - * [124-124] - * [128-128] - * [132-132] - * [136-136] - * [140-140] - * - * Returns 0 if the IE has been found to be invalid in the middle - * somewhere. - */ -static int max_subband_chan(enum ieee80211_band band, - int orig_cur_chan, - int orig_end_channel, - s8 orig_max_power, - u8 **country_ie, - u8 *country_ie_len) -{ - u8 *triplets_start = *country_ie; - u8 len_at_triplet = *country_ie_len; - int end_subband_chan = orig_end_channel; - - /* - * We'll deal with padding for the caller unless - * its not immediate and we don't process any channels - */ - if (*country_ie_len == 1) { - *country_ie += 1; - *country_ie_len -= 1; - return orig_end_channel; - } - - /* Move to the next triplet and then start search */ - *country_ie += 3; - *country_ie_len -= 3; - - if (!chan_in_band(orig_cur_chan, band)) - return 0; - - while (*country_ie_len >= 3) { - int end_channel = 0; - struct ieee80211_country_ie_triplet *triplet = - (struct ieee80211_country_ie_triplet *) *country_ie; - int cur_channel = 0, next_expected_chan; - - /* means last triplet is completely unrelated to this one */ - if (triplet->ext.reg_extension_id >= - IEEE80211_COUNTRY_EXTENSION_ID) { - *country_ie -= 3; - *country_ie_len += 3; - break; - } - - if (triplet->chans.first_channel == 0) { - *country_ie += 1; - *country_ie_len -= 1; - if (*country_ie_len != 0) - return 0; - break; - } - - if (triplet->chans.num_channels == 0) - return 0; - - /* Monitonically increasing channel order */ - if (triplet->chans.first_channel <= end_subband_chan) - return 0; - - if (!chan_in_band(triplet->chans.first_channel, band)) - return 0; - - /* 2 GHz */ - if (triplet->chans.first_channel <= 14) { - end_channel = triplet->chans.first_channel + - triplet->chans.num_channels - 1; - } - else { - end_channel = triplet->chans.first_channel + - (4 * (triplet->chans.num_channels - 1)); - } - - if (!chan_in_band(end_channel, band)) - return 0; - - if (orig_max_power != triplet->chans.max_power) { - *country_ie -= 3; - *country_ie_len += 3; - break; - } - - cur_channel = triplet->chans.first_channel; - - /* The key is finding the right next expected channel */ - if (band == IEEE80211_BAND_2GHZ) - next_expected_chan = end_subband_chan + 1; - else - next_expected_chan = end_subband_chan + 4; - - if (cur_channel != next_expected_chan) { - *country_ie -= 3; - *country_ie_len += 3; - break; - } - - end_subband_chan = end_channel; - - /* Move to the next one */ - *country_ie += 3; - *country_ie_len -= 3; - - /* - * Padding needs to be dealt with if we processed - * some channels. - */ - if (*country_ie_len == 1) { - *country_ie += 1; - *country_ie_len -= 1; - break; - } - - /* If seen, the IE is invalid */ - if (*country_ie_len == 2) - return 0; - } - - if (end_subband_chan == orig_end_channel) { - *country_ie = triplets_start; - *country_ie_len = len_at_triplet; - return orig_end_channel; - } - - return end_subband_chan; -} - -/* - * Converts a country IE to a regulatory domain. A regulatory domain - * structure has a lot of information which the IE doesn't yet have, - * so for the other values we use upper max values as we will intersect - * with our userspace regulatory agent to get lower bounds. - */ -static struct ieee80211_regdomain *country_ie_2_rd( - enum ieee80211_band band, - u8 *country_ie, - u8 country_ie_len, - u32 *checksum) -{ - struct ieee80211_regdomain *rd = NULL; - unsigned int i = 0; - char alpha2[2]; - u32 flags = 0; - u32 num_rules = 0, size_of_regd = 0; - u8 *triplets_start = NULL; - u8 len_at_triplet = 0; - /* the last channel we have registered in a subband (triplet) */ - int last_sub_max_channel = 0; - - *checksum = 0xDEADBEEF; - - /* Country IE requirements */ - BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN || - country_ie_len & 0x01); - - alpha2[0] = country_ie[0]; - alpha2[1] = country_ie[1]; - - /* - * Third octet can be: - * 'I' - Indoor - * 'O' - Outdoor - * - * anything else we assume is no restrictions - */ - if (country_ie[2] == 'I') - flags = NL80211_RRF_NO_OUTDOOR; - else if (country_ie[2] == 'O') - flags = NL80211_RRF_NO_INDOOR; - - country_ie += 3; - country_ie_len -= 3; - - triplets_start = country_ie; - len_at_triplet = country_ie_len; - - *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); - - /* - * We need to build a reg rule for each triplet, but first we must - * calculate the number of reg rules we will need. We will need one - * for each channel subband - */ - while (country_ie_len >= 3) { - int end_channel = 0; - struct ieee80211_country_ie_triplet *triplet = - (struct ieee80211_country_ie_triplet *) country_ie; - int cur_sub_max_channel = 0, cur_channel = 0; - - if (triplet->ext.reg_extension_id >= - IEEE80211_COUNTRY_EXTENSION_ID) { - country_ie += 3; - country_ie_len -= 3; - continue; - } - - /* - * APs can add padding to make length divisible - * by two, required by the spec. - */ - if (triplet->chans.first_channel == 0) { - country_ie++; - country_ie_len--; - /* This is expected to be at the very end only */ - if (country_ie_len != 0) - return NULL; - break; - } - - if (triplet->chans.num_channels == 0) - return NULL; - - if (!chan_in_band(triplet->chans.first_channel, band)) - return NULL; - - /* 2 GHz */ - if (band == IEEE80211_BAND_2GHZ) - end_channel = triplet->chans.first_channel + - triplet->chans.num_channels - 1; - else - /* - * 5 GHz -- For example in country IEs if the first - * channel given is 36 and the number of channels is 4 - * then the individual channel numbers defined for the - * 5 GHz PHY by these parameters are: 36, 40, 44, and 48 - * and not 36, 37, 38, 39. - * - * See: http://tinyurl.com/11d-clarification - */ - end_channel = triplet->chans.first_channel + - (4 * (triplet->chans.num_channels - 1)); - - cur_channel = triplet->chans.first_channel; - - /* - * Enhancement for APs that send a triplet for every channel - * or for whatever reason sends triplets with multiple channels - * separated when in fact they should be together. - */ - end_channel = max_subband_chan(band, - cur_channel, - end_channel, - triplet->chans.max_power, - &country_ie, - &country_ie_len); - if (!end_channel) - return NULL; - - if (!chan_in_band(end_channel, band)) - return NULL; - - cur_sub_max_channel = end_channel; - - /* Basic sanity check */ - if (cur_sub_max_channel < cur_channel) - return NULL; - - /* - * Do not allow overlapping channels. Also channels - * passed in each subband must be monotonically - * increasing - */ - if (last_sub_max_channel) { - if (cur_channel <= last_sub_max_channel) - return NULL; - if (cur_sub_max_channel <= last_sub_max_channel) - return NULL; - } - - /* - * When dot11RegulatoryClassesRequired is supported - * we can throw ext triplets as part of this soup, - * for now we don't care when those change as we - * don't support them - */ - *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | - ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | - ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); - - last_sub_max_channel = cur_sub_max_channel; - - num_rules++; - - if (country_ie_len >= 3) { - country_ie += 3; - country_ie_len -= 3; - } - - /* - * Note: this is not a IEEE requirement but - * simply a memory requirement - */ - if (num_rules > NL80211_MAX_SUPP_REG_RULES) - return NULL; - } - - country_ie = triplets_start; - country_ie_len = len_at_triplet; - - size_of_regd = sizeof(struct ieee80211_regdomain) + - (num_rules * sizeof(struct ieee80211_reg_rule)); - - rd = kzalloc(size_of_regd, GFP_KERNEL); - if (!rd) - return NULL; - - rd->n_reg_rules = num_rules; - rd->alpha2[0] = alpha2[0]; - rd->alpha2[1] = alpha2[1]; - - /* This time around we fill in the rd */ - while (country_ie_len >= 3) { - int end_channel = 0; - struct ieee80211_country_ie_triplet *triplet = - (struct ieee80211_country_ie_triplet *) country_ie; - struct ieee80211_reg_rule *reg_rule = NULL; - struct ieee80211_freq_range *freq_range = NULL; - struct ieee80211_power_rule *power_rule = NULL; - - /* - * Must parse if dot11RegulatoryClassesRequired is true, - * we don't support this yet - */ - if (triplet->ext.reg_extension_id >= - IEEE80211_COUNTRY_EXTENSION_ID) { - country_ie += 3; - country_ie_len -= 3; - continue; - } - - if (triplet->chans.first_channel == 0) { - country_ie++; - country_ie_len--; - break; - } - - reg_rule = &rd->reg_rules[i]; - freq_range = ®_rule->freq_range; - power_rule = ®_rule->power_rule; - - reg_rule->flags = flags; - - /* 2 GHz */ - if (band == IEEE80211_BAND_2GHZ) - end_channel = triplet->chans.first_channel + - triplet->chans.num_channels -1; - else - end_channel = triplet->chans.first_channel + - (4 * (triplet->chans.num_channels - 1)); - - end_channel = max_subband_chan(band, - triplet->chans.first_channel, - end_channel, - triplet->chans.max_power, - &country_ie, - &country_ie_len); - - /* - * The +10 is since the regulatory domain expects - * the actual band edge, not the center of freq for - * its start and end freqs, assuming 20 MHz bandwidth on - * the channels passed - */ - freq_range->start_freq_khz = - MHZ_TO_KHZ(ieee80211_channel_to_frequency( - triplet->chans.first_channel) - 10); - freq_range->end_freq_khz = - MHZ_TO_KHZ(ieee80211_channel_to_frequency( - end_channel) + 10); - - /* - * These are large arbitrary values we use to intersect later. - * Increment this if we ever support >= 40 MHz channels - * in IEEE 802.11 - */ - freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); - power_rule->max_antenna_gain = DBI_TO_MBI(100); - power_rule->max_eirp = DBM_TO_MBM(triplet->chans.max_power); - - i++; - - if (country_ie_len >= 3) { - country_ie += 3; - country_ie_len -= 3; - } - - BUG_ON(i > NL80211_MAX_SUPP_REG_RULES); - } - - return rd; -} - - -/* * Helper for regdom_intersect(), this does the real * mathematical intersection fun */ @@ -1191,7 +699,6 @@ static int freq_reg_info_regd(struct wiphy *wiphy, return -EINVAL; } -EXPORT_SYMBOL(freq_reg_info); int freq_reg_info(struct wiphy *wiphy, u32 center_freq, @@ -1205,6 +712,7 @@ int freq_reg_info(struct wiphy *wiphy, reg_rule, NULL); } +EXPORT_SYMBOL(freq_reg_info); /* * Note that right now we assume the desired channel bandwidth @@ -1243,41 +751,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, desired_bw_khz, ®_rule); - if (r) { - /* - * This means no regulatory rule was found in the country IE - * with a frequency range on the center_freq's band, since - * IEEE-802.11 allows for a country IE to have a subset of the - * regulatory information provided in a country we ignore - * disabling the channel unless at least one reg rule was - * found on the center_freq's band. For details see this - * clarification: - * - * http://tinyurl.com/11d-clarification - */ - if (r == -ERANGE && - last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) { - REG_DBG_PRINT("cfg80211: Leaving channel %d MHz " - "intact on %s - no rule found in band on " - "Country IE\n", - chan->center_freq, wiphy_name(wiphy)); - } else { - /* - * In this case we know the country IE has at least one reg rule - * for the band so we respect its band definitions - */ - if (last_request->initiator == - NL80211_REGDOM_SET_BY_COUNTRY_IE) - REG_DBG_PRINT("cfg80211: Disabling " - "channel %d MHz on %s due to " - "Country IE\n", - chan->center_freq, wiphy_name(wiphy)); - flags |= IEEE80211_CHAN_DISABLED; - chan->flags = flags; - } + if (r) return; - } power_rule = ®_rule->power_rule; freq_range = ®_rule->freq_range; @@ -1831,6 +1306,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) { int r = 0; struct wiphy *wiphy = NULL; + enum nl80211_reg_initiator initiator = reg_request->initiator; BUG_ON(!reg_request->alpha2); @@ -1850,7 +1326,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) /* This is required so that the orig_* parameters are saved */ if (r == -EALREADY && wiphy && wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) - wiphy_update_regulatory(wiphy, reg_request->initiator); + wiphy_update_regulatory(wiphy, initiator); out: mutex_unlock(®_mutex); mutex_unlock(&cfg80211_mutex); @@ -2008,35 +1484,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -/* Caller must hold reg_mutex */ -static bool reg_same_country_ie_hint(struct wiphy *wiphy, - u32 country_ie_checksum) -{ - struct wiphy *request_wiphy; - - assert_reg_lock(); - - if (unlikely(last_request->initiator != - NL80211_REGDOM_SET_BY_COUNTRY_IE)) - return false; - - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - - if (!request_wiphy) - return false; - - if (likely(request_wiphy != wiphy)) - return !country_ie_integrity_changes(country_ie_checksum); - /* - * We should not have let these through at this point, they - * should have been picked up earlier by the first alpha2 check - * on the device - */ - if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) - return true; - return false; -} - /* * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and * therefore cannot iterate over the rdev list here. @@ -2046,9 +1493,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, u8 *country_ie, u8 country_ie_len) { - struct ieee80211_regdomain *rd = NULL; char alpha2[2]; - u32 checksum = 0; enum environment_cap env = ENVIRON_ANY; struct regulatory_request *request; @@ -2064,14 +1509,6 @@ void regulatory_hint_11d(struct wiphy *wiphy, if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) goto out; - /* - * Pending country IE processing, this can happen after we - * call CRDA and wait for a response if a beacon was received before - * we were able to process the last regulatory_hint_11d() call - */ - if (country_ie_regdomain) - goto out; - alpha2[0] = country_ie[0]; alpha2[1] = country_ie[1]; @@ -2090,39 +1527,14 @@ void regulatory_hint_11d(struct wiphy *wiphy, wiphy_idx_valid(last_request->wiphy_idx))) goto out; - rd = country_ie_2_rd(band, country_ie, country_ie_len, &checksum); - if (!rd) { - REG_DBG_PRINT("cfg80211: Ignoring bogus country IE\n"); - goto out; - } - - /* - * This will not happen right now but we leave it here for the - * the future when we want to add suspend/resume support and having - * the user move to another country after doing so, or having the user - * move to another AP. Right now we just trust the first AP. - * - * If we hit this before we add this support we want to be informed of - * it as it would indicate a mistake in the current design - */ - if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum))) - goto free_rd_out; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) - goto free_rd_out; - - /* - * We keep this around for when CRDA comes back with a response so - * we can intersect with that - */ - country_ie_regdomain = rd; + goto out; request->wiphy_idx = get_wiphy_idx(wiphy); - request->alpha2[0] = rd->alpha2[0]; - request->alpha2[1] = rd->alpha2[1]; + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; - request->country_ie_checksum = checksum; request->country_ie_env = env; mutex_unlock(®_mutex); @@ -2131,8 +1543,6 @@ void regulatory_hint_11d(struct wiphy *wiphy, return; -free_rd_out: - kfree(rd); out: mutex_unlock(®_mutex); } @@ -2383,33 +1793,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } -#ifdef CONFIG_CFG80211_REG_DEBUG -static void reg_country_ie_process_debug( - const struct ieee80211_regdomain *rd, - const struct ieee80211_regdomain *country_ie_regdomain, - const struct ieee80211_regdomain *intersected_rd) -{ - printk(KERN_DEBUG "cfg80211: Received country IE:\n"); - print_regdomain_info(country_ie_regdomain); - printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n"); - print_regdomain_info(rd); - if (intersected_rd) { - printk(KERN_DEBUG "cfg80211: We intersect both of these " - "and get:\n"); - print_regdomain_info(intersected_rd); - return; - } - printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); -} -#else -static inline void reg_country_ie_process_debug( - const struct ieee80211_regdomain *rd, - const struct ieee80211_regdomain *country_ie_regdomain, - const struct ieee80211_regdomain *intersected_rd) -{ -} -#endif - /* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { @@ -2521,34 +1904,6 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - /* - * Country IE requests are handled a bit differently, we intersect - * the country IE rd with what CRDA believes that country should have - */ - - /* - * Userspace could have sent two replies with only - * one kernel request. By the second reply we would have - * already processed and consumed the country_ie_regdomain. - */ - if (!country_ie_regdomain) - return -EALREADY; - BUG_ON(rd == country_ie_regdomain); - - /* - * Intersect what CRDA returned and our what we - * had built from the Country IE received - */ - - intersected_rd = regdom_intersect(rd, country_ie_regdomain); - - reg_country_ie_process_debug(rd, - country_ie_regdomain, - intersected_rd); - - kfree(country_ie_regdomain); - country_ie_regdomain = NULL; - if (!intersected_rd) return -EINVAL; @@ -2630,7 +1985,7 @@ out: mutex_unlock(®_mutex); } -int regulatory_init(void) +int __init regulatory_init(void) { int err = 0; @@ -2676,7 +2031,7 @@ int regulatory_init(void) return 0; } -void regulatory_exit(void) +void /* __init_or_exit */ regulatory_exit(void) { struct regulatory_request *reg_request, *tmp; struct reg_beacon *reg_beacon, *btmp; @@ -2688,9 +2043,6 @@ void regulatory_exit(void) reset_regdomains(); - kfree(country_ie_regdomain); - country_ie_regdomain = NULL; - kfree(last_request); platform_device_unregister(reg_pdev); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index b26224a9f3bc..c4695d07af23 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -10,7 +10,7 @@ int regulatory_hint_user(const char *alpha2); void reg_device_remove(struct wiphy *wiphy); -int regulatory_init(void); +int __init regulatory_init(void); void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 58401d246bda..5ca8c7180141 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -275,6 +275,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, { struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; + unsigned long now = jiffies; spin_lock_bh(&dev->bss_lock); @@ -283,6 +284,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, continue; if (channel && bss->pub.channel != channel) continue; + /* Don't get expired BSS structs */ + if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) && + !atomic_read(&bss->hold)) + continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { res = bss; kref_get(&res->ref); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 72222f0074db..a8c2d6b877ae 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -35,7 +35,7 @@ struct cfg80211_conn { bool auto_auth, prev_bssid_valid; }; -bool cfg80211_is_all_idle(void) +static bool cfg80211_is_all_idle(void) { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; diff --git a/net/wireless/util.c b/net/wireless/util.c index 3416373a9c0c..0c8a1e8b7690 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -770,8 +770,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; /* if it's part of a bridge, reject changing type to station/ibss */ - if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC || - ntype == NL80211_IFTYPE_STATION)) + if ((dev->priv_flags & IFF_BRIDGE_PORT) && + (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION)) return -EBUSY; if (ntype != otype) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 96342993cf93..bb5e0a5ecfa1 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -829,7 +829,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - enum tx_power_setting type; + enum nl80211_tx_power_setting type; int dbm = 0; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) @@ -852,7 +852,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, if (data->txpower.value < 0) return -EINVAL; dbm = data->txpower.value; - type = TX_POWER_FIXED; + type = NL80211_TX_POWER_FIXED; /* TODO: do regulatory check! */ } else { /* @@ -860,10 +860,10 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, * passed in from userland. */ if (data->txpower.value < 0) { - type = TX_POWER_AUTOMATIC; + type = NL80211_TX_POWER_AUTOMATIC; } else { dbm = data->txpower.value; - type = TX_POWER_LIMITED; + type = NL80211_TX_POWER_LIMITED; } } } else { @@ -872,7 +872,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev, return 0; } - return rdev->ops->set_tx_power(wdev->wiphy, type, dbm); + return rdev->ops->set_tx_power(wdev->wiphy, type, DBM_TO_MBM(dbm)); } EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower); @@ -1471,6 +1471,7 @@ int cfg80211_wext_siwpmksa(struct net_device *dev, return -EOPNOTSUPP; } } +EXPORT_SYMBOL_GPL(cfg80211_wext_siwpmksa); static const iw_handler cfg80211_handlers[] = { [IW_IOCTL_IDX(SIOCGIWNAME)] = (iw_handler) cfg80211_wext_giwname, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index af1c173be4ad..2b3ed7ad4933 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); - if (err < 0) { - if (err != -EAGAIN) + if (err <= 0) { + if (err != 0 && err != -EAGAIN) XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); return ERR_PTR(err); } @@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, goto make_dummy_bundle; dst_hold(&xdst->u.dst); return oldflo; + } else if (new_xdst == NULL) { + num_xfrms = 0; + if (oldflo == NULL) + goto make_dummy_bundle; + xdst->num_xfrms = 0; + dst_hold(&xdst->u.dst); + return oldflo; } /* Kill the previous bundle */ @@ -1760,6 +1767,10 @@ restart: xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); goto dropdst; + } else if (xdst == NULL) { + num_xfrms = 0; + drop_pols = num_pols; + goto no_transform; } spin_lock_bh(&xfrm_policy_sk_bundle_lock); @@ -2481,7 +2492,8 @@ static int __net_init xfrm_statistics_init(struct net *net) int rv; if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, - sizeof(struct linux_xfrm_mib)) < 0) + sizeof(struct linux_xfrm_mib), + __alignof__(struct linux_xfrm_mib)) < 0) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) |