diff options
Diffstat (limited to 'net')
364 files changed, 11390 insertions, 6638 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c7a581a96894..8970ba139d73 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -18,6 +18,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/capability.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -132,8 +134,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); rcu_assign_pointer(real_dev->vlgrp, NULL); - if (ops->ndo_vlan_rx_register) - ops->ndo_vlan_rx_register(real_dev, NULL); /* Free the group, after all cpu's are done. */ call_rcu(&grp->rcu, vlan_rcu_free); @@ -149,13 +149,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) const struct net_device_ops *ops = real_dev->netdev_ops; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { - pr_info("8021q: VLANs not supported on %s\n", name); + pr_info("VLANs not supported on %s\n", name); return -EOPNOTSUPP; } if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { - pr_info("8021q: Device %s has buggy VLAN hw accel\n", name); + pr_info("Device %s has buggy VLAN hw accel\n", name); return -EOPNOTSUPP; } @@ -205,8 +205,6 @@ int register_vlan_dev(struct net_device *dev) grp->nr_vlans++; if (ngrp) { - if (ops->ndo_vlan_rx_register) - ops->ndo_vlan_rx_register(real_dev, ngrp); rcu_assign_pointer(real_dev->vlgrp, ngrp); } if (real_dev->features & NETIF_F_HW_VLAN_FILTER) @@ -344,13 +342,12 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) case NETDEV_CHANGENAME: vlan_proc_rem_dev(dev); if (vlan_proc_add_dev(dev) < 0) - pr_warning("8021q: failed to change proc name for %s\n", - dev->name); + pr_warn("failed to change proc name for %s\n", + dev->name); break; case NETDEV_REGISTER: if (vlan_proc_add_dev(dev) < 0) - pr_warning("8021q: failed to add proc entry for %s\n", - dev->name); + pr_warn("failed to add proc entry for %s\n", dev->name); break; case NETDEV_UNREGISTER: vlan_proc_rem_dev(dev); @@ -374,7 +371,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if ((event == NETDEV_UP) && (dev->features & NETIF_F_HW_VLAN_FILTER) && dev->netdev_ops->ndo_vlan_rx_add_vid) { - pr_info("8021q: adding VLAN 0 to HW filter on device %s\n", + pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); } diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 9da07e30d1a2..9fd45f3571f9 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -74,6 +74,37 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) return netdev_priv(dev); } +static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, + u16 vlan_id) +{ + struct net_device **array; + array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; +} + +static inline void vlan_group_set_device(struct vlan_group *vg, + u16 vlan_id, + struct net_device *dev) +{ + struct net_device **array; + if (!vg) + return; + array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; +} + +/* Must be invoked with rcu_read_lock or with RTNL. */ +static inline struct net_device *vlan_find_dev(struct net_device *real_dev, + u16 vlan_id) +{ + struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + + if (grp) + return vlan_group_get_device(grp, vlan_id); + + return NULL; +} + /* found in vlan_dev.c */ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 41495dc2a4c9..5f27f8e30254 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -23,6 +23,31 @@ bool vlan_do_receive(struct sk_buff **skbp) return false; skb->dev = vlan_dev; + if (skb->pkt_type == PACKET_OTHERHOST) { + /* Our lower layer thinks this is not local, let's make sure. + * This allows the VLAN to have a different MAC than the + * underlying device, and still route correctly. */ + if (!compare_ether_addr(eth_hdr(skb)->h_dest, + vlan_dev->dev_addr)) + skb->pkt_type = PACKET_HOST; + } + + if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) { + unsigned int offset = skb->data - skb_mac_header(skb); + + /* + * vlan_insert_tag expect skb->data pointing to mac header. + * So change skb->data before calling it and change back to + * original position later + */ + skb_push(skb, offset); + skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); + if (!skb) + return false; + skb_pull(skb, offset + VLAN_HLEN); + skb_reset_mac_len(skb); + } + skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci); skb->vlan_tci = 0; @@ -31,27 +56,34 @@ bool vlan_do_receive(struct sk_buff **skbp) u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; - - switch (skb->pkt_type) { - case PACKET_BROADCAST: - break; - case PACKET_MULTICAST: + if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; - break; - case PACKET_OTHERHOST: - /* Our lower layer thinks this is not local, let's make sure. - * This allows the VLAN to have a different MAC than the - * underlying device, and still route correctly. */ - if (!compare_ether_addr(eth_hdr(skb)->h_dest, - vlan_dev->dev_addr)) - skb->pkt_type = PACKET_HOST; - break; - } u64_stats_update_end(&rx_stats->syncp); return true; } +/* Must be invoked with rcu_read_lock or with RTNL. */ +struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, + u16 vlan_id) +{ + struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp); + + if (grp) { + return vlan_group_get_device(grp, vlan_id); + } else { + /* + * Bonding slaves do not have grp assigned to themselves. + * Grp is assigned to bonding master instead. + */ + if (netif_is_bond_slave(real_dev)) + return __vlan_find_dev_deep(real_dev->master, vlan_id); + } + + return NULL; +} +EXPORT_SYMBOL(__vlan_find_dev_deep); + struct net_device *vlan_dev_real_dev(const struct net_device *dev) { return vlan_dev_info(dev)->real_dev; @@ -64,43 +96,13 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); -/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ -int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, - u16 vlan_tci, int polling) +static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) { - __vlan_hwaccel_put_tag(skb, vlan_tci); - return polling ? netif_receive_skb(skb) : netif_rx(skb); -} -EXPORT_SYMBOL(__vlan_hwaccel_rx); - -gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb) -{ - __vlan_hwaccel_put_tag(skb, vlan_tci); - return napi_gro_receive(napi, skb); -} -EXPORT_SYMBOL(vlan_gro_receive); - -gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci) -{ - __vlan_hwaccel_put_tag(napi->skb, vlan_tci); - return napi_gro_frags(napi); -} -EXPORT_SYMBOL(vlan_gro_frags); - -static struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) -{ - if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { - if (skb_cow(skb, skb_headroom(skb)) < 0) - skb = NULL; - if (skb) { - /* Lifted from Gleb's VLAN code... */ - memmove(skb->data - ETH_HLEN, - skb->data - VLAN_ETH_HLEN, 12); - skb->mac_header += VLAN_HLEN; - } - } + if (skb_cow(skb, skb_headroom(skb)) < 0) + return NULL; + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + skb_reset_mac_len(skb); return skb; } @@ -161,7 +163,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb) skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); - skb = vlan_check_reorder_header(skb); + skb = vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f247f5bff88d..9d40a071d038 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -20,6 +20,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/slab.h> #include <linux/skbuff.h> @@ -55,7 +57,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) return arp_find(veth->h_dest, skb); #endif default: - pr_debug("%s: unable to resolve type %X addresses.\n", + pr_debug("%s: unable to resolve type %X addresses\n", dev->name, ntohs(veth->h_vlan_encapsulated_proto)); memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); @@ -165,7 +167,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; - u64_stats_update_begin(&stats->syncp); + u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); } @@ -528,7 +530,11 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->hw_features = NETIF_F_ALL_TX_OFFLOADS; + dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | + NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | + NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM | + NETIF_F_ALL_FCOE; + dev->features |= real_dev->vlan_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; @@ -586,9 +592,13 @@ static void vlan_dev_uninit(struct net_device *dev) static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + u32 old_features = features; features &= real_dev->features; features &= real_dev->vlan_features; + + features |= old_features & NETIF_F_SOFT_FEATURES; + if (dev_ethtool_get_rx_csum(real_dev)) features |= NETIF_F_RXCSUM; features |= NETIF_F_LLTX; @@ -685,7 +695,7 @@ void vlan_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags |= IFF_802_1Q_VLAN; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->tx_queue_len = 0; dev->netdev_ops = &vlan_netdev_ops; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index d940c49d168a..d34b6daf8930 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -17,6 +17,8 @@ * Jan 20, 1998 Ben Greear Initial Version *****************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -155,7 +157,7 @@ int __net_init vlan_proc_init(struct net *net) return 0; err: - pr_err("%s: can't create entry in proc filesystem!\n", __func__); + pr_err("can't create entry in proc filesystem!\n"); vlan_proc_cleanup(net); return -ENOBUFS; } @@ -229,7 +231,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; - dev = (struct net_device *)v; + dev = v; if (v == SEQ_START_TOKEN) dev = net_device_entry(&net->dev_base_head); diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 7ed75c7bd5d1..d9ea09b11cf8 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -3,8 +3,8 @@ # menuconfig NET_9P - depends on NET && EXPERIMENTAL - tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" + depends on NET + tristate "Plan 9 Resource Sharing Support (9P2000)" help If you say Y here, you will get experimental support for Plan 9 resource sharing via the 9P2000 protocol. @@ -16,8 +16,8 @@ menuconfig NET_9P if NET_9P config NET_9P_VIRTIO - depends on EXPERIMENTAL && VIRTIO - tristate "9P Virtio Transport (Experimental)" + depends on VIRTIO + tristate "9P Virtio Transport" help This builds support for a transports between guest partitions and a host partition. diff --git a/net/9p/client.c b/net/9p/client.c index ceab943dfc49..0505a03c374c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -72,29 +72,25 @@ inline int p9_is_proto_dotu(struct p9_client *clnt) EXPORT_SYMBOL(p9_is_proto_dotu); /* Interpret mount option for protocol version */ -static int get_protocol_version(const substring_t *name) +static int get_protocol_version(char *s) { int version = -EINVAL; - if (!strncmp("9p2000", name->from, name->to-name->from)) { + if (!strcmp(s, "9p2000")) { version = p9_proto_legacy; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); - } else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { + } else if (!strcmp(s, "9p2000.u")) { version = p9_proto_2000u; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); - } else if (!strncmp("9p2000.L", name->from, name->to-name->from)) { + } else if (!strcmp(s, "9p2000.L")) { version = p9_proto_2000L; P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); - } else { - P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", - name->from); - } + } else + printk(KERN_INFO "9p: Unknown protocol version %s.\n", s); + return version; } -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); - /** * parse_options - parse mount options into client structure * @opts: options string passed from mount @@ -109,6 +105,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) char *p; substring_t args[MAX_OPT_ARGS]; int option; + char *s; int ret = 0; clnt->proto_version = p9_proto_2000u; @@ -144,22 +141,41 @@ static int parse_opts(char *opts, struct p9_client *clnt) clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); - if(clnt->trans_mod == NULL) { + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; P9_DPRINTK(P9_DEBUG_ERROR, - "Could not find request transport: %s\n", - (char *) &args[0]); + "problem allocating copy of trans arg\n"); + goto free_and_return; + } + clnt->trans_mod = v9fs_get_trans_by_name(s); + if (clnt->trans_mod == NULL) { + printk(KERN_INFO + "9p: Could not find " + "request transport: %s\n", s); ret = -EINVAL; + kfree(s); goto free_and_return; } + kfree(s); break; case Opt_legacy: clnt->proto_version = p9_proto_legacy; break; case Opt_version: - ret = get_protocol_version(&args[0]); - if (ret == -EINVAL) + s = match_strdup(&args[0]); + if (!s) { + ret = -ENOMEM; + P9_DPRINTK(P9_DEBUG_ERROR, + "problem allocating copy of version arg\n"); goto free_and_return; + } + ret = get_protocol_version(s); + if (ret == -EINVAL) { + kfree(s); + goto free_and_return; + } + kfree(s); clnt->proto_version = ret; break; default: @@ -283,7 +299,8 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) * buffer to read the data into */ tag++; - BUG_ON(tag >= c->max_tag); + if(tag >= c->max_tag) + return NULL; row = tag / P9_ROW_MAXTAG; col = tag % P9_ROW_MAXTAG; @@ -307,12 +324,13 @@ static int p9_tag_init(struct p9_client *c) c->tagpool = p9_idpool_create(); if (IS_ERR(c->tagpool)) { err = PTR_ERR(c->tagpool); - c->tagpool = NULL; goto error; } - - p9_idpool_get(c->tagpool); /* reserve tag 0 */ - + err = p9_idpool_get(c->tagpool); /* reserve tag 0 */ + if (err < 0) { + p9_idpool_destroy(c->tagpool); + goto error; + } c->max_tag = 0; error: return err; @@ -518,12 +536,15 @@ out_err: return err; } +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); + /** * p9_client_flush - flush (cancel) a request * @c: client state * @oldreq: request to cancel * - * This sents a flush for a particular requests and links + * This sents a flush for a particular request and links * the flush request to the original request. The current * code only supports a single flush request although the protocol * allows for multiple flush requests to be sent for a single request. @@ -748,7 +769,7 @@ static int p9_client_version(struct p9_client *c) err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version); if (err) { P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto error; } @@ -789,11 +810,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); - p9_tag_init(clnt); + err = p9_tag_init(clnt); + if (err < 0) + goto free_client; err = parse_opts(options, clnt); if (err < 0) - goto free_client; + goto destroy_tagpool; if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); @@ -802,13 +825,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, "No transport defined or default transport\n"); - goto free_client; + goto destroy_tagpool; } clnt->fidpool = p9_idpool_create(); if (IS_ERR(clnt->fidpool)) { err = PTR_ERR(clnt->fidpool); - clnt->fidpool = NULL; goto put_trans; } @@ -819,8 +841,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto destroy_fidpool; - if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) - clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; + if (clnt->msize > clnt->trans_mod->maxsize) + clnt->msize = clnt->trans_mod->maxsize; err = p9_client_version(clnt); if (err) @@ -834,6 +856,8 @@ destroy_fidpool: p9_idpool_destroy(clnt->fidpool); put_trans: v9fs_put_trans(clnt->trans_mod); +destroy_tagpool: + p9_idpool_destroy(clnt->tagpool); free_client: kfree(clnt); return ERR_PTR(err); @@ -907,7 +931,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); p9_free_req(clnt, req); goto error; } @@ -967,7 +991,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1034,7 +1058,7 @@ int p9_client_open(struct p9_fid *fid, int mode) err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto free_and_error; } @@ -1077,7 +1101,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto free_and_error; } @@ -1122,7 +1146,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto free_and_error; } @@ -1161,7 +1185,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto free_and_error; } @@ -1245,9 +1269,11 @@ int p9_client_clunk(struct p9_fid *fid) P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); p9_free_req(clnt, req); - p9_fid_destroy(fid); - error: + /* + * Fid is not valid even after a failed clunk + */ + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -1277,6 +1303,29 @@ error: } EXPORT_SYMBOL(p9_client_remove); +int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) +{ + int err = 0; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", + dfid->fid, name, flags); + + clnt = dfid->clnt; + req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + P9_DPRINTK(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_unlinkat); + int p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, u32 count) @@ -1298,7 +1347,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, if (count < rsize) rsize = count; - /* Don't bother zerocopy form small IO (< 1024) */ + /* Don't bother zerocopy for small IO (< 1024) */ if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, @@ -1314,11 +1363,12 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto free_and_error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + P9_DUMP_PKT(1, req->rc); if (!req->tc->pbuf_size) { if (data) { @@ -1382,7 +1432,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto free_and_error; } @@ -1422,7 +1472,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); p9_free_req(clnt, req); goto error; } @@ -1473,7 +1523,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); p9_free_req(clnt, req); goto error; } @@ -1621,7 +1671,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); p9_free_req(clnt, req); goto error; } @@ -1639,7 +1689,8 @@ error: } EXPORT_SYMBOL(p9_client_statfs); -int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name) +int p9_client_rename(struct p9_fid *fid, + struct p9_fid *newdirfid, const char *name) { int err; struct p9_req_t *req; @@ -1666,6 +1717,36 @@ error: } EXPORT_SYMBOL(p9_client_rename); +int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, + struct p9_fid *newdirfid, const char *new_name) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + err = 0; + clnt = olddirfid->clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" + " newdirfid %d new name %s\n", olddirfid->fid, old_name, + newdirfid->fid, new_name); + + req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, + old_name, newdirfid->fid, new_name); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", + newdirfid->fid, new_name); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_renameat); + /* * An xattrwalk without @attr_name gives the fid for the lisxattr namespace */ @@ -1697,7 +1778,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, } err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); p9_free_req(clnt, req); goto clunk_fid; } @@ -1776,7 +1857,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto free_and_error; } @@ -1813,7 +1894,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, @@ -1844,7 +1925,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, @@ -1879,7 +1960,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) err = p9pdu_readf(req->rc, clnt->proto_version, "b", status); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); @@ -1912,7 +1993,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) &glock->start, &glock->length, &glock->proc_id, &glock->client_id); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " @@ -1940,7 +2021,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target) err = p9pdu_readf(req->rc, clnt->proto_version, "s", target); if (err) { - p9pdu_dump(1, req->rc); + P9_DUMP_PKT(1, req->rc); goto error; } P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); diff --git a/net/9p/mod.c b/net/9p/mod.c index cf8a4128cd5c..2664d1292291 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -80,14 +80,14 @@ EXPORT_SYMBOL(v9fs_unregister_trans); * @name: string identifying transport * */ -struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(char *s) { struct p9_trans_module *t, *found = NULL; spin_lock(&v9fs_trans_lock); list_for_each_entry(t, &v9fs_trans_list, list) - if (strncmp(t->name, name->from, name->to-name->from) == 0 && + if (strcmp(t->name, s) == 0 && try_module_get(t->owner)) { found = t; break; @@ -139,7 +139,7 @@ void v9fs_put_trans(struct p9_trans_module *m) } /** - * v9fs_init - Initialize module + * init_p9 - Initialize module * */ static int __init init_p9(void) @@ -154,7 +154,7 @@ static int __init init_p9(void) } /** - * v9fs_init - shutdown module + * exit_p9 - shutdown module * */ diff --git a/net/9p/protocol.c b/net/9p/protocol.c index a873277cb996..df58375ea6b3 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -44,30 +44,24 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); void p9pdu_dump(int way, struct p9_fcall *pdu) { - int i, n; - u8 *data = pdu->sdata; - int datalen = pdu->size; - char buf[255]; - int buflen = 255; - - i = n = 0; - if (datalen > (buflen-16)) - datalen = buflen-16; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x ", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; + int len = pdu->size; + + if ((p9_debug_level & P9_DEBUG_VPKT) != P9_DEBUG_VPKT) { + if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) { + if (len > 32) + len = 32; + } else { + /* shouldn't happen */ + return; + } } - n += scnprintf(buf + n, buflen - n, "\n"); if (way) - P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf); + print_hex_dump_bytes("[9P] ", DUMP_PREFIX_OFFSET, pdu->sdata, + len); else - P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); + print_hex_dump_bytes("]9P[ ", DUMP_PREFIX_OFFSET, pdu->sdata, + len); } #else void @@ -610,7 +604,7 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); - p9pdu_dump(1, &fake_pdu); + P9_DUMP_PKT(0, &fake_pdu); } return ret; @@ -632,11 +626,7 @@ int p9pdu_finalize(struct p9_fcall *pdu) err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) - p9pdu_dump(0, pdu); -#endif - + P9_DUMP_PKT(0, pdu); P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, pdu->id, pdu->tag); @@ -669,7 +659,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); - p9pdu_dump(1, &fake_pdu); + P9_DUMP_PKT(1, &fake_pdu); goto out; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 4a9084395d35..fdfdb5747f63 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -916,8 +916,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); sin_server.sin_port = htons(opts.port); - err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, + SOCK_STREAM, IPPROTO_TCP, &csocket, 1); if (err) { P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); return err; @@ -954,7 +954,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) sun_server.sun_family = PF_UNIX; strcpy(sun_server.sun_path, addr); - err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, + SOCK_STREAM, 0, &csocket, 1); if (err < 0) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); return err; diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 844a7a5607e3..159c50f1c6bf 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -589,7 +589,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 244e70742183..175b5135bdcf 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -367,7 +367,7 @@ req_retry_pinned: in += inp; } else { in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, - client->msize); + req->rc->capacity); } err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); @@ -592,7 +592,7 @@ static struct p9_trans_module p9_virtio_trans = { .close = p9_virtio_close, .request = p9_virtio_request, .cancel = p9_virtio_cancel, - .maxsize = PAGE_SIZE*16, + .maxsize = PAGE_SIZE*VIRTQUEUE_NUM, .pref = P9_TRANS_PREF_PAYLOAD_SEP, .def = 0, .owner = THIS_MODULE, diff --git a/net/9p/util.c b/net/9p/util.c index da6af81e59d9..9c1c9348ac35 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -93,7 +93,7 @@ int p9_idpool_get(struct p9_idpool *p) retry: if (idr_pre_get(&p->pool, GFP_NOFS) == 0) - return 0; + return -1; spin_lock_irqsave(&p->lock, flags); diff --git a/net/TUNABLE b/net/TUNABLE deleted file mode 100644 index 9913211f07a7..000000000000 --- a/net/TUNABLE +++ /dev/null @@ -1,50 +0,0 @@ -The following parameters should be tunable at compile time. Some of them -exist as sysctls too. - -This is far from complete - -Item Description ----------------------------------------------------------------------------- -MAX_LINKS Maximum number of netlink minor devices. (1-32) -RIF_TABLE_SIZE Token ring RIF cache size (tunable) -AARP_HASH_SIZE Size of Appletalk hash table (tunable) -AX25_DEF_T1 AX.25 parameters. These are all tunable via -AX25_DEF_T2 SIOCAX25SETPARMS -AX25_DEF_T3 T1-T3,N2 have the meanings in the specification -AX25_DEF_N2 -AX25_DEF_AXDEFMODE 8 = normal 128 is PE1CHL extended -AX25_DEF_IPDEFMODE 'D' - datagram 'V' - virtual connection -AX25_DEF_BACKOFF 'E'xponential 'L'inear -AX25_DEF_NETROM Allow netrom 1=Y -AX25_DF_TEXT Allow PID=Text 1=Y -AX25_DEF_WINDOW Window for normal mode -AX25_DEF_EWINDOW Window for PE1CHL mode -AX25_DEF_DIGI 1 for inband 2 for cross band 3 for both -AX25_DEF_CONMODE Allow connected modes 1=Yes -AX25_ROUTE_MAX AX.25 route cache size - no currently tunable -Unnamed (16) Number of protocol hash slots (tunable) -DEV_NUMBUFFS Number of priority levels (not easily tunable) -Unnamed (300) Maximum packet backlog queue (tunable) -MAX_IOVEC Maximum number of iovecs in a message (tunable) -MIN_WINDOW Offered minimum window (tunable) -MAX_WINDOW Offered maximum window (tunable) -MAX_HEADER Largest physical header (tunable) -MAX_ADDR_LEN Largest physical address (tunable) -SOCK_ARRAY_SIZE IP socket array hash size (tunable) -IP_MAX_MEMBERSHIPS Largest number of groups per socket (BSD style) (tunable) -16 Hard coded constant for amount of room allowed for - cache align and faster forwarding (tunable) -IP_FRAG_TIME Time we hold a fragment for. (tunable) -PORT_MASQ_BEGIN First port reserved for masquerade (tunable) -PORT_MASQ_END Last port used for masquerade (tunable) -MASQUERADE_EXPIRE_TCP_FIN Time we keep a masquerade for after a FIN -MASQUERADE_EXPIRE_UDP Time we keep a UDP masquerade for (tunable) -MAXVIFS Maximum mrouted vifs (1-32) -MFC_LINES Lines in the multicast router cache (tunable) - -NetROM parameters are tunable via an ioctl passing a struct - -4000 Size a Unix domain socket malloc falls back to - (tunable) should be 8K - a bit for 8K machines like - the ALPHA - diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 50dce7981321..1acc69576df8 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -779,87 +779,87 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, } switch (function) { - case AARP_REPLY: - if (!unresolved_count) /* Speed up */ - break; - - /* Find the entry. */ - a = __aarp_find_entry(unresolved[hash], dev, &sa); - if (!a || dev != a->dev) - break; + case AARP_REPLY: + if (!unresolved_count) /* Speed up */ + break; - /* We can fill one in - this is good. */ - memcpy(a->hwaddr, ea->hw_src, ETH_ALEN); - __aarp_resolved(&unresolved[hash], a, hash); - if (!unresolved_count) - mod_timer(&aarp_timer, - jiffies + sysctl_aarp_expiry_time); + /* Find the entry. */ + a = __aarp_find_entry(unresolved[hash], dev, &sa); + if (!a || dev != a->dev) break; - case AARP_REQUEST: - case AARP_PROBE: + /* We can fill one in - this is good. */ + memcpy(a->hwaddr, ea->hw_src, ETH_ALEN); + __aarp_resolved(&unresolved[hash], a, hash); + if (!unresolved_count) + mod_timer(&aarp_timer, + jiffies + sysctl_aarp_expiry_time); + break; + + case AARP_REQUEST: + case AARP_PROBE: + + /* + * If it is my address set ma to my address and reply. + * We can treat probe and request the same. Probe + * simply means we shouldn't cache the querying host, + * as in a probe they are proposing an address not + * using one. + * + * Support for proxy-AARP added. We check if the + * address is one of our proxies before we toss the + * packet out. + */ + + sa.s_node = ea->pa_dst_node; + sa.s_net = ea->pa_dst_net; + + /* See if we have a matching proxy. */ + ma = __aarp_proxy_find(dev, &sa); + if (!ma) + ma = &ifa->address; + else { /* We need to make a copy of the entry. */ + da.s_node = sa.s_node; + da.s_net = sa.s_net; + ma = &da; + } + if (function == AARP_PROBE) { /* - * If it is my address set ma to my address and reply. - * We can treat probe and request the same. Probe - * simply means we shouldn't cache the querying host, - * as in a probe they are proposing an address not - * using one. - * - * Support for proxy-AARP added. We check if the - * address is one of our proxies before we toss the - * packet out. + * A probe implies someone trying to get an + * address. So as a precaution flush any + * entries we have for this address. */ + a = __aarp_find_entry(resolved[sa.s_node % + (AARP_HASH_SIZE - 1)], + skb->dev, &sa); - sa.s_node = ea->pa_dst_node; - sa.s_net = ea->pa_dst_net; - - /* See if we have a matching proxy. */ - ma = __aarp_proxy_find(dev, &sa); - if (!ma) - ma = &ifa->address; - else { /* We need to make a copy of the entry. */ - da.s_node = sa.s_node; - da.s_net = sa.s_net; - ma = &da; - } - - if (function == AARP_PROBE) { - /* - * A probe implies someone trying to get an - * address. So as a precaution flush any - * entries we have for this address. - */ - a = __aarp_find_entry(resolved[sa.s_node % - (AARP_HASH_SIZE - 1)], - skb->dev, &sa); - - /* - * Make it expire next tick - that avoids us - * getting into a probe/flush/learn/probe/ - * flush/learn cycle during probing of a slow - * to respond host addr. - */ - if (a) { - a->expires_at = jiffies - 1; - mod_timer(&aarp_timer, jiffies + - sysctl_aarp_tick_time); - } + /* + * Make it expire next tick - that avoids us + * getting into a probe/flush/learn/probe/ + * flush/learn cycle during probing of a slow + * to respond host addr. + */ + if (a) { + a->expires_at = jiffies - 1; + mod_timer(&aarp_timer, jiffies + + sysctl_aarp_tick_time); } + } - if (sa.s_node != ma->s_node) - break; + if (sa.s_node != ma->s_node) + break; - if (sa.s_net && ma->s_net && sa.s_net != ma->s_net) - break; + if (sa.s_net && ma->s_net && sa.s_net != ma->s_net) + break; - sa.s_node = ea->pa_src_node; - sa.s_net = ea->pa_src_net; + sa.s_node = ea->pa_src_node; + sa.s_net = ea->pa_src_net; - /* aarp_my_address has found the address to use for us. - */ - aarp_send_reply(dev, ma, &sa, ea->hw_src); - break; + /* aarp_my_address has found the address to use for us. + */ + aarp_send_reply(dev, ma, &sa, ea->hw_src); + break; } unlock: diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 956a5302002a..b1fe7c35e8d1 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -684,192 +684,192 @@ static int atif_ioctl(int cmd, void __user *arg) atif = atalk_find_dev(dev); switch (cmd) { - case SIOCSIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) - return -EINVAL; - if (dev->type != ARPHRD_ETHER && - dev->type != ARPHRD_LOOPBACK && - dev->type != ARPHRD_LOCALTLK && - dev->type != ARPHRD_PPP) - return -EPROTONOSUPPORT; - - nr = (struct atalk_netrange *)&sa->sat_zero[0]; - add_route = 1; - - /* - * if this is a point-to-point iface, and we already - * have an iface for this AppleTalk address, then we - * should not add a route - */ - if ((dev->flags & IFF_POINTOPOINT) && - atalk_find_interface(sa->sat_addr.s_net, - sa->sat_addr.s_node)) { - printk(KERN_DEBUG "AppleTalk: point-to-point " - "interface added with " - "existing address\n"); - add_route = 0; - } + case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + if (dev->type != ARPHRD_ETHER && + dev->type != ARPHRD_LOOPBACK && + dev->type != ARPHRD_LOCALTLK && + dev->type != ARPHRD_PPP) + return -EPROTONOSUPPORT; + + nr = (struct atalk_netrange *)&sa->sat_zero[0]; + add_route = 1; - /* - * Phase 1 is fine on LocalTalk but we don't do - * EtherTalk phase 1. Anyone wanting to add it go ahead. - */ - if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) - return -EPROTONOSUPPORT; - if (sa->sat_addr.s_node == ATADDR_BCAST || - sa->sat_addr.s_node == 254) - return -EINVAL; - if (atif) { - /* Already setting address */ - if (atif->status & ATIF_PROBE) - return -EBUSY; - - atif->address.s_net = sa->sat_addr.s_net; - atif->address.s_node = sa->sat_addr.s_node; - atrtr_device_down(dev); /* Flush old routes */ - } else { - atif = atif_add_device(dev, &sa->sat_addr); - if (!atif) - return -ENOMEM; - } - atif->nets = *nr; - - /* - * Check if the chosen address is used. If so we - * error and atalkd will try another. - */ - - if (!(dev->flags & IFF_LOOPBACK) && - !(dev->flags & IFF_POINTOPOINT) && - atif_probe_device(atif) < 0) { - atif_drop_device(dev); - return -EADDRINUSE; - } - - /* Hey it worked - add the direct routes */ - sa = (struct sockaddr_at *)&rtdef.rt_gateway; - sa->sat_family = AF_APPLETALK; - sa->sat_addr.s_net = atif->address.s_net; - sa->sat_addr.s_node = atif->address.s_node; - sa = (struct sockaddr_at *)&rtdef.rt_dst; - rtdef.rt_flags = RTF_UP; - sa->sat_family = AF_APPLETALK; - sa->sat_addr.s_node = ATADDR_ANYNODE; - if (dev->flags & IFF_LOOPBACK || - dev->flags & IFF_POINTOPOINT) - rtdef.rt_flags |= RTF_HOST; - - /* Routerless initial state */ - if (nr->nr_firstnet == htons(0) && - nr->nr_lastnet == htons(0xFFFE)) { - sa->sat_addr.s_net = atif->address.s_net; - atrtr_create(&rtdef, dev); - atrtr_set_default(dev); - } else { - limit = ntohs(nr->nr_lastnet); - if (limit - ntohs(nr->nr_firstnet) > 4096) { - printk(KERN_WARNING "Too many routes/" - "iface.\n"); - return -EINVAL; - } - if (add_route) - for (ct = ntohs(nr->nr_firstnet); - ct <= limit; ct++) { - sa->sat_addr.s_net = htons(ct); - atrtr_create(&rtdef, dev); - } - } - dev_mc_add_global(dev, aarp_mcast); - return 0; + /* + * if this is a point-to-point iface, and we already + * have an iface for this AppleTalk address, then we + * should not add a route + */ + if ((dev->flags & IFF_POINTOPOINT) && + atalk_find_interface(sa->sat_addr.s_net, + sa->sat_addr.s_node)) { + printk(KERN_DEBUG "AppleTalk: point-to-point " + "interface added with " + "existing address\n"); + add_route = 0; + } - case SIOCGIFADDR: + /* + * Phase 1 is fine on LocalTalk but we don't do + * EtherTalk phase 1. Anyone wanting to add it go ahead. + */ + if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) + return -EPROTONOSUPPORT; + if (sa->sat_addr.s_node == ATADDR_BCAST || + sa->sat_addr.s_node == 254) + return -EINVAL; + if (atif) { + /* Already setting address */ + if (atif->status & ATIF_PROBE) + return -EBUSY; + + atif->address.s_net = sa->sat_addr.s_net; + atif->address.s_node = sa->sat_addr.s_node; + atrtr_device_down(dev); /* Flush old routes */ + } else { + atif = atif_add_device(dev, &sa->sat_addr); if (!atif) - return -EADDRNOTAVAIL; + return -ENOMEM; + } + atif->nets = *nr; - sa->sat_family = AF_APPLETALK; - sa->sat_addr = atif->address; - break; + /* + * Check if the chosen address is used. If so we + * error and atalkd will try another. + */ - case SIOCGIFBRDADDR: - if (!atif) - return -EADDRNOTAVAIL; + if (!(dev->flags & IFF_LOOPBACK) && + !(dev->flags & IFF_POINTOPOINT) && + atif_probe_device(atif) < 0) { + atif_drop_device(dev); + return -EADDRINUSE; + } - sa->sat_family = AF_APPLETALK; + /* Hey it worked - add the direct routes */ + sa = (struct sockaddr_at *)&rtdef.rt_gateway; + sa->sat_family = AF_APPLETALK; + sa->sat_addr.s_net = atif->address.s_net; + sa->sat_addr.s_node = atif->address.s_node; + sa = (struct sockaddr_at *)&rtdef.rt_dst; + rtdef.rt_flags = RTF_UP; + sa->sat_family = AF_APPLETALK; + sa->sat_addr.s_node = ATADDR_ANYNODE; + if (dev->flags & IFF_LOOPBACK || + dev->flags & IFF_POINTOPOINT) + rtdef.rt_flags |= RTF_HOST; + + /* Routerless initial state */ + if (nr->nr_firstnet == htons(0) && + nr->nr_lastnet == htons(0xFFFE)) { sa->sat_addr.s_net = atif->address.s_net; - sa->sat_addr.s_node = ATADDR_BCAST; - break; - - case SIOCATALKDIFADDR: - case SIOCDIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) + atrtr_create(&rtdef, dev); + atrtr_set_default(dev); + } else { + limit = ntohs(nr->nr_lastnet); + if (limit - ntohs(nr->nr_firstnet) > 4096) { + printk(KERN_WARNING "Too many routes/" + "iface.\n"); return -EINVAL; - atalk_dev_down(dev); - break; - - case SIOCSARP: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) - return -EINVAL; - /* - * for now, we only support proxy AARP on ELAP; - * we should be able to do it for LocalTalk, too. - */ - if (dev->type != ARPHRD_ETHER) - return -EPROTONOSUPPORT; - - /* - * atif points to the current interface on this network; - * we aren't concerned about its current status (at - * least for now), but it has all the settings about - * the network we're going to probe. Consequently, it - * must exist. - */ - if (!atif) - return -EADDRNOTAVAIL; + } + if (add_route) + for (ct = ntohs(nr->nr_firstnet); + ct <= limit; ct++) { + sa->sat_addr.s_net = htons(ct); + atrtr_create(&rtdef, dev); + } + } + dev_mc_add_global(dev, aarp_mcast); + return 0; + + case SIOCGIFADDR: + if (!atif) + return -EADDRNOTAVAIL; + + sa->sat_family = AF_APPLETALK; + sa->sat_addr = atif->address; + break; + + case SIOCGIFBRDADDR: + if (!atif) + return -EADDRNOTAVAIL; + + sa->sat_family = AF_APPLETALK; + sa->sat_addr.s_net = atif->address.s_net; + sa->sat_addr.s_node = ATADDR_BCAST; + break; + + case SIOCATALKDIFADDR: + case SIOCDIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + atalk_dev_down(dev); + break; - nr = (struct atalk_netrange *)&(atif->nets); - /* - * Phase 1 is fine on Localtalk but we don't do - * Ethertalk phase 1. Anyone wanting to add it go ahead. - */ - if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) - return -EPROTONOSUPPORT; + case SIOCSARP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + /* + * for now, we only support proxy AARP on ELAP; + * we should be able to do it for LocalTalk, too. + */ + if (dev->type != ARPHRD_ETHER) + return -EPROTONOSUPPORT; - if (sa->sat_addr.s_node == ATADDR_BCAST || - sa->sat_addr.s_node == 254) - return -EINVAL; + /* + * atif points to the current interface on this network; + * we aren't concerned about its current status (at + * least for now), but it has all the settings about + * the network we're going to probe. Consequently, it + * must exist. + */ + if (!atif) + return -EADDRNOTAVAIL; - /* - * Check if the chosen address is used. If so we - * error and ATCP will try another. - */ - if (atif_proxy_probe_device(atif, &(sa->sat_addr)) < 0) - return -EADDRINUSE; + nr = (struct atalk_netrange *)&(atif->nets); + /* + * Phase 1 is fine on Localtalk but we don't do + * Ethertalk phase 1. Anyone wanting to add it go ahead. + */ + if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2) + return -EPROTONOSUPPORT; - /* - * We now have an address on the local network, and - * the AARP code will defend it for us until we take it - * down. We don't set up any routes right now, because - * ATCP will install them manually via SIOCADDRT. - */ - break; + if (sa->sat_addr.s_node == ATADDR_BCAST || + sa->sat_addr.s_node == 254) + return -EINVAL; - case SIOCDARP: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (sa->sat_family != AF_APPLETALK) - return -EINVAL; - if (!atif) - return -EADDRNOTAVAIL; + /* + * Check if the chosen address is used. If so we + * error and ATCP will try another. + */ + if (atif_proxy_probe_device(atif, &(sa->sat_addr)) < 0) + return -EADDRINUSE; - /* give to aarp module to remove proxy entry */ - aarp_proxy_remove(atif->dev, &(sa->sat_addr)); - return 0; + /* + * We now have an address on the local network, and + * the AARP code will defend it for us until we take it + * down. We don't set up any routes right now, because + * ATCP will install them manually via SIOCADDRT. + */ + break; + + case SIOCDARP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sa->sat_family != AF_APPLETALK) + return -EINVAL; + if (!atif) + return -EADDRNOTAVAIL; + + /* give to aarp module to remove proxy entry */ + aarp_proxy_remove(atif->dev, &(sa->sat_addr)); + return 0; } return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0; @@ -884,25 +884,25 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) return -EFAULT; switch (cmd) { - case SIOCDELRT: - if (rt.rt_dst.sa_family != AF_APPLETALK) - return -EINVAL; - return atrtr_delete(&((struct sockaddr_at *) - &rt.rt_dst)->sat_addr); - - case SIOCADDRT: { - struct net_device *dev = NULL; - if (rt.rt_dev) { - char name[IFNAMSIZ]; - if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) - return -EFAULT; - name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(&init_net, name); - if (!dev) - return -ENODEV; - } - return atrtr_create(&rt, dev); + case SIOCDELRT: + if (rt.rt_dst.sa_family != AF_APPLETALK) + return -EINVAL; + return atrtr_delete(&((struct sockaddr_at *) + &rt.rt_dst)->sat_addr); + + case SIOCADDRT: { + struct net_device *dev = NULL; + if (rt.rt_dev) { + char name[IFNAMSIZ]; + if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) + return -EFAULT; + name[IFNAMSIZ-1] = '\0'; + dev = __dev_get_by_name(&init_net, name); + if (!dev) + return -ENODEV; } + return atrtr_create(&rt, dev); + } } return -EINVAL; } diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index fc63526d8695..f41f02656ff4 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -9,7 +9,7 @@ #include <linux/sonet.h> #include <linux/bitops.h> #include <linux/errno.h> -#include <asm/atomic.h> +#include <linux/atomic.h> int atm_charge(struct atm_vcc *vcc, int truesize) { diff --git a/net/atm/clip.c b/net/atm/clip.c index 1d4be60e1390..852394072fa1 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -37,7 +37,7 @@ #include <linux/uaccess.h> #include <asm/byteorder.h> /* for htons etc. */ #include <asm/system.h> /* save/restore_flags */ -#include <asm/atomic.h> +#include <linux/atomic.h> #include "common.h" #include "resources.h" @@ -271,10 +271,8 @@ static const struct neigh_ops clip_neigh_ops = { .family = AF_INET, .solicit = clip_neigh_solicit, .error_report = clip_neigh_error, - .output = dev_queue_xmit, - .connected_output = dev_queue_xmit, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, + .output = neigh_direct_output, + .connected_output = neigh_direct_output, }; static int clip_constructor(struct neighbour *neigh) @@ -364,33 +362,37 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct clip_priv *clip_priv = PRIV(dev); + struct dst_entry *dst = skb_dst(skb); struct atmarp_entry *entry; + struct neighbour *n; struct atm_vcc *vcc; int old; unsigned long flags; pr_debug("(skb %p)\n", skb); - if (!skb_dst(skb)) { + if (!dst) { pr_err("skb_dst(skb) == NULL\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - if (!skb_dst(skb)->neighbour) { + n = dst_get_neighbour(dst); + if (!n) { #if 0 - skb_dst(skb)->neighbour = clip_find_neighbour(skb_dst(skb), 1); - if (!skb_dst(skb)->neighbour) { + n = clip_find_neighbour(skb_dst(skb), 1); + if (!n) { dev_kfree_skb(skb); /* lost that one */ dev->stats.tx_dropped++; return 0; } + dst_set_neighbour(dst, n); #endif pr_err("NO NEIGHBOUR !\n"); dev_kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } - entry = NEIGH2ENTRY(skb_dst(skb)->neighbour); + entry = NEIGH2ENTRY(n); if (!entry->vccs) { if (time_after(jiffies, entry->expires)) { /* should be resolved */ @@ -407,7 +409,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, } pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - pr_debug("using neighbour %p, vcc %p\n", skb_dst(skb)->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", n, vcc); if (entry->vccs->encap) { void *here; diff --git a/net/atm/common.c b/net/atm/common.c index 22b963d06a10..14ff9fe39989 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -23,7 +23,7 @@ #include <linux/uaccess.h> #include <linux/poll.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "resources.h" /* atm_find_dev */ #include "common.h" /* prototypes */ diff --git a/net/atm/lec.c b/net/atm/lec.c index 25073b6ef474..215c9fad7cdf 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1171,7 +1171,7 @@ static int __init lane_module_init(void) #endif register_atm_ioctl(&lane_ioctl_ops); - pr_info("lec.c: " __DATE__ " " __TIME__ " initialized\n"); + pr_info("lec.c: initialized\n"); return 0; } @@ -1335,7 +1335,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, #include <linux/types.h> #include <linux/timer.h> #include <linux/param.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/inetdevice.h> #include <net/route.h> diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 644cdf071642..aa972409f093 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1005,7 +1005,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, struct mpoa_client *mpc; struct lec_priv *priv; - dev = (struct net_device *)dev_ptr; + dev = dev_ptr; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -1482,7 +1482,7 @@ static __init int atm_mpoa_init(void) if (mpc_proc_init() != 0) pr_info("failed to initialize /proc/mpoa\n"); - pr_info("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); + pr_info("mpc.c: initialized\n"); return 0; } diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index e9aced0ec56b..db4a11c61d15 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -37,6 +37,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/atm.h> diff --git a/net/atm/proc.c b/net/atm/proc.c index be3afdefec58..0d020de8d233 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -27,7 +27,7 @@ #include <net/atmclip.h> #include <linux/uaccess.h> #include <linux/param.h> /* for HZ */ -#include <asm/atomic.h> +#include <linux/atomic.h> #include "resources.h" #include "common.h" /* atm_proc_init prototype */ #include "signaling.h" /* to get sigd - ugly too */ diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 6c051ad833eb..2b68d068eaf3 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -5,6 +5,7 @@ config BATMAN_ADV tristate "B.A.T.M.A.N. Advanced Meshing Protocol" depends on NET + select CRC16 default n ---help--- diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c index a8c32030527c..69467fe71ff2 100644 --- a/net/batman-adv/aggregation.c +++ b/net/batman-adv/aggregation.c @@ -20,28 +20,26 @@ */ #include "main.h" +#include "translation-table.h" #include "aggregation.h" #include "send.h" #include "routing.h" #include "hard-interface.h" -/* calculate the size of the tt information for a given packet */ -static int tt_len(struct batman_packet *batman_packet) -{ - return batman_packet->num_tt * ETH_ALEN; -} - /* return true if new_packet can be aggregated with forw_packet */ -static bool can_aggregate_with(struct batman_packet *new_batman_packet, +static bool can_aggregate_with(const struct batman_packet *new_batman_packet, + struct bat_priv *bat_priv, int packet_len, unsigned long send_time, bool directlink, - struct hard_iface *if_incoming, - struct forw_packet *forw_packet) + const struct hard_iface *if_incoming, + const struct forw_packet *forw_packet) { struct batman_packet *batman_packet = (struct batman_packet *)forw_packet->skb->data; int aggregated_bytes = forw_packet->packet_len + packet_len; + struct hard_iface *primary_if = NULL; + bool res = false; /** * we can aggregate the current packet to this aggregated packet @@ -66,6 +64,10 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, * packet */ + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + /* packets without direct link flag and high TTL * are flooded through the net */ if ((!directlink) && @@ -75,8 +77,10 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, /* own packets originating non-primary * interfaces leave only that interface */ ((!forw_packet->own) || - (forw_packet->if_incoming->if_num == 0))) - return true; + (forw_packet->if_incoming == primary_if))) { + res = true; + goto out; + } /* if the incoming packet is sent via this one * interface only - we still can aggregate */ @@ -89,16 +93,22 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, * (= secondary interface packets in general) */ (batman_packet->flags & DIRECTLINK || (forw_packet->own && - forw_packet->if_incoming->if_num != 0))) - return true; + forw_packet->if_incoming != primary_if))) { + res = true; + goto out; + } } - return false; +out: + if (primary_if) + hardif_free_ref(primary_if); + return res; } /* create a new aggregated packet and add this packet to it */ -static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, - unsigned long send_time, bool direct_link, +static void new_aggregated_packet(const unsigned char *packet_buff, + int packet_len, unsigned long send_time, + bool direct_link, struct hard_iface *if_incoming, int own_packet) { @@ -118,7 +128,7 @@ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, } } - forw_packet_aggr = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC); + forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); if (!forw_packet_aggr) { if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); @@ -150,7 +160,7 @@ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, forw_packet_aggr->own = own_packet; forw_packet_aggr->if_incoming = if_incoming; forw_packet_aggr->num_packets = 0; - forw_packet_aggr->direct_link_flags = 0; + forw_packet_aggr->direct_link_flags = NO_FLAGS; forw_packet_aggr->send_time = send_time; /* save packet direct link flag status */ @@ -176,8 +186,7 @@ out: /* aggregate a new packet into the existing aggregation */ static void aggregate(struct forw_packet *forw_packet_aggr, - unsigned char *packet_buff, - int packet_len, + const unsigned char *packet_buff, int packet_len, bool direct_link) { unsigned char *skb_buff; @@ -195,7 +204,7 @@ static void aggregate(struct forw_packet *forw_packet_aggr, void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, char own_packet, + struct hard_iface *if_incoming, int own_packet, unsigned long send_time) { /** @@ -215,6 +224,7 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv, hlist_for_each_entry(forw_packet_pos, tmp_node, &bat_priv->forw_bat_list, list) { if (can_aggregate_with(batman_packet, + bat_priv, packet_len, send_time, direct_link, @@ -253,8 +263,9 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv, } /* unpack the aggregated packets and process them one by one */ -void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct hard_iface *if_incoming) +void receive_aggr_bat_packet(const struct ethhdr *ethhdr, + unsigned char *packet_buff, int packet_len, + struct hard_iface *if_incoming) { struct batman_packet *batman_packet; int buff_pos = 0; @@ -263,18 +274,20 @@ void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, batman_packet = (struct batman_packet *)packet_buff; do { - /* network to host order for our 32bit seqno, and the - orig_interval. */ + /* network to host order for our 32bit seqno and the + orig_interval */ batman_packet->seqno = ntohl(batman_packet->seqno); + batman_packet->tt_crc = ntohs(batman_packet->tt_crc); tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN; - receive_bat_packet(ethhdr, batman_packet, - tt_buff, tt_len(batman_packet), - if_incoming); - buff_pos += BAT_PACKET_LEN + tt_len(batman_packet); + receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming); + + buff_pos += BAT_PACKET_LEN + + tt_len(batman_packet->tt_num_changes); + batman_packet = (struct batman_packet *) (packet_buff + buff_pos); } while (aggregated_packet(buff_pos, packet_len, - batman_packet->num_tt)); + batman_packet->tt_num_changes)); } diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h index 7e6d72fbf540..216337bb841f 100644 --- a/net/batman-adv/aggregation.h +++ b/net/batman-adv/aggregation.h @@ -25,9 +25,11 @@ #include "main.h" /* is there another aggregated packet here? */ -static inline int aggregated_packet(int buff_pos, int packet_len, int num_tt) +static inline int aggregated_packet(int buff_pos, int packet_len, + int tt_num_changes) { - int next_buff_pos = buff_pos + BAT_PACKET_LEN + (num_tt * ETH_ALEN); + int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes * + sizeof(struct tt_change)); return (next_buff_pos <= packet_len) && (next_buff_pos <= MAX_AGGREGATION_BYTES); @@ -35,9 +37,10 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int num_tt) void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct hard_iface *if_incoming, char own_packet, + struct hard_iface *if_incoming, int own_packet, unsigned long send_time); -void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct hard_iface *if_incoming); +void receive_aggr_bat_packet(const struct ethhdr *ethhdr, + unsigned char *packet_buff, int packet_len, + struct hard_iface *if_incoming); #endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c index abaeec5f6247..d0af9bf69e46 100644 --- a/net/batman-adv/bat_debugfs.c +++ b/net/batman-adv/bat_debugfs.c @@ -50,7 +50,8 @@ static void emit_log_char(struct debug_log *debug_log, char c) debug_log->log_start = debug_log->log_end - log_buff_len; } -static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) +__printf(2, 3) +static int fdebug_log(struct debug_log *debug_log, const char *fmt, ...) { va_list args; static char debug_log_buf[256]; @@ -74,14 +75,14 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) return 0; } -int debug_log(struct bat_priv *bat_priv, char *fmt, ...) +int debug_log(struct bat_priv *bat_priv, const char *fmt, ...) { va_list args; char tmp_log_buf[256]; va_start(args, fmt); vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args); - fdebug_log(bat_priv->debug_log, "[%10u] %s", + fdebug_log(bat_priv->debug_log, "[%10lu] %s", (jiffies / HZ), tmp_log_buf); va_end(args); @@ -114,7 +115,7 @@ static ssize_t log_read(struct file *file, char __user *buf, !(debug_log->log_end - debug_log->log_start)) return -EAGAIN; - if ((!buf) || (count < 0)) + if (!buf) return -EINVAL; if (count == 0) @@ -184,7 +185,7 @@ static int debug_log_setup(struct bat_priv *bat_priv) if (!bat_priv->debug_dir) goto err; - bat_priv->debug_log = kzalloc(sizeof(struct debug_log), GFP_ATOMIC); + bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC); if (!bat_priv->debug_log) goto err; diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index 497a0700cc3c..cd15deba60a1 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -28,9 +28,31 @@ #include "gateway_client.h" #include "vis.h" -#define to_dev(obj) container_of(obj, struct device, kobj) -#define kobj_to_netdev(obj) to_net_dev(to_dev(obj->parent)) -#define kobj_to_batpriv(obj) netdev_priv(kobj_to_netdev(obj)) +static struct net_device *kobj_to_netdev(struct kobject *obj) +{ + struct device *dev = container_of(obj->parent, struct device, kobj); + return to_net_dev(dev); +} + +static struct bat_priv *kobj_to_batpriv(struct kobject *obj) +{ + struct net_device *net_dev = kobj_to_netdev(obj); + return netdev_priv(net_dev); +} + +#define UEV_TYPE_VAR "BATTYPE=" +#define UEV_ACTION_VAR "BATACTION=" +#define UEV_DATA_VAR "BATDATA=" + +static char *uev_action_str[] = { + "add", + "del", + "change" +}; + +static char *uev_type_str[] = { + "gw" +}; /* Use this, if you have customized show and store functions */ #define BAT_ATTR(_name, _mode, _show, _store) \ @@ -96,7 +118,7 @@ ssize_t show_##_name(struct kobject *kobj, struct attribute *attr, \ static int store_bool_attr(char *buff, size_t count, struct net_device *net_dev, - char *attr_name, atomic_t *attr) + const char *attr_name, atomic_t *attr) { int enabled = -1; @@ -138,16 +160,15 @@ static inline ssize_t __store_bool_attr(char *buff, size_t count, { int ret; - ret = store_bool_attr(buff, count, net_dev, (char *)attr->name, - attr_store); + ret = store_bool_attr(buff, count, net_dev, attr->name, attr_store); if (post_func && ret) post_func(net_dev); return ret; } -static int store_uint_attr(char *buff, size_t count, - struct net_device *net_dev, char *attr_name, +static int store_uint_attr(const char *buff, size_t count, + struct net_device *net_dev, const char *attr_name, unsigned int min, unsigned int max, atomic_t *attr) { unsigned long uint_val; @@ -183,15 +204,15 @@ static int store_uint_attr(char *buff, size_t count, return count; } -static inline ssize_t __store_uint_attr(char *buff, size_t count, +static inline ssize_t __store_uint_attr(const char *buff, size_t count, int min, int max, void (*post_func)(struct net_device *), - struct attribute *attr, + const struct attribute *attr, atomic_t *attr_store, struct net_device *net_dev) { int ret; - ret = store_uint_attr(buff, count, net_dev, (char *)attr->name, + ret = store_uint_attr(buff, count, net_dev, attr->name, min, max, attr_store); if (post_func && ret) post_func(net_dev); @@ -368,7 +389,7 @@ BAT_ATTR_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE, static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth, store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_DEBUG -BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 3, NULL); +BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 7, NULL); #endif static struct bat_attribute *mesh_attrs[] = { @@ -594,3 +615,60 @@ void sysfs_del_hardif(struct kobject **hardif_obj) kobject_put(*hardif_obj); *hardif_obj = NULL; } + +int throw_uevent(struct bat_priv *bat_priv, enum uev_type type, + enum uev_action action, const char *data) +{ + int ret = -1; + struct hard_iface *primary_if = NULL; + struct kobject *bat_kobj; + char *uevent_env[4] = { NULL, NULL, NULL, NULL }; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + bat_kobj = &primary_if->soft_iface->dev.kobj; + + uevent_env[0] = kmalloc(strlen(UEV_TYPE_VAR) + + strlen(uev_type_str[type]) + 1, + GFP_ATOMIC); + if (!uevent_env[0]) + goto out; + + sprintf(uevent_env[0], "%s%s", UEV_TYPE_VAR, uev_type_str[type]); + + uevent_env[1] = kmalloc(strlen(UEV_ACTION_VAR) + + strlen(uev_action_str[action]) + 1, + GFP_ATOMIC); + if (!uevent_env[1]) + goto out; + + sprintf(uevent_env[1], "%s%s", UEV_ACTION_VAR, uev_action_str[action]); + + /* If the event is DEL, ignore the data field */ + if (action != UEV_DEL) { + uevent_env[2] = kmalloc(strlen(UEV_DATA_VAR) + + strlen(data) + 1, GFP_ATOMIC); + if (!uevent_env[2]) + goto out; + + sprintf(uevent_env[2], "%s%s", UEV_DATA_VAR, data); + } + + ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env); +out: + kfree(uevent_env[0]); + kfree(uevent_env[1]); + kfree(uevent_env[2]); + + if (primary_if) + hardif_free_ref(primary_if); + + if (ret) + bat_dbg(DBG_BATMAN, bat_priv, "Impossible to send " + "uevent for (%s,%s,%s) event (err: %d)\n", + uev_type_str[type], uev_action_str[action], + (action == UEV_DEL ? "NULL" : data), ret); + return ret; +} diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h index 02f1fa7aadfa..a3f75a723c56 100644 --- a/net/batman-adv/bat_sysfs.h +++ b/net/batman-adv/bat_sysfs.h @@ -38,5 +38,7 @@ int sysfs_add_meshif(struct net_device *dev); void sysfs_del_meshif(struct net_device *dev); int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev); void sysfs_del_hardif(struct kobject **hardif_obj); +int throw_uevent(struct bat_priv *bat_priv, enum uev_type type, + enum uev_action action, const char *data); #endif /* _NET_BATMAN_ADV_SYSFS_H_ */ diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index ad2ca925b3e0..c1f4bfc09cc3 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -26,8 +26,8 @@ /* returns true if the corresponding bit in the given seq_bits indicates true * and curr_seqno is within range of last_seqno */ -uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno, - uint32_t curr_seqno) +int get_bit_status(const unsigned long *seq_bits, uint32_t last_seqno, + uint32_t curr_seqno) { int32_t diff, word_offset, word_num; @@ -127,10 +127,10 @@ static void bit_reset_window(unsigned long *seq_bits) * 1 if the window was moved (either new or very old) * 0 if the window was not moved/shifted. */ -char bit_get_packet(void *priv, unsigned long *seq_bits, - int32_t seq_num_diff, int8_t set_mark) +int bit_get_packet(void *priv, unsigned long *seq_bits, + int32_t seq_num_diff, int set_mark) { - struct bat_priv *bat_priv = (struct bat_priv *)priv; + struct bat_priv *bat_priv = priv; /* sequence number is slightly older. We already got a sequence number * higher than this one, so we just mark it. */ @@ -190,7 +190,7 @@ char bit_get_packet(void *priv, unsigned long *seq_bits, /* count the hamming weight, how many good packets did we receive? just count * the 1's. */ -int bit_packet_count(unsigned long *seq_bits) +int bit_packet_count(const unsigned long *seq_bits) { int i, hamming = 0; diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index 769c246d1fc1..9c04422aeb07 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -26,8 +26,8 @@ /* returns true if the corresponding bit in the given seq_bits indicates true * and curr_seqno is within range of last_seqno */ -uint8_t get_bit_status(unsigned long *seq_bits, uint32_t last_seqno, - uint32_t curr_seqno); +int get_bit_status(const unsigned long *seq_bits, uint32_t last_seqno, + uint32_t curr_seqno); /* turn corresponding bit on, so we can remember that we got the packet */ void bit_mark(unsigned long *seq_bits, int32_t n); @@ -35,10 +35,10 @@ void bit_mark(unsigned long *seq_bits, int32_t n); /* receive and process one packet, returns 1 if received seq_num is considered * new, 0 if old */ -char bit_get_packet(void *priv, unsigned long *seq_bits, - int32_t seq_num_diff, int8_t set_mark); +int bit_get_packet(void *priv, unsigned long *seq_bits, + int32_t seq_num_diff, int set_mark); /* count the hamming weight, how many good packets did we receive? */ -int bit_packet_count(unsigned long *seq_bits); +int bit_packet_count(const unsigned long *seq_bits); #endif /* _NET_BATMAN_ADV_BITARRAY_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 61605a0f3f39..056180ef9e1a 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -20,15 +20,22 @@ */ #include "main.h" +#include "bat_sysfs.h" #include "gateway_client.h" #include "gateway_common.h" #include "hard-interface.h" #include "originator.h" +#include "routing.h" #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/udp.h> #include <linux/if_vlan.h> +/* This is the offset of the options field in a dhcp packet starting at + * the beginning of the dhcp header */ +#define DHCP_OPTIONS_OFFSET 240 +#define DHCP_REQUEST 3 + static void gw_node_free_ref(struct gw_node *gw_node) { if (atomic_dec_and_test(&gw_node->refcount)) @@ -86,7 +93,7 @@ static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) new_gw_node = NULL; - curr_gw_node = bat_priv->curr_gw; + curr_gw_node = rcu_dereference_protected(bat_priv->curr_gw, 1); rcu_assign_pointer(bat_priv->curr_gw, new_gw_node); if (curr_gw_node) @@ -97,40 +104,19 @@ static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) void gw_deselect(struct bat_priv *bat_priv) { - gw_select(bat_priv, NULL); + atomic_set(&bat_priv->gw_reselect, 1); } -void gw_election(struct bat_priv *bat_priv) +static struct gw_node *gw_get_best_gw_node(struct bat_priv *bat_priv) { - struct hlist_node *node; - struct gw_node *gw_node, *curr_gw = NULL, *curr_gw_tmp = NULL; struct neigh_node *router; - uint8_t max_tq = 0; + struct hlist_node *node; + struct gw_node *gw_node, *curr_gw = NULL; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; + uint8_t max_tq = 0; int down, up; - /** - * The batman daemon checks here if we already passed a full originator - * cycle in order to make sure we don't choose the first gateway we - * hear about. This check is based on the daemon's uptime which we - * don't have. - **/ - if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) - return; - - curr_gw = gw_get_selected_gw_node(bat_priv); - if (curr_gw) - goto out; - rcu_read_lock(); - if (hlist_empty(&bat_priv->gw_list)) { - bat_dbg(DBG_BATMAN, bat_priv, - "Removing selected gateway - " - "no gateway in range\n"); - gw_deselect(bat_priv); - goto unlock; - } - hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) { if (gw_node->deleted) continue; @@ -139,6 +125,9 @@ void gw_election(struct bat_priv *bat_priv) if (!router) continue; + if (!atomic_inc_not_zero(&gw_node->refcount)) + goto next; + switch (atomic_read(&bat_priv->gw_sel_class)) { case 1: /* fast connection */ gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, @@ -151,8 +140,12 @@ void gw_election(struct bat_priv *bat_priv) if ((tmp_gw_factor > max_gw_factor) || ((tmp_gw_factor == max_gw_factor) && - (router->tq_avg > max_tq))) - curr_gw_tmp = gw_node; + (router->tq_avg > max_tq))) { + if (curr_gw) + gw_node_free_ref(curr_gw); + curr_gw = gw_node; + atomic_inc(&curr_gw->refcount); + } break; default: /** @@ -163,8 +156,12 @@ void gw_election(struct bat_priv *bat_priv) * soon as a better gateway appears which has * $routing_class more tq points) **/ - if (router->tq_avg > max_tq) - curr_gw_tmp = gw_node; + if (router->tq_avg > max_tq) { + if (curr_gw) + gw_node_free_ref(curr_gw); + curr_gw = gw_node; + atomic_inc(&curr_gw->refcount); + } break; } @@ -174,42 +171,81 @@ void gw_election(struct bat_priv *bat_priv) if (tmp_gw_factor > max_gw_factor) max_gw_factor = tmp_gw_factor; + gw_node_free_ref(gw_node); + +next: neigh_node_free_ref(router); } + rcu_read_unlock(); - if (curr_gw != curr_gw_tmp) { - router = orig_node_get_router(curr_gw_tmp->orig_node); - if (!router) - goto unlock; + return curr_gw; +} - if ((curr_gw) && (!curr_gw_tmp)) - bat_dbg(DBG_BATMAN, bat_priv, - "Removing selected gateway - " - "no gateway in range\n"); - else if ((!curr_gw) && (curr_gw_tmp)) - bat_dbg(DBG_BATMAN, bat_priv, - "Adding route to gateway %pM " - "(gw_flags: %i, tq: %i)\n", - curr_gw_tmp->orig_node->orig, - curr_gw_tmp->orig_node->gw_flags, - router->tq_avg); - else - bat_dbg(DBG_BATMAN, bat_priv, - "Changing route to gateway %pM " - "(gw_flags: %i, tq: %i)\n", - curr_gw_tmp->orig_node->orig, - curr_gw_tmp->orig_node->gw_flags, - router->tq_avg); +void gw_election(struct bat_priv *bat_priv) +{ + struct gw_node *curr_gw = NULL, *next_gw = NULL; + struct neigh_node *router = NULL; + char gw_addr[18] = { '\0' }; - neigh_node_free_ref(router); - gw_select(bat_priv, curr_gw_tmp); + /** + * The batman daemon checks here if we already passed a full originator + * cycle in order to make sure we don't choose the first gateway we + * hear about. This check is based on the daemon's uptime which we + * don't have. + **/ + if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) + goto out; + + if (!atomic_dec_not_zero(&bat_priv->gw_reselect)) + goto out; + + curr_gw = gw_get_selected_gw_node(bat_priv); + + next_gw = gw_get_best_gw_node(bat_priv); + + if (curr_gw == next_gw) + goto out; + + if (next_gw) { + sprintf(gw_addr, "%pM", next_gw->orig_node->orig); + + router = orig_node_get_router(next_gw->orig_node); + if (!router) { + gw_deselect(bat_priv); + goto out; + } } -unlock: - rcu_read_unlock(); + if ((curr_gw) && (!next_gw)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Removing selected gateway - no gateway in range\n"); + throw_uevent(bat_priv, UEV_GW, UEV_DEL, NULL); + } else if ((!curr_gw) && (next_gw)) { + bat_dbg(DBG_BATMAN, bat_priv, + "Adding route to gateway %pM (gw_flags: %i, tq: %i)\n", + next_gw->orig_node->orig, + next_gw->orig_node->gw_flags, + router->tq_avg); + throw_uevent(bat_priv, UEV_GW, UEV_ADD, gw_addr); + } else { + bat_dbg(DBG_BATMAN, bat_priv, + "Changing route to gateway %pM " + "(gw_flags: %i, tq: %i)\n", + next_gw->orig_node->orig, + next_gw->orig_node->gw_flags, + router->tq_avg); + throw_uevent(bat_priv, UEV_GW, UEV_CHANGE, gw_addr); + } + + gw_select(bat_priv, next_gw); + out: if (curr_gw) gw_node_free_ref(curr_gw); + if (next_gw) + gw_node_free_ref(next_gw); + if (router) + neigh_node_free_ref(router); } void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) @@ -273,11 +309,10 @@ static void gw_node_add(struct bat_priv *bat_priv, struct gw_node *gw_node; int down, up; - gw_node = kmalloc(sizeof(struct gw_node), GFP_ATOMIC); + gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); if (!gw_node) return; - memset(gw_node, 0, sizeof(struct gw_node)); INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; atomic_set(&gw_node->refcount, 1); @@ -323,7 +358,7 @@ void gw_node_update(struct bat_priv *bat_priv, gw_node->deleted = 0; - if (new_gwflags == 0) { + if (new_gwflags == NO_FLAGS) { gw_node->deleted = jiffies; bat_dbg(DBG_BATMAN, bat_priv, "Gateway %pM removed from gateway list\n", @@ -336,7 +371,7 @@ void gw_node_update(struct bat_priv *bat_priv, goto unlock; } - if (new_gwflags == 0) + if (new_gwflags == NO_FLAGS) goto unlock; gw_node_add(bat_priv, orig_node, new_gwflags); @@ -353,7 +388,7 @@ unlock: void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node) { - return gw_node_update(bat_priv, orig_node, 0); + gw_node_update(bat_priv, orig_node, 0); } void gw_node_purge(struct bat_priv *bat_priv) @@ -361,7 +396,7 @@ void gw_node_purge(struct bat_priv *bat_priv) struct gw_node *gw_node, *curr_gw; struct hlist_node *node, *node_tmp; unsigned long timeout = 2 * PURGE_TIMEOUT * HZ; - char do_deselect = 0; + int do_deselect = 0; curr_gw = gw_get_selected_gw_node(bat_priv); @@ -394,8 +429,8 @@ void gw_node_purge(struct bat_priv *bat_priv) /** * fails if orig_node has no router */ -static int _write_buffer_text(struct bat_priv *bat_priv, - struct seq_file *seq, struct gw_node *gw_node) +static int _write_buffer_text(struct bat_priv *bat_priv, struct seq_file *seq, + const struct gw_node *gw_node) { struct gw_node *curr_gw; struct neigh_node *router; @@ -452,10 +487,9 @@ int gw_client_seq_print_text(struct seq_file *seq, void *offset) } seq_printf(seq, " %-12s (%s/%i) %17s [%10s]: gw_class ... " - "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n", + "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", "Gateway", "#", TQ_MAX_VALUE, "Nexthop", - "outgoingIF", SOURCE_VERSION, REVISION_VERSION_STR, - primary_if->net_dev->name, + "outgoingIF", SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name); rcu_read_lock(); @@ -480,14 +514,75 @@ out: return ret; } -int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) +static bool is_type_dhcprequest(struct sk_buff *skb, int header_len) +{ + int ret = false; + unsigned char *p; + int pkt_len; + + if (skb_linearize(skb) < 0) + goto out; + + pkt_len = skb_headlen(skb); + + if (pkt_len < header_len + DHCP_OPTIONS_OFFSET + 1) + goto out; + + p = skb->data + header_len + DHCP_OPTIONS_OFFSET; + pkt_len -= header_len + DHCP_OPTIONS_OFFSET + 1; + + /* Access the dhcp option lists. Each entry is made up by: + * - octect 1: option type + * - octect 2: option data len (only if type != 255 and 0) + * - octect 3: option data */ + while (*p != 255 && !ret) { + /* p now points to the first octect: option type */ + if (*p == 53) { + /* type 53 is the message type option. + * Jump the len octect and go to the data octect */ + if (pkt_len < 2) + goto out; + p += 2; + + /* check if the message type is what we need */ + if (*p == DHCP_REQUEST) + ret = true; + break; + } else if (*p == 0) { + /* option type 0 (padding), just go forward */ + if (pkt_len < 1) + goto out; + pkt_len--; + p++; + } else { + /* This is any other option. So we get the length... */ + if (pkt_len < 1) + goto out; + pkt_len--; + p++; + + /* ...and then we jump over the data */ + if (pkt_len < *p) + goto out; + pkt_len -= *p; + p += (*p); + } + } +out: + return ret; +} + +int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb, + struct orig_node *old_gw) { struct ethhdr *ethhdr; struct iphdr *iphdr; struct ipv6hdr *ipv6hdr; struct udphdr *udphdr; struct gw_node *curr_gw; + struct neigh_node *neigh_curr = NULL, *neigh_old = NULL; unsigned int header_len = 0; + int ret = 1; if (atomic_read(&bat_priv->gw_mode) == GW_MODE_OFF) return 0; @@ -509,7 +604,7 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) /* check for ip header */ switch (ntohs(ethhdr->h_proto)) { case ETH_P_IP: - if (!pskb_may_pull(skb, header_len + sizeof(struct iphdr))) + if (!pskb_may_pull(skb, header_len + sizeof(*iphdr))) return 0; iphdr = (struct iphdr *)(skb->data + header_len); header_len += iphdr->ihl * 4; @@ -520,10 +615,10 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) break; case ETH_P_IPV6: - if (!pskb_may_pull(skb, header_len + sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, header_len + sizeof(*ipv6hdr))) return 0; ipv6hdr = (struct ipv6hdr *)(skb->data + header_len); - header_len += sizeof(struct ipv6hdr); + header_len += sizeof(*ipv6hdr); /* check for udp header */ if (ipv6hdr->nexthdr != IPPROTO_UDP) @@ -534,10 +629,10 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) return 0; } - if (!pskb_may_pull(skb, header_len + sizeof(struct udphdr))) + if (!pskb_may_pull(skb, header_len + sizeof(*udphdr))) return 0; udphdr = (struct udphdr *)(skb->data + header_len); - header_len += sizeof(struct udphdr); + header_len += sizeof(*udphdr); /* check for bootp port */ if ((ntohs(ethhdr->h_proto) == ETH_P_IP) && @@ -555,7 +650,30 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) if (!curr_gw) return 0; + /* If old_gw != NULL then this packet is unicast. + * So, at this point we have to check the message type: if it is a + * DHCPREQUEST we have to decide whether to drop it or not */ + if (old_gw && curr_gw->orig_node != old_gw) { + if (is_type_dhcprequest(skb, header_len)) { + /* If the dhcp packet has been sent to a different gw, + * we have to evaluate whether the old gw is still + * reliable enough */ + neigh_curr = find_router(bat_priv, curr_gw->orig_node, + NULL); + neigh_old = find_router(bat_priv, old_gw, NULL); + if (!neigh_curr || !neigh_old) + goto free_neigh; + if (neigh_curr->tq_avg - neigh_old->tq_avg < + GW_THRESHOLD) + ret = -1; + } + } +free_neigh: + if (neigh_old) + neigh_node_free_ref(neigh_old); + if (neigh_curr) + neigh_node_free_ref(neigh_curr); if (curr_gw) gw_node_free_ref(curr_gw); - return 1; + return ret; } diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 1ce8c6066da1..b9b983c07feb 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -31,6 +31,7 @@ void gw_node_update(struct bat_priv *bat_priv, void gw_node_delete(struct bat_priv *bat_priv, struct orig_node *orig_node); void gw_node_purge(struct bat_priv *bat_priv); int gw_client_seq_print_text(struct seq_file *seq, void *offset); -int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb); +int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb, + struct orig_node *old_gw); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 50d3a59a3d73..18661af0bc3b 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -61,9 +61,9 @@ static void kbit_to_gw_bandwidth(int down, int up, long *gw_srv_class) /* returns the up and downspeeds in kbit, calculated from the class */ void gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up) { - char sbit = (gw_srv_class & 0x80) >> 7; - char dpart = (gw_srv_class & 0x78) >> 3; - char upart = (gw_srv_class & 0x07); + int sbit = (gw_srv_class & 0x80) >> 7; + int dpart = (gw_srv_class & 0x78) >> 3; + int upart = (gw_srv_class & 0x07); if (!gw_srv_class) { *down = 0; @@ -76,10 +76,11 @@ void gw_bandwidth_to_kbit(uint8_t gw_srv_class, int *down, int *up) } static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, - long *up, long *down) + int *up, int *down) { int ret, multi = 1; char *slash_ptr, *tmp_ptr; + long ldown, lup; slash_ptr = strchr(buff, '/'); if (slash_ptr) @@ -96,7 +97,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = strict_strtoul(buff, 10, down); + ret = strict_strtol(buff, 10, &ldown); if (ret) { bat_err(net_dev, "Download speed of gateway mode invalid: %s\n", @@ -104,7 +105,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, return false; } - *down *= multi; + *down = ldown * multi; /* we also got some upload info */ if (slash_ptr) { @@ -121,7 +122,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, *tmp_ptr = '\0'; } - ret = strict_strtoul(slash_ptr + 1, 10, up); + ret = strict_strtol(slash_ptr + 1, 10, &lup); if (ret) { bat_err(net_dev, "Upload speed of gateway mode invalid: " @@ -129,7 +130,7 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, return false; } - *up *= multi; + *up = lup * multi; } return true; @@ -138,7 +139,8 @@ static bool parse_gw_bandwidth(struct net_device *net_dev, char *buff, ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) { struct bat_priv *bat_priv = netdev_priv(net_dev); - long gw_bandwidth_tmp = 0, up = 0, down = 0; + long gw_bandwidth_tmp = 0; + int up = 0, down = 0; bool ret; ret = parse_gw_bandwidth(net_dev, buff, &up, &down); @@ -158,12 +160,11 @@ ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) * speeds, hence we need to calculate it back to show the number * that is going to be propagated **/ - gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, - (int *)&down, (int *)&up); + gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, &down, &up); gw_deselect(bat_priv); bat_info(net_dev, "Changing gateway bandwidth from: '%i' to: '%ld' " - "(propagating: %ld%s/%ld%s)\n", + "(propagating: %d%s/%d%s)\n", atomic_read(&bat_priv->gw_bandwidth), gw_bandwidth_tmp, (down > 2048 ? down / 1024 : down), (down > 2048 ? "MBit" : "KBit"), diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index dfbfccc9fe40..db7aacf1e095 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -46,7 +46,7 @@ void hardif_free_rcu(struct rcu_head *rcu) kfree(hard_iface); } -struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev) +struct hard_iface *hardif_get_by_netdev(const struct net_device *net_dev) { struct hard_iface *hard_iface; @@ -64,7 +64,7 @@ out: return hard_iface; } -static int is_valid_iface(struct net_device *net_dev) +static int is_valid_iface(const struct net_device *net_dev) { if (net_dev->flags & IFF_LOOPBACK) return 0; @@ -86,7 +86,7 @@ static int is_valid_iface(struct net_device *net_dev) return 1; } -static struct hard_iface *hardif_get_active(struct net_device *soft_iface) +static struct hard_iface *hardif_get_active(const struct net_device *soft_iface) { struct hard_iface *hard_iface; @@ -138,7 +138,7 @@ static void primary_if_select(struct bat_priv *bat_priv, if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount)) new_hard_iface = NULL; - curr_hard_iface = bat_priv->primary_if; + curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1); rcu_assign_pointer(bat_priv->primary_if, new_hard_iface); if (curr_hard_iface) @@ -152,15 +152,9 @@ static void primary_if_select(struct bat_priv *bat_priv, batman_packet->ttl = TTL; primary_if_update_addr(bat_priv); - - /*** - * hacky trick to make sure that we send the TT information via - * our new primary interface - */ - atomic_set(&bat_priv->tt_local_changed, 1); } -static bool hardif_is_iface_up(struct hard_iface *hard_iface) +static bool hardif_is_iface_up(const struct hard_iface *hard_iface) { if (hard_iface->net_dev->flags & IFF_UP) return true; @@ -176,9 +170,9 @@ static void update_mac_addresses(struct hard_iface *hard_iface) hard_iface->net_dev->dev_addr, ETH_ALEN); } -static void check_known_mac_addr(struct net_device *net_dev) +static void check_known_mac_addr(const struct net_device *net_dev) { - struct hard_iface *hard_iface; + const struct hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &hardif_list, list) { @@ -204,8 +198,8 @@ static void check_known_mac_addr(struct net_device *net_dev) int hardif_min_mtu(struct net_device *soft_iface) { - struct bat_priv *bat_priv = netdev_priv(soft_iface); - struct hard_iface *hard_iface; + const struct bat_priv *bat_priv = netdev_priv(soft_iface); + const struct hard_iface *hard_iface; /* allow big frames if all devices are capable to do so * (have MTU > 1500 + BAT_HEADER_LEN) */ int min_mtu = ETH_DATA_LEN; @@ -285,7 +279,8 @@ static void hardif_deactivate_interface(struct hard_iface *hard_iface) update_min_mtu(hard_iface->soft_iface); } -int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) +int hardif_enable_interface(struct hard_iface *hard_iface, + const char *iface_name) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; @@ -336,10 +331,11 @@ int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) batman_packet = (struct batman_packet *)(hard_iface->packet_buff); batman_packet->packet_type = BAT_PACKET; batman_packet->version = COMPAT_VERSION; - batman_packet->flags = 0; + batman_packet->flags = NO_FLAGS; batman_packet->ttl = 2; batman_packet->tq = TQ_MAX_VALUE; - batman_packet->num_tt = 0; + batman_packet->tt_num_changes = 0; + batman_packet->ttvn = 0; hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; @@ -458,7 +454,7 @@ static struct hard_iface *hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - hard_iface = kmalloc(sizeof(struct hard_iface), GFP_ATOMIC); + hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) { pr_err("Can't add interface (%s): out of memory\n", net_dev->name); @@ -522,7 +518,7 @@ void hardif_remove_interfaces(void) static int hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = (struct net_device *)ptr; + struct net_device *net_dev = ptr; struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); struct hard_iface *primary_if = NULL; struct bat_priv *bat_priv; @@ -567,7 +563,7 @@ static int hard_if_event(struct notifier_block *this, break; default: break; - }; + } hardif_put: hardif_free_ref(hard_iface); @@ -658,6 +654,14 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, case BAT_VIS: ret = recv_vis_packet(skb, hard_iface); break; + /* Translation table query (request or response) */ + case BAT_TT_QUERY: + ret = recv_tt_query(skb, hard_iface); + break; + /* Roaming advertisement */ + case BAT_ROAM_ADV: + ret = recv_roam_adv(skb, hard_iface); + break; default: ret = NET_RX_DROP; } diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 64265991460b..442eacbc9e3a 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -22,17 +22,21 @@ #ifndef _NET_BATMAN_ADV_HARD_INTERFACE_H_ #define _NET_BATMAN_ADV_HARD_INTERFACE_H_ -#define IF_NOT_IN_USE 0 -#define IF_TO_BE_REMOVED 1 -#define IF_INACTIVE 2 -#define IF_ACTIVE 3 -#define IF_TO_BE_ACTIVATED 4 -#define IF_I_WANT_YOU 5 +enum hard_if_state { + IF_NOT_IN_USE, + IF_TO_BE_REMOVED, + IF_INACTIVE, + IF_ACTIVE, + IF_TO_BE_ACTIVATED, + IF_I_WANT_YOU +}; extern struct notifier_block hard_if_notifier; -struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev); -int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name); +struct hard_iface* +hardif_get_by_netdev(const struct net_device *net_dev); +int hardif_enable_interface(struct hard_iface *hard_iface, + const char *iface_name); void hardif_disable_interface(struct hard_iface *hard_iface); void hardif_remove_interfaces(void); int hardif_min_mtu(struct net_device *soft_iface); diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index c5213d8f2cca..2a172505f513 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -46,15 +46,16 @@ struct hashtable_t *hash_new(int size) { struct hashtable_t *hash; - hash = kmalloc(sizeof(struct hashtable_t), GFP_ATOMIC); + hash = kmalloc(sizeof(*hash), GFP_ATOMIC); if (!hash) return NULL; - hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC); + hash->table = kmalloc(sizeof(*hash->table) * size, GFP_ATOMIC); if (!hash->table) goto free_hash; - hash->list_locks = kmalloc(sizeof(spinlock_t) * size, GFP_ATOMIC); + hash->list_locks = kmalloc(sizeof(*hash->list_locks) * size, + GFP_ATOMIC); if (!hash->list_locks) goto free_table; diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 434822b27473..dd5c9fd7a905 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -28,12 +28,12 @@ * compare 2 element datas for their keys, * return 0 if same and not 0 if not * same */ -typedef int (*hashdata_compare_cb)(struct hlist_node *, void *); +typedef int (*hashdata_compare_cb)(const struct hlist_node *, const void *); /* the hashfunction, should return an index * based on the key in the data of the first * argument and the size the second */ -typedef int (*hashdata_choose_cb)(void *, int); +typedef int (*hashdata_choose_cb)(const void *, int); typedef void (*hashdata_free_cb)(struct hlist_node *, void *); struct hashtable_t { @@ -80,7 +80,7 @@ static inline void hash_delete(struct hashtable_t *hash, static inline int hash_add(struct hashtable_t *hash, hashdata_compare_cb compare, hashdata_choose_cb choose, - void *data, struct hlist_node *data_node) + const void *data, struct hlist_node *data_node) { int index; struct hlist_head *head; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index fa22ba2bb832..ac3520e057c0 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -46,7 +46,7 @@ static int bat_socket_open(struct inode *inode, struct file *file) nonseekable_open(inode, file); - socket_client = kmalloc(sizeof(struct socket_client), GFP_KERNEL); + socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL); if (!socket_client) return -ENOMEM; @@ -310,7 +310,7 @@ static void bat_socket_add_packet(struct socket_client *socket_client, { struct socket_packet *socket_packet; - socket_packet = kmalloc(sizeof(struct socket_packet), GFP_ATOMIC); + socket_packet = kmalloc(sizeof(*socket_packet), GFP_ATOMIC); if (!socket_packet) return; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 0a7cee0076f4..b0f9068ade57 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -58,9 +58,8 @@ static int __init batman_init(void) register_netdevice_notifier(&hard_if_notifier); - pr_info("B.A.T.M.A.N. advanced %s%s (compatibility version %i) " - "loaded\n", SOURCE_VERSION, REVISION_VERSION_STR, - COMPAT_VERSION); + pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) " + "loaded\n", SOURCE_VERSION, COMPAT_VERSION); return 0; } @@ -84,8 +83,10 @@ int mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); - spin_lock_init(&bat_priv->tt_lhash_lock); - spin_lock_init(&bat_priv->tt_ghash_lock); + spin_lock_init(&bat_priv->tt_changes_list_lock); + spin_lock_init(&bat_priv->tt_req_list_lock); + spin_lock_init(&bat_priv->tt_roam_list_lock); + spin_lock_init(&bat_priv->tt_buff_lock); spin_lock_init(&bat_priv->gw_list_lock); spin_lock_init(&bat_priv->vis_hash_lock); spin_lock_init(&bat_priv->vis_list_lock); @@ -96,14 +97,14 @@ int mesh_init(struct net_device *soft_iface) INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); INIT_HLIST_HEAD(&bat_priv->gw_list); INIT_HLIST_HEAD(&bat_priv->softif_neigh_vids); + INIT_LIST_HEAD(&bat_priv->tt_changes_list); + INIT_LIST_HEAD(&bat_priv->tt_req_list); + INIT_LIST_HEAD(&bat_priv->tt_roam_list); if (originator_init(bat_priv) < 1) goto err; - if (tt_local_init(bat_priv) < 1) - goto err; - - if (tt_global_init(bat_priv) < 1) + if (tt_init(bat_priv) < 1) goto err; tt_local_add(soft_iface, soft_iface->dev_addr); @@ -111,6 +112,7 @@ int mesh_init(struct net_device *soft_iface) if (vis_init(bat_priv) < 1) goto err; + atomic_set(&bat_priv->gw_reselect, 0); atomic_set(&bat_priv->mesh_state, MESH_ACTIVE); goto end; @@ -137,8 +139,7 @@ void mesh_free(struct net_device *soft_iface) gw_node_purge(bat_priv); originator_free(bat_priv); - tt_local_free(bat_priv); - tt_global_free(bat_priv); + tt_free(bat_priv); softif_neigh_purge(bat_priv); @@ -155,9 +156,9 @@ void dec_module_count(void) module_put(THIS_MODULE); } -int is_my_mac(uint8_t *addr) +int is_my_mac(const uint8_t *addr) { - struct hard_iface *hard_iface; + const struct hard_iface *hard_iface; rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &hardif_list, list) { @@ -182,8 +183,4 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_SUPPORTED_DEVICE(DRIVER_DEVICE); -#ifdef REVISION_VERSION -MODULE_VERSION(SOURCE_VERSION "-" REVISION_VERSION); -#else MODULE_VERSION(SOURCE_VERSION); -#endif diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 148b49e02642..a6df61a6933b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -27,8 +27,9 @@ #define DRIVER_DESC "B.A.T.M.A.N. advanced" #define DRIVER_DEVICE "batman-adv" -#define SOURCE_VERSION "next" - +#ifndef SOURCE_VERSION +#define SOURCE_VERSION "2011.3.0" +#endif /* B.A.T.M.A.N. parameters */ @@ -42,15 +43,25 @@ * -> TODO: check influence on TQ_LOCAL_WINDOW_SIZE */ #define PURGE_TIMEOUT 200 #define TT_LOCAL_TIMEOUT 3600 /* in seconds */ - +#define TT_CLIENT_ROAM_TIMEOUT 600 /* sliding packet range of received originator messages in squence numbers * (should be a multiple of our word size) */ #define TQ_LOCAL_WINDOW_SIZE 64 +#define TT_REQUEST_TIMEOUT 3 /* seconds we have to keep pending tt_req */ + #define TQ_GLOBAL_WINDOW_SIZE 5 #define TQ_LOCAL_BIDRECT_SEND_MINIMUM 1 #define TQ_LOCAL_BIDRECT_RECV_MINIMUM 1 #define TQ_TOTAL_BIDRECT_LIMIT 1 +#define TT_OGM_APPEND_MAX 3 /* number of OGMs sent with the last tt diff */ + +#define ROAMING_MAX_TIME 20 /* Time in which a client can roam at most + * ROAMING_MAX_COUNT times */ +#define ROAMING_MAX_COUNT 5 + +#define NO_FLAGS 0 + #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ @@ -72,13 +83,27 @@ #define RESET_PROTECTION_MS 30000 #define EXPECTED_SEQNO_RANGE 65536 -#define MESH_INACTIVE 0 -#define MESH_ACTIVE 1 -#define MESH_DEACTIVATING 2 +enum mesh_state { + MESH_INACTIVE, + MESH_ACTIVE, + MESH_DEACTIVATING +}; #define BCAST_QUEUE_LEN 256 #define BATMAN_QUEUE_LEN 256 +enum uev_action { + UEV_ADD = 0, + UEV_DEL, + UEV_CHANGE +}; + +enum uev_type { + UEV_GW = 0 +}; + +#define GW_THRESHOLD 50 + /* * Debug Messages */ @@ -89,10 +114,12 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* all messages related to routing / flooding / broadcasting / etc */ -#define DBG_BATMAN 1 -/* route or tt entry added / changed / deleted */ -#define DBG_ROUTES 2 -#define DBG_ALL 3 +enum dbg_level { + DBG_BATMAN = 1 << 0, + DBG_ROUTES = 1 << 1, /* route added / changed / deleted */ + DBG_TT = 1 << 2, /* translation table operations */ + DBG_ALL = 7 +}; /* @@ -118,12 +145,6 @@ #include <linux/seq_file.h> #include "types.h" -#ifndef REVISION_VERSION -#define REVISION_VERSION_STR "" -#else -#define REVISION_VERSION_STR " "REVISION_VERSION -#endif - extern struct list_head hardif_list; extern unsigned char broadcast_addr[]; @@ -133,10 +154,10 @@ int mesh_init(struct net_device *soft_iface); void mesh_free(struct net_device *soft_iface); void inc_module_count(void); void dec_module_count(void); -int is_my_mac(uint8_t *addr); +int is_my_mac(const uint8_t *addr); #ifdef CONFIG_BATMAN_ADV_DEBUG -int debug_log(struct bat_priv *bat_priv, char *fmt, ...); +int debug_log(struct bat_priv *bat_priv, const char *fmt, ...) __printf(2, 3); #define bat_dbg(type, bat_priv, fmt, arg...) \ do { \ @@ -145,9 +166,10 @@ int debug_log(struct bat_priv *bat_priv, char *fmt, ...); } \ while (0) #else /* !CONFIG_BATMAN_ADV_DEBUG */ -static inline void bat_dbg(char type __always_unused, +__printf(3, 4) +static inline void bat_dbg(int type __always_unused, struct bat_priv *bat_priv __always_unused, - char *fmt __always_unused, ...) + const char *fmt __always_unused, ...) { } #endif @@ -172,11 +194,32 @@ static inline void bat_dbg(char type __always_unused, * * note: can't use compare_ether_addr() as it requires aligned memory */ -static inline int compare_eth(void *data1, void *data2) + +static inline int compare_eth(const void *data1, const void *data2) { return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } + #define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) +/* Returns the smallest signed integer in two's complement with the sizeof x */ +#define smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u))) + +/* Checks if a sequence number x is a predecessor/successor of y. + * they handle overflows/underflows and can correctly check for a + * predecessor/successor unless the variable sequence number has grown by + * more then 2**(bitwidth(x)-1)-1. + * This means that for a uint8_t with the maximum value 255, it would think: + * - when adding nothing - it is neither a predecessor nor a successor + * - before adding more than 127 to the starting value - it is a predecessor, + * - when adding 128 - it is neither a predecessor nor a successor, + * - after adding more than 127 to the starting value - it is a successor */ +#define seq_before(x, y) ({typeof(x) _d1 = (x); \ + typeof(y) _d2 = (y); \ + typeof(x) _dummy = (_d1 - _d2); \ + (void) (&_d1 == &_d2); \ + _dummy > smallest_signed_int(_dummy); }) +#define seq_after(x, y) seq_before(y, x) + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 40a30bbcd147..f3c3f620d195 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -37,6 +37,14 @@ static void start_purge_timer(struct bat_priv *bat_priv) queue_delayed_work(bat_event_workqueue, &bat_priv->orig_work, 1 * HZ); } +/* returns 1 if they are the same originator */ +static int compare_orig(const struct hlist_node *node, const void *data2) +{ + const void *data1 = container_of(node, struct orig_node, hash_entry); + + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + int originator_init(struct bat_priv *bat_priv) { if (bat_priv->orig_hash) @@ -77,7 +85,7 @@ struct neigh_node *orig_node_get_router(struct orig_node *orig_node) struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, + const uint8_t *neigh, struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -86,7 +94,7 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, bat_dbg(DBG_BATMAN, bat_priv, "Creating new last-hop neighbor of originator\n"); - neigh_node = kzalloc(sizeof(struct neigh_node), GFP_ATOMIC); + neigh_node = kzalloc(sizeof(*neigh_node), GFP_ATOMIC); if (!neigh_node) return NULL; @@ -137,6 +145,7 @@ static void orig_node_free_rcu(struct rcu_head *rcu) tt_global_del_orig(orig_node->bat_priv, orig_node, "originator timed out"); + kfree(orig_node->tt_buff); kfree(orig_node->bcast_own); kfree(orig_node->bcast_own_sum); kfree(orig_node); @@ -183,7 +192,7 @@ void originator_free(struct bat_priv *bat_priv) /* this function finds or creates an originator entry for the given * address if it does not exits */ -struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) +struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr) { struct orig_node *orig_node; int size; @@ -196,7 +205,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) bat_dbg(DBG_BATMAN, bat_priv, "Creating new originator: %pM\n", addr); - orig_node = kzalloc(sizeof(struct orig_node), GFP_ATOMIC); + orig_node = kzalloc(sizeof(*orig_node), GFP_ATOMIC); if (!orig_node) return NULL; @@ -205,14 +214,20 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) spin_lock_init(&orig_node->ogm_cnt_lock); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); + spin_lock_init(&orig_node->tt_buff_lock); /* extra reference for return */ atomic_set(&orig_node->refcount, 2); + orig_node->tt_poss_change = false; orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); orig_node->router = NULL; + orig_node->tt_crc = 0; + atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; + orig_node->tt_buff_len = 0; + atomic_set(&orig_node->tt_size, 0); orig_node->bcast_seqno_reset = jiffies - 1 - msecs_to_jiffies(RESET_PROTECTION_MS); orig_node->batman_seqno_reset = jiffies - 1 @@ -322,9 +337,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv, if (purge_orig_neighbors(bat_priv, orig_node, &best_neigh_node)) { update_routes(bat_priv, orig_node, - best_neigh_node, - orig_node->tt_buff, - orig_node->tt_buff_len); + best_neigh_node); } } @@ -419,9 +432,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) goto out; } - seq_printf(seq, "[B.A.T.M.A.N. adv %s%s, MainIF/MAC: %s/%pM (%s)]\n", - SOURCE_VERSION, REVISION_VERSION_STR, - primary_if->net_dev->name, + seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", + SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name); seq_printf(seq, " %-15s %s (%s/%i) %17s [%10s]: %20s ...\n", "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop", @@ -559,7 +571,7 @@ static int orig_node_del_if(struct orig_node *orig_node, memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); /* copy second part */ - memcpy(data_ptr + del_if_num * chunk_size, + memcpy((char *)data_ptr + del_if_num * chunk_size, orig_node->bcast_own + ((del_if_num + 1) * chunk_size), (max_if_num - del_if_num) * chunk_size); @@ -579,7 +591,7 @@ free_bcast_own: memcpy(data_ptr, orig_node->bcast_own_sum, del_if_num * sizeof(uint8_t)); - memcpy(data_ptr + del_if_num * sizeof(uint8_t), + memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t), orig_node->bcast_own_sum + ((del_if_num + 1) * sizeof(uint8_t)), (max_if_num - del_if_num) * sizeof(uint8_t)); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index e1d641f27aa9..cfc1f60a96a1 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -28,10 +28,10 @@ int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); void orig_node_free_ref(struct orig_node *orig_node); -struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); +struct orig_node *get_orig_node(struct bat_priv *bat_priv, const uint8_t *addr); struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, + const uint8_t *neigh, struct hard_iface *if_incoming); void neigh_node_free_ref(struct neigh_node *neigh_node); struct neigh_node *orig_node_get_router(struct orig_node *orig_node); @@ -40,19 +40,11 @@ int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num); int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num); -/* returns 1 if they are the same originator */ -static inline int compare_orig(struct hlist_node *node, void *data2) -{ - void *data1 = container_of(node, struct orig_node, hash_entry); - - return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); -} - /* hashfunction to choose an entry in a hash table of given size */ /* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ -static inline int choose_orig(void *data, int32_t size) +static inline int choose_orig(const void *data, int32_t size) { - unsigned char *key = data; + const unsigned char *key = data; uint32_t hash = 0; size_t i; @@ -70,7 +62,7 @@ static inline int choose_orig(void *data, int32_t size) } static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->orig_hash; struct hlist_head *head; diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index eda99650e9f8..b76b4be10b92 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -24,46 +24,84 @@ #define ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ -#define BAT_PACKET 0x01 -#define BAT_ICMP 0x02 -#define BAT_UNICAST 0x03 -#define BAT_BCAST 0x04 -#define BAT_VIS 0x05 -#define BAT_UNICAST_FRAG 0x06 +enum bat_packettype { + BAT_PACKET = 0x01, + BAT_ICMP = 0x02, + BAT_UNICAST = 0x03, + BAT_BCAST = 0x04, + BAT_VIS = 0x05, + BAT_UNICAST_FRAG = 0x06, + BAT_TT_QUERY = 0x07, + BAT_ROAM_ADV = 0x08 +}; /* this file is included by batctl which needs these defines */ -#define COMPAT_VERSION 12 -#define DIRECTLINK 0x40 -#define VIS_SERVER 0x20 -#define PRIMARIES_FIRST_HOP 0x10 +#define COMPAT_VERSION 14 + +enum batman_flags { + PRIMARIES_FIRST_HOP = 1 << 4, + VIS_SERVER = 1 << 5, + DIRECTLINK = 1 << 6 +}; /* ICMP message types */ -#define ECHO_REPLY 0 -#define DESTINATION_UNREACHABLE 3 -#define ECHO_REQUEST 8 -#define TTL_EXCEEDED 11 -#define PARAMETER_PROBLEM 12 +enum icmp_packettype { + ECHO_REPLY = 0, + DESTINATION_UNREACHABLE = 3, + ECHO_REQUEST = 8, + TTL_EXCEEDED = 11, + PARAMETER_PROBLEM = 12 +}; /* vis defines */ -#define VIS_TYPE_SERVER_SYNC 0 -#define VIS_TYPE_CLIENT_UPDATE 1 +enum vis_packettype { + VIS_TYPE_SERVER_SYNC = 0, + VIS_TYPE_CLIENT_UPDATE = 1 +}; /* fragmentation defines */ -#define UNI_FRAG_HEAD 0x01 -#define UNI_FRAG_LARGETAIL 0x02 +enum unicast_frag_flags { + UNI_FRAG_HEAD = 1 << 0, + UNI_FRAG_LARGETAIL = 1 << 1 +}; + +/* TT_QUERY subtypes */ +#define TT_QUERY_TYPE_MASK 0x3 + +enum tt_query_packettype { + TT_REQUEST = 0, + TT_RESPONSE = 1 +}; + +/* TT_QUERY flags */ +enum tt_query_flags { + TT_FULL_TABLE = 1 << 2 +}; + +/* TT_CLIENT flags. + * Flags from 1 to 1 << 7 are sent on the wire, while flags from 1 << 8 to + * 1 << 15 are used for local computation only */ +enum tt_client_flags { + TT_CLIENT_DEL = 1 << 0, + TT_CLIENT_ROAM = 1 << 1, + TT_CLIENT_NOPURGE = 1 << 8, + TT_CLIENT_NEW = 1 << 9, + TT_CLIENT_PENDING = 1 << 10 +}; struct batman_packet { uint8_t packet_type; uint8_t version; /* batman version field */ + uint8_t ttl; uint8_t flags; /* 0x40: DIRECTLINK flag, 0x20 VIS_SERVER flag... */ - uint8_t tq; uint32_t seqno; uint8_t orig[6]; uint8_t prev_sender[6]; - uint8_t ttl; - uint8_t num_tt; uint8_t gw_flags; /* flags related to gateway class */ - uint8_t align; + uint8_t tq; + uint8_t tt_num_changes; + uint8_t ttvn; /* translation table version number */ + uint16_t tt_crc; } __packed; #define BAT_PACKET_LEN sizeof(struct batman_packet) @@ -71,12 +109,13 @@ struct batman_packet { struct icmp_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t msg_type; /* see ICMP message types above */ uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[6]; uint8_t orig[6]; uint16_t seqno; uint8_t uid; + uint8_t reserved; } __packed; #define BAT_RR_LEN 16 @@ -86,8 +125,8 @@ struct icmp_packet { struct icmp_packet_rr { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t msg_type; /* see ICMP message types above */ uint8_t ttl; + uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[6]; uint8_t orig[6]; uint16_t seqno; @@ -99,16 +138,19 @@ struct icmp_packet_rr { struct unicast_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t dest[6]; uint8_t ttl; + uint8_t ttvn; /* destination translation table version number */ + uint8_t dest[6]; } __packed; struct unicast_frag_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t dest[6]; uint8_t ttl; + uint8_t ttvn; /* destination translation table version number */ + uint8_t dest[6]; uint8_t flags; + uint8_t align; uint8_t orig[6]; uint16_t seqno; } __packed; @@ -116,21 +158,61 @@ struct unicast_frag_packet { struct bcast_packet { uint8_t packet_type; uint8_t version; /* batman version field */ - uint8_t orig[6]; uint8_t ttl; + uint8_t reserved; uint32_t seqno; + uint8_t orig[6]; } __packed; struct vis_packet { uint8_t packet_type; uint8_t version; /* batman version field */ + uint8_t ttl; /* TTL */ uint8_t vis_type; /* which type of vis-participant sent this? */ - uint8_t entries; /* number of entries behind this struct */ uint32_t seqno; /* sequence number */ - uint8_t ttl; /* TTL */ + uint8_t entries; /* number of entries behind this struct */ + uint8_t reserved; uint8_t vis_orig[6]; /* originator that announces its neighbors */ uint8_t target_orig[6]; /* who should receive this packet */ uint8_t sender_orig[6]; /* who sent or rebroadcasted this packet */ } __packed; +struct tt_query_packet { + uint8_t packet_type; + uint8_t version; /* batman version field */ + uint8_t ttl; + /* the flag field is a combination of: + * - TT_REQUEST or TT_RESPONSE + * - TT_FULL_TABLE */ + uint8_t flags; + uint8_t dst[ETH_ALEN]; + uint8_t src[ETH_ALEN]; + /* the ttvn field is: + * if TT_REQUEST: ttvn that triggered the + * request + * if TT_RESPONSE: new ttvn for the src + * orig_node */ + uint8_t ttvn; + /* tt_data field is: + * if TT_REQUEST: crc associated with the + * ttvn + * if TT_RESPONSE: table_size */ + uint16_t tt_data; +} __packed; + +struct roam_adv_packet { + uint8_t packet_type; + uint8_t version; + uint8_t ttl; + uint8_t reserved; + uint8_t dst[ETH_ALEN]; + uint8_t src[ETH_ALEN]; + uint8_t client[ETH_ALEN]; +} __packed; + +struct tt_change { + uint8_t flags; + uint8_t addr[ETH_ALEN]; +} __packed; + #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c index 5bb6a619afee..f1ccfa76ce8a 100644 --- a/net/batman-adv/ring_buffer.c +++ b/net/batman-adv/ring_buffer.c @@ -28,9 +28,9 @@ void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value) *lq_index = (*lq_index + 1) % TQ_GLOBAL_WINDOW_SIZE; } -uint8_t ring_buffer_avg(uint8_t lq_recv[]) +uint8_t ring_buffer_avg(const uint8_t lq_recv[]) { - uint8_t *ptr; + const uint8_t *ptr; uint16_t count = 0, i = 0, sum = 0; ptr = lq_recv; diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h index 0395b2741864..7cdfe62b657c 100644 --- a/net/batman-adv/ring_buffer.h +++ b/net/batman-adv/ring_buffer.h @@ -23,6 +23,6 @@ #define _NET_BATMAN_ADV_RING_BUFFER_H_ void ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, uint8_t value); -uint8_t ring_buffer_avg(uint8_t lq_recv[]); +uint8_t ring_buffer_avg(const uint8_t lq_recv[]); #endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index bb1c3ec7e3ff..0f32c818874d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -64,28 +64,69 @@ void slide_own_bcast_window(struct hard_iface *hard_iface) } } -static void update_TT(struct bat_priv *bat_priv, struct orig_node *orig_node, - unsigned char *tt_buff, int tt_buff_len) +static void update_transtable(struct bat_priv *bat_priv, + struct orig_node *orig_node, + const unsigned char *tt_buff, + uint8_t tt_num_changes, uint8_t ttvn, + uint16_t tt_crc) { - if ((tt_buff_len != orig_node->tt_buff_len) || - ((tt_buff_len > 0) && - (orig_node->tt_buff_len > 0) && - (memcmp(orig_node->tt_buff, tt_buff, tt_buff_len) != 0))) { - - if (orig_node->tt_buff_len > 0) - tt_global_del_orig(bat_priv, orig_node, - "originator changed tt"); - - if ((tt_buff_len > 0) && (tt_buff)) - tt_global_add_orig(bat_priv, orig_node, - tt_buff, tt_buff_len); + uint8_t orig_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + bool full_table = true; + + /* the ttvn increased by one -> we can apply the attached changes */ + if (ttvn - orig_ttvn == 1) { + /* the OGM could not contain the changes because they were too + * many to fit in one frame or because they have already been + * sent TT_OGM_APPEND_MAX times. In this case send a tt + * request */ + if (!tt_num_changes) { + full_table = false; + goto request_table; + } + + tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, + (struct tt_change *)tt_buff); + + /* Even if we received the crc into the OGM, we prefer + * to recompute it to spot any possible inconsistency + * in the global table */ + orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); + + /* The ttvn alone is not enough to guarantee consistency + * because a single value could repesent different states + * (due to the wrap around). Thus a node has to check whether + * the resulting table (after applying the changes) is still + * consistent or not. E.g. a node could disconnect while its + * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case + * checking the CRC value is mandatory to detect the + * inconsistency */ + if (orig_node->tt_crc != tt_crc) + goto request_table; + + /* Roaming phase is over: tables are in sync again. I can + * unset the flag */ + orig_node->tt_poss_change = false; + } else { + /* if we missed more than one change or our tables are not + * in sync anymore -> request fresh tt data */ + if (ttvn != orig_ttvn || orig_node->tt_crc != tt_crc) { +request_table: + bat_dbg(DBG_TT, bat_priv, "TT inconsistency for %pM. " + "Need to retrieve the correct information " + "(ttvn: %u last_ttvn: %u crc: %u last_crc: " + "%u num_changes: %u)\n", orig_node->orig, ttvn, + orig_ttvn, tt_crc, orig_node->tt_crc, + tt_num_changes); + send_tt_request(bat_priv, orig_node, ttvn, tt_crc, + full_table); + return; + } } } static void update_route(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node, - unsigned char *tt_buff, int tt_buff_len) + struct neigh_node *neigh_node) { struct neigh_node *curr_router; @@ -93,11 +134,10 @@ static void update_route(struct bat_priv *bat_priv, /* route deleted */ if ((curr_router) && (!neigh_node)) { - bat_dbg(DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", orig_node->orig); tt_global_del_orig(bat_priv, orig_node, - "originator timed out"); + "Deleted route towards originator"); /* route added */ } else if ((!curr_router) && (neigh_node)) { @@ -105,11 +145,8 @@ static void update_route(struct bat_priv *bat_priv, bat_dbg(DBG_ROUTES, bat_priv, "Adding route towards: %pM (via %pM)\n", orig_node->orig, neigh_node->addr); - tt_global_add_orig(bat_priv, orig_node, - tt_buff, tt_buff_len); - /* route changed */ - } else { + } else if (neigh_node && curr_router) { bat_dbg(DBG_ROUTES, bat_priv, "Changing route towards: %pM " "(now via %pM - was via %pM)\n", @@ -133,10 +170,8 @@ static void update_route(struct bat_priv *bat_priv, neigh_node_free_ref(curr_router); } - void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node, unsigned char *tt_buff, - int tt_buff_len) + struct neigh_node *neigh_node) { struct neigh_node *router = NULL; @@ -146,11 +181,7 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, router = orig_node_get_router(orig_node); if (router != neigh_node) - update_route(bat_priv, orig_node, neigh_node, - tt_buff, tt_buff_len); - /* may be just TT changed */ - else - update_TT(bat_priv, orig_node, tt_buff, tt_buff_len); + update_route(bat_priv, orig_node, neigh_node); out: if (router) @@ -165,7 +196,7 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *neigh_node = NULL, *tmp_neigh_node; struct hlist_node *node; - unsigned char total_count; + uint8_t total_count; uint8_t orig_eq_count, neigh_rq_count, tq_own; int tq_asym_penalty, ret = 0; @@ -348,9 +379,9 @@ out: } /* copy primary address for bonding */ -static void bonding_save_primary(struct orig_node *orig_node, +static void bonding_save_primary(const struct orig_node *orig_node, struct orig_node *orig_neigh_node, - struct batman_packet *batman_packet) + const struct batman_packet *batman_packet) { if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) return; @@ -358,19 +389,16 @@ static void bonding_save_primary(struct orig_node *orig_node, memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); } -static void update_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, - struct ethhdr *ethhdr, - struct batman_packet *batman_packet, +static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const struct ethhdr *ethhdr, + const struct batman_packet *batman_packet, struct hard_iface *if_incoming, - unsigned char *tt_buff, int tt_buff_len, - char is_duplicate) + const unsigned char *tt_buff, int is_duplicate) { struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct neigh_node *router = NULL; struct orig_node *orig_node_tmp; struct hlist_node *node; - int tmp_tt_buff_len; uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " @@ -435,9 +463,6 @@ static void update_orig(struct bat_priv *bat_priv, bonding_candidate_add(orig_node, neigh_node); - tmp_tt_buff_len = (tt_buff_len > batman_packet->num_tt * ETH_ALEN ? - batman_packet->num_tt * ETH_ALEN : tt_buff_len); - /* if this neighbor already is our next hop there is nothing * to change */ router = orig_node_get_router(orig_node); @@ -467,15 +492,19 @@ static void update_orig(struct bat_priv *bat_priv, goto update_tt; } - update_routes(bat_priv, orig_node, neigh_node, - tt_buff, tmp_tt_buff_len); - goto update_gw; + update_routes(bat_priv, orig_node, neigh_node); update_tt: - update_routes(bat_priv, orig_node, router, - tt_buff, tmp_tt_buff_len); + /* I have to check for transtable changes only if the OGM has been + * sent through a primary interface */ + if (((batman_packet->orig != ethhdr->h_source) && + (batman_packet->ttl > 2)) || + (batman_packet->flags & PRIMARIES_FIRST_HOP)) + update_transtable(bat_priv, orig_node, tt_buff, + batman_packet->tt_num_changes, + batman_packet->ttvn, + batman_packet->tt_crc); -update_gw: if (orig_node->gw_flags != batman_packet->gw_flags) gw_node_update(bat_priv, orig_node, batman_packet->gw_flags); @@ -531,15 +560,15 @@ static int window_protected(struct bat_priv *bat_priv, * -1 the packet is old and has been received while the seqno window * was protected. Caller should drop it. */ -static char count_real_packets(struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - struct hard_iface *if_incoming) +static int count_real_packets(const struct ethhdr *ethhdr, + const struct batman_packet *batman_packet, + const struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct orig_node *orig_node; struct neigh_node *tmp_neigh_node; struct hlist_node *node; - char is_duplicate = 0; + int is_duplicate = 0; int32_t seq_diff; int need_update = 0; int set_mark, ret = -1; @@ -595,9 +624,9 @@ out: return ret; } -void receive_bat_packet(struct ethhdr *ethhdr, +void receive_bat_packet(const struct ethhdr *ethhdr, struct batman_packet *batman_packet, - unsigned char *tt_buff, int tt_buff_len, + const unsigned char *tt_buff, struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -605,10 +634,10 @@ void receive_bat_packet(struct ethhdr *ethhdr, struct orig_node *orig_neigh_node, *orig_node; struct neigh_node *router = NULL, *router_router = NULL; struct neigh_node *orig_neigh_router = NULL; - char has_directlink_flag; - char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - char is_broadcast = 0, is_bidirectional, is_single_hop_neigh; - char is_duplicate; + int has_directlink_flag; + int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; + int is_broadcast = 0, is_bidirectional, is_single_hop_neigh; + int is_duplicate; uint32_t if_incoming_seqno; /* Silently drop when the batman packet is actually not a @@ -636,12 +665,14 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Received BATMAN packet via NB: %pM, IF: %s [%pM] " - "(from OG: %pM, via prev OG: %pM, seqno %d, tq %d, " - "TTL %d, V %d, IDF %d)\n", + "(from OG: %pM, via prev OG: %pM, seqno %d, ttvn %u, " + "crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, batman_packet->orig, batman_packet->prev_sender, batman_packet->seqno, - batman_packet->tq, batman_packet->ttl, batman_packet->version, + batman_packet->ttvn, batman_packet->tt_crc, + batman_packet->tt_num_changes, batman_packet->tq, + batman_packet->ttl, batman_packet->version, has_directlink_flag); rcu_read_lock(); @@ -664,7 +695,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - if (compare_eth(ethhdr->h_source, broadcast_addr)) + if (is_broadcast_ether_addr(ethhdr->h_source)) is_broadcast = 1; } rcu_read_unlock(); @@ -701,17 +732,16 @@ void receive_bat_packet(struct ethhdr *ethhdr, /* neighbor has to indicate direct link and it has to * come via the corresponding interface */ - /* if received seqno equals last send seqno save new - * seqno for bidirectional check */ + /* save packet seqno for bidirectional check */ if (has_directlink_flag && compare_eth(if_incoming->net_dev->dev_addr, - batman_packet->orig) && - (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { + batman_packet->orig)) { offset = if_incoming->if_num * NUM_WORDS; spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); word = &(orig_neigh_node->bcast_own[offset]); - bit_mark(word, 0); + bit_mark(word, + if_incoming_seqno - batman_packet->seqno - 2); orig_neigh_node->bcast_own_sum[if_incoming->if_num] = bit_packet_count(word); spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); @@ -794,14 +824,14 @@ void receive_bat_packet(struct ethhdr *ethhdr, ((orig_node->last_real_seqno == batman_packet->seqno) && (orig_node->last_ttl - 3 <= batman_packet->ttl)))) update_orig(bat_priv, orig_node, ethhdr, batman_packet, - if_incoming, tt_buff, tt_buff_len, is_duplicate); + if_incoming, tt_buff, is_duplicate); /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { /* mark direct link on incoming interface */ schedule_forward_packet(orig_node, ethhdr, batman_packet, - 1, tt_buff_len, if_incoming); + 1, if_incoming); bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " "rebroadcast neighbor packet with direct link flag\n"); @@ -824,7 +854,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast originator packet\n"); schedule_forward_packet(orig_node, ethhdr, batman_packet, - 0, tt_buff_len, if_incoming); + 0, if_incoming); out_neigh: if ((orig_neigh_node) && (!is_single_hop_neigh)) @@ -1077,7 +1107,7 @@ out: * This method rotates the bonding list and increases the * returned router's refcount. */ static struct neigh_node *find_bond_router(struct orig_node *primary_orig, - struct hard_iface *recv_if) + const struct hard_iface *recv_if) { struct neigh_node *tmp_neigh_node; struct neigh_node *router = NULL, *first_candidate = NULL; @@ -1128,7 +1158,7 @@ out: * * Increases the returned router's refcount */ static struct neigh_node *find_ifalter_router(struct orig_node *primary_orig, - struct hard_iface *recv_if) + const struct hard_iface *recv_if) { struct neigh_node *tmp_neigh_node; struct neigh_node *router = NULL, *first_candidate = NULL; @@ -1171,12 +1201,124 @@ static struct neigh_node *find_ifalter_router(struct orig_node *primary_orig, return router; } +int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct tt_query_packet *tt_query; + struct ethhdr *ethhdr; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, sizeof(struct tt_query_packet)))) + goto out; + + /* I could need to modify it */ + if (skb_cow(skb, sizeof(struct tt_query_packet)) < 0) + goto out; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with unicast indication but broadcast recipient */ + if (is_broadcast_ether_addr(ethhdr->h_dest)) + goto out; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + goto out; + + tt_query = (struct tt_query_packet *)skb->data; + + tt_query->tt_data = ntohs(tt_query->tt_data); + + switch (tt_query->flags & TT_QUERY_TYPE_MASK) { + case TT_REQUEST: + /* If we cannot provide an answer the tt_request is + * forwarded */ + if (!send_tt_response(bat_priv, tt_query)) { + bat_dbg(DBG_TT, bat_priv, + "Routing TT_REQUEST to %pM [%c]\n", + tt_query->dst, + (tt_query->flags & TT_FULL_TABLE ? 'F' : '.')); + tt_query->tt_data = htons(tt_query->tt_data); + return route_unicast_packet(skb, recv_if); + } + break; + case TT_RESPONSE: + /* packet needs to be linearised to access the TT changes */ + if (skb_linearize(skb) < 0) + goto out; + + if (is_my_mac(tt_query->dst)) + handle_tt_response(bat_priv, tt_query); + else { + bat_dbg(DBG_TT, bat_priv, + "Routing TT_RESPONSE to %pM [%c]\n", + tt_query->dst, + (tt_query->flags & TT_FULL_TABLE ? 'F' : '.')); + tt_query->tt_data = htons(tt_query->tt_data); + return route_unicast_packet(skb, recv_if); + } + break; + } + +out: + /* returning NET_RX_DROP will make the caller function kfree the skb */ + return NET_RX_DROP; +} + +int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if) +{ + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct roam_adv_packet *roam_adv_packet; + struct orig_node *orig_node; + struct ethhdr *ethhdr; + + /* drop packet if it has not necessary minimum size */ + if (unlikely(!pskb_may_pull(skb, sizeof(struct roam_adv_packet)))) + goto out; + + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* packet with unicast indication but broadcast recipient */ + if (is_broadcast_ether_addr(ethhdr->h_dest)) + goto out; + + /* packet with broadcast sender address */ + if (is_broadcast_ether_addr(ethhdr->h_source)) + goto out; + + roam_adv_packet = (struct roam_adv_packet *)skb->data; + + if (!is_my_mac(roam_adv_packet->dst)) + return route_unicast_packet(skb, recv_if); + + orig_node = orig_hash_find(bat_priv, roam_adv_packet->src); + if (!orig_node) + goto out; + + bat_dbg(DBG_TT, bat_priv, "Received ROAMING_ADV from %pM " + "(client %pM)\n", roam_adv_packet->src, + roam_adv_packet->client); + + tt_global_add(bat_priv, orig_node, roam_adv_packet->client, + atomic_read(&orig_node->last_ttvn) + 1, true); + + /* Roaming phase starts: I have new information but the ttvn has not + * been incremented yet. This flag will make me check all the incoming + * packets for the correct destination. */ + bat_priv->tt_poss_change = true; + + orig_node_free_ref(orig_node); +out: + /* returning NET_RX_DROP will make the caller function kfree the skb */ + return NET_RX_DROP; +} + /* find a suitable router for this originator, and use * bonding if possible. increases the found neighbors * refcount.*/ struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct hard_iface *recv_if) + const struct hard_iface *recv_if) { struct orig_node *primary_orig_node; struct orig_node *router_orig; @@ -1240,6 +1382,9 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, router = find_ifalter_router(primary_orig_node, recv_if); return_router: + if (router && router->if_incoming->if_status != IF_ACTIVE) + goto err_unlock; + rcu_read_unlock(); return router; err_unlock: @@ -1354,14 +1499,84 @@ out: return ret; } +static int check_unicast_ttvn(struct bat_priv *bat_priv, + struct sk_buff *skb) { + uint8_t curr_ttvn; + struct orig_node *orig_node; + struct ethhdr *ethhdr; + struct hard_iface *primary_if; + struct unicast_packet *unicast_packet; + bool tt_poss_change; + + /* I could need to modify it */ + if (skb_cow(skb, sizeof(struct unicast_packet)) < 0) + return 0; + + unicast_packet = (struct unicast_packet *)skb->data; + + if (is_my_mac(unicast_packet->dest)) { + tt_poss_change = bat_priv->tt_poss_change; + curr_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + } else { + orig_node = orig_hash_find(bat_priv, unicast_packet->dest); + + if (!orig_node) + return 0; + + curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + tt_poss_change = orig_node->tt_poss_change; + orig_node_free_ref(orig_node); + } + + /* Check whether I have to reroute the packet */ + if (seq_before(unicast_packet->ttvn, curr_ttvn) || tt_poss_change) { + /* Linearize the skb before accessing it */ + if (skb_linearize(skb) < 0) + return 0; + + ethhdr = (struct ethhdr *)(skb->data + + sizeof(struct unicast_packet)); + orig_node = transtable_search(bat_priv, ethhdr->h_dest); + + if (!orig_node) { + if (!is_my_client(bat_priv, ethhdr->h_dest)) + return 0; + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + return 0; + memcpy(unicast_packet->dest, + primary_if->net_dev->dev_addr, ETH_ALEN); + hardif_free_ref(primary_if); + } else { + memcpy(unicast_packet->dest, orig_node->orig, + ETH_ALEN); + curr_ttvn = (uint8_t) + atomic_read(&orig_node->last_ttvn); + orig_node_free_ref(orig_node); + } + + bat_dbg(DBG_ROUTES, bat_priv, "TTVN mismatch (old_ttvn %u " + "new_ttvn %u)! Rerouting unicast packet (for %pM) to " + "%pM\n", unicast_packet->ttvn, curr_ttvn, + ethhdr->h_dest, unicast_packet->dest); + + unicast_packet->ttvn = curr_ttvn; + } + return 1; +} + int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { + struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct unicast_packet *unicast_packet; - int hdr_size = sizeof(struct unicast_packet); + int hdr_size = sizeof(*unicast_packet); if (check_unicast_packet(skb, hdr_size) < 0) return NET_RX_DROP; + if (!check_unicast_ttvn(bat_priv, skb)) + return NET_RX_DROP; + unicast_packet = (struct unicast_packet *)skb->data; /* packet for me */ @@ -1377,13 +1592,16 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct unicast_frag_packet *unicast_packet; - int hdr_size = sizeof(struct unicast_frag_packet); + int hdr_size = sizeof(*unicast_packet); struct sk_buff *new_skb = NULL; int ret; if (check_unicast_packet(skb, hdr_size) < 0) return NET_RX_DROP; + if (!check_unicast_ttvn(bat_priv, skb)) + return NET_RX_DROP; + unicast_packet = (struct unicast_frag_packet *)skb->data; /* packet for me */ @@ -1413,7 +1631,7 @@ int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if) struct orig_node *orig_node = NULL; struct bcast_packet *bcast_packet; struct ethhdr *ethhdr; - int hdr_size = sizeof(struct bcast_packet); + int hdr_size = sizeof(*bcast_packet); int ret = NET_RX_DROP; int32_t seq_diff; @@ -1471,7 +1689,7 @@ int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if) spin_unlock_bh(&orig_node->bcast_seqno_lock); /* rebroadcast packet */ - add_bcast_packet_to_list(bat_priv, skb); + add_bcast_packet_to_list(bat_priv, skb, 1); /* broadcast for me */ interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); @@ -1491,7 +1709,7 @@ int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if) struct vis_packet *vis_packet; struct ethhdr *ethhdr; struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); - int hdr_size = sizeof(struct vis_packet); + int hdr_size = sizeof(*vis_packet); /* keep skb linear */ if (skb_linearize(skb) < 0) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 870f29842b28..fb14e9579b19 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -23,13 +23,12 @@ #define _NET_BATMAN_ADV_ROUTING_H_ void slide_own_bcast_window(struct hard_iface *hard_iface); -void receive_bat_packet(struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - unsigned char *tt_buff, int tt_buff_len, - struct hard_iface *if_incoming); +void receive_bat_packet(const struct ethhdr *ethhdr, + struct batman_packet *batman_packet, + const unsigned char *tt_buff, + struct hard_iface *if_incoming); void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct neigh_node *neigh_node, unsigned char *tt_buff, - int tt_buff_len); + struct neigh_node *neigh_node); int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); @@ -37,9 +36,11 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct hard_iface *recv_if); + const struct hard_iface *recv_if); void bonding_candidate_del(struct orig_node *orig_node, struct neigh_node *neigh_node); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 33779278f1b2..58d14472068c 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -33,14 +33,14 @@ static void send_outstanding_bcast_packet(struct work_struct *work); /* apply hop penalty for a normal link */ -static uint8_t hop_penalty(const uint8_t tq, struct bat_priv *bat_priv) +static uint8_t hop_penalty(uint8_t tq, const struct bat_priv *bat_priv) { int hop_penalty = atomic_read(&bat_priv->hop_penalty); return (tq * (TQ_MAX_VALUE - hop_penalty)) / (TQ_MAX_VALUE); } /* when do we schedule our own packet to be sent */ -static unsigned long own_send_time(struct bat_priv *bat_priv) +static unsigned long own_send_time(const struct bat_priv *bat_priv) { return jiffies + msecs_to_jiffies( atomic_read(&bat_priv->orig_interval) - @@ -55,9 +55,8 @@ static unsigned long forward_send_time(void) /* send out an already prepared packet to the given address via the * specified batman interface */ -int send_skb_packet(struct sk_buff *skb, - struct hard_iface *hard_iface, - uint8_t *dst_addr) +int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, + const uint8_t *dst_addr) { struct ethhdr *ethhdr; @@ -74,7 +73,7 @@ int send_skb_packet(struct sk_buff *skb, } /* push to the ethernet header. */ - if (my_skb_head_push(skb, sizeof(struct ethhdr)) < 0) + if (my_skb_head_push(skb, sizeof(*ethhdr)) < 0) goto send_skb_err; skb_reset_mac_header(skb); @@ -121,7 +120,7 @@ static void send_packet_to_if(struct forw_packet *forw_packet, /* adjust all flags and log packets */ while (aggregated_packet(buff_pos, forw_packet->packet_len, - batman_packet->num_tt)) { + batman_packet->tt_num_changes)) { /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -136,17 +135,17 @@ static void send_packet_to_if(struct forw_packet *forw_packet, "Forwarding")); bat_dbg(DBG_BATMAN, bat_priv, "%s %spacket (originator %pM, seqno %d, TQ %d, TTL %d," - " IDF %s) on interface %s [%pM]\n", + " IDF %s, hvn %d) on interface %s [%pM]\n", fwd_str, (packet_num > 0 ? "aggregated " : ""), batman_packet->orig, ntohl(batman_packet->seqno), batman_packet->tq, batman_packet->ttl, (batman_packet->flags & DIRECTLINK ? "on" : "off"), - hard_iface->net_dev->name, + batman_packet->ttvn, hard_iface->net_dev->name, hard_iface->net_dev->dev_addr); - buff_pos += sizeof(struct batman_packet) + - (batman_packet->num_tt * ETH_ALEN); + buff_pos += sizeof(*batman_packet) + + tt_len(batman_packet->tt_num_changes); packet_num++; batman_packet = (struct batman_packet *) (forw_packet->skb->data + buff_pos); @@ -164,26 +163,31 @@ static void send_packet(struct forw_packet *forw_packet) struct hard_iface *hard_iface; struct net_device *soft_iface; struct bat_priv *bat_priv; + struct hard_iface *primary_if = NULL; struct batman_packet *batman_packet = (struct batman_packet *)(forw_packet->skb->data); - unsigned char directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0); + int directlink = (batman_packet->flags & DIRECTLINK ? 1 : 0); if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not " "specified\n"); - return; + goto out; } soft_iface = forw_packet->if_incoming->soft_iface; bat_priv = netdev_priv(soft_iface); if (forw_packet->if_incoming->if_status != IF_ACTIVE) - return; + goto out; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; /* multihomed peer assumed */ /* non-primary OGMs are only broadcasted on their interface */ if ((directlink && (batman_packet->ttl == 1)) || - (forw_packet->own && (forw_packet->if_incoming->if_num > 0))) { + (forw_packet->own && (forw_packet->if_incoming != primary_if))) { /* FIXME: what about aggregated packets ? */ bat_dbg(DBG_BATMAN, bat_priv, @@ -200,7 +204,7 @@ static void send_packet(struct forw_packet *forw_packet) broadcast_addr); forw_packet->skb = NULL; - return; + goto out; } /* broadcast on every interface */ @@ -212,28 +216,24 @@ static void send_packet(struct forw_packet *forw_packet) send_packet_to_if(forw_packet, hard_iface); } rcu_read_unlock(); + +out: + if (primary_if) + hardif_free_ref(primary_if); } -static void rebuild_batman_packet(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) +static void realloc_packet_buffer(struct hard_iface *hard_iface, + int new_len) { - int new_len; unsigned char *new_buff; struct batman_packet *batman_packet; - new_len = sizeof(struct batman_packet) + - (bat_priv->num_local_tt * ETH_ALEN); new_buff = kmalloc(new_len, GFP_ATOMIC); /* keep old buffer if kmalloc should fail */ if (new_buff) { memcpy(new_buff, hard_iface->packet_buff, - sizeof(struct batman_packet)); - batman_packet = (struct batman_packet *)new_buff; - - batman_packet->num_tt = tt_local_fill_buffer(bat_priv, - new_buff + sizeof(struct batman_packet), - new_len - sizeof(struct batman_packet)); + sizeof(*batman_packet)); kfree(hard_iface->packet_buff); hard_iface->packet_buff = new_buff; @@ -241,6 +241,46 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv, } } +/* when calling this function (hard_iface == primary_if) has to be true */ +static void prepare_packet_buffer(struct bat_priv *bat_priv, + struct hard_iface *hard_iface) +{ + int new_len; + struct batman_packet *batman_packet; + + new_len = BAT_PACKET_LEN + + tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes)); + + /* if we have too many changes for one packet don't send any + * and wait for the tt table request which will be fragmented */ + if (new_len > hard_iface->soft_iface->mtu) + new_len = BAT_PACKET_LEN; + + realloc_packet_buffer(hard_iface, new_len); + batman_packet = (struct batman_packet *)hard_iface->packet_buff; + + atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv)); + + /* reset the sending counter */ + atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX); + + batman_packet->tt_num_changes = tt_changes_fill_buffer(bat_priv, + hard_iface->packet_buff + BAT_PACKET_LEN, + hard_iface->packet_len - BAT_PACKET_LEN); + +} + +static void reset_packet_buffer(struct bat_priv *bat_priv, + struct hard_iface *hard_iface) +{ + struct batman_packet *batman_packet; + + realloc_packet_buffer(hard_iface, BAT_PACKET_LEN); + + batman_packet = (struct batman_packet *)hard_iface->packet_buff; + batman_packet->tt_num_changes = 0; +} + void schedule_own_packet(struct hard_iface *hard_iface) { struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -266,14 +306,21 @@ void schedule_own_packet(struct hard_iface *hard_iface) if (hard_iface->if_status == IF_TO_BE_ACTIVATED) hard_iface->if_status = IF_ACTIVE; - /* if local tt has changed and interface is a primary interface */ - if ((atomic_read(&bat_priv->tt_local_changed)) && - (hard_iface == primary_if)) - rebuild_batman_packet(bat_priv, hard_iface); + if (hard_iface == primary_if) { + /* if at least one change happened */ + if (atomic_read(&bat_priv->tt_local_changes) > 0) { + tt_commit_changes(bat_priv); + prepare_packet_buffer(bat_priv, hard_iface); + } + + /* if the changes have been sent enough times */ + if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt)) + reset_packet_buffer(bat_priv, hard_iface); + } /** * NOTE: packet_buff might just have been re-allocated in - * rebuild_batman_packet() + * prepare_packet_buffer() or in reset_packet_buffer() */ batman_packet = (struct batman_packet *)hard_iface->packet_buff; @@ -281,6 +328,9 @@ void schedule_own_packet(struct hard_iface *hard_iface) batman_packet->seqno = htonl((uint32_t)atomic_read(&hard_iface->seqno)); + batman_packet->ttvn = atomic_read(&bat_priv->ttvn); + batman_packet->tt_crc = htons((uint16_t)atomic_read(&bat_priv->tt_crc)); + if (vis_server == VIS_TYPE_SERVER_SYNC) batman_packet->flags |= VIS_SERVER; else @@ -291,7 +341,7 @@ void schedule_own_packet(struct hard_iface *hard_iface) batman_packet->gw_flags = (uint8_t)atomic_read(&bat_priv->gw_bandwidth); else - batman_packet->gw_flags = 0; + batman_packet->gw_flags = NO_FLAGS; atomic_inc(&hard_iface->seqno); @@ -307,15 +357,16 @@ void schedule_own_packet(struct hard_iface *hard_iface) } void schedule_forward_packet(struct orig_node *orig_node, - struct ethhdr *ethhdr, + const struct ethhdr *ethhdr, struct batman_packet *batman_packet, - uint8_t directlink, int tt_buff_len, + int directlink, struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *router; - unsigned char in_tq, in_ttl, tq_avg = 0; + uint8_t in_tq, in_ttl, tq_avg = 0; unsigned long send_time; + uint8_t tt_num_changes; if (batman_packet->ttl <= 1) { bat_dbg(DBG_BATMAN, bat_priv, "ttl exceeded\n"); @@ -326,6 +377,7 @@ void schedule_forward_packet(struct orig_node *orig_node, in_tq = batman_packet->tq; in_ttl = batman_packet->ttl; + tt_num_changes = batman_packet->tt_num_changes; batman_packet->ttl--; memcpy(batman_packet->prev_sender, ethhdr->h_source, ETH_ALEN); @@ -358,6 +410,7 @@ void schedule_forward_packet(struct orig_node *orig_node, batman_packet->ttl); batman_packet->seqno = htonl(batman_packet->seqno); + batman_packet->tt_crc = htons(batman_packet->tt_crc); /* switch of primaries first hop flag when forwarding */ batman_packet->flags &= ~PRIMARIES_FIRST_HOP; @@ -369,7 +422,7 @@ void schedule_forward_packet(struct orig_node *orig_node, send_time = forward_send_time(); add_bat_packet_to_list(bat_priv, (unsigned char *)batman_packet, - sizeof(struct batman_packet) + tt_buff_len, + sizeof(*batman_packet) + tt_len(tt_num_changes), if_incoming, 0, send_time); } @@ -408,11 +461,13 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv, * * The skb is not consumed, so the caller should make sure that the * skb is freed. */ -int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb) +int add_bcast_packet_to_list(struct bat_priv *bat_priv, + const struct sk_buff *skb, unsigned long delay) { struct hard_iface *primary_if = NULL; struct forw_packet *forw_packet; struct bcast_packet *bcast_packet; + struct sk_buff *newskb; if (!atomic_dec_not_zero(&bat_priv->bcast_queue_left)) { bat_dbg(DBG_BATMAN, bat_priv, "bcast packet queue full\n"); @@ -423,28 +478,28 @@ int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb) if (!primary_if) goto out_and_inc; - forw_packet = kmalloc(sizeof(struct forw_packet), GFP_ATOMIC); + forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); if (!forw_packet) goto out_and_inc; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) + newskb = skb_copy(skb, GFP_ATOMIC); + if (!newskb) goto packet_free; /* as we have a copy now, it is safe to decrease the TTL */ - bcast_packet = (struct bcast_packet *)skb->data; + bcast_packet = (struct bcast_packet *)newskb->data; bcast_packet->ttl--; - skb_reset_mac_header(skb); + skb_reset_mac_header(newskb); - forw_packet->skb = skb; + forw_packet->skb = newskb; forw_packet->if_incoming = primary_if; /* how often did we send the bcast packet ? */ forw_packet->num_packets = 0; - _add_bcast_packet_to_list(bat_priv, forw_packet, 1); + _add_bcast_packet_to_list(bat_priv, forw_packet, delay); return NETDEV_TX_OK; packet_free: @@ -537,7 +592,7 @@ out: } void purge_outstanding_packets(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) + const struct hard_iface *hard_iface) { struct forw_packet *forw_packet; struct hlist_node *tmp_node, *safe_tmp_node; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 247172d71e4b..1f2d1e877663 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -22,18 +22,18 @@ #ifndef _NET_BATMAN_ADV_SEND_H_ #define _NET_BATMAN_ADV_SEND_H_ -int send_skb_packet(struct sk_buff *skb, - struct hard_iface *hard_iface, - uint8_t *dst_addr); +int send_skb_packet(struct sk_buff *skb, struct hard_iface *hard_iface, + const uint8_t *dst_addr); void schedule_own_packet(struct hard_iface *hard_iface); void schedule_forward_packet(struct orig_node *orig_node, - struct ethhdr *ethhdr, + const struct ethhdr *ethhdr, struct batman_packet *batman_packet, - uint8_t directlink, int tt_buff_len, + int directlink, struct hard_iface *if_outgoing); -int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb); +int add_bcast_packet_to_list(struct bat_priv *bat_priv, + const struct sk_buff *skb, unsigned long delay); void send_outstanding_bat_packet(struct work_struct *work); void purge_outstanding_packets(struct bat_priv *bat_priv, - struct hard_iface *hard_iface); + const struct hard_iface *hard_iface); #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index d5aa60999e83..3e2f91ffa4e2 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -30,6 +30,7 @@ #include "gateway_common.h" #include "gateway_client.h" #include "bat_sysfs.h" +#include "originator.h" #include <linux/slab.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> @@ -123,8 +124,7 @@ static struct softif_neigh_vid *softif_neigh_vid_get(struct bat_priv *bat_priv, goto out; } - softif_neigh_vid = kzalloc(sizeof(struct softif_neigh_vid), - GFP_ATOMIC); + softif_neigh_vid = kzalloc(sizeof(*softif_neigh_vid), GFP_ATOMIC); if (!softif_neigh_vid) goto out; @@ -146,7 +146,7 @@ out: } static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, - uint8_t *addr, short vid) + const uint8_t *addr, short vid) { struct softif_neigh_vid *softif_neigh_vid; struct softif_neigh *softif_neigh = NULL; @@ -170,7 +170,7 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, goto unlock; } - softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC); + softif_neigh = kzalloc(sizeof(*softif_neigh), GFP_ATOMIC); if (!softif_neigh) goto unlock; @@ -242,7 +242,8 @@ static void softif_neigh_vid_select(struct bat_priv *bat_priv, if (new_neigh && !atomic_inc_not_zero(&new_neigh->refcount)) new_neigh = NULL; - curr_neigh = softif_neigh_vid->softif_neigh; + curr_neigh = rcu_dereference_protected(softif_neigh_vid->softif_neigh, + 1); rcu_assign_pointer(softif_neigh_vid->softif_neigh, new_neigh); if ((curr_neigh) && (!new_neigh)) @@ -380,7 +381,7 @@ void softif_neigh_purge(struct bat_priv *bat_priv) struct softif_neigh *softif_neigh, *curr_softif_neigh; struct softif_neigh_vid *softif_neigh_vid; struct hlist_node *node, *node_tmp, *node_tmp2; - char do_deselect; + int do_deselect; rcu_read_lock(); hlist_for_each_entry_rcu(softif_neigh_vid, node, @@ -534,7 +535,7 @@ static int interface_set_mac_addr(struct net_device *dev, void *p) /* only modify transtable if it has been initialised before */ if (atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) { tt_local_remove(bat_priv, dev->dev_addr, - "mac address changed"); + "mac address changed", false); tt_local_add(dev, addr->sa_data); } @@ -553,7 +554,7 @@ static int interface_change_mtu(struct net_device *dev, int new_mtu) return 0; } -int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) +static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct bat_priv *bat_priv = netdev_priv(soft_iface); @@ -561,6 +562,7 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) struct bcast_packet *bcast_packet; struct vlan_ethhdr *vhdr; struct softif_neigh *curr_softif_neigh = NULL; + struct orig_node *orig_node = NULL; int data_len = skb->len, ret; short vid = -1; bool do_bcast = false; @@ -592,11 +594,13 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) if (curr_softif_neigh) goto dropped; - /* TODO: check this for locks */ + /* Register the client MAC in the transtable */ tt_local_add(soft_iface, ethhdr->h_source); - if (is_multicast_ether_addr(ethhdr->h_dest)) { - ret = gw_is_target(bat_priv, skb); + orig_node = transtable_search(bat_priv, ethhdr->h_dest); + if (is_multicast_ether_addr(ethhdr->h_dest) || + (orig_node && orig_node->gw_flags)) { + ret = gw_is_target(bat_priv, skb, orig_node); if (ret < 0) goto dropped; @@ -611,7 +615,7 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) if (!primary_if) goto dropped; - if (my_skb_head_push(skb, sizeof(struct bcast_packet)) < 0) + if (my_skb_head_push(skb, sizeof(*bcast_packet)) < 0) goto dropped; bcast_packet = (struct bcast_packet *)skb->data; @@ -630,7 +634,7 @@ int interface_tx(struct sk_buff *skb, struct net_device *soft_iface) bcast_packet->seqno = htonl(atomic_inc_return(&bat_priv->bcast_seqno)); - add_bcast_packet_to_list(bat_priv, skb); + add_bcast_packet_to_list(bat_priv, skb, 1); /* a copy is stored in the bcast list, therefore removing * the original skb. */ @@ -656,6 +660,8 @@ end: softif_neigh_free_ref(curr_softif_neigh); if (primary_if) hardif_free_ref(primary_if); + if (orig_node) + orig_node_free_ref(orig_node); return NETDEV_TX_OK; } @@ -744,7 +750,6 @@ out: return; } -#ifdef HAVE_NET_DEVICE_OPS static const struct net_device_ops bat_netdev_ops = { .ndo_open = interface_open, .ndo_stop = interface_release, @@ -754,7 +759,6 @@ static const struct net_device_ops bat_netdev_ops = { .ndo_start_xmit = interface_tx, .ndo_validate_addr = eth_validate_addr }; -#endif static void interface_setup(struct net_device *dev) { @@ -763,16 +767,7 @@ static void interface_setup(struct net_device *dev) ether_setup(dev); -#ifdef HAVE_NET_DEVICE_OPS dev->netdev_ops = &bat_netdev_ops; -#else - dev->open = interface_open; - dev->stop = interface_release; - dev->get_stats = interface_stats; - dev->set_mac_address = interface_set_mac_addr; - dev->change_mtu = interface_change_mtu; - dev->hard_start_xmit = interface_tx; -#endif dev->destructor = free_netdev; dev->tx_queue_len = 0; @@ -790,17 +785,16 @@ static void interface_setup(struct net_device *dev) SET_ETHTOOL_OPS(dev, &bat_ethtool_ops); - memset(priv, 0, sizeof(struct bat_priv)); + memset(priv, 0, sizeof(*priv)); } -struct net_device *softif_create(char *name) +struct net_device *softif_create(const char *name) { struct net_device *soft_iface; struct bat_priv *bat_priv; int ret; - soft_iface = alloc_netdev(sizeof(struct bat_priv) , name, - interface_setup); + soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup); if (!soft_iface) { pr_err("Unable to allocate the batman interface: %s\n", name); @@ -831,7 +825,13 @@ struct net_device *softif_create(char *name) atomic_set(&bat_priv->mesh_state, MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); - atomic_set(&bat_priv->tt_local_changed, 0); + atomic_set(&bat_priv->ttvn, 0); + atomic_set(&bat_priv->tt_local_changes, 0); + atomic_set(&bat_priv->tt_ogm_append_cnt, 0); + + bat_priv->tt_buff = NULL; + bat_priv->tt_buff_len = 0; + bat_priv->tt_poss_change = false; bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; @@ -872,15 +872,10 @@ void softif_destroy(struct net_device *soft_iface) unregister_netdevice(soft_iface); } -int softif_is_valid(struct net_device *net_dev) +int softif_is_valid(const struct net_device *net_dev) { -#ifdef HAVE_NET_DEVICE_OPS if (net_dev->netdev_ops->ndo_start_xmit == interface_tx) return 1; -#else - if (net_dev->hard_start_xmit == interface_tx) - return 1; -#endif return 0; } @@ -924,4 +919,3 @@ static u32 bat_get_link(struct net_device *dev) { return 1; } - diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 4789b6f2a0b3..001546fc96f1 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -25,12 +25,11 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len); int softif_neigh_seq_print_text(struct seq_file *seq, void *offset); void softif_neigh_purge(struct bat_priv *bat_priv); -int interface_tx(struct sk_buff *skb, struct net_device *soft_iface); void interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size); -struct net_device *softif_create(char *name); +struct net_device *softif_create(const char *name); void softif_destroy(struct net_device *soft_iface); -int softif_is_valid(struct net_device *net_dev); +int softif_is_valid(const struct net_device *net_dev); #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7b729660cbfd..fb6931d00cd7 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -23,38 +23,45 @@ #include "translation-table.h" #include "soft-interface.h" #include "hard-interface.h" +#include "send.h" #include "hash.h" #include "originator.h" +#include "routing.h" -static void tt_local_purge(struct work_struct *work); -static void _tt_global_del_orig(struct bat_priv *bat_priv, - struct tt_global_entry *tt_global_entry, - char *message); +#include <linux/crc16.h> + +static void _tt_global_del(struct bat_priv *bat_priv, + struct tt_global_entry *tt_global_entry, + const char *message); +static void tt_purge(struct work_struct *work); /* returns 1 if they are the same mac addr */ -static int compare_ltt(struct hlist_node *node, void *data2) +static int compare_ltt(const struct hlist_node *node, const void *data2) { - void *data1 = container_of(node, struct tt_local_entry, hash_entry); + const void *data1 = container_of(node, struct tt_local_entry, + hash_entry); return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } /* returns 1 if they are the same mac addr */ -static int compare_gtt(struct hlist_node *node, void *data2) +static int compare_gtt(const struct hlist_node *node, const void *data2) { - void *data1 = container_of(node, struct tt_global_entry, hash_entry); + const void *data1 = container_of(node, struct tt_global_entry, + hash_entry); return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } -static void tt_local_start_timer(struct bat_priv *bat_priv) +static void tt_start_timer(struct bat_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->tt_work, tt_local_purge); - queue_delayed_work(bat_event_workqueue, &bat_priv->tt_work, 10 * HZ); + INIT_DELAYED_WORK(&bat_priv->tt_work, tt_purge); + queue_delayed_work(bat_event_workqueue, &bat_priv->tt_work, + msecs_to_jiffies(5000)); } static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->tt_local_hash; struct hlist_head *head; @@ -73,6 +80,9 @@ static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, if (!compare_eth(tt_local_entry, data)) continue; + if (!atomic_inc_not_zero(&tt_local_entry->refcount)) + continue; + tt_local_entry_tmp = tt_local_entry; break; } @@ -82,7 +92,7 @@ static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv, } static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->tt_global_hash; struct hlist_head *head; @@ -102,6 +112,9 @@ static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, if (!compare_eth(tt_global_entry, data)) continue; + if (!atomic_inc_not_zero(&tt_global_entry->refcount)) + continue; + tt_global_entry_tmp = tt_global_entry; break; } @@ -110,7 +123,54 @@ static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv, return tt_global_entry_tmp; } -int tt_local_init(struct bat_priv *bat_priv) +static bool is_out_of_time(unsigned long starting_time, unsigned long timeout) +{ + unsigned long deadline; + deadline = starting_time + msecs_to_jiffies(timeout); + + return time_after(jiffies, deadline); +} + +static void tt_local_entry_free_ref(struct tt_local_entry *tt_local_entry) +{ + if (atomic_dec_and_test(&tt_local_entry->refcount)) + kfree_rcu(tt_local_entry, rcu); +} + +static void tt_global_entry_free_ref(struct tt_global_entry *tt_global_entry) +{ + if (atomic_dec_and_test(&tt_global_entry->refcount)) + kfree_rcu(tt_global_entry, rcu); +} + +static void tt_local_event(struct bat_priv *bat_priv, const uint8_t *addr, + uint8_t flags) +{ + struct tt_change_node *tt_change_node; + + tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC); + + if (!tt_change_node) + return; + + tt_change_node->change.flags = flags; + memcpy(tt_change_node->change.addr, addr, ETH_ALEN); + + spin_lock_bh(&bat_priv->tt_changes_list_lock); + /* track the change in the OGMinterval list */ + list_add_tail(&tt_change_node->list, &bat_priv->tt_changes_list); + atomic_inc(&bat_priv->tt_local_changes); + spin_unlock_bh(&bat_priv->tt_changes_list_lock); + + atomic_set(&bat_priv->tt_ogm_append_cnt, 0); +} + +int tt_len(int changes_num) +{ + return changes_num * sizeof(struct tt_change); +} + +static int tt_local_init(struct bat_priv *bat_priv) { if (bat_priv->tt_local_hash) return 1; @@ -120,116 +180,114 @@ int tt_local_init(struct bat_priv *bat_priv) if (!bat_priv->tt_local_hash) return 0; - atomic_set(&bat_priv->tt_local_changed, 0); - tt_local_start_timer(bat_priv); - return 1; } -void tt_local_add(struct net_device *soft_iface, uint8_t *addr) +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr) { struct bat_priv *bat_priv = netdev_priv(soft_iface); - struct tt_local_entry *tt_local_entry; - struct tt_global_entry *tt_global_entry; - int required_bytes; + struct tt_local_entry *tt_local_entry = NULL; + struct tt_global_entry *tt_global_entry = NULL; - spin_lock_bh(&bat_priv->tt_lhash_lock); tt_local_entry = tt_local_hash_find(bat_priv, addr); - spin_unlock_bh(&bat_priv->tt_lhash_lock); if (tt_local_entry) { tt_local_entry->last_seen = jiffies; - return; - } - - /* only announce as many hosts as possible in the batman-packet and - space in batman_packet->num_tt That also should give a limit to - MAC-flooding. */ - required_bytes = (bat_priv->num_local_tt + 1) * ETH_ALEN; - required_bytes += BAT_PACKET_LEN; - - if ((required_bytes > ETH_DATA_LEN) || - (atomic_read(&bat_priv->aggregated_ogms) && - required_bytes > MAX_AGGREGATION_BYTES) || - (bat_priv->num_local_tt + 1 > 255)) { - bat_dbg(DBG_ROUTES, bat_priv, - "Can't add new local tt entry (%pM): " - "number of local tt entries exceeds packet size\n", - addr); - return; + goto out; } - bat_dbg(DBG_ROUTES, bat_priv, - "Creating new local tt entry: %pM\n", addr); - - tt_local_entry = kmalloc(sizeof(struct tt_local_entry), GFP_ATOMIC); + tt_local_entry = kmalloc(sizeof(*tt_local_entry), GFP_ATOMIC); if (!tt_local_entry) - return; + goto out; + + bat_dbg(DBG_TT, bat_priv, + "Creating new local tt entry: %pM (ttvn: %d)\n", addr, + (uint8_t)atomic_read(&bat_priv->ttvn)); memcpy(tt_local_entry->addr, addr, ETH_ALEN); tt_local_entry->last_seen = jiffies; + tt_local_entry->flags = NO_FLAGS; + atomic_set(&tt_local_entry->refcount, 2); /* the batman interface mac address should never be purged */ if (compare_eth(addr, soft_iface->dev_addr)) - tt_local_entry->never_purge = 1; - else - tt_local_entry->never_purge = 0; + tt_local_entry->flags |= TT_CLIENT_NOPURGE; - spin_lock_bh(&bat_priv->tt_lhash_lock); + tt_local_event(bat_priv, addr, tt_local_entry->flags); + + /* The local entry has to be marked as NEW to avoid to send it in + * a full table response going out before the next ttvn increment + * (consistency check) */ + tt_local_entry->flags |= TT_CLIENT_NEW; hash_add(bat_priv->tt_local_hash, compare_ltt, choose_orig, tt_local_entry, &tt_local_entry->hash_entry); - bat_priv->num_local_tt++; - atomic_set(&bat_priv->tt_local_changed, 1); - - spin_unlock_bh(&bat_priv->tt_lhash_lock); /* remove address from global hash if present */ - spin_lock_bh(&bat_priv->tt_ghash_lock); - tt_global_entry = tt_global_hash_find(bat_priv, addr); + /* Check whether it is a roaming! */ + if (tt_global_entry) { + /* This node is probably going to update its tt table */ + tt_global_entry->orig_node->tt_poss_change = true; + /* The global entry has to be marked as PENDING and has to be + * kept for consistency purpose */ + tt_global_entry->flags |= TT_CLIENT_PENDING; + send_roam_adv(bat_priv, tt_global_entry->addr, + tt_global_entry->orig_node); + } +out: + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); if (tt_global_entry) - _tt_global_del_orig(bat_priv, tt_global_entry, - "local tt received"); - - spin_unlock_bh(&bat_priv->tt_ghash_lock); + tt_global_entry_free_ref(tt_global_entry); } -int tt_local_fill_buffer(struct bat_priv *bat_priv, - unsigned char *buff, int buff_len) +int tt_changes_fill_buffer(struct bat_priv *bat_priv, + unsigned char *buff, int buff_len) { - struct hashtable_t *hash = bat_priv->tt_local_hash; - struct tt_local_entry *tt_local_entry; - struct hlist_node *node; - struct hlist_head *head; - int i, count = 0; + int count = 0, tot_changes = 0; + struct tt_change_node *entry, *safe; - spin_lock_bh(&bat_priv->tt_lhash_lock); + if (buff_len > 0) + tot_changes = buff_len / tt_len(1); - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tt_local_entry, node, - head, hash_entry) { - if (buff_len < (count + 1) * ETH_ALEN) - break; - - memcpy(buff + (count * ETH_ALEN), tt_local_entry->addr, - ETH_ALEN); + spin_lock_bh(&bat_priv->tt_changes_list_lock); + atomic_set(&bat_priv->tt_local_changes, 0); + list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list) { + if (count < tot_changes) { + memcpy(buff + tt_len(count), + &entry->change, sizeof(struct tt_change)); count++; } - rcu_read_unlock(); + list_del(&entry->list); + kfree(entry); } + spin_unlock_bh(&bat_priv->tt_changes_list_lock); + + /* Keep the buffer for possible tt_request */ + spin_lock_bh(&bat_priv->tt_buff_lock); + kfree(bat_priv->tt_buff); + bat_priv->tt_buff_len = 0; + bat_priv->tt_buff = NULL; + /* We check whether this new OGM has no changes due to size + * problems */ + if (buff_len > 0) { + /** + * if kmalloc() fails we will reply with the full table + * instead of providing the diff + */ + bat_priv->tt_buff = kmalloc(buff_len, GFP_ATOMIC); + if (bat_priv->tt_buff) { + memcpy(bat_priv->tt_buff, buff, buff_len); + bat_priv->tt_buff_len = buff_len; + } + } + spin_unlock_bh(&bat_priv->tt_buff_lock); - /* if we did not get all new local tts see you next time ;-) */ - if (count == bat_priv->num_local_tt) - atomic_set(&bat_priv->tt_local_changed, 0); - - spin_unlock_bh(&bat_priv->tt_lhash_lock); - return count; + return tot_changes; } int tt_local_seq_print_text(struct seq_file *seq, void *offset) @@ -261,10 +319,8 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) } seq_printf(seq, "Locally retrieved addresses (from %s) " - "announced via TT:\n", - net_dev->name); - - spin_lock_bh(&bat_priv->tt_lhash_lock); + "announced via TT (TTVN: %u):\n", + net_dev->name, (uint8_t)atomic_read(&bat_priv->ttvn)); buf_size = 1; /* Estimate length for: " * xx:xx:xx:xx:xx:xx\n" */ @@ -279,7 +335,6 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) buff = kmalloc(buf_size, GFP_ATOMIC); if (!buff) { - spin_unlock_bh(&bat_priv->tt_lhash_lock); ret = -ENOMEM; goto out; } @@ -299,8 +354,6 @@ int tt_local_seq_print_text(struct seq_file *seq, void *offset) rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->tt_lhash_lock); - seq_printf(seq, "%s", buff); kfree(buff); out: @@ -309,92 +362,109 @@ out: return ret; } -static void _tt_local_del(struct hlist_node *node, void *arg) +static void tt_local_set_pending(struct bat_priv *bat_priv, + struct tt_local_entry *tt_local_entry, + uint16_t flags) { - struct bat_priv *bat_priv = (struct bat_priv *)arg; - void *data = container_of(node, struct tt_local_entry, hash_entry); + tt_local_event(bat_priv, tt_local_entry->addr, + tt_local_entry->flags | flags); - kfree(data); - bat_priv->num_local_tt--; - atomic_set(&bat_priv->tt_local_changed, 1); + /* The local client has to be merked as "pending to be removed" but has + * to be kept in the table in order to send it in an full tables + * response issued before the net ttvn increment (consistency check) */ + tt_local_entry->flags |= TT_CLIENT_PENDING; } -static void tt_local_del(struct bat_priv *bat_priv, - struct tt_local_entry *tt_local_entry, - char *message) +void tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr, + const char *message, bool roaming) { - bat_dbg(DBG_ROUTES, bat_priv, "Deleting local tt entry (%pM): %s\n", - tt_local_entry->addr, message); - - hash_remove(bat_priv->tt_local_hash, compare_ltt, choose_orig, - tt_local_entry->addr); - _tt_local_del(&tt_local_entry->hash_entry, bat_priv); -} - -void tt_local_remove(struct bat_priv *bat_priv, - uint8_t *addr, char *message) -{ - struct tt_local_entry *tt_local_entry; - - spin_lock_bh(&bat_priv->tt_lhash_lock); + struct tt_local_entry *tt_local_entry = NULL; tt_local_entry = tt_local_hash_find(bat_priv, addr); + if (!tt_local_entry) + goto out; - if (tt_local_entry) - tt_local_del(bat_priv, tt_local_entry, message); + tt_local_set_pending(bat_priv, tt_local_entry, TT_CLIENT_DEL | + (roaming ? TT_CLIENT_ROAM : NO_FLAGS)); - spin_unlock_bh(&bat_priv->tt_lhash_lock); + bat_dbg(DBG_TT, bat_priv, "Local tt entry (%pM) pending to be removed: " + "%s\n", tt_local_entry->addr, message); +out: + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); } -static void tt_local_purge(struct work_struct *work) +static void tt_local_purge(struct bat_priv *bat_priv) { - struct delayed_work *delayed_work = - container_of(work, struct delayed_work, work); - struct bat_priv *bat_priv = - container_of(delayed_work, struct bat_priv, tt_work); struct hashtable_t *hash = bat_priv->tt_local_hash; struct tt_local_entry *tt_local_entry; struct hlist_node *node, *node_tmp; struct hlist_head *head; - unsigned long timeout; + spinlock_t *list_lock; /* protects write access to the hash lists */ int i; - spin_lock_bh(&bat_priv->tt_lhash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, head, hash_entry) { - if (tt_local_entry->never_purge) + if (tt_local_entry->flags & TT_CLIENT_NOPURGE) continue; - timeout = tt_local_entry->last_seen; - timeout += TT_LOCAL_TIMEOUT * HZ; + /* entry already marked for deletion */ + if (tt_local_entry->flags & TT_CLIENT_PENDING) + continue; - if (time_before(jiffies, timeout)) + if (!is_out_of_time(tt_local_entry->last_seen, + TT_LOCAL_TIMEOUT * 1000)) continue; - tt_local_del(bat_priv, tt_local_entry, - "address timed out"); + tt_local_set_pending(bat_priv, tt_local_entry, + TT_CLIENT_DEL); + bat_dbg(DBG_TT, bat_priv, "Local tt entry (%pM) " + "pending to be removed: timed out\n", + tt_local_entry->addr); } + spin_unlock_bh(list_lock); } - spin_unlock_bh(&bat_priv->tt_lhash_lock); - tt_local_start_timer(bat_priv); } -void tt_local_free(struct bat_priv *bat_priv) +static void tt_local_table_free(struct bat_priv *bat_priv) { + struct hashtable_t *hash; + spinlock_t *list_lock; /* protects write access to the hash lists */ + struct tt_local_entry *tt_local_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + int i; + if (!bat_priv->tt_local_hash) return; - cancel_delayed_work_sync(&bat_priv->tt_work); - hash_delete(bat_priv->tt_local_hash, _tt_local_del, bat_priv); + hash = bat_priv->tt_local_hash; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, + head, hash_entry) { + hlist_del_rcu(node); + tt_local_entry_free_ref(tt_local_entry); + } + spin_unlock_bh(list_lock); + } + + hash_destroy(hash); + bat_priv->tt_local_hash = NULL; } -int tt_global_init(struct bat_priv *bat_priv) +static int tt_global_init(struct bat_priv *bat_priv) { if (bat_priv->tt_global_hash) return 1; @@ -407,74 +477,78 @@ int tt_global_init(struct bat_priv *bat_priv) return 1; } -void tt_global_add_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, - unsigned char *tt_buff, int tt_buff_len) +static void tt_changes_list_free(struct bat_priv *bat_priv) { - struct tt_global_entry *tt_global_entry; - struct tt_local_entry *tt_local_entry; - int tt_buff_count = 0; - unsigned char *tt_ptr; + struct tt_change_node *entry, *safe; - while ((tt_buff_count + 1) * ETH_ALEN <= tt_buff_len) { - spin_lock_bh(&bat_priv->tt_ghash_lock); + spin_lock_bh(&bat_priv->tt_changes_list_lock); - tt_ptr = tt_buff + (tt_buff_count * ETH_ALEN); - tt_global_entry = tt_global_hash_find(bat_priv, tt_ptr); - - if (!tt_global_entry) { - spin_unlock_bh(&bat_priv->tt_ghash_lock); - - tt_global_entry = - kmalloc(sizeof(struct tt_global_entry), - GFP_ATOMIC); - - if (!tt_global_entry) - break; + list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, + list) { + list_del(&entry->list); + kfree(entry); + } - memcpy(tt_global_entry->addr, tt_ptr, ETH_ALEN); + atomic_set(&bat_priv->tt_local_changes, 0); + spin_unlock_bh(&bat_priv->tt_changes_list_lock); +} - bat_dbg(DBG_ROUTES, bat_priv, - "Creating new global tt entry: " - "%pM (via %pM)\n", - tt_global_entry->addr, orig_node->orig); +/* caller must hold orig_node refcount */ +int tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_addr, uint8_t ttvn, bool roaming) +{ + struct tt_global_entry *tt_global_entry; + struct orig_node *orig_node_tmp; + int ret = 0; - spin_lock_bh(&bat_priv->tt_ghash_lock); - hash_add(bat_priv->tt_global_hash, compare_gtt, - choose_orig, tt_global_entry, - &tt_global_entry->hash_entry); + tt_global_entry = tt_global_hash_find(bat_priv, tt_addr); - } + if (!tt_global_entry) { + tt_global_entry = + kmalloc(sizeof(*tt_global_entry), + GFP_ATOMIC); + if (!tt_global_entry) + goto out; + memcpy(tt_global_entry->addr, tt_addr, ETH_ALEN); + /* Assign the new orig_node */ + atomic_inc(&orig_node->refcount); tt_global_entry->orig_node = orig_node; - spin_unlock_bh(&bat_priv->tt_ghash_lock); - - /* remove address from local hash if present */ - spin_lock_bh(&bat_priv->tt_lhash_lock); - - tt_ptr = tt_buff + (tt_buff_count * ETH_ALEN); - tt_local_entry = tt_local_hash_find(bat_priv, tt_ptr); - - if (tt_local_entry) - tt_local_del(bat_priv, tt_local_entry, - "global tt received"); - - spin_unlock_bh(&bat_priv->tt_lhash_lock); - - tt_buff_count++; + tt_global_entry->ttvn = ttvn; + tt_global_entry->flags = NO_FLAGS; + tt_global_entry->roam_at = 0; + atomic_set(&tt_global_entry->refcount, 2); + + hash_add(bat_priv->tt_global_hash, compare_gtt, + choose_orig, tt_global_entry, + &tt_global_entry->hash_entry); + atomic_inc(&orig_node->tt_size); + } else { + if (tt_global_entry->orig_node != orig_node) { + atomic_dec(&tt_global_entry->orig_node->tt_size); + orig_node_tmp = tt_global_entry->orig_node; + atomic_inc(&orig_node->refcount); + tt_global_entry->orig_node = orig_node; + orig_node_free_ref(orig_node_tmp); + atomic_inc(&orig_node->tt_size); + } + tt_global_entry->ttvn = ttvn; + tt_global_entry->flags = NO_FLAGS; + tt_global_entry->roam_at = 0; } - /* initialize, and overwrite if malloc succeeds */ - orig_node->tt_buff = NULL; - orig_node->tt_buff_len = 0; + bat_dbg(DBG_TT, bat_priv, + "Creating new global tt entry: %pM (via %pM)\n", + tt_global_entry->addr, orig_node->orig); - if (tt_buff_len > 0) { - orig_node->tt_buff = kmalloc(tt_buff_len, GFP_ATOMIC); - if (orig_node->tt_buff) { - memcpy(orig_node->tt_buff, tt_buff, tt_buff_len); - orig_node->tt_buff_len = tt_buff_len; - } - } + /* remove address from local hash if present */ + tt_local_remove(bat_priv, tt_global_entry->addr, + "global tt received", roaming); + ret = 1; +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); + return ret; } int tt_global_seq_print_text(struct seq_file *seq, void *offset) @@ -508,26 +582,27 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", net_dev->name); - - spin_lock_bh(&bat_priv->tt_ghash_lock); + seq_printf(seq, " %-13s %s %-15s %s\n", + "Client", "(TTVN)", "Originator", "(Curr TTVN)"); buf_size = 1; - /* Estimate length for: " * xx:xx:xx:xx:xx:xx via xx:xx:xx:xx:xx:xx\n"*/ + /* Estimate length for: " * xx:xx:xx:xx:xx:xx (ttvn) via + * xx:xx:xx:xx:xx:xx (cur_ttvn)\n"*/ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); __hlist_for_each_rcu(node, head) - buf_size += 43; + buf_size += 59; rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); if (!buff) { - spin_unlock_bh(&bat_priv->tt_ghash_lock); ret = -ENOMEM; goto out; } + buff[0] = '\0'; pos = 0; @@ -537,16 +612,18 @@ int tt_global_seq_print_text(struct seq_file *seq, void *offset) rcu_read_lock(); hlist_for_each_entry_rcu(tt_global_entry, node, head, hash_entry) { - pos += snprintf(buff + pos, 44, - " * %pM via %pM\n", + pos += snprintf(buff + pos, 61, + " * %pM (%3u) via %pM (%3u)\n", tt_global_entry->addr, - tt_global_entry->orig_node->orig); + tt_global_entry->ttvn, + tt_global_entry->orig_node->orig, + (uint8_t) atomic_read( + &tt_global_entry->orig_node-> + last_ttvn)); } rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->tt_ghash_lock); - seq_printf(seq, "%s", buff); kfree(buff); out: @@ -555,84 +632,1091 @@ out: return ret; } -static void _tt_global_del_orig(struct bat_priv *bat_priv, - struct tt_global_entry *tt_global_entry, - char *message) +static void _tt_global_del(struct bat_priv *bat_priv, + struct tt_global_entry *tt_global_entry, + const char *message) { - bat_dbg(DBG_ROUTES, bat_priv, + if (!tt_global_entry) + goto out; + + bat_dbg(DBG_TT, bat_priv, "Deleting global tt entry %pM (via %pM): %s\n", tt_global_entry->addr, tt_global_entry->orig_node->orig, message); + atomic_dec(&tt_global_entry->orig_node->tt_size); + hash_remove(bat_priv->tt_global_hash, compare_gtt, choose_orig, tt_global_entry->addr); - kfree(tt_global_entry); +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); } -void tt_global_del_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, char *message) +void tt_global_del(struct bat_priv *bat_priv, + struct orig_node *orig_node, const unsigned char *addr, + const char *message, bool roaming) { - struct tt_global_entry *tt_global_entry; - int tt_buff_count = 0; - unsigned char *tt_ptr; + struct tt_global_entry *tt_global_entry = NULL; - if (orig_node->tt_buff_len == 0) - return; + tt_global_entry = tt_global_hash_find(bat_priv, addr); + if (!tt_global_entry) + goto out; - spin_lock_bh(&bat_priv->tt_ghash_lock); + if (tt_global_entry->orig_node == orig_node) { + if (roaming) { + tt_global_entry->flags |= TT_CLIENT_ROAM; + tt_global_entry->roam_at = jiffies; + goto out; + } + _tt_global_del(bat_priv, tt_global_entry, message); + } +out: + if (tt_global_entry) + tt_global_entry_free_ref(tt_global_entry); +} - while ((tt_buff_count + 1) * ETH_ALEN <= orig_node->tt_buff_len) { - tt_ptr = orig_node->tt_buff + (tt_buff_count * ETH_ALEN); - tt_global_entry = tt_global_hash_find(bat_priv, tt_ptr); +void tt_global_del_orig(struct bat_priv *bat_priv, + struct orig_node *orig_node, const char *message) +{ + struct tt_global_entry *tt_global_entry; + int i; + struct hashtable_t *hash = bat_priv->tt_global_hash; + struct hlist_node *node, *safe; + struct hlist_head *head; + spinlock_t *list_lock; /* protects write access to the hash lists */ - if ((tt_global_entry) && - (tt_global_entry->orig_node == orig_node)) - _tt_global_del_orig(bat_priv, tt_global_entry, - message); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; - tt_buff_count++; + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_global_entry, node, safe, + head, hash_entry) { + if (tt_global_entry->orig_node == orig_node) { + bat_dbg(DBG_TT, bat_priv, + "Deleting global tt entry %pM " + "(via %pM): originator time out\n", + tt_global_entry->addr, + tt_global_entry->orig_node->orig); + hlist_del_rcu(node); + tt_global_entry_free_ref(tt_global_entry); + } + } + spin_unlock_bh(list_lock); } - - spin_unlock_bh(&bat_priv->tt_ghash_lock); - - orig_node->tt_buff_len = 0; - kfree(orig_node->tt_buff); - orig_node->tt_buff = NULL; + atomic_set(&orig_node->tt_size, 0); } -static void tt_global_del(struct hlist_node *node, void *arg) +static void tt_global_roam_purge(struct bat_priv *bat_priv) { - void *data = container_of(node, struct tt_global_entry, hash_entry); + struct hashtable_t *hash = bat_priv->tt_global_hash; + struct tt_global_entry *tt_global_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + spinlock_t *list_lock; /* protects write access to the hash lists */ + int i; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_global_entry, node, node_tmp, + head, hash_entry) { + if (!(tt_global_entry->flags & TT_CLIENT_ROAM)) + continue; + if (!is_out_of_time(tt_global_entry->roam_at, + TT_CLIENT_ROAM_TIMEOUT * 1000)) + continue; + + bat_dbg(DBG_TT, bat_priv, "Deleting global " + "tt entry (%pM): Roaming timeout\n", + tt_global_entry->addr); + atomic_dec(&tt_global_entry->orig_node->tt_size); + hlist_del_rcu(node); + tt_global_entry_free_ref(tt_global_entry); + } + spin_unlock_bh(list_lock); + } - kfree(data); } -void tt_global_free(struct bat_priv *bat_priv) +static void tt_global_table_free(struct bat_priv *bat_priv) { + struct hashtable_t *hash; + spinlock_t *list_lock; /* protects write access to the hash lists */ + struct tt_global_entry *tt_global_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + int i; + if (!bat_priv->tt_global_hash) return; - hash_delete(bat_priv->tt_global_hash, tt_global_del, NULL); + hash = bat_priv->tt_global_hash; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_global_entry, node, node_tmp, + head, hash_entry) { + hlist_del_rcu(node); + tt_global_entry_free_ref(tt_global_entry); + } + spin_unlock_bh(list_lock); + } + + hash_destroy(hash); + bat_priv->tt_global_hash = NULL; } -struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) +struct orig_node *transtable_search(struct bat_priv *bat_priv, + const uint8_t *addr) { struct tt_global_entry *tt_global_entry; struct orig_node *orig_node = NULL; - spin_lock_bh(&bat_priv->tt_ghash_lock); tt_global_entry = tt_global_hash_find(bat_priv, addr); if (!tt_global_entry) goto out; if (!atomic_inc_not_zero(&tt_global_entry->orig_node->refcount)) - goto out; + goto free_tt; + + /* A global client marked as PENDING has already moved from that + * originator */ + if (tt_global_entry->flags & TT_CLIENT_PENDING) + goto free_tt; orig_node = tt_global_entry->orig_node; +free_tt: + tt_global_entry_free_ref(tt_global_entry); out: - spin_unlock_bh(&bat_priv->tt_ghash_lock); return orig_node; } + +/* Calculates the checksum of the local table of a given orig_node */ +uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node) +{ + uint16_t total = 0, total_one; + struct hashtable_t *hash = bat_priv->tt_global_hash; + struct tt_global_entry *tt_global_entry; + struct hlist_node *node; + struct hlist_head *head; + int i, j; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tt_global_entry, node, + head, hash_entry) { + if (compare_eth(tt_global_entry->orig_node, + orig_node)) { + /* Roaming clients are in the global table for + * consistency only. They don't have to be + * taken into account while computing the + * global crc */ + if (tt_global_entry->flags & TT_CLIENT_ROAM) + continue; + total_one = 0; + for (j = 0; j < ETH_ALEN; j++) + total_one = crc16_byte(total_one, + tt_global_entry->addr[j]); + total ^= total_one; + } + } + rcu_read_unlock(); + } + + return total; +} + +/* Calculates the checksum of the local table */ +uint16_t tt_local_crc(struct bat_priv *bat_priv) +{ + uint16_t total = 0, total_one; + struct hashtable_t *hash = bat_priv->tt_local_hash; + struct tt_local_entry *tt_local_entry; + struct hlist_node *node; + struct hlist_head *head; + int i, j; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tt_local_entry, node, + head, hash_entry) { + /* not yet committed clients have not to be taken into + * account while computing the CRC */ + if (tt_local_entry->flags & TT_CLIENT_NEW) + continue; + total_one = 0; + for (j = 0; j < ETH_ALEN; j++) + total_one = crc16_byte(total_one, + tt_local_entry->addr[j]); + total ^= total_one; + } + rcu_read_unlock(); + } + + return total; +} + +static void tt_req_list_free(struct bat_priv *bat_priv) +{ + struct tt_req_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_req_list_lock); + + list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + list_del(&node->list); + kfree(node); + } + + spin_unlock_bh(&bat_priv->tt_req_list_lock); +} + +void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes) +{ + uint16_t tt_buff_len = tt_len(tt_num_changes); + + /* Replace the old buffer only if I received something in the + * last OGM (the OGM could carry no changes) */ + spin_lock_bh(&orig_node->tt_buff_lock); + if (tt_buff_len > 0) { + kfree(orig_node->tt_buff); + orig_node->tt_buff_len = 0; + orig_node->tt_buff = kmalloc(tt_buff_len, GFP_ATOMIC); + if (orig_node->tt_buff) { + memcpy(orig_node->tt_buff, tt_buff, tt_buff_len); + orig_node->tt_buff_len = tt_buff_len; + } + } + spin_unlock_bh(&orig_node->tt_buff_lock); +} + +static void tt_req_purge(struct bat_priv *bat_priv) +{ + struct tt_req_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + if (is_out_of_time(node->issued_at, + TT_REQUEST_TIMEOUT * 1000)) { + list_del(&node->list); + kfree(node); + } + } + spin_unlock_bh(&bat_priv->tt_req_list_lock); +} + +/* returns the pointer to the new tt_req_node struct if no request + * has already been issued for this orig_node, NULL otherwise */ +static struct tt_req_node *new_tt_req_node(struct bat_priv *bat_priv, + struct orig_node *orig_node) +{ + struct tt_req_node *tt_req_node_tmp, *tt_req_node = NULL; + + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_for_each_entry(tt_req_node_tmp, &bat_priv->tt_req_list, list) { + if (compare_eth(tt_req_node_tmp, orig_node) && + !is_out_of_time(tt_req_node_tmp->issued_at, + TT_REQUEST_TIMEOUT * 1000)) + goto unlock; + } + + tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC); + if (!tt_req_node) + goto unlock; + + memcpy(tt_req_node->addr, orig_node->orig, ETH_ALEN); + tt_req_node->issued_at = jiffies; + + list_add(&tt_req_node->list, &bat_priv->tt_req_list); +unlock: + spin_unlock_bh(&bat_priv->tt_req_list_lock); + return tt_req_node; +} + +/* data_ptr is useless here, but has to be kept to respect the prototype */ +static int tt_local_valid_entry(const void *entry_ptr, const void *data_ptr) +{ + const struct tt_local_entry *tt_local_entry = entry_ptr; + + if (tt_local_entry->flags & TT_CLIENT_NEW) + return 0; + return 1; +} + +static int tt_global_valid_entry(const void *entry_ptr, const void *data_ptr) +{ + const struct tt_global_entry *tt_global_entry = entry_ptr; + const struct orig_node *orig_node = data_ptr; + + if (tt_global_entry->flags & TT_CLIENT_ROAM) + return 0; + + return (tt_global_entry->orig_node == orig_node); +} + +static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, + struct hashtable_t *hash, + struct hard_iface *primary_if, + int (*valid_cb)(const void *, + const void *), + void *cb_data) +{ + struct tt_local_entry *tt_local_entry; + struct tt_query_packet *tt_response; + struct tt_change *tt_change; + struct hlist_node *node; + struct hlist_head *head; + struct sk_buff *skb = NULL; + uint16_t tt_tot, tt_count; + ssize_t tt_query_size = sizeof(struct tt_query_packet); + int i; + + if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { + tt_len = primary_if->soft_iface->mtu - tt_query_size; + tt_len -= tt_len % sizeof(struct tt_change); + } + tt_tot = tt_len / sizeof(struct tt_change); + + skb = dev_alloc_skb(tt_query_size + tt_len + ETH_HLEN); + if (!skb) + goto out; + + skb_reserve(skb, ETH_HLEN); + tt_response = (struct tt_query_packet *)skb_put(skb, + tt_query_size + tt_len); + tt_response->ttvn = ttvn; + tt_response->tt_data = htons(tt_tot); + + tt_change = (struct tt_change *)(skb->data + tt_query_size); + tt_count = 0; + + rcu_read_lock(); + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + hlist_for_each_entry_rcu(tt_local_entry, node, + head, hash_entry) { + if (tt_count == tt_tot) + break; + + if ((valid_cb) && (!valid_cb(tt_local_entry, cb_data))) + continue; + + memcpy(tt_change->addr, tt_local_entry->addr, ETH_ALEN); + tt_change->flags = NO_FLAGS; + + tt_count++; + tt_change++; + } + } + rcu_read_unlock(); + +out: + return skb; +} + +int send_tt_request(struct bat_priv *bat_priv, struct orig_node *dst_orig_node, + uint8_t ttvn, uint16_t tt_crc, bool full_table) +{ + struct sk_buff *skb = NULL; + struct tt_query_packet *tt_request; + struct neigh_node *neigh_node = NULL; + struct hard_iface *primary_if; + struct tt_req_node *tt_req_node = NULL; + int ret = 1; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* The new tt_req will be issued only if I'm not waiting for a + * reply from the same orig_node yet */ + tt_req_node = new_tt_req_node(bat_priv, dst_orig_node); + if (!tt_req_node) + goto out; + + skb = dev_alloc_skb(sizeof(struct tt_query_packet) + ETH_HLEN); + if (!skb) + goto out; + + skb_reserve(skb, ETH_HLEN); + + tt_request = (struct tt_query_packet *)skb_put(skb, + sizeof(struct tt_query_packet)); + + tt_request->packet_type = BAT_TT_QUERY; + tt_request->version = COMPAT_VERSION; + memcpy(tt_request->src, primary_if->net_dev->dev_addr, ETH_ALEN); + memcpy(tt_request->dst, dst_orig_node->orig, ETH_ALEN); + tt_request->ttl = TTL; + tt_request->ttvn = ttvn; + tt_request->tt_data = tt_crc; + tt_request->flags = TT_REQUEST; + + if (full_table) + tt_request->flags |= TT_FULL_TABLE; + + neigh_node = orig_node_get_router(dst_orig_node); + if (!neigh_node) + goto out; + + bat_dbg(DBG_TT, bat_priv, "Sending TT_REQUEST to %pM via %pM " + "[%c]\n", dst_orig_node->orig, neigh_node->addr, + (full_table ? 'F' : '.')); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = 0; + +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (primary_if) + hardif_free_ref(primary_if); + if (ret) + kfree_skb(skb); + if (ret && tt_req_node) { + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_del(&tt_req_node->list); + spin_unlock_bh(&bat_priv->tt_req_list_lock); + kfree(tt_req_node); + } + return ret; +} + +static bool send_other_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request) +{ + struct orig_node *req_dst_orig_node = NULL, *res_dst_orig_node = NULL; + struct neigh_node *neigh_node = NULL; + struct hard_iface *primary_if = NULL; + uint8_t orig_ttvn, req_ttvn, ttvn; + int ret = false; + unsigned char *tt_buff; + bool full_table; + uint16_t tt_len, tt_tot; + struct sk_buff *skb = NULL; + struct tt_query_packet *tt_response; + + bat_dbg(DBG_TT, bat_priv, + "Received TT_REQUEST from %pM for " + "ttvn: %u (%pM) [%c]\n", tt_request->src, + tt_request->ttvn, tt_request->dst, + (tt_request->flags & TT_FULL_TABLE ? 'F' : '.')); + + /* Let's get the orig node of the REAL destination */ + req_dst_orig_node = get_orig_node(bat_priv, tt_request->dst); + if (!req_dst_orig_node) + goto out; + + res_dst_orig_node = get_orig_node(bat_priv, tt_request->src); + if (!res_dst_orig_node) + goto out; + + neigh_node = orig_node_get_router(res_dst_orig_node); + if (!neigh_node) + goto out; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); + req_ttvn = tt_request->ttvn; + + /* I have not the requested data */ + if (orig_ttvn != req_ttvn || + tt_request->tt_data != req_dst_orig_node->tt_crc) + goto out; + + /* If it has explicitly been requested the full table */ + if (tt_request->flags & TT_FULL_TABLE || + !req_dst_orig_node->tt_buff) + full_table = true; + else + full_table = false; + + /* In this version, fragmentation is not implemented, then + * I'll send only one packet with as much TT entries as I can */ + if (!full_table) { + spin_lock_bh(&req_dst_orig_node->tt_buff_lock); + tt_len = req_dst_orig_node->tt_buff_len; + tt_tot = tt_len / sizeof(struct tt_change); + + skb = dev_alloc_skb(sizeof(struct tt_query_packet) + + tt_len + ETH_HLEN); + if (!skb) + goto unlock; + + skb_reserve(skb, ETH_HLEN); + tt_response = (struct tt_query_packet *)skb_put(skb, + sizeof(struct tt_query_packet) + tt_len); + tt_response->ttvn = req_ttvn; + tt_response->tt_data = htons(tt_tot); + + tt_buff = skb->data + sizeof(struct tt_query_packet); + /* Copy the last orig_node's OGM buffer */ + memcpy(tt_buff, req_dst_orig_node->tt_buff, + req_dst_orig_node->tt_buff_len); + + spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); + } else { + tt_len = (uint16_t)atomic_read(&req_dst_orig_node->tt_size) * + sizeof(struct tt_change); + ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); + + skb = tt_response_fill_table(tt_len, ttvn, + bat_priv->tt_global_hash, + primary_if, tt_global_valid_entry, + req_dst_orig_node); + if (!skb) + goto out; + + tt_response = (struct tt_query_packet *)skb->data; + } + + tt_response->packet_type = BAT_TT_QUERY; + tt_response->version = COMPAT_VERSION; + tt_response->ttl = TTL; + memcpy(tt_response->src, req_dst_orig_node->orig, ETH_ALEN); + memcpy(tt_response->dst, tt_request->src, ETH_ALEN); + tt_response->flags = TT_RESPONSE; + + if (full_table) + tt_response->flags |= TT_FULL_TABLE; + + bat_dbg(DBG_TT, bat_priv, + "Sending TT_RESPONSE %pM via %pM for %pM (ttvn: %u)\n", + res_dst_orig_node->orig, neigh_node->addr, + req_dst_orig_node->orig, req_ttvn); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = true; + goto out; + +unlock: + spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); + +out: + if (res_dst_orig_node) + orig_node_free_ref(res_dst_orig_node); + if (req_dst_orig_node) + orig_node_free_ref(req_dst_orig_node); + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (primary_if) + hardif_free_ref(primary_if); + if (!ret) + kfree_skb(skb); + return ret; + +} +static bool send_my_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request) +{ + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; + struct hard_iface *primary_if = NULL; + uint8_t my_ttvn, req_ttvn, ttvn; + int ret = false; + unsigned char *tt_buff; + bool full_table; + uint16_t tt_len, tt_tot; + struct sk_buff *skb = NULL; + struct tt_query_packet *tt_response; + + bat_dbg(DBG_TT, bat_priv, + "Received TT_REQUEST from %pM for " + "ttvn: %u (me) [%c]\n", tt_request->src, + tt_request->ttvn, + (tt_request->flags & TT_FULL_TABLE ? 'F' : '.')); + + + my_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + req_ttvn = tt_request->ttvn; + + orig_node = get_orig_node(bat_priv, tt_request->src); + if (!orig_node) + goto out; + + neigh_node = orig_node_get_router(orig_node); + if (!neigh_node) + goto out; + + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* If the full table has been explicitly requested or the gap + * is too big send the whole local translation table */ + if (tt_request->flags & TT_FULL_TABLE || my_ttvn != req_ttvn || + !bat_priv->tt_buff) + full_table = true; + else + full_table = false; + + /* In this version, fragmentation is not implemented, then + * I'll send only one packet with as much TT entries as I can */ + if (!full_table) { + spin_lock_bh(&bat_priv->tt_buff_lock); + tt_len = bat_priv->tt_buff_len; + tt_tot = tt_len / sizeof(struct tt_change); + + skb = dev_alloc_skb(sizeof(struct tt_query_packet) + + tt_len + ETH_HLEN); + if (!skb) + goto unlock; + + skb_reserve(skb, ETH_HLEN); + tt_response = (struct tt_query_packet *)skb_put(skb, + sizeof(struct tt_query_packet) + tt_len); + tt_response->ttvn = req_ttvn; + tt_response->tt_data = htons(tt_tot); + + tt_buff = skb->data + sizeof(struct tt_query_packet); + memcpy(tt_buff, bat_priv->tt_buff, + bat_priv->tt_buff_len); + spin_unlock_bh(&bat_priv->tt_buff_lock); + } else { + tt_len = (uint16_t)atomic_read(&bat_priv->num_local_tt) * + sizeof(struct tt_change); + ttvn = (uint8_t)atomic_read(&bat_priv->ttvn); + + skb = tt_response_fill_table(tt_len, ttvn, + bat_priv->tt_local_hash, + primary_if, tt_local_valid_entry, + NULL); + if (!skb) + goto out; + + tt_response = (struct tt_query_packet *)skb->data; + } + + tt_response->packet_type = BAT_TT_QUERY; + tt_response->version = COMPAT_VERSION; + tt_response->ttl = TTL; + memcpy(tt_response->src, primary_if->net_dev->dev_addr, ETH_ALEN); + memcpy(tt_response->dst, tt_request->src, ETH_ALEN); + tt_response->flags = TT_RESPONSE; + + if (full_table) + tt_response->flags |= TT_FULL_TABLE; + + bat_dbg(DBG_TT, bat_priv, + "Sending TT_RESPONSE to %pM via %pM [%c]\n", + orig_node->orig, neigh_node->addr, + (tt_response->flags & TT_FULL_TABLE ? 'F' : '.')); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = true; + goto out; + +unlock: + spin_unlock_bh(&bat_priv->tt_buff_lock); +out: + if (orig_node) + orig_node_free_ref(orig_node); + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (primary_if) + hardif_free_ref(primary_if); + if (!ret) + kfree_skb(skb); + /* This packet was for me, so it doesn't need to be re-routed */ + return true; +} + +bool send_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request) +{ + if (is_my_mac(tt_request->dst)) + return send_my_tt_response(bat_priv, tt_request); + else + return send_other_tt_response(bat_priv, tt_request); +} + +static void _tt_update_changes(struct bat_priv *bat_priv, + struct orig_node *orig_node, + struct tt_change *tt_change, + uint16_t tt_num_changes, uint8_t ttvn) +{ + int i; + + for (i = 0; i < tt_num_changes; i++) { + if ((tt_change + i)->flags & TT_CLIENT_DEL) + tt_global_del(bat_priv, orig_node, + (tt_change + i)->addr, + "tt removed by changes", + (tt_change + i)->flags & TT_CLIENT_ROAM); + else + if (!tt_global_add(bat_priv, orig_node, + (tt_change + i)->addr, ttvn, false)) + /* In case of problem while storing a + * global_entry, we stop the updating + * procedure without committing the + * ttvn change. This will avoid to send + * corrupted data on tt_request + */ + return; + } +} + +static void tt_fill_gtable(struct bat_priv *bat_priv, + struct tt_query_packet *tt_response) +{ + struct orig_node *orig_node = NULL; + + orig_node = orig_hash_find(bat_priv, tt_response->src); + if (!orig_node) + goto out; + + /* Purge the old table first.. */ + tt_global_del_orig(bat_priv, orig_node, "Received full table"); + + _tt_update_changes(bat_priv, orig_node, + (struct tt_change *)(tt_response + 1), + tt_response->tt_data, tt_response->ttvn); + + spin_lock_bh(&orig_node->tt_buff_lock); + kfree(orig_node->tt_buff); + orig_node->tt_buff_len = 0; + orig_node->tt_buff = NULL; + spin_unlock_bh(&orig_node->tt_buff_lock); + + atomic_set(&orig_node->last_ttvn, tt_response->ttvn); + +out: + if (orig_node) + orig_node_free_ref(orig_node); +} + +void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, + uint16_t tt_num_changes, uint8_t ttvn, + struct tt_change *tt_change) +{ + _tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, + ttvn); + + tt_save_orig_buffer(bat_priv, orig_node, (unsigned char *)tt_change, + tt_num_changes); + atomic_set(&orig_node->last_ttvn, ttvn); +} + +bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr) +{ + struct tt_local_entry *tt_local_entry = NULL; + bool ret = false; + + tt_local_entry = tt_local_hash_find(bat_priv, addr); + if (!tt_local_entry) + goto out; + /* Check if the client has been logically deleted (but is kept for + * consistency purpose) */ + if (tt_local_entry->flags & TT_CLIENT_PENDING) + goto out; + ret = true; +out: + if (tt_local_entry) + tt_local_entry_free_ref(tt_local_entry); + return ret; +} + +void handle_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_response) +{ + struct tt_req_node *node, *safe; + struct orig_node *orig_node = NULL; + + bat_dbg(DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for " + "ttvn %d t_size: %d [%c]\n", + tt_response->src, tt_response->ttvn, + tt_response->tt_data, + (tt_response->flags & TT_FULL_TABLE ? 'F' : '.')); + + orig_node = orig_hash_find(bat_priv, tt_response->src); + if (!orig_node) + goto out; + + if (tt_response->flags & TT_FULL_TABLE) + tt_fill_gtable(bat_priv, tt_response); + else + tt_update_changes(bat_priv, orig_node, tt_response->tt_data, + tt_response->ttvn, + (struct tt_change *)(tt_response + 1)); + + /* Delete the tt_req_node from pending tt_requests list */ + spin_lock_bh(&bat_priv->tt_req_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) { + if (!compare_eth(node->addr, tt_response->src)) + continue; + list_del(&node->list); + kfree(node); + } + spin_unlock_bh(&bat_priv->tt_req_list_lock); + + /* Recalculate the CRC for this orig_node and store it */ + orig_node->tt_crc = tt_global_crc(bat_priv, orig_node); + /* Roaming phase is over: tables are in sync again. I can + * unset the flag */ + orig_node->tt_poss_change = false; +out: + if (orig_node) + orig_node_free_ref(orig_node); +} + +int tt_init(struct bat_priv *bat_priv) +{ + if (!tt_local_init(bat_priv)) + return 0; + + if (!tt_global_init(bat_priv)) + return 0; + + tt_start_timer(bat_priv); + + return 1; +} + +static void tt_roam_list_free(struct bat_priv *bat_priv) +{ + struct tt_roam_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_roam_list_lock); + + list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + list_del(&node->list); + kfree(node); + } + + spin_unlock_bh(&bat_priv->tt_roam_list_lock); +} + +static void tt_roam_purge(struct bat_priv *bat_priv) +{ + struct tt_roam_node *node, *safe; + + spin_lock_bh(&bat_priv->tt_roam_list_lock); + list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) { + if (!is_out_of_time(node->first_time, + ROAMING_MAX_TIME * 1000)) + continue; + + list_del(&node->list); + kfree(node); + } + spin_unlock_bh(&bat_priv->tt_roam_list_lock); +} + +/* This function checks whether the client already reached the + * maximum number of possible roaming phases. In this case the ROAMING_ADV + * will not be sent. + * + * returns true if the ROAMING_ADV can be sent, false otherwise */ +static bool tt_check_roam_count(struct bat_priv *bat_priv, + uint8_t *client) +{ + struct tt_roam_node *tt_roam_node; + bool ret = false; + + spin_lock_bh(&bat_priv->tt_roam_list_lock); + /* The new tt_req will be issued only if I'm not waiting for a + * reply from the same orig_node yet */ + list_for_each_entry(tt_roam_node, &bat_priv->tt_roam_list, list) { + if (!compare_eth(tt_roam_node->addr, client)) + continue; + + if (is_out_of_time(tt_roam_node->first_time, + ROAMING_MAX_TIME * 1000)) + continue; + + if (!atomic_dec_not_zero(&tt_roam_node->counter)) + /* Sorry, you roamed too many times! */ + goto unlock; + ret = true; + break; + } + + if (!ret) { + tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC); + if (!tt_roam_node) + goto unlock; + + tt_roam_node->first_time = jiffies; + atomic_set(&tt_roam_node->counter, ROAMING_MAX_COUNT - 1); + memcpy(tt_roam_node->addr, client, ETH_ALEN); + + list_add(&tt_roam_node->list, &bat_priv->tt_roam_list); + ret = true; + } + +unlock: + spin_unlock_bh(&bat_priv->tt_roam_list_lock); + return ret; +} + +void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, + struct orig_node *orig_node) +{ + struct neigh_node *neigh_node = NULL; + struct sk_buff *skb = NULL; + struct roam_adv_packet *roam_adv_packet; + int ret = 1; + struct hard_iface *primary_if; + + /* before going on we have to check whether the client has + * already roamed to us too many times */ + if (!tt_check_roam_count(bat_priv, client)) + goto out; + + skb = dev_alloc_skb(sizeof(struct roam_adv_packet) + ETH_HLEN); + if (!skb) + goto out; + + skb_reserve(skb, ETH_HLEN); + + roam_adv_packet = (struct roam_adv_packet *)skb_put(skb, + sizeof(struct roam_adv_packet)); + + roam_adv_packet->packet_type = BAT_ROAM_ADV; + roam_adv_packet->version = COMPAT_VERSION; + roam_adv_packet->ttl = TTL; + primary_if = primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + memcpy(roam_adv_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN); + hardif_free_ref(primary_if); + memcpy(roam_adv_packet->dst, orig_node->orig, ETH_ALEN); + memcpy(roam_adv_packet->client, client, ETH_ALEN); + + neigh_node = orig_node_get_router(orig_node); + if (!neigh_node) + goto out; + + bat_dbg(DBG_TT, bat_priv, + "Sending ROAMING_ADV to %pM (client %pM) via %pM\n", + orig_node->orig, client, neigh_node->addr); + + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + ret = 0; + +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (ret) + kfree_skb(skb); + return; +} + +static void tt_purge(struct work_struct *work) +{ + struct delayed_work *delayed_work = + container_of(work, struct delayed_work, work); + struct bat_priv *bat_priv = + container_of(delayed_work, struct bat_priv, tt_work); + + tt_local_purge(bat_priv); + tt_global_roam_purge(bat_priv); + tt_req_purge(bat_priv); + tt_roam_purge(bat_priv); + + tt_start_timer(bat_priv); +} + +void tt_free(struct bat_priv *bat_priv) +{ + cancel_delayed_work_sync(&bat_priv->tt_work); + + tt_local_table_free(bat_priv); + tt_global_table_free(bat_priv); + tt_req_list_free(bat_priv); + tt_changes_list_free(bat_priv); + tt_roam_list_free(bat_priv); + + kfree(bat_priv->tt_buff); +} + +/* This function will reset the specified flags from all the entries in + * the given hash table and will increment num_local_tt for each involved + * entry */ +static void tt_local_reset_flags(struct bat_priv *bat_priv, uint16_t flags) +{ + int i; + struct hashtable_t *hash = bat_priv->tt_local_hash; + struct hlist_head *head; + struct hlist_node *node; + struct tt_local_entry *tt_local_entry; + + if (!hash) + return; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tt_local_entry, node, + head, hash_entry) { + tt_local_entry->flags &= ~flags; + atomic_inc(&bat_priv->num_local_tt); + } + rcu_read_unlock(); + } + +} + +/* Purge out all the tt local entries marked with TT_CLIENT_PENDING */ +static void tt_local_purge_pending_clients(struct bat_priv *bat_priv) +{ + struct hashtable_t *hash = bat_priv->tt_local_hash; + struct tt_local_entry *tt_local_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + spinlock_t *list_lock; /* protects write access to the hash lists */ + int i; + + if (!hash) + return; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(tt_local_entry, node, node_tmp, + head, hash_entry) { + if (!(tt_local_entry->flags & TT_CLIENT_PENDING)) + continue; + + bat_dbg(DBG_TT, bat_priv, "Deleting local tt entry " + "(%pM): pending\n", tt_local_entry->addr); + + atomic_dec(&bat_priv->num_local_tt); + hlist_del_rcu(node); + tt_local_entry_free_ref(tt_local_entry); + } + spin_unlock_bh(list_lock); + } + +} + +void tt_commit_changes(struct bat_priv *bat_priv) +{ + tt_local_reset_flags(bat_priv, TT_CLIENT_NEW); + tt_local_purge_pending_clients(bat_priv); + + /* Increment the TTVN only once per OGM interval */ + atomic_inc(&bat_priv->ttvn); + bat_priv->tt_poss_change = false; +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 46152c38cc95..d4122cba53b8 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,22 +22,45 @@ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ -int tt_local_init(struct bat_priv *bat_priv); -void tt_local_add(struct net_device *soft_iface, uint8_t *addr); +int tt_len(int changes_num); +int tt_changes_fill_buffer(struct bat_priv *bat_priv, + unsigned char *buff, int buff_len); +int tt_init(struct bat_priv *bat_priv); +void tt_local_add(struct net_device *soft_iface, const uint8_t *addr); void tt_local_remove(struct bat_priv *bat_priv, - uint8_t *addr, char *message); -int tt_local_fill_buffer(struct bat_priv *bat_priv, - unsigned char *buff, int buff_len); + const uint8_t *addr, const char *message, bool roaming); int tt_local_seq_print_text(struct seq_file *seq, void *offset); -void tt_local_free(struct bat_priv *bat_priv); -int tt_global_init(struct bat_priv *bat_priv); -void tt_global_add_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, - unsigned char *tt_buff, int tt_buff_len); +void tt_global_add_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, int tt_buff_len); +int tt_global_add(struct bat_priv *bat_priv, + struct orig_node *orig_node, const unsigned char *addr, + uint8_t ttvn, bool roaming); int tt_global_seq_print_text(struct seq_file *seq, void *offset); void tt_global_del_orig(struct bat_priv *bat_priv, - struct orig_node *orig_node, char *message); -void tt_global_free(struct bat_priv *bat_priv); -struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr); + struct orig_node *orig_node, const char *message); +void tt_global_del(struct bat_priv *bat_priv, + struct orig_node *orig_node, const unsigned char *addr, + const char *message, bool roaming); +struct orig_node *transtable_search(struct bat_priv *bat_priv, + const uint8_t *addr); +void tt_save_orig_buffer(struct bat_priv *bat_priv, struct orig_node *orig_node, + const unsigned char *tt_buff, uint8_t tt_num_changes); +uint16_t tt_local_crc(struct bat_priv *bat_priv); +uint16_t tt_global_crc(struct bat_priv *bat_priv, struct orig_node *orig_node); +void tt_free(struct bat_priv *bat_priv); +int send_tt_request(struct bat_priv *bat_priv, + struct orig_node *dst_orig_node, uint8_t hvn, + uint16_t tt_crc, bool full_table); +bool send_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_request); +void tt_update_changes(struct bat_priv *bat_priv, struct orig_node *orig_node, + uint16_t tt_num_changes, uint8_t ttvn, + struct tt_change *tt_change); +bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr); +void handle_tt_response(struct bat_priv *bat_priv, + struct tt_query_packet *tt_response); +void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, + struct orig_node *orig_node); +void tt_commit_changes(struct bat_priv *bat_priv); #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index fab70e8b16ee..25bd1db35370 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -75,8 +75,18 @@ struct orig_node { unsigned long batman_seqno_reset; uint8_t gw_flags; uint8_t flags; + atomic_t last_ttvn; /* last seen translation table version number */ + uint16_t tt_crc; unsigned char *tt_buff; int16_t tt_buff_len; + spinlock_t tt_buff_lock; /* protects tt_buff */ + atomic_t tt_size; + /* The tt_poss_change flag is used to detect an ongoing roaming phase. + * If true, then I sent a Roaming_adv to this orig_node and I have to + * inspect every packet directed to it to check whether it is still + * the true destination or not. This flag will be reset to false as + * soon as I receive a new TTVN from this orig_node */ + bool tt_poss_change; uint32_t last_real_seqno; uint8_t last_ttl; unsigned long bcast_bits[NUM_WORDS]; @@ -94,6 +104,7 @@ struct orig_node { spinlock_t ogm_cnt_lock; /* bcast_seqno_lock protects bcast_bits, last_bcast_seqno */ spinlock_t bcast_seqno_lock; + spinlock_t tt_list_lock; /* protects tt_list */ atomic_t bond_candidates; struct list_head bond_list; }; @@ -145,6 +156,15 @@ struct bat_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; + atomic_t ttvn; /* tranlation table version number */ + atomic_t tt_ogm_append_cnt; + atomic_t tt_local_changes; /* changes registered in a OGM interval */ + /* The tt_poss_change flag is used to detect an ongoing roaming phase. + * If true, then I received a Roaming_adv and I have to inspect every + * packet directed to me to check whether I am still the true + * destination or not. This flag will be reset to false as soon as I + * increase my TTVN */ + bool tt_poss_change; char num_ifaces; struct debug_log *debug_log; struct kobject *mesh_obj; @@ -153,26 +173,35 @@ struct bat_priv { struct hlist_head forw_bcast_list; struct hlist_head gw_list; struct hlist_head softif_neigh_vids; + struct list_head tt_changes_list; /* tracks changes in a OGM int */ struct list_head vis_send_list; struct hashtable_t *orig_hash; struct hashtable_t *tt_local_hash; struct hashtable_t *tt_global_hash; + struct list_head tt_req_list; /* list of pending tt_requests */ + struct list_head tt_roam_list; struct hashtable_t *vis_hash; spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects */ - spinlock_t tt_lhash_lock; /* protects tt_local_hash */ - spinlock_t tt_ghash_lock; /* protects tt_global_hash */ + spinlock_t tt_changes_list_lock; /* protects tt_changes */ + spinlock_t tt_req_list_lock; /* protects tt_req_list */ + spinlock_t tt_roam_list_lock; /* protects tt_roam_list */ spinlock_t gw_list_lock; /* protects gw_list and curr_gw */ spinlock_t vis_hash_lock; /* protects vis_hash */ spinlock_t vis_list_lock; /* protects vis_info::recv_list */ spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */ spinlock_t softif_neigh_vid_lock; /* protects soft-interface vid list */ - int16_t num_local_tt; - atomic_t tt_local_changed; + atomic_t num_local_tt; + /* Checksum of the local table, recomputed before sending a new OGM */ + atomic_t tt_crc; + unsigned char *tt_buff; + int16_t tt_buff_len; + spinlock_t tt_buff_lock; /* protects tt_buff */ struct delayed_work tt_work; struct delayed_work orig_work; struct delayed_work vis_work; struct gw_node __rcu *curr_gw; /* rcu protected pointer */ + atomic_t gw_reselect; struct hard_iface __rcu *primary_if; /* rcu protected pointer */ struct vis_info *my_vis_info; }; @@ -195,14 +224,39 @@ struct socket_packet { struct tt_local_entry { uint8_t addr[ETH_ALEN]; unsigned long last_seen; - char never_purge; + uint16_t flags; + atomic_t refcount; + struct rcu_head rcu; struct hlist_node hash_entry; }; struct tt_global_entry { uint8_t addr[ETH_ALEN]; struct orig_node *orig_node; - struct hlist_node hash_entry; + uint8_t ttvn; + uint16_t flags; /* only TT_GLOBAL_ROAM is used */ + unsigned long roam_at; /* time at which TT_GLOBAL_ROAM was set */ + atomic_t refcount; + struct rcu_head rcu; + struct hlist_node hash_entry; /* entry in the global table */ +}; + +struct tt_change_node { + struct list_head list; + struct tt_change change; +}; + +struct tt_req_node { + uint8_t addr[ETH_ALEN]; + unsigned long issued_at; + struct list_head list; +}; + +struct tt_roam_node { + uint8_t addr[ETH_ALEN]; + atomic_t counter; + unsigned long first_time; + struct list_head list; }; /** @@ -246,10 +300,10 @@ struct frag_packet_list_entry { }; struct vis_info { - unsigned long first_seen; - struct list_head recv_list; - /* list of server-neighbors we received a vis-packet - * from. we should not reply to them. */ + unsigned long first_seen; + /* list of server-neighbors we received a vis-packet + * from. we should not reply to them. */ + struct list_head recv_list; struct list_head send_list; struct kref refcount; struct hlist_node hash_entry; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 19c3daf34ac6..32b125fb3d3b 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, (struct unicast_frag_packet *)skb->data; struct sk_buff *tmp_skb; struct unicast_packet *unicast_packet; - int hdr_len = sizeof(struct unicast_packet); - int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; + int hdr_len = sizeof(*unicast_packet); + int uni_diff = sizeof(*up) - hdr_len; /* set skb to the first part and tmp_skb to the second part */ if (up->flags & UNI_FRAG_HEAD) { @@ -53,7 +53,7 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0) goto err; - skb_pull(tmp_skb, sizeof(struct unicast_frag_packet)); + skb_pull(tmp_skb, sizeof(*up)); if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0) goto err; @@ -99,8 +99,7 @@ static int frag_create_buffer(struct list_head *head) struct frag_packet_list_entry *tfp; for (i = 0; i < FRAG_BUFFER_SIZE; i++) { - tfp = kmalloc(sizeof(struct frag_packet_list_entry), - GFP_ATOMIC); + tfp = kmalloc(sizeof(*tfp), GFP_ATOMIC); if (!tfp) { frag_list_free(head); return -ENOMEM; @@ -115,7 +114,7 @@ static int frag_create_buffer(struct list_head *head) } static struct frag_packet_list_entry *frag_search_packet(struct list_head *head, - struct unicast_frag_packet *up) + const struct unicast_frag_packet *up) { struct frag_packet_list_entry *tfp; struct unicast_frag_packet *tmp_up = NULL; @@ -218,14 +217,14 @@ out: } int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct hard_iface *hard_iface, uint8_t dstaddr[]) + struct hard_iface *hard_iface, const uint8_t dstaddr[]) { struct unicast_packet tmp_uc, *unicast_packet; struct hard_iface *primary_if; struct sk_buff *frag_skb; struct unicast_frag_packet *frag1, *frag2; - int uc_hdr_len = sizeof(struct unicast_packet); - int ucf_hdr_len = sizeof(struct unicast_frag_packet); + int uc_hdr_len = sizeof(*unicast_packet); + int ucf_hdr_len = sizeof(*frag1); int data_len = skb->len - uc_hdr_len; int large_tail = 0, ret = NET_RX_DROP; uint16_t seqno; @@ -250,14 +249,14 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag1 = (struct unicast_frag_packet *)skb->data; frag2 = (struct unicast_frag_packet *)frag_skb->data; - memcpy(frag1, &tmp_uc, sizeof(struct unicast_packet)); + memcpy(frag1, &tmp_uc, sizeof(tmp_uc)); frag1->ttl--; frag1->version = COMPAT_VERSION; frag1->packet_type = BAT_UNICAST_FRAG; memcpy(frag1->orig, primary_if->net_dev->dev_addr, ETH_ALEN); - memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); + memcpy(frag2, frag1, sizeof(*frag2)); if (data_len & 1) large_tail = UNI_FRAG_LARGETAIL; @@ -295,7 +294,7 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) { - orig_node = (struct orig_node *)gw_get_selected_orig(bat_priv); + orig_node = gw_get_selected_orig(bat_priv); if (orig_node) goto find_router; } @@ -314,10 +313,7 @@ find_router: if (!neigh_node) goto out; - if (neigh_node->if_incoming->if_status != IF_ACTIVE) - goto out; - - if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) + if (my_skb_head_push(skb, sizeof(*unicast_packet)) < 0) goto out; unicast_packet = (struct unicast_packet *)skb->data; @@ -329,9 +325,12 @@ find_router: unicast_packet->ttl = TTL; /* copy the destination for faster routing */ memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); + /* set the destination tt version number */ + unicast_packet->ttvn = + (uint8_t)atomic_read(&orig_node->last_ttvn); if (atomic_read(&bat_priv->fragmentation) && - data_len + sizeof(struct unicast_packet) > + data_len + sizeof(*unicast_packet) > neigh_node->if_incoming->net_dev->mtu) { /* send frag skb decreases ttl */ unicast_packet->ttl++; diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 16ad7a9242b5..62f54b954625 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -32,11 +32,11 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, void frag_list_free(struct list_head *head); int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct hard_iface *hard_iface, uint8_t dstaddr[]); + struct hard_iface *hard_iface, const uint8_t dstaddr[]); -static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) +static inline int frag_can_reassemble(const struct sk_buff *skb, int mtu) { - struct unicast_frag_packet *unicast_packet; + const struct unicast_frag_packet *unicast_packet; int uneven_correction = 0; unsigned int merged_size; @@ -49,7 +49,7 @@ static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) uneven_correction = -1; } - merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2; + merged_size = (skb->len - sizeof(*unicast_packet)) * 2; merged_size += sizeof(struct unicast_packet) + uneven_correction; return merged_size <= mtu; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index c39f20cc1ba6..8a1b98589d76 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -30,22 +30,6 @@ #define MAX_VIS_PACKET_SIZE 1000 -/* Returns the smallest signed integer in two's complement with the sizeof x */ -#define smallest_signed_int(x) (1u << (7u + 8u * (sizeof(x) - 1u))) - -/* Checks if a sequence number x is a predecessor/successor of y. - * they handle overflows/underflows and can correctly check for a - * predecessor/successor unless the variable sequence number has grown by - * more then 2**(bitwidth(x)-1)-1. - * This means that for a uint8_t with the maximum value 255, it would think: - * - when adding nothing - it is neither a predecessor nor a successor - * - before adding more than 127 to the starting value - it is a predecessor, - * - when adding 128 - it is neither a predecessor nor a successor, - * - after adding more than 127 to the starting value - it is a successor */ -#define seq_before(x, y) ({typeof(x) _dummy = (x - y); \ - _dummy > smallest_signed_int(_dummy); }) -#define seq_after(x, y) seq_before(y, x) - static void start_vis_timer(struct bat_priv *bat_priv); /* free the info */ @@ -68,10 +52,10 @@ static void free_info(struct kref *ref) } /* Compare two vis packets, used by the hashing algorithm */ -static int vis_info_cmp(struct hlist_node *node, void *data2) +static int vis_info_cmp(const struct hlist_node *node, const void *data2) { - struct vis_info *d1, *d2; - struct vis_packet *p1, *p2; + const struct vis_info *d1, *d2; + const struct vis_packet *p1, *p2; d1 = container_of(node, struct vis_info, hash_entry); d2 = data2; @@ -82,11 +66,11 @@ static int vis_info_cmp(struct hlist_node *node, void *data2) /* hash function to choose an entry in a hash table of given size */ /* hash algorithm from http://en.wikipedia.org/wiki/Hash_table */ -static int vis_info_choose(void *data, int size) +static int vis_info_choose(const void *data, int size) { - struct vis_info *vis_info = data; - struct vis_packet *packet; - unsigned char *key; + const struct vis_info *vis_info = data; + const struct vis_packet *packet; + const unsigned char *key; uint32_t hash = 0; size_t i; @@ -106,7 +90,7 @@ static int vis_info_choose(void *data, int size) } static struct vis_info *vis_hash_find(struct bat_priv *bat_priv, - void *data) + const void *data) { struct hashtable_t *hash = bat_priv->vis_hash; struct hlist_head *head; @@ -143,7 +127,7 @@ static void vis_data_insert_interface(const uint8_t *interface, struct hlist_node *pos; hlist_for_each_entry(entry, pos, if_list, list) { - if (compare_eth(entry->addr, (void *)interface)) + if (compare_eth(entry->addr, interface)) return; } @@ -156,7 +140,8 @@ static void vis_data_insert_interface(const uint8_t *interface, hlist_add_head(&entry->list, if_list); } -static ssize_t vis_data_read_prim_sec(char *buff, struct hlist_head *if_list) +static ssize_t vis_data_read_prim_sec(char *buff, + const struct hlist_head *if_list) { struct if_list_entry *entry; struct hlist_node *pos; @@ -189,8 +174,9 @@ static size_t vis_data_count_prim_sec(struct hlist_head *if_list) } /* read an entry */ -static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry, - uint8_t *src, bool primary) +static ssize_t vis_data_read_entry(char *buff, + const struct vis_info_entry *entry, + const uint8_t *src, bool primary) { /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */ if (primary && entry->quality == 0) @@ -239,7 +225,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) - ((char *)packet + sizeof(struct vis_packet)); + ((char *)packet + sizeof(*packet)); for (j = 0; j < packet->entries; j++) { if (entries[j].quality == 0) @@ -287,7 +273,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) - ((char *)packet + sizeof(struct vis_packet)); + ((char *)packet + sizeof(*packet)); for (j = 0; j < packet->entries; j++) { if (entries[j].quality == 0) @@ -361,11 +347,11 @@ static void send_list_del(struct vis_info *info) /* tries to add one entry to the receive list. */ static void recv_list_add(struct bat_priv *bat_priv, - struct list_head *recv_list, char *mac) + struct list_head *recv_list, const char *mac) { struct recvlist_node *entry; - entry = kmalloc(sizeof(struct recvlist_node), GFP_ATOMIC); + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return; @@ -377,9 +363,9 @@ static void recv_list_add(struct bat_priv *bat_priv, /* returns 1 if this mac is in the recv_list */ static int recv_list_is_in(struct bat_priv *bat_priv, - struct list_head *recv_list, char *mac) + const struct list_head *recv_list, const char *mac) { - struct recvlist_node *entry; + const struct recvlist_node *entry; spin_lock_bh(&bat_priv->vis_list_lock); list_for_each_entry(entry, recv_list, list) { @@ -412,11 +398,11 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, return NULL; /* see if the packet is already in vis_hash */ - search_elem.skb_packet = dev_alloc_skb(sizeof(struct vis_packet)); + search_elem.skb_packet = dev_alloc_skb(sizeof(*search_packet)); if (!search_elem.skb_packet) return NULL; search_packet = (struct vis_packet *)skb_put(search_elem.skb_packet, - sizeof(struct vis_packet)); + sizeof(*search_packet)); memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); old_info = vis_hash_find(bat_priv, &search_elem); @@ -442,27 +428,26 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, kref_put(&old_info->refcount, free_info); } - info = kmalloc(sizeof(struct vis_info), GFP_ATOMIC); + info = kmalloc(sizeof(*info), GFP_ATOMIC); if (!info) return NULL; - info->skb_packet = dev_alloc_skb(sizeof(struct vis_packet) + - vis_info_len + sizeof(struct ethhdr)); + info->skb_packet = dev_alloc_skb(sizeof(*packet) + vis_info_len + + sizeof(struct ethhdr)); if (!info->skb_packet) { kfree(info); return NULL; } skb_reserve(info->skb_packet, sizeof(struct ethhdr)); - packet = (struct vis_packet *)skb_put(info->skb_packet, - sizeof(struct vis_packet) + - vis_info_len); + packet = (struct vis_packet *)skb_put(info->skb_packet, sizeof(*packet) + + vis_info_len); kref_init(&info->refcount); INIT_LIST_HEAD(&info->send_list); INIT_LIST_HEAD(&info->recv_list); info->first_seen = jiffies; info->bat_priv = bat_priv; - memcpy(packet, vis_packet, sizeof(struct vis_packet) + vis_info_len); + memcpy(packet, vis_packet, sizeof(*packet) + vis_info_len); /* initialize and add new packet. */ *is_new = 1; @@ -599,9 +584,9 @@ static int find_best_vis_server(struct bat_priv *bat_priv, } /* Return true if the vis packet is full. */ -static bool vis_packet_full(struct vis_info *info) +static bool vis_packet_full(const struct vis_info *info) { - struct vis_packet *packet; + const struct vis_packet *packet; packet = (struct vis_packet *)info->skb_packet->data; if (MAX_VIS_PACKET_SIZE / sizeof(struct vis_info_entry) @@ -619,7 +604,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) struct hlist_head *head; struct orig_node *orig_node; struct neigh_node *router; - struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info; + struct vis_info *info = bat_priv->my_vis_info; struct vis_packet *packet = (struct vis_packet *)info->skb_packet->data; struct vis_info_entry *entry; struct tt_local_entry *tt_local_entry; @@ -632,7 +617,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) packet->ttl = TTL; packet->seqno = htonl(ntohl(packet->seqno) + 1); packet->entries = 0; - skb_trim(info->skb_packet, sizeof(struct vis_packet)); + skb_trim(info->skb_packet, sizeof(*packet)); if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) { best_tq = find_best_vis_server(bat_priv, info); @@ -680,11 +665,12 @@ next: hash = bat_priv->tt_local_hash; - spin_lock_bh(&bat_priv->tt_lhash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(tt_local_entry, node, head, hash_entry) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tt_local_entry, node, head, + hash_entry) { entry = (struct vis_info_entry *) skb_put(info->skb_packet, sizeof(*entry)); @@ -693,14 +679,12 @@ next: entry->quality = 0; /* 0 means TT */ packet->entries++; - if (vis_packet_full(info)) { - spin_unlock_bh(&bat_priv->tt_lhash_lock); - return 0; - } + if (vis_packet_full(info)) + goto unlock; } + rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->tt_lhash_lock); return 0; unlock: @@ -908,17 +892,15 @@ int vis_init(struct bat_priv *bat_priv) goto err; } - bat_priv->my_vis_info->skb_packet = dev_alloc_skb( - sizeof(struct vis_packet) + - MAX_VIS_PACKET_SIZE + - sizeof(struct ethhdr)); + bat_priv->my_vis_info->skb_packet = dev_alloc_skb(sizeof(*packet) + + MAX_VIS_PACKET_SIZE + + sizeof(struct ethhdr)); if (!bat_priv->my_vis_info->skb_packet) goto free_info; skb_reserve(bat_priv->my_vis_info->skb_packet, sizeof(struct ethhdr)); - packet = (struct vis_packet *)skb_put( - bat_priv->my_vis_info->skb_packet, - sizeof(struct vis_packet)); + packet = (struct vis_packet *)skb_put(bat_priv->my_vis_info->skb_packet, + sizeof(*packet)); /* prefill the vis info */ bat_priv->my_vis_info->first_seen = jiffies - diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 8c100c9dae28..d4f5dff7c955 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -231,6 +231,7 @@ void bnep_net_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &bnep_netdev_ops; dev->watchdog_timeo = HZ * 2; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 908fcd384ab4..ec0bc3f60f2e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1771,7 +1771,7 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count) data += (count - rem); count = rem; - }; + } return rem; } @@ -1806,7 +1806,7 @@ int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count) data += (count - rem); count = rem; - }; + } return rem; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f7f8e2cd3f70..3204ba8a701c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -763,7 +763,8 @@ static void l2cap_conn_start(struct l2cap_conn *conn) struct sock *parent = bt_sk(sk)->parent; rsp.result = cpu_to_le16(L2CAP_CR_PEND); rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); - parent->sk_data_ready(parent, 0); + if (parent) + parent->sk_data_ready(parent, 0); } else { l2cap_state_change(chan, BT_CONFIG); @@ -2523,8 +2524,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr sk = chan->sk; - if ((bt_sk(sk)->defer_setup && chan->state != BT_CONNECT2) || - (!bt_sk(sk)->defer_setup && chan->state != BT_CONFIG)) { + if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2) { struct l2cap_cmd_rej_cid rej; rej.reason = cpu_to_le16(L2CAP_REJ_INVALID_CID); @@ -4150,7 +4150,8 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) struct sock *parent = bt_sk(sk)->parent; res = L2CAP_CR_PEND; stat = L2CAP_CS_AUTHOR_PEND; - parent->sk_data_ready(parent, 0); + if (parent) + parent->sk_data_ready(parent, 0); } else { l2cap_state_change(chan, BT_CONFIG); res = L2CAP_CR_SUCCESS; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index a6b2f86378c7..32b8f9f7f79e 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -49,7 +49,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); rcu_read_lock(); - if (is_multicast_ether_addr(dest)) { + if (is_broadcast_ether_addr(dest)) + br_flood_deliver(br, skb); + else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { br_flood_deliver(br, skb); goto out; @@ -243,6 +245,7 @@ int br_netpoll_enable(struct net_bridge_port *p) goto out; np->dev = p->dev; + strlcpy(np->dev_name, p->dev->name, IFNAMSIZ); err = __netpoll_setup(np); if (err) { diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e0dfbc151dd7..68def3b7fb49 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -21,7 +21,7 @@ #include <linux/jhash.h> #include <linux/random.h> #include <linux/slab.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/unaligned.h> #include "br_private.h" diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 1bacca4cb676..3176e2e13d9b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -388,7 +388,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) br_ifinfo_notify(RTM_NEWLINK, p); if (changed_addr) - call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); dev_set_mtu(br->dev, br_min_mtu(br)); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index f3ac1e858ee1..f06ee39c73fd 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,7 +60,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br = p->br; br_fdb_update(br, p, eth_hdr(skb)->h_source); - if (is_multicast_ether_addr(dest) && + if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) goto drop; @@ -77,7 +77,9 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_multicast_ether_addr(dest)) { + if (is_broadcast_ether_addr(dest)) + skb2 = skb; + else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb); if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { if ((mdst && mdst->mglist) || diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2f14eafdeeab..2d85ca7111d3 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1379,8 +1379,11 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) return -EINVAL; - if (iph->protocol != IPPROTO_IGMP) + if (iph->protocol != IPPROTO_IGMP) { + if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; + } len = ntohs(iph->tot_len); if (skb->len < len || len < ip_hdrlen(skb)) @@ -1424,7 +1427,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip4_multicast_add_group(br, port, ih->group); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: @@ -1543,7 +1546,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, goto out; } mld = (struct mld_msg *)skb_transport_header(skb2); - BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca); break; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3fa123185e89..d6ec3720c77e 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -104,10 +104,22 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) { } +static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + return NULL; +} + +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr) +{ + return NULL; +} + static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, + .cow_metrics = fake_cow_metrics, + .neigh_lookup = fake_neigh_lookup, }; /* @@ -337,24 +349,26 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct neighbour *neigh; struct dst_entry *dst; skb->dev = bridge_parent(skb->dev); if (!skb->dev) goto free_skb; dst = skb_dst(skb); - if (dst->hh) { - neigh_hh_bridge(dst->hh, skb); + neigh = dst_get_neighbour(dst); + if (neigh->hh.hh_len) { + neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; return br_handle_frame_finish(skb); - } else if (dst->neighbour) { + } else { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and * protocol number. */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; - return dst->neighbour->output(skb); + return neigh->output(neigh, skb); } free_skb: kfree_skb(skb); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ffb0dc4cc0e8..5b1ed1ba9aa7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -188,6 +188,8 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) p->state = new_state; br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + return 0; } @@ -218,19 +220,24 @@ int __init br_netlink_init(void) if (err < 0) goto err1; - err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo); + err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, + br_dump_ifinfo, NULL); if (err) goto err2; - err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL); + err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, + br_rtm_setlink, NULL, NULL); if (err) goto err3; - err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, br_fdb_add, NULL); + err = __rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, + br_fdb_add, NULL, NULL); if (err) goto err3; - err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, br_fdb_delete, NULL); + err = __rtnl_register(PF_BRIDGE, RTM_DELNEIGH, + br_fdb_delete, NULL, NULL); if (err) goto err3; - err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, br_fdb_dump); + err = __rtnl_register(PF_BRIDGE, RTM_GETNEIGH, + NULL, br_fdb_dump, NULL); if (err) goto err3; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 54578f274d85..78cc364997d9 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -124,6 +124,7 @@ struct net_bridge_port bridge_id designated_bridge; u32 path_cost; u32 designated_cost; + unsigned long designated_age; struct timer_list forward_delay_timer; struct timer_list hold_timer; diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index 642ef47a867e..05ed9bc7e426 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h @@ -56,7 +56,8 @@ extern void br_become_root_bridge(struct net_bridge *br); extern void br_config_bpdu_generation(struct net_bridge *); extern void br_configuration_update(struct net_bridge *); extern void br_port_state_selection(struct net_bridge *); -extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu); +extern void br_received_config_bpdu(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu); extern void br_received_tcn_bpdu(struct net_bridge_port *p); extern void br_transmit_config(struct net_bridge_port *p); extern void br_transmit_tcn(struct net_bridge *br); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index bb4383e84de9..ad0a3f7cf6cc 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -109,7 +109,6 @@ static void br_root_selection(struct net_bridge *br) list_for_each_entry(p, &br->port_list, list) { if (br_should_become_root_port(p, root_port)) root_port = p->port_no; - } br->root_port = root_port; @@ -145,7 +144,6 @@ void br_transmit_config(struct net_bridge_port *p) struct br_config_bpdu bpdu; struct net_bridge *br; - if (timer_pending(&p->hold_timer)) { p->config_pending = 1; return; @@ -164,8 +162,7 @@ void br_transmit_config(struct net_bridge_port *p) else { struct net_bridge_port *root = br_get_port(br, br->root_port); - bpdu.message_age = br->max_age - - (root->message_age_timer.expires - jiffies) + bpdu.message_age = (jiffies - root->designated_age) + MESSAGE_AGE_INCR; } bpdu.max_age = br->max_age; @@ -182,20 +179,21 @@ void br_transmit_config(struct net_bridge_port *p) } /* called under bridge lock */ -static inline void br_record_config_information(struct net_bridge_port *p, - const struct br_config_bpdu *bpdu) +static void br_record_config_information(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { p->designated_root = bpdu->root; p->designated_cost = bpdu->root_path_cost; p->designated_bridge = bpdu->bridge_id; p->designated_port = bpdu->port_id; + p->designated_age = jiffies + bpdu->message_age; mod_timer(&p->message_age_timer, jiffies + (p->br->max_age - bpdu->message_age)); } /* called under bridge lock */ -static inline void br_record_config_timeout_values(struct net_bridge *br, +static void br_record_config_timeout_values(struct net_bridge *br, const struct br_config_bpdu *bpdu) { br->max_age = bpdu->max_age; @@ -254,7 +252,8 @@ static void br_designated_port_selection(struct net_bridge *br) } /* called under bridge lock */ -static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +static int br_supersedes_port_info(const struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { int t; @@ -285,7 +284,7 @@ static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_b } /* called under bridge lock */ -static inline void br_topology_change_acknowledged(struct net_bridge *br) +static void br_topology_change_acknowledged(struct net_bridge *br) { br->topology_change_detected = 0; del_timer(&br->tcn_timer); @@ -327,7 +326,7 @@ void br_config_bpdu_generation(struct net_bridge *br) } /* called under bridge lock */ -static inline void br_reply(struct net_bridge_port *p) +static void br_reply(struct net_bridge_port *p) { br_transmit_config(p); } @@ -363,6 +362,8 @@ static void br_make_blocking(struct net_bridge_port *p) p->state = BR_STATE_BLOCKING; br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + del_timer(&p->forward_delay_timer); } } @@ -379,15 +380,14 @@ static void br_make_forwarding(struct net_bridge_port *p) p->state = BR_STATE_FORWARDING; br_topology_change_detection(br); del_timer(&p->forward_delay_timer); - } - else if (br->stp_enabled == BR_KERNEL_STP) + } else if (br->stp_enabled == BR_KERNEL_STP) p->state = BR_STATE_LISTENING; else p->state = BR_STATE_LEARNING; br_multicast_enable_port(p); - br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); if (br->forward_delay != 0) mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); @@ -431,14 +431,15 @@ void br_port_state_selection(struct net_bridge *br) } /* called under bridge lock */ -static inline void br_topology_change_acknowledge(struct net_bridge_port *p) +static void br_topology_change_acknowledge(struct net_bridge_port *p) { p->topology_change_ack = 1; br_transmit_config(p); } /* called under bridge lock */ -void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +void br_received_config_bpdu(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { struct net_bridge *br; int was_root; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 289646ec9b7b..e16aade51ae0 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -210,10 +210,19 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, bpdu.hello_time = br_get_ticks(buf+28); bpdu.forward_delay = br_get_ticks(buf+30); - br_received_config_bpdu(p, &bpdu); - } + if (bpdu.message_age > bpdu.max_age) { + if (net_ratelimit()) + br_notice(p->br, + "port %u config from %pM" + " (message_age %ul > max_age %ul)\n", + p->port_no, + eth_hdr(skb)->h_source, + bpdu.message_age, bpdu.max_age); + goto out; + } - else if (buf[0] == BPDU_TYPE_TCN) { + br_received_config_bpdu(p, &bpdu); + } else if (buf[0] == BPDU_TYPE_TCN) { br_received_tcn_bpdu(p); } out: diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 6f615b8192f4..10eda3cd1d71 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -88,6 +88,7 @@ void br_stp_enable_port(struct net_bridge_port *p) br_init_port(p); br_port_state_selection(p->br); br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); } /* called under bridge lock */ @@ -104,6 +105,8 @@ void br_stp_disable_port(struct net_bridge_port *p) p->topology_change_ack = 0; p->config_pending = 0; + br_ifinfo_notify(RTM_NEWLINK, p); + del_timer(&p->message_age_timer); del_timer(&p->forward_delay_timer); del_timer(&p->hold_timer); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 3e965140051e..58de2a0f9975 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -97,6 +97,7 @@ static void br_forward_delay_timer_expired(unsigned long arg) netif_carrier_on(br->dev); } br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); spin_unlock(&br->lock); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 26377e96fa1c..bf2a333ca7c7 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -216,7 +216,6 @@ unlock: nlmsg_failure: pr_debug("error during NLMSG_PUT. This should " "not happen, please report to author.\n"); - goto unlock; alloc_failure: goto unlock; } diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 682c0fedf360..7c2fa0a08148 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -11,7 +11,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/if_arp.h> #include <linux/net.h> diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 3a66b8c10e09..c23979e79dfa 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -255,7 +255,7 @@ static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, if (cfsrvl_phyid_match(layer, phyid) && layer->ctrlcmd) { - if ((ctrl == _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND || + if ((ctrl == _CAIF_CTRLCMD_PHYIF_DOWN_IND || ctrl == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND) && layer->id != 0) { diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 649ebacaf6bc..865690948bbc 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -7,8 +7,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ -#include <linux/version.h> #include <linux/fs.h> +#include <linux/hardirq.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -139,17 +139,14 @@ static void close_work(struct work_struct *work) struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; - /* May be called with or without RTNL lock held */ - int islocked = rtnl_is_locked(); - if (!islocked) - rtnl_lock(); + + rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); if (dev->state == CAIF_SHUTDOWN) dev_close(dev->netdev); } - if (!islocked) - rtnl_unlock(); + rtnl_unlock(); } static DECLARE_WORK(close_worker, close_work); diff --git a/net/can/af_can.c b/net/can/af_can.c index 094fc5332d42..8ce926d3b2cb 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -58,6 +58,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/ratelimit.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -161,8 +162,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol, * return the error code immediately. Below we will * return -EPROTONOSUPPORT */ - if (err && printk_ratelimit()) - printk(KERN_ERR "can: request_module " + if (err) + printk_ratelimited(KERN_ERR "can: request_module " "(can-proto-%d) failed.\n", protocol); cp = can_get_proto(protocol); diff --git a/net/can/bcm.c b/net/can/bcm.c index 184a6572b67e..d6c8ae5b2e6a 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -43,6 +43,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/hrtimer.h> #include <linux/list.h> #include <linux/proc_fs.h> diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index a3a3a31d3c37..41466ccb972a 100644 --- a/net/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c @@ -36,16 +36,19 @@ int ceph_flags_to_mode(int flags) if ((flags & O_DIRECTORY) == O_DIRECTORY) return CEPH_FILE_MODE_PIN; #endif - if ((flags & O_APPEND) == O_APPEND) - flags |= O_WRONLY; - if ((flags & O_ACCMODE) == O_RDWR) - mode = CEPH_FILE_MODE_RDWR; - else if ((flags & O_ACCMODE) == O_WRONLY) + switch (flags & O_ACCMODE) { + case O_WRONLY: mode = CEPH_FILE_MODE_WR; - else + break; + case O_RDONLY: mode = CEPH_FILE_MODE_RD; - + break; + case O_RDWR: + case O_ACCMODE: /* this is what the VFS does */ + mode = CEPH_FILE_MODE_RDWR; + break; + } #ifdef O_LAZY if (flags & O_LAZY) mode |= CEPH_FILE_MODE_LAZY; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 5a8009c9e0cd..85f3bc0a7062 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -444,7 +444,7 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) goto err; /* TODO ceph_crypto_key_decode should really take const input */ - p = (void*)data; + p = (void *)data; ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); if (ret < 0) goto err_ckey; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index e15a82ccc05f..c340e2e0765b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss) break; default: - sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); + snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)", + (int)ss->ss_family); } return s; @@ -485,13 +486,10 @@ static void prepare_write_message(struct ceph_connection *con) m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); con->out_msg = m; - if (test_bit(LOSSYTX, &con->state)) { - list_del_init(&m->list_head); - } else { - /* put message on sent list */ - ceph_msg_get(m); - list_move_tail(&m->list_head, &con->out_sent); - } + + /* put message on sent list */ + ceph_msg_get(m); + list_move_tail(&m->list_head, &con->out_sent); /* * only assign outgoing seq # if we haven't sent this message @@ -598,7 +596,7 @@ static void prepare_write_keepalive(struct ceph_connection *con) * Connection negotiation. */ -static void prepare_connect_authorizer(struct ceph_connection *con) +static int prepare_connect_authorizer(struct ceph_connection *con) { void *auth_buf; int auth_len = 0; @@ -612,13 +610,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con) con->auth_retry); mutex_lock(&con->mutex); + if (test_bit(CLOSED, &con->state) || + test_bit(OPENING, &con->state)) + return -EAGAIN; + con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); con->out_connect.authorizer_len = cpu_to_le32(auth_len); - con->out_kvec[con->out_kvec_left].iov_base = auth_buf; - con->out_kvec[con->out_kvec_left].iov_len = auth_len; - con->out_kvec_left++; - con->out_kvec_bytes += auth_len; + if (auth_len) { + con->out_kvec[con->out_kvec_left].iov_base = auth_buf; + con->out_kvec[con->out_kvec_left].iov_len = auth_len; + con->out_kvec_left++; + con->out_kvec_bytes += auth_len; + } + return 0; } /* @@ -640,9 +645,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr, set_bit(WRITE_PENDING, &con->state); } -static void prepare_write_connect(struct ceph_messenger *msgr, - struct ceph_connection *con, - int after_banner) +static int prepare_write_connect(struct ceph_messenger *msgr, + struct ceph_connection *con, + int after_banner) { unsigned global_seq = get_global_seq(con->msgr, 0); int proto; @@ -683,7 +688,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, con->out_more = 0; set_bit(WRITE_PENDING, &con->state); - prepare_connect_authorizer(con); + return prepare_connect_authorizer(con); } @@ -1065,8 +1070,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) switch (ss->ss_family) { case AF_INET: ((struct sockaddr_in *)ss)->sin_port = htons(p); + break; case AF_INET6: ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); + break; } } @@ -1216,6 +1223,7 @@ static int process_connect(struct ceph_connection *con) u64 sup_feat = con->msgr->supported_features; u64 req_feat = con->msgr->required_features; u64 server_feat = le64_to_cpu(con->in_reply.features); + int ret; dout("process_connect on %p tag %d\n", con, (int)con->in_tag); @@ -1250,7 +1258,9 @@ static int process_connect(struct ceph_connection *con) return -1; } con->auth_retry = 1; - prepare_write_connect(con->msgr, con, 0); + ret = prepare_write_connect(con->msgr, con, 0); + if (ret < 0) + return ret; prepare_read_connect(con); break; @@ -1277,6 +1287,9 @@ static int process_connect(struct ceph_connection *con) if (con->ops->peer_reset) con->ops->peer_reset(con); mutex_lock(&con->mutex); + if (test_bit(CLOSED, &con->state) || + test_bit(OPENING, &con->state)) + return -EAGAIN; break; case CEPH_MSGR_TAG_RETRY_SESSION: @@ -1341,7 +1354,9 @@ static int process_connect(struct ceph_connection *con) * to WAIT. This shouldn't happen if we are the * client. */ - pr_err("process_connect peer connecting WAIT\n"); + pr_err("process_connect got WAIT as client\n"); + con->error_msg = "protocol error, got WAIT as client"; + return -1; default: pr_err("connect protocol error, will retry\n"); @@ -1381,6 +1396,7 @@ static void process_ack(struct ceph_connection *con) break; dout("got ack for seq %llu type %d at %p\n", seq, le16_to_cpu(m->hdr.type), m); + m->ack_stamp = jiffies; ceph_msg_remove(m); } prepare_read_tag(con); @@ -1810,6 +1826,17 @@ static int try_read(struct ceph_connection *con) more: dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, con->in_base_pos); + + /* + * process_connect and process_message drop and re-take + * con->mutex. make sure we handle a racing close or reopen. + */ + if (test_bit(CLOSED, &con->state) || + test_bit(OPENING, &con->state)) { + ret = -EAGAIN; + goto out; + } + if (test_bit(CONNECTING, &con->state)) { if (!test_bit(NEGOTIATING, &con->state)) { dout("try_read connecting\n"); @@ -1938,8 +1965,10 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); + int ret; mutex_lock(&con->mutex); +restart: if (test_and_clear_bit(BACKOFF, &con->state)) { dout("con_work %p backing off\n", con); if (queue_delayed_work(ceph_msgr_wq, &con->work, @@ -1969,18 +1998,31 @@ static void con_work(struct work_struct *work) con_close_socket(con); } - if (test_and_clear_bit(SOCK_CLOSED, &con->state) || - try_read(con) < 0 || - try_write(con) < 0) { - mutex_unlock(&con->mutex); - ceph_fault(con); /* error/fault path */ - goto done_unlocked; - } + if (test_and_clear_bit(SOCK_CLOSED, &con->state)) + goto fault; + + ret = try_read(con); + if (ret == -EAGAIN) + goto restart; + if (ret < 0) + goto fault; + + ret = try_write(con); + if (ret == -EAGAIN) + goto restart; + if (ret < 0) + goto fault; done: mutex_unlock(&con->mutex); done_unlocked: con->ops->put(con); + return; + +fault: + mutex_unlock(&con->mutex); + ceph_fault(con); /* error/fault path */ + goto done_unlocked; } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 6b5dda1cb5df..ce310eee708d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc, ceph_calc_raw_layout(osdc, layout, vino.snap, off, plen, &bno, req, op); - sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); + snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); } @@ -477,8 +477,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, calc_layout(osdc, vino, layout, off, plen, req, ops); req->r_file_layout = *layout; /* keep a copy */ - /* in case it differs from natural alignment that calc_layout - filled in for us */ + /* in case it differs from natural (file) alignment that + calc_layout filled in for us */ + req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; ceph_osdc_build_request(req, off, plen, ops, @@ -1084,9 +1085,15 @@ static void handle_timeout(struct work_struct *work) req = list_entry(osdc->req_lru.next, struct ceph_osd_request, r_req_lru_item); + /* hasn't been long enough since we sent it? */ if (time_before(jiffies, req->r_stamp + timeout)) break; + /* hasn't been long enough since it was acked? */ + if (req->r_request->ack_stamp == 0 || + time_before(jiffies, req->r_request->ack_stamp + timeout)) + break; + BUG_ON(req == last_req && req->r_stamp == last_stamp); last_req = req; last_stamp = req->r_stamp; @@ -1144,6 +1151,13 @@ static void handle_osds_timeout(struct work_struct *work) round_jiffies_relative(delay)); } +static void complete_request(struct ceph_osd_request *req) +{ + if (req->r_safe_callback) + req->r_safe_callback(req, NULL); + complete_all(&req->r_safe_completion); /* fsync waiter */ +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1226,11 +1240,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, else complete_all(&req->r_completion); - if (flags & CEPH_OSD_FLAG_ONDISK) { - if (req->r_safe_callback) - req->r_safe_callback(req, msg); - complete_all(&req->r_safe_completion); /* fsync waiter */ - } + if (flags & CEPH_OSD_FLAG_ONDISK) + complete_request(req); done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); @@ -1421,6 +1432,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) done: downgrade_write(&osdc->map_sem); ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); + + /* + * subscribe to subsequent osdmap updates if full to ensure + * we find out when we are no longer full and stop returning + * ENOSPC. + */ + if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) + ceph_monc_request_next_osdmap(&osdc->client->monc); + send_queued(osdc); up_read(&osdc->map_sem); wake_up_all(&osdc->client->auth_wq); @@ -1677,8 +1697,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, */ if (req->r_sent == 0) { rc = __map_request(osdc, req); - if (rc < 0) + if (rc < 0) { + if (nofail) { + dout("osdc_start_request failed map, " + " will retry %lld\n", req->r_tid); + rc = 0; + } goto out_unlock; + } if (req->r_osd == NULL) { dout("send_request %p no up osds in pg\n", req); ceph_monc_request_next_osdmap(&osdc->client->monc); @@ -1717,6 +1743,7 @@ int ceph_osdc_wait_request(struct ceph_osd_client *osdc, __cancel_request(req); __unregister_request(osdc, req); mutex_unlock(&osdc->request_mutex); + complete_request(req); dout("wait_request tid %llu canceled/timed out\n", req->r_tid); return rc; } @@ -2007,8 +2034,9 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, int want = calc_pages_for(req->r_page_alignment, data_len); if (unlikely(req->r_num_pages < want)) { - pr_warning("tid %lld reply %d > expected %d pages\n", - tid, want, m->nr_pages); + pr_warning("tid %lld reply has %d bytes %d pages, we" + " had only %d pages ready\n", tid, data_len, + want, req->r_num_pages); *skip = 1; ceph_msg_put(m); m = NULL; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 71603ac3dff5..e97c3588c3ec 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, } map->epoch++; - map->modified = map->modified; + map->modified = modified; if (newcrush) { if (map->crush) crush_destroy(map->crush); @@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, map->osd_addr[osd] = addr; } - /* new_down */ + /* new_state */ ceph_decode_32_safe(p, end, len, bad); while (len--) { u32 osd; + u8 xorstate; ceph_decode_32_safe(p, end, osd, bad); + xorstate = **(u8 **)p; (*p)++; /* clean flag */ - pr_info("osd%d down\n", osd); + if (xorstate == 0) + xorstate = CEPH_OSD_UP; + if (xorstate & CEPH_OSD_UP) + pr_info("osd%d down\n", osd); if (osd < map->max_osd) - map->osd_state[osd] &= ~CEPH_OSD_UP; + map->osd_state[osd] ^= xorstate; } /* new_weight */ diff --git a/net/core/dev.c b/net/core/dev.c index c7e305d13b71..17d67b579beb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -199,6 +199,11 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static inline void dev_base_seq_inc(struct net *net) +{ + while (++net->dev_base_seq == 0); +} + static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); @@ -237,6 +242,9 @@ static int list_netdevice(struct net_device *dev) hlist_add_head_rcu(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(net); + return 0; } @@ -253,6 +261,8 @@ static void unlist_netdevice(struct net_device *dev) hlist_del_rcu(&dev->name_hlist); hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); + + dev_base_seq_inc(dev_net(dev)); } /* @@ -2096,6 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; + unsigned int skb_len; if (likely(!skb->next)) { u32 features; @@ -2146,8 +2157,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); - trace_net_dev_xmit(skb, rc); + trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -2167,8 +2179,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); - trace_net_dev_xmit(nskb, rc); + trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; @@ -2529,7 +2542,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) goto done; ip = (const struct iphdr *) (skb->data + nhoff); - if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip)) ip_proto = 0; else ip_proto = ip->protocol; @@ -3111,7 +3124,7 @@ static int __netif_receive_skb(struct sk_buff *skb) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->mac_len = skb->network_header - skb->mac_header; + skb_reset_mac_len(skb); pt_prev = NULL; @@ -4484,10 +4497,10 @@ void __dev_set_rx_mode(struct net_device *dev) */ if (!netdev_uc_empty(dev) && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); - dev->uc_promisc = 1; + dev->uc_promisc = true; } else if (netdev_uc_empty(dev) && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); - dev->uc_promisc = 0; + dev->uc_promisc = false; } if (ops->ndo_set_multicast_list) @@ -5196,7 +5209,7 @@ static void rollback_registered(struct net_device *dev) list_del(&single); } -u32 netdev_fix_features(struct net_device *dev, u32 features) +static u32 netdev_fix_features(struct net_device *dev, u32 features) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && @@ -5255,7 +5268,6 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) return features; } -EXPORT_SYMBOL(netdev_fix_features); int __netdev_update_features(struct net_device *dev) { @@ -5475,11 +5487,9 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_NOCACHE_COPY; } - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, - * vlan_dev_init() will do the dev->features check, so these features - * are enabled only if supported by underlying device. + /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. */ - dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA); + dev->vlan_features |= NETIF_F_HIGHDMA; ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); @@ -5864,8 +5874,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_size = GSO_MAX_SIZE; - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); INIT_LIST_HEAD(&dev->link_watch_list); @@ -5929,9 +5937,6 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); - /* Clear ethtool n-tuple list */ - ethtool_ntuple_flush(dev); - list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p); @@ -6175,6 +6180,11 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } + /* Append NAPI poll list from offline CPU. */ + if (!list_empty(&oldsd->poll_list)) { + list_splice_init(&oldsd->poll_list, &sd->poll_list); + raise_softirq_irqoff(NET_RX_SOFTIRQ); + } raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); @@ -6261,29 +6271,23 @@ err_name: /** * netdev_drivername - network driver for the device * @dev: network device - * @buffer: buffer for resulting name - * @len: size of buffer * * Determine network driver for device. */ -char *netdev_drivername(const struct net_device *dev, char *buffer, int len) +const char *netdev_drivername(const struct net_device *dev) { const struct device_driver *driver; const struct device *parent; - - if (len <= 0 || !buffer) - return buffer; - buffer[0] = 0; + const char *empty = ""; parent = dev->dev.parent; - if (!parent) - return buffer; + return empty; driver = parent->driver; if (driver && driver->name) - strlcpy(buffer, driver->name, len); - return buffer; + return driver->name; + return empty; } static int __netdev_printk(const char *level, const struct net_device *dev, diff --git a/net/core/dst.c b/net/core/dst.c index 9ccca038444f..14b33baf0733 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,8 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->neighbour = NULL; - dst->hh = NULL; + dst->_neighbour = NULL; #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -190,7 +189,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->lastuse = jiffies; dst->flags = flags; dst->next = NULL; - dst_entries_add(ops, 1); + if (!(flags & DST_NOCOUNT)) + dst_entries_add(ops, 1); return dst; } EXPORT_SYMBOL(dst_alloc); @@ -225,25 +225,20 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) { struct dst_entry *child; struct neighbour *neigh; - struct hh_cache *hh; smp_rmb(); again: - neigh = dst->neighbour; - hh = dst->hh; + neigh = dst->_neighbour; child = dst->child; - dst->hh = NULL; - if (hh) - hh_cache_put(hh); - if (neigh) { - dst->neighbour = NULL; + dst->_neighbour = NULL; neigh_release(neigh); } - dst_entries_add(dst->ops, -1); + if (!(dst->flags & DST_NOCOUNT)) + dst_entries_add(dst->ops, -1); if (dst->ops->destroy) dst->ops->destroy(dst); @@ -368,8 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = dst->dev; + if (dst->_neighbour && dst->_neighbour->dev == dev) { + dst->_neighbour->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index fd14116ad7f0..6cdba5fc2bed 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) } EXPORT_SYMBOL(ethtool_op_set_flags); -void ethtool_ntuple_flush(struct net_device *dev) -{ - struct ethtool_rx_ntuple_flow_spec_container *fsc, *f; - - list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) { - list_del(&fsc->list); - kfree(fsc); - } - dev->ethtool_ntuple_list.count = 0; -} -EXPORT_SYMBOL(ethtool_ntuple_flush); - /* Handlers for each ethtool command */ #define ETHTOOL_DEV_FEATURE_WORDS 1 @@ -865,34 +853,6 @@ out: return ret; } -static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, - struct ethtool_rx_ntuple_flow_spec *spec, - struct ethtool_rx_ntuple_flow_spec_container *fsc) -{ - - /* don't add filters forever */ - if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) { - /* free the container */ - kfree(fsc); - return; - } - - /* Copy the whole filter over */ - fsc->fs.flow_type = spec->flow_type; - memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u)); - memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u)); - - fsc->fs.vlan_tag = spec->vlan_tag; - fsc->fs.vlan_tag_mask = spec->vlan_tag_mask; - fsc->fs.data = spec->data; - fsc->fs.data_mask = spec->data_mask; - fsc->fs.action = spec->action; - - /* add to the list */ - list_add_tail_rcu(&fsc->list, &list->list); - list->count++; -} - /* * ethtool does not (or did not) set masks for flow parameters that are * not specified, so if both value and mask are 0 then this must be @@ -930,8 +890,6 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, { struct ethtool_rx_ntuple cmd; const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; - int ret; if (!ops->set_rx_ntuple) return -EOPNOTSUPP; @@ -944,269 +902,7 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, rx_ntuple_fix_masks(&cmd.fs); - /* - * Cache filter in dev struct for GET operation only if - * the underlying driver doesn't have its own GET operation, and - * only if the filter was added successfully. First make sure we - * can allocate the filter, then continue if successful. - */ - if (!ops->get_rx_ntuple) { - fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC); - if (!fsc) - return -ENOMEM; - } - - ret = ops->set_rx_ntuple(dev, &cmd); - if (ret) { - kfree(fsc); - return ret; - } - - if (!ops->get_rx_ntuple) - __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc); - - return ret; -} - -static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_rx_ntuple_flow_spec_container *fsc; - u8 *data; - char *p; - int ret, i, num_strings = 0; - - if (!ops->get_sset_count) - return -EOPNOTSUPP; - - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) - return -EFAULT; - - ret = ops->get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - - gstrings.len = ret; - - data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); - if (!data) - return -ENOMEM; - - if (ops->get_rx_ntuple) { - /* driver-specific filter grab */ - ret = ops->get_rx_ntuple(dev, gstrings.string_set, data); - goto copy; - } - - /* default ethtool filter grab */ - i = 0; - p = (char *)data; - list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) { - sprintf(p, "Filter %d:\n", i); - p += ETH_GSTRING_LEN; - num_strings++; - - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - sprintf(p, "\tFlow Type: TCP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case UDP_V4_FLOW: - sprintf(p, "\tFlow Type: UDP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case SCTP_V4_FLOW: - sprintf(p, "\tFlow Type: SCTP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - sprintf(p, "\tFlow Type: AH ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case ESP_V4_FLOW: - sprintf(p, "\tFlow Type: ESP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tFlow Type: Raw IP\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tFlow Type: IPv4\n"); - p += ETH_GSTRING_LEN; - num_strings++; - break; - default: - sprintf(p, "\tFlow Type: Unknown\n"); - p += ETH_GSTRING_LEN; - num_strings++; - goto unknown_filter; - } - - /* now the rest of the filters */ - switch (fsc->fs.flow_type) { - case TCP_V4_FLOW: - case UDP_V4_FLOW: - case SCTP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.tcp_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.psrc, - fsc->fs.m_u.tcp_ip4_spec.psrc); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest Port: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.pdst, - fsc->fs.m_u.tcp_ip4_spec.pdst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.tcp_ip4_spec.tos, - fsc->fs.m_u.tcp_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case AH_ESP_V4_FLOW: - case ESP_V4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.ah_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSPI: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.spi, - fsc->fs.m_u.ah_ip4_spec.spi); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.ah_ip4_spec.tos, - fsc->fs.m_u.ah_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IP_USER_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - break; - case IPV4_FLOW: - sprintf(p, "\tSrc IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tSrc IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4src); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP addr: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tDest IP mask: 0x%x\n", - fsc->fs.m_u.usr_ip4_spec.ip4dst); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, - fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tTOS: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.tos, - fsc->fs.m_u.usr_ip4_spec.tos); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tIP Version: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.ip_ver, - fsc->fs.m_u.usr_ip4_spec.ip_ver); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tProtocol: %d, mask: 0x%x\n", - fsc->fs.h_u.usr_ip4_spec.proto, - fsc->fs.m_u.usr_ip4_spec.proto); - p += ETH_GSTRING_LEN; - num_strings++; - break; - } - sprintf(p, "\tVLAN: %d, mask: 0x%x\n", - fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); - p += ETH_GSTRING_LEN; - num_strings++; - sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask); - p += ETH_GSTRING_LEN; - num_strings++; - if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP) - sprintf(p, "\tAction: Drop\n"); - else - sprintf(p, "\tAction: Direct to queue %d\n", - fsc->fs.action); - p += ETH_GSTRING_LEN; - num_strings++; -unknown_filter: - i++; - } -copy: - /* indicate to userspace how many strings we actually have */ - gstrings.len = num_strings; - ret = -EFAULT; - if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) - goto out; - useraddr += sizeof(gstrings); - if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) - goto out; - ret = 0; - -out: - kfree(data); - return ret; + return ops->set_rx_ntuple(dev, &cmd); } static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) @@ -1227,7 +923,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) regs.len = reglen; regbuf = vzalloc(reglen); - if (!regbuf) + if (reglen && !regbuf) return -ENOMEM; ops->get_regs(dev, ®s, regbuf); @@ -1236,7 +932,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (copy_to_user(useraddr, ®s, sizeof(regs))) goto out; useraddr += offsetof(struct ethtool_regs, data); - if (copy_to_user(useraddr, regbuf, regs.len)) + if (regbuf && copy_to_user(useraddr, regbuf, regs.len)) goto out; ret = 0; @@ -2101,9 +1797,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXNTUPLE: rc = ethtool_set_rx_ntuple(dev, useraddr); break; - case ETHTOOL_GRXNTUPLE: - rc = ethtool_get_rx_ntuple(dev, useraddr); - break; case ETHTOOL_GSSET_INFO: rc = ethtool_get_sset_info(dev, useraddr); break; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 008dc70b064b..e7ab0c0285b5 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = { static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); + rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) diff --git a/net/core/flow.c b/net/core/flow.c index 990703b8863b..bf32c33cad3b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -22,7 +22,7 @@ #include <linux/cpumask.h> #include <linux/mutex.h> #include <net/flow.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/security.h> struct flow_cache_entry { diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a7b342131869..357bd4ee4baa 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -126,7 +126,7 @@ static void linkwatch_schedule_work(int urgent) return; /* It's already running which is good enough. */ - if (!cancel_delayed_work(&linkwatch_work)) + if (!__cancel_delayed_work(&linkwatch_work)) return; /* Otherwise we reschedule it again for immediate execution. */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 799f06e03a22..8fab9b0bb203 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -98,7 +98,7 @@ static const struct file_operations neigh_stat_seq_fops; static DEFINE_RWLOCK(neigh_tbl_lock); -static int neigh_blackhole(struct sk_buff *skb) +static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) { kfree_skb(skb); return -ENETDOWN; @@ -137,7 +137,7 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock_bh(&tbl->lock); nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (i = 0; i <= nht->hash_mask; i++) { + for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; struct neighbour __rcu **np; @@ -210,7 +210,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (i = 0; i <= nht->hash_mask; i++) { + for (i = 0; i < (1 << nht->hash_shift); i++) { struct neighbour *n; struct neighbour __rcu **np = &nht->hash_buckets[i]; @@ -297,6 +297,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); @@ -312,9 +313,9 @@ out_entries: goto out; } -static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) +static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) { - size_t size = entries * sizeof(struct neighbour *); + size_t size = (1 << shift) * sizeof(struct neighbour *); struct neigh_hash_table *ret; struct neighbour __rcu **buckets; @@ -332,8 +333,9 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) return NULL; } ret->hash_buckets = buckets; - ret->hash_mask = entries - 1; + ret->hash_shift = shift; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); + ret->hash_rnd |= 1; return ret; } @@ -342,7 +344,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table *nht = container_of(head, struct neigh_hash_table, rcu); - size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); + size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) @@ -353,21 +355,20 @@ static void neigh_hash_free_rcu(struct rcu_head *head) } static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, - unsigned long new_entries) + unsigned long new_shift) { unsigned int i, hash; struct neigh_hash_table *new_nht, *old_nht; NEIGH_CACHE_STAT_INC(tbl, hash_grows); - BUG_ON(!is_power_of_2(new_entries)); old_nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - new_nht = neigh_hash_alloc(new_entries); + new_nht = neigh_hash_alloc(new_shift); if (!new_nht) return old_nht; - for (i = 0; i <= old_nht->hash_mask; i++) { + for (i = 0; i < (1 << old_nht->hash_shift); i++) { struct neighbour *n, *next; for (n = rcu_dereference_protected(old_nht->hash_buckets[i], @@ -377,7 +378,7 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); - hash &= new_nht->hash_mask; + hash >>= (32 - new_nht->hash_shift); next = rcu_dereference_protected(n->next, lockdep_is_held(&tbl->lock)); @@ -406,7 +407,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; @@ -436,7 +437,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; @@ -492,10 +493,10 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) > (nht->hash_mask + 1)) - nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1); + if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) + nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); @@ -688,8 +689,6 @@ static void neigh_destroy_rcu(struct rcu_head *head) */ void neigh_destroy(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { @@ -702,16 +701,6 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) printk(KERN_WARNING "Impossible event.\n"); - while ((hh = neigh->hh) != NULL) { - neigh->hh = hh->hh_next; - hh->hh_next = NULL; - - write_seqlock_bh(&hh->hh_lock); - hh->hh_output = neigh_blackhole; - write_sequnlock_bh(&hh->hh_lock); - hh_cache_put(hh); - } - skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); @@ -731,14 +720,9 @@ EXPORT_SYMBOL(neigh_destroy); */ static void neigh_suspect(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); neigh->output = neigh->ops->output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->output; } /* Neighbour state is OK; @@ -748,14 +732,9 @@ static void neigh_suspect(struct neighbour *neigh) */ static void neigh_connect(struct neighbour *neigh) { - struct hh_cache *hh; - NEIGH_PRINTK2("neigh %p is connected.\n", neigh); neigh->output = neigh->ops->connected_output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->hh_output; } static void neigh_periodic_work(struct work_struct *work) @@ -784,7 +763,7 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(p->base_reachable_time); } - for (i = 0 ; i <= nht->hash_mask; i++) { + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; while ((n = rcu_dereference_protected(*np, @@ -1015,7 +994,7 @@ out_unlock_bh: } EXPORT_SYMBOL(__neigh_event_send); -static void neigh_update_hhs(const struct neighbour *neigh) +static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) @@ -1025,7 +1004,8 @@ static void neigh_update_hhs(const struct neighbour *neigh) update = neigh->dev->header_ops->cache_update; if (update) { - for (hh = neigh->hh; hh; hh = hh->hh_next) { + hh = &neigh->hh; + if (hh->hh_len) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -1173,12 +1153,13 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - struct neighbour *n1 = neigh; + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb_dst(skb) && skb_dst(skb)->neighbour) - n1 = skb_dst(skb)->neighbour; - n1->output(skb); + if (dst && (n2 = dst_get_neighbour(dst)) != NULL) + n1 = n2; + n1->output(n1, skb); write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); @@ -1211,67 +1192,21 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, } EXPORT_SYMBOL(neigh_event_ns); -static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, - __be16 protocol) -{ - struct hh_cache *hh; - - smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ - for (hh = n->hh; hh; hh = hh->hh_next) { - if (hh->hh_type == protocol) { - atomic_inc(&hh->hh_refcnt); - if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) - hh_cache_put(hh); - return true; - } - } - return false; -} - /* called with read_lock_bh(&n->lock); */ -static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - __be16 protocol) +static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) { - struct hh_cache *hh; struct net_device *dev = dst->dev; - - if (likely(neigh_hh_lookup(n, dst, protocol))) - return; - - /* slow path */ - hh = kzalloc(sizeof(*hh), GFP_ATOMIC); - if (!hh) - return; - - seqlock_init(&hh->hh_lock); - hh->hh_type = protocol; - atomic_set(&hh->hh_refcnt, 2); - - if (dev->header_ops->cache(n, hh)) { - kfree(hh); - return; - } + __be16 prot = dst->ops->protocol; + struct hh_cache *hh = &n->hh; write_lock_bh(&n->lock); - /* must check if another thread already did the insert */ - if (neigh_hh_lookup(n, dst, protocol)) { - kfree(hh); - goto end; - } - - if (n->nud_state & NUD_CONNECTED) - hh->hh_output = n->ops->hh_output; - else - hh->hh_output = n->ops->output; - - hh->hh_next = n->hh; - smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ - n->hh = hh; + /* Only one thread can come in here and initialize the + * hh_cache entry. + */ + if (!hh->hh_len) + dev->header_ops->cache(n, hh, prot); - if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) - hh_cache_put(hh); -end: write_unlock_bh(&n->lock); } @@ -1280,7 +1215,7 @@ end: * but resolution is not made yet. */ -int neigh_compat_output(struct sk_buff *skb) +int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -1297,13 +1232,12 @@ EXPORT_SYMBOL(neigh_compat_output); /* Slow and careful. */ -int neigh_resolve_output(struct sk_buff *skb) +int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh; int rc = 0; - if (!dst || !(neigh = dst->neighbour)) + if (!dst) goto discard; __skb_pull(skb, skb_network_offset(skb)); @@ -1313,10 +1247,8 @@ int neigh_resolve_output(struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - if (dev->header_ops->cache && - !dst->hh && - !(dst->flags & DST_NOCACHE)) - neigh_hh_init(neigh, dst, dst->ops->protocol); + if (dev->header_ops->cache && !neigh->hh.hh_len) + neigh_hh_init(neigh, dst); do { seq = read_seqbegin(&neigh->ha_lock); @@ -1325,7 +1257,7 @@ int neigh_resolve_output(struct sk_buff *skb) } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) - rc = neigh->ops->queue_xmit(skb); + rc = dev_queue_xmit(skb); else goto out_kfree_skb; } @@ -1333,7 +1265,7 @@ out: return rc; discard: NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, dst ? dst->neighbour : NULL); + dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); @@ -1343,13 +1275,11 @@ EXPORT_SYMBOL(neigh_resolve_output); /* As fast as possible without hh cache */ -int neigh_connected_output(struct sk_buff *skb) +int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) { - int err; - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; unsigned int seq; + int err; __skb_pull(skb, skb_network_offset(skb)); @@ -1360,7 +1290,7 @@ int neigh_connected_output(struct sk_buff *skb) } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) - err = neigh->ops->queue_xmit(skb); + err = dev_queue_xmit(skb); else { err = -EINVAL; kfree_skb(skb); @@ -1369,6 +1299,12 @@ int neigh_connected_output(struct sk_buff *skb) } EXPORT_SYMBOL(neigh_connected_output); +int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} +EXPORT_SYMBOL(neigh_direct_output); + static void neigh_proxy_process(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table *)arg; @@ -1540,7 +1476,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); @@ -1857,7 +1793,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); ndc.ndtc_hash_rnd = nht->hash_rnd; - ndc.ndtc_hash_mask = nht->hash_mask; + ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); rcu_read_unlock_bh(); NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); @@ -2200,7 +2136,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - for (h = 0; h <= nht->hash_mask; h++) { + for (h = 0; h < (1 << nht->hash_shift); h++) { if (h < s_h) continue; if (h > s_h) @@ -2264,7 +2200,7 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void nht = rcu_dereference_bh(tbl->nht); read_lock(&tbl->lock); /* avoid resizes */ - for (chain = 0; chain <= nht->hash_mask; chain++) { + for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; for (n = rcu_dereference_bh(nht->hash_buckets[chain]); @@ -2286,7 +2222,7 @@ void __neigh_for_each_release(struct neigh_table *tbl, nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - for (chain = 0; chain <= nht->hash_mask; chain++) { + for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; struct neighbour __rcu **np; @@ -2323,7 +2259,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) int bucket = state->bucket; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; - for (bucket = 0; bucket <= nht->hash_mask; bucket++) { + for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { n = rcu_dereference_bh(nht->hash_buckets[bucket]); while (n) { @@ -2390,7 +2326,7 @@ next: if (n) break; - if (++state->bucket > nht->hash_mask) + if (++state->bucket >= (1 << nht->hash_shift)) break; n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); @@ -2909,12 +2845,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info); + rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, + NULL); + rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL); return 0; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 11b98bc2aa8f..1683e5db2f27 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -100,7 +100,6 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); -NETDEVICE_SHOW(features, fmt_hex); NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); @@ -312,7 +311,6 @@ static struct device_attribute net_class_attributes[] = { __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), - __ATTR(features, S_IRUGO, show_features, NULL), __ATTR(type, S_IRUGO, show_type, NULL), __ATTR(link_mode, S_IRUGO, show_link_mode, NULL), __ATTR(address, S_IRUGO, show_address, NULL), @@ -1179,9 +1177,14 @@ static void remove_queue_kobjects(struct net_device *net) #endif } -static const void *net_current_ns(void) +static void *net_grab_current_ns(void) { - return current->nsproxy->net_ns; + struct net *ns = current->nsproxy->net_ns; +#ifdef CONFIG_NET_NS + if (ns) + atomic_inc(&ns->passive); +#endif + return ns; } static const void *net_initial_ns(void) @@ -1196,22 +1199,13 @@ static const void *net_netlink_ns(struct sock *sk) struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, - .current_ns = net_current_ns, + .grab_current_ns = net_grab_current_ns, .netlink_ns = net_netlink_ns, .initial_ns = net_initial_ns, + .drop_ns = net_drop_ns, }; EXPORT_SYMBOL_GPL(net_ns_type_operations); -static void net_kobj_ns_exit(struct net *net) -{ - kobj_ns_exit(KOBJ_NS_TYPE_NET, net); -} - -static struct pernet_operations kobj_net_ops = { - .exit = net_kobj_ns_exit, -}; - - #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) { @@ -1339,6 +1333,5 @@ EXPORT_SYMBOL(netdev_class_remove_file); int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); - register_pernet_subsys(&kobj_net_ops); return class_register(&net_class); } diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 7f1bb2aba03b..52380b1d552a 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -28,6 +28,8 @@ #include <trace/events/skb.h> #include <trace/events/net.h> #include <trace/events/napi.h> +#include <trace/events/sock.h> +#include <trace/events/udp.h> EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2e2dce6583e1..5bbdbf0d3664 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -8,6 +8,8 @@ #include <linux/idr.h> #include <linux/rculist.h> #include <linux/nsproxy.h> +#include <linux/proc_fs.h> +#include <linux/file.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -126,6 +128,8 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); + atomic_set(&net->passive, 1); + net->dev_base_seq = 1; #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -208,6 +212,13 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } +void net_drop_ns(void *p) +{ + struct net *ns = p; + if (ns && atomic_dec_and_test(&ns->passive)) + net_free(ns); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *net; @@ -228,7 +239,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { - net_free(net); + net_drop_ns(net); return ERR_PTR(rv); } return net; @@ -284,7 +295,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); - net_free(net); + net_drop_ns(net); } } static DECLARE_WORK(net_cleanup_work, cleanup_net); @@ -302,6 +313,26 @@ void __put_net(struct net *net) } EXPORT_SYMBOL_GPL(__put_net); +struct net *get_net_ns_by_fd(int fd) +{ + struct proc_inode *ei; + struct file *file; + struct net *net; + + file = proc_ns_fget(fd); + if (IS_ERR(file)) + return ERR_CAST(file); + + ei = PROC_I(file->f_dentry->d_inode); + if (ei->ns_ops == &netns_operations) + net = get_net(ei->ns); + else + net = ERR_PTR(-EINVAL); + + fput(file); + return net; +} + #else struct net *copy_net_ns(unsigned long flags, struct net *old_net) { @@ -309,6 +340,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) return ERR_PTR(-EINVAL); return old_net; } + +struct net *get_net_ns_by_fd(int fd) +{ + return ERR_PTR(-EINVAL); +} #endif struct net *get_net_ns_by_pid(pid_t pid) @@ -561,3 +597,39 @@ void unregister_pernet_device(struct pernet_operations *ops) mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(unregister_pernet_device); + +#ifdef CONFIG_NET_NS +static void *netns_get(struct task_struct *task) +{ + struct net *net = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) + net = get_net(nsproxy->net_ns); + rcu_read_unlock(); + + return net; +} + +static void netns_put(void *ns) +{ + put_net(ns); +} + +static int netns_install(struct nsproxy *nsproxy, void *ns) +{ + put_net(nsproxy->net_ns); + nsproxy->net_ns = get_net(ns); + return 0; +} + +const struct proc_ns_operations netns_operations = { + .name = "net", + .type = CLONE_NEWNET, + .get = netns_get, + .put = netns_put, + .install = netns_install, +}; +#endif diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2d7d6d473781..adf84dd8c7b5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -177,7 +177,7 @@ static void service_arp_queue(struct netpoll_info *npi) } } -void netpoll_poll_dev(struct net_device *dev) +static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; @@ -208,13 +208,6 @@ void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -EXPORT_SYMBOL(netpoll_poll_dev); - -void netpoll_poll(struct netpoll *np) -{ - netpoll_poll_dev(np->dev); -} -EXPORT_SYMBOL(netpoll_poll); static void refill_skbs(void) { @@ -275,7 +268,7 @@ repeat: if (!skb) { if (++count < 10) { - netpoll_poll(np); + netpoll_poll_dev(np->dev); goto repeat; } return NULL; @@ -336,7 +329,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } /* tickle device maybe there is some cleanup */ - netpoll_poll(np); + netpoll_poll_dev(np->dev); udelay(USEC_PER_POLL); } @@ -792,6 +785,13 @@ int netpoll_setup(struct netpoll *np) return -ENODEV; } + if (ndev->master) { + printk(KERN_ERR "%s: %s is a slave device, aborting.\n", + np->name, np->dev_name); + err = -EBUSY; + goto put; + } + if (!netif_running(ndev)) { unsigned long atmost, atleast; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f76079cd750c..e35a6fbb8110 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1070,7 +1070,9 @@ static ssize_t pktgen_if_write(struct file *file, len = num_arg(&user_buffer[i], 10, &value); if (len < 0) return len; - + if ((value > 0) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -ENOTSUPP; i += len; pkt_dev->clone_skb = value; @@ -3555,7 +3557,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->min_pkt_size = ETH_ZLEN; pkt_dev->max_pkt_size = ETH_ZLEN; pkt_dev->nfrags = 0; - pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->delay = pg_delay_d; pkt_dev->count = pg_count_d; pkt_dev->sofar = 0; @@ -3563,7 +3564,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->udp_src_max = 9; pkt_dev->udp_dst_min = 9; pkt_dev->udp_dst_max = 9; - pkt_dev->vlan_p = 0; pkt_dev->vlan_cfi = 0; pkt_dev->vlan_id = 0xffff; @@ -3575,6 +3575,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) err = pktgen_setup_dev(pkt_dev, ifname); if (err) goto out1; + if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) + pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, &pktgen_if_fops, pkt_dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d56cb9b0b94..99d9e953fe39 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -56,9 +56,11 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; }; static DEFINE_MUTEX(rtnl_mutex); +static u16 min_ifinfo_dump_size; void rtnl_lock(void) { @@ -144,12 +146,28 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) return tab ? tab[msgindex].dumpit : NULL; } +static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) +{ + struct rtnl_link *tab; + + if (protocol <= RTNL_FAMILY_MAX) + tab = rtnl_msg_handlers[protocol]; + else + tab = NULL; + + if (tab == NULL || tab[msgindex].calcit == NULL) + tab = rtnl_msg_handlers[PF_UNSPEC]; + + return tab ? tab[msgindex].calcit : NULL; +} + /** * __rtnl_register - Register a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message + * @calcit: Function pointer to calc size of dump message * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the @@ -162,7 +180,8 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) * Returns 0 on success or a negative error code. */ int __rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { struct rtnl_link *tab; int msgindex; @@ -185,6 +204,9 @@ int __rtnl_register(int protocol, int msgtype, if (dumpit) tab[msgindex].dumpit = dumpit; + if (calcit) + tab[msgindex].calcit = calcit; + return 0; } EXPORT_SYMBOL_GPL(__rtnl_register); @@ -199,9 +221,10 @@ EXPORT_SYMBOL_GPL(__rtnl_register); * of memory implies no sense in continuing. */ void rtnl_register(int protocol, int msgtype, - rtnl_doit_func doit, rtnl_dumpit_func dumpit) + rtnl_doit_func doit, rtnl_dumpit_func dumpit, + rtnl_calcit_func calcit) { - if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0) + if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0) panic("Unable to register rtnetlink message handler, " "protocol = %d, message type = %d\n", protocol, msgtype); @@ -1009,6 +1032,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_idx = cb->args[1]; rcu_read_lock(); + cb->seq = net->dev_base_seq; + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -1020,6 +1045,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) goto out; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -1046,6 +1073,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_NET_NS_FD] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, @@ -1094,6 +1122,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) */ if (tb[IFLA_NET_NS_PID]) net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + else if (tb[IFLA_NET_NS_FD]) + net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD])); else net = get_net(src_net); return net; @@ -1224,7 +1254,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, int send_addr_notify = 0; int err; - if (tb[IFLA_NET_NS_PID]) { + if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) { struct net *net = rtnl_link_get_net(dev_net(dev), tb); if (IS_ERR(net)) { err = PTR_ERR(net); @@ -1815,6 +1845,11 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) return err; } +static u16 rtnl_calcit(struct sk_buff *skb) +{ + return min_ifinfo_dump_size; +} + static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; @@ -1844,11 +1879,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; + size_t if_info_size; - skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL); if (skb == NULL) goto errout; + min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ @@ -1899,14 +1937,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; + rtnl_calcit_func calcit; + u16 min_dump_alloc = 0; dumpit = rtnl_get_dumpit(family, type); if (dumpit == NULL) return -EOPNOTSUPP; + calcit = rtnl_get_calcit(family, type); + if (calcit) + min_dump_alloc = calcit(skb); __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); + err = netlink_dump_start(rtnl, skb, nlh, dumpit, + NULL, min_dump_alloc); rtnl_lock(); return err; } @@ -2016,12 +2060,13 @@ void __init rtnetlink_init(void) netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); - rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); + rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, + rtnl_dump_ifinfo, rtnl_calcit); + rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); + rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46cbd28f40f9..2beda824636e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -329,6 +329,18 @@ static void skb_release_data(struct sk_buff *skb) put_page(skb_shinfo(skb)->frags[i].page); } + /* + * If skb buf is from userspace, we need to notify the caller + * the lower device DMA has done; + */ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + struct ubuf_info *uarg; + + uarg = skb_shinfo(skb)->destructor_arg; + if (uarg->callback) + uarg->callback(uarg); + } + if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -481,6 +493,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size) if (irqs_disabled()) return false; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) + return false; + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) return false; @@ -596,6 +611,51 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); +/* skb frags copy userspace buffers to kernel */ +static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +{ + int i; + int num_frags = skb_shinfo(skb)->nr_frags; + struct page *page, *head = NULL; + struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; + + for (i = 0; i < num_frags; i++) { + u8 *vaddr; + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + + page = alloc_page(GFP_ATOMIC); + if (!page) { + while (head) { + struct page *next = (struct page *)head->private; + put_page(head); + head = next; + } + return -ENOMEM; + } + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(page_address(page), + vaddr + f->page_offset, f->size); + kunmap_skb_frag(vaddr); + page->private = (unsigned long)head; + head = page; + } + + /* skb frags release userspace buffers */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + + uarg->callback(uarg); + + /* skb frags point to kernel buffers */ + for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { + skb_shinfo(skb)->frags[i - 1].page_offset = 0; + skb_shinfo(skb)->frags[i - 1].page = head; + head = (struct page *)head->private; + } + return 0; +} + + /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone @@ -614,6 +674,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + return NULL; + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } + n = skb + 1; if (skb->fclone == SKB_FCLONE_ORIG && n->fclone == SKB_FCLONE_UNAVAILABLE) { @@ -731,6 +797,14 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->nr_frags) { int i; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) { + kfree_skb(n); + n = NULL; + goto out; + } + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; get_page(skb_shinfo(n)->frags[i].page); @@ -788,7 +862,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, fastpath = true; else { int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; } @@ -819,6 +892,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (fastpath) { kfree(skb->head); } else { + /* copy this zero copy skb frags */ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + goto nofrags; + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); @@ -853,6 +932,8 @@ adjust_others: atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; +nofrags: + kfree(data); nodata: return -ENOMEM; } @@ -1354,6 +1435,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) } start = end; } + if (!len) return 0; diff --git a/net/core/sock.c b/net/core/sock.c index 84d6de809352..bc745d00ea4d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -128,6 +128,8 @@ #include <linux/filter.h> +#include <trace/events/sock.h> + #ifdef CONFIG_INET #include <net/tcp.h> #endif @@ -292,6 +294,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) { atomic_inc(&sk->sk_drops); + trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; } @@ -1736,6 +1739,8 @@ suppress_allocation: return 1; } + trace_sock_exceed_buf_limit(sk, prot, allocated); + /* Alas. Undo changes. */ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; atomic_long_sub(amt, prot->memory_allocated); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 7e7ca375d431..98a52640e7cd 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -68,6 +68,7 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) break; } } +EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps) @@ -121,6 +122,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) return false; } +EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp); void __init skb_timestamping_init(void) { diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 3609eacaf4ce..3cb56af4e13c 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1166,64 +1166,6 @@ err: return ret; } -/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not - * be completed the entire msg is aborted and error value is returned. - * No attempt is made to reconcile the case where only part of the - * cmd can be completed. - */ -static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) -{ - const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; - int err = -EOPNOTSUPP; - - if (!ops) - goto err; - - err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, - tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); - if (err) - goto err; - - if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { - struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); - err = ops->ieee_setets(netdev, ets); - if (err) - goto err; - } - - if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { - struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); - err = ops->ieee_setpfc(netdev, pfc); - if (err) - goto err; - } - - if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { - struct nlattr *attr; - int rem; - - nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { - struct dcb_app *app_data; - if (nla_type(attr) != DCB_ATTR_IEEE_APP) - continue; - app_data = nla_data(attr); - if (ops->ieee_setapp) - err = ops->ieee_setapp(netdev, app_data); - else - err = dcb_setapp(netdev, app_data); - if (err) - goto err; - } - } - -err: - dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, - pid, seq, flags); - return err; -} - static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, int app_nested_type, int app_info_type, int app_entry_type) @@ -1279,29 +1221,13 @@ nla_put_failure: } /* Handle IEEE 802.1Qaz GET commands. */ -static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) { - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *ieee, *app; struct dcb_app_type *itr; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - int err; - - if (!ops) - return -EOPNOTSUPP; - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_IEEE_GET; + int dcbx; + int err = -EMSGSIZE; NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); @@ -1338,6 +1264,12 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, } } } + + if (netdev->dcbnl_ops->getdcbx) + dcbx = netdev->dcbnl_ops->getdcbx(netdev); + else + dcbx = -EOPNOTSUPP; + spin_unlock(&dcb_lock); nla_nest_end(skb, app); @@ -1366,16 +1298,413 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, } nla_nest_end(skb, ieee); - nlmsg_end(skb, nlh); + if (dcbx >= 0) { + err = nla_put_u8(skb, DCB_ATTR_DCBX, dcbx); + if (err) + goto nla_put_failure; + } + + return 0; - return rtnl_unicast(skb, &init_net, pid); nla_put_failure: - nlmsg_cancel(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return err; } +static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, + int dir) +{ + u8 pgid, up_map, prio, tc_pct; + const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; + int i = dir ? DCB_ATTR_CEE_TX_PG : DCB_ATTR_CEE_RX_PG; + struct nlattr *pg = nla_nest_start(skb, i); + + if (!pg) + goto nla_put_failure; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + struct nlattr *tc_nest = nla_nest_start(skb, i); + + if (!tc_nest) + goto nla_put_failure; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (!dir) + ops->getpgtccfgrx(dev, i - DCB_PG_ATTR_TC_0, + &prio, &pgid, &tc_pct, &up_map); + else + ops->getpgtccfgtx(dev, i - DCB_PG_ATTR_TC_0, + &prio, &pgid, &tc_pct, &up_map); + + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_PGID, pgid); + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_UP_MAPPING, up_map); + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_STRICT_PRIO, prio); + NLA_PUT_U8(skb, DCB_TC_ATTR_PARAM_BW_PCT, tc_pct); + nla_nest_end(skb, tc_nest); + } + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + + if (!dir) + ops->getpgbwgcfgrx(dev, i - DCB_PG_ATTR_BW_ID_0, + &tc_pct); + else + ops->getpgbwgcfgtx(dev, i - DCB_PG_ATTR_BW_ID_0, + &tc_pct); + NLA_PUT_U8(skb, i, tc_pct); + } + nla_nest_end(skb, pg); + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) +{ + struct nlattr *cee, *app; + struct dcb_app_type *itr; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int dcbx, i, err = -EMSGSIZE; + u8 value; + + NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); + + cee = nla_nest_start(skb, DCB_ATTR_CEE); + if (!cee) + goto nla_put_failure; + + /* local pg */ + if (ops->getpgtccfgtx && ops->getpgbwgcfgtx) { + err = dcbnl_cee_pg_fill(skb, netdev, 1); + if (err) + goto nla_put_failure; + } + + if (ops->getpgtccfgrx && ops->getpgbwgcfgrx) { + err = dcbnl_cee_pg_fill(skb, netdev, 0); + if (err) + goto nla_put_failure; + } + + /* local pfc */ + if (ops->getpfccfg) { + struct nlattr *pfc_nest = nla_nest_start(skb, DCB_ATTR_CEE_PFC); + + if (!pfc_nest) + goto nla_put_failure; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0, &value); + NLA_PUT_U8(skb, i, value); + } + nla_nest_end(skb, pfc_nest); + } + + /* local app */ + spin_lock(&dcb_lock); + app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE); + if (!app) + goto dcb_unlock; + + list_for_each_entry(itr, &dcb_app_list, list) { + if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) { + struct nlattr *app_nest = nla_nest_start(skb, + DCB_ATTR_APP); + if (!app_nest) + goto dcb_unlock; + + err = nla_put_u8(skb, DCB_APP_ATTR_IDTYPE, + itr->app.selector); + if (err) + goto dcb_unlock; + + err = nla_put_u16(skb, DCB_APP_ATTR_ID, + itr->app.protocol); + if (err) + goto dcb_unlock; + + err = nla_put_u8(skb, DCB_APP_ATTR_PRIORITY, + itr->app.priority); + if (err) + goto dcb_unlock; + + nla_nest_end(skb, app_nest); + } + } + nla_nest_end(skb, app); + + if (netdev->dcbnl_ops->getdcbx) + dcbx = netdev->dcbnl_ops->getdcbx(netdev); + else + dcbx = -EOPNOTSUPP; + + spin_unlock(&dcb_lock); + + /* features flags */ + if (ops->getfeatcfg) { + struct nlattr *feat = nla_nest_start(skb, DCB_ATTR_CEE_FEAT); + if (!feat) + goto nla_put_failure; + + for (i = DCB_FEATCFG_ATTR_ALL + 1; i <= DCB_FEATCFG_ATTR_MAX; + i++) + if (!ops->getfeatcfg(netdev, i, &value)) + NLA_PUT_U8(skb, i, value); + + nla_nest_end(skb, feat); + } + + /* peer info if available */ + if (ops->cee_peer_getpg) { + struct cee_pg pg; + err = ops->cee_peer_getpg(netdev, &pg); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg); + } + + if (ops->cee_peer_getpfc) { + struct cee_pfc pfc; + err = ops->cee_peer_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc); + } + + if (ops->peer_getappinfo && ops->peer_getapptable) { + err = dcbnl_build_peer_app(netdev, skb, + DCB_ATTR_CEE_PEER_APP_TABLE, + DCB_ATTR_CEE_PEER_APP_INFO, + DCB_ATTR_CEE_PEER_APP); + if (err) + goto nla_put_failure; + } + nla_nest_end(skb, cee); + + /* DCBX state */ + if (dcbx >= 0) { + err = nla_put_u8(skb, DCB_ATTR_DCBX, dcbx); + if (err) + goto nla_put_failure; + } + return 0; + +dcb_unlock: + spin_unlock(&dcb_lock); +nla_put_failure: + return err; +} + +static int dcbnl_notify(struct net_device *dev, int event, int cmd, + u32 seq, u32 pid, int dcbx_ver) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = nlmsg_put(skb, pid, 0, event, sizeof(*dcb), 0); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = cmd; + + if (dcbx_ver == DCB_CAP_DCBX_VER_IEEE) + err = dcbnl_ieee_fill(skb, dev); + else + err = dcbnl_cee_fill(skb, dev); + + if (err < 0) { + /* Report error to broadcast listeners */ + nlmsg_cancel(skb, nlh); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_DCB, err); + } else { + /* End nlmsg and notify broadcast listeners */ + nlmsg_end(skb, nlh); + rtnl_notify(skb, net, 0, RTNLGRP_DCB, NULL, GFP_KERNEL); + } + + return err; +} + +int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, + u32 seq, u32 pid) +{ + return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE); +} +EXPORT_SYMBOL(dcbnl_ieee_notify); + +int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, + u32 seq, u32 pid) +{ + return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE); +} +EXPORT_SYMBOL(dcbnl_cee_notify); + +/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not + * be completed the entire msg is aborted and error value is returned. + * No attempt is made to reconcile the case where only part of the + * cmd can be completed. + */ +static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + return err; + + if (!tb[DCB_ATTR_IEEE]) + return -EINVAL; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + return err; + + if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { + struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); + err = ops->ieee_setets(netdev, ets); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { + struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); + err = ops->ieee_setpfc(netdev, pfc); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + if (ops->ieee_setapp) + err = ops->ieee_setapp(netdev, app_data); + else + err = dcb_ieee_setapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, + pid, seq, flags); + dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, seq, 0); + return err; +} + +static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct net *net = dev_net(netdev); + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_IEEE_GET; + + err = dcbnl_ieee_fill(skb, netdev); + + if (err < 0) { + nlmsg_cancel(skb, nlh); + kfree_skb(skb); + } else { + nlmsg_end(skb, nlh); + err = rtnl_unicast(skb, net, pid); + } + + return err; +} + +static int dcbnl_ieee_del(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + return -EOPNOTSUPP; + + if (!tb[DCB_ATTR_IEEE]) + return -EINVAL; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + return err; + + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + if (ops->ieee_delapp) + err = ops->ieee_delapp(netdev, app_data); + else + err = dcb_ieee_delapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_DEL, DCB_ATTR_IEEE, + pid, seq, flags); + dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_DEL, seq, 0); + return err; +} + + /* DCBX configuration */ static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags) @@ -1522,10 +1851,10 @@ err: static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags) { + struct net *net = dev_net(netdev); struct sk_buff *skb; struct nlmsghdr *nlh; struct dcbmsg *dcb; - struct nlattr *cee; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; int err; @@ -1536,51 +1865,26 @@ static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb, if (!skb) return -ENOBUFS; - nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + if (nlh == NULL) { + nlmsg_free(skb); + return -EMSGSIZE; + } dcb = NLMSG_DATA(nlh); dcb->dcb_family = AF_UNSPEC; dcb->cmd = DCB_CMD_CEE_GET; - NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); + err = dcbnl_cee_fill(skb, netdev); - cee = nla_nest_start(skb, DCB_ATTR_CEE); - if (!cee) - goto nla_put_failure; - - /* get peer info if available */ - if (ops->cee_peer_getpg) { - struct cee_pg pg; - err = ops->cee_peer_getpg(netdev, &pg); - if (!err) - NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg); - } - - if (ops->cee_peer_getpfc) { - struct cee_pfc pfc; - err = ops->cee_peer_getpfc(netdev, &pfc); - if (!err) - NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc); - } - - if (ops->peer_getappinfo && ops->peer_getapptable) { - err = dcbnl_build_peer_app(netdev, skb, - DCB_ATTR_CEE_PEER_APP_TABLE, - DCB_ATTR_CEE_PEER_APP_INFO, - DCB_ATTR_CEE_PEER_APP); - if (err) - goto nla_put_failure; + if (err < 0) { + nlmsg_cancel(skb, nlh); + nlmsg_free(skb); + } else { + nlmsg_end(skb, nlh); + err = rtnl_unicast(skb, net, pid); } - - nla_nest_end(skb, cee); - nlmsg_end(skb, nlh); - - return rtnl_unicast(skb, &init_net, pid); -nla_put_failure: - nlmsg_cancel(skb, nlh); -nlmsg_failure: - kfree_skb(skb); - return -1; + return err; } static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) @@ -1690,11 +1994,15 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; case DCB_CMD_IEEE_SET: ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + nlh->nlmsg_flags); goto out; case DCB_CMD_IEEE_GET: ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + nlh->nlmsg_flags); + goto out; + case DCB_CMD_IEEE_DEL: + ret = dcbnl_ieee_del(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); goto out; case DCB_CMD_GDCBX: ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq, @@ -1754,12 +2062,13 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) EXPORT_SYMBOL(dcb_getapp); /** - * ixgbe_dcbnl_setapp - add dcb application data to app list + * dcb_setapp - add CEE dcb application data to app list * - * Priority 0 is the default priority this removes applications - * from the app list if the priority is set to zero. + * Priority 0 is an invalid priority in CEE spec. This routine + * removes applications from the app list if the priority is + * set to zero. */ -u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) +int dcb_setapp(struct net_device *dev, struct dcb_app *new) { struct dcb_app_type *itr; struct dcb_app_type event; @@ -1802,6 +2111,114 @@ out: } EXPORT_SYMBOL(dcb_setapp); +/** + * dcb_ieee_getapp_mask - retrieve the IEEE DCB application priority + * + * Helper routine which on success returns a non-zero 802.1Qaz user + * priority bitmap otherwise returns 0 to indicate the dcb_app was + * not found in APP list. + */ +u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app_type *itr; + u8 prio = 0; + + spin_lock(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == app->selector && + itr->app.protocol == app->protocol && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + prio |= 1 << itr->app.priority; + } + } + spin_unlock(&dcb_lock); + + return prio; +} +EXPORT_SYMBOL(dcb_ieee_getapp_mask); + +/** + * dcb_ieee_setapp - add IEEE dcb application data to app list + * + * This adds Application data to the list. Multiple application + * entries may exists for the same selector and protocol as long + * as the priorities are different. + */ +int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) +{ + struct dcb_app_type *itr, *entry; + struct dcb_app_type event; + int err = 0; + + memcpy(&event.name, dev->name, sizeof(event.name)); + memcpy(&event.app, new, sizeof(event.app)); + + spin_lock(&dcb_lock); + /* Search for existing match and abort if found */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == new->selector && + itr->app.protocol == new->protocol && + itr->app.priority == new->priority && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + err = -EEXIST; + goto out; + } + } + + /* App entry does not exist add new entry */ + entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC); + if (!entry) { + err = -ENOMEM; + goto out; + } + + memcpy(&entry->app, new, sizeof(*new)); + strncpy(entry->name, dev->name, IFNAMSIZ); + list_add(&entry->list, &dcb_app_list); +out: + spin_unlock(&dcb_lock); + if (!err) + call_dcbevent_notifiers(DCB_APP_EVENT, &event); + return err; +} +EXPORT_SYMBOL(dcb_ieee_setapp); + +/** + * dcb_ieee_delapp - delete IEEE dcb application data from list + * + * This removes a matching APP data from the APP list + */ +int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) +{ + struct dcb_app_type *itr; + struct dcb_app_type event; + int err = -ENOENT; + + memcpy(&event.name, dev->name, sizeof(event.name)); + memcpy(&event.app, del, sizeof(event.app)); + + spin_lock(&dcb_lock); + /* Search for existing match and remove it. */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == del->selector && + itr->app.protocol == del->protocol && + itr->app.priority == del->priority && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + list_del(&itr->list); + kfree(itr); + err = 0; + goto out; + } + } + +out: + spin_unlock(&dcb_lock); + if (!err) + call_dcbevent_notifiers(DCB_APP_EVENT, &event); + return err; +} +EXPORT_SYMBOL(dcb_ieee_delapp); + static void dcb_flushapp(void) { struct dcb_app_type *app; @@ -1819,8 +2236,8 @@ static int __init dcbnl_init(void) { INIT_LIST_HEAD(&dcb_app_list); - rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); - rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL); return 0; } diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 36479ca61e03..48b585a5cba7 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -118,7 +118,7 @@ static int ccid_activate(struct ccid_operations *ccid_ops) if (ccid_ops->ccid_hc_tx_slab == NULL) goto out_free_rx_slab; - pr_info("CCID: Activated CCID %d (%s)\n", + pr_info("DCCP: Activated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); err = 0; out: @@ -136,7 +136,7 @@ static void ccid_deactivate(struct ccid_operations *ccid_ops) ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; - pr_info("CCID: Deactivated CCID %d (%s)\n", + pr_info("DCCP: Deactivated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); } diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index fadecd20d75b..0462040fc818 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -153,17 +153,93 @@ out: sock_put(sk); } +/* + * Congestion window validation (RFC 2861). + */ +static int ccid2_do_cwv = 1; +module_param(ccid2_do_cwv, bool, 0644); +MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); + +/** + * ccid2_update_used_window - Track how much of cwnd is actually used + * This is done in addition to CWV. The sender needs to have an idea of how many + * packets may be in flight, to set the local Sequence Window value accordingly + * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the + * maximum-used window. We use an EWMA low-pass filter to filter out noise. + */ +static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd) +{ + hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4; +} + +/* This borrows the code of tcp_cwnd_application_limited() */ +static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + /* don't reduce cwnd below the initial window (IW) */ + u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache), + win_used = max(hc->tx_cwnd_used, init_win); + + if (win_used < hc->tx_cwnd) { + hc->tx_ssthresh = max(hc->tx_ssthresh, + (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2)); + hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1; + } + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; +} + +/* This borrows the code of tcp_cwnd_restart() */ +static void ccid2_cwnd_restart(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + u32 cwnd = hc->tx_cwnd, restart_cwnd, + iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + + hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); + + /* don't reduce cwnd below the initial window (IW) */ + restart_cwnd = min(cwnd, iwnd); + cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; + hc->tx_cwnd = max(cwnd, restart_cwnd); + + hc->tx_cwnd_stamp = now; + hc->tx_cwnd_used = 0; +} + static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + const u32 now = ccid2_time_stamp; struct ccid2_seq *next; - hc->tx_pipe++; + /* slow-start after idle periods (RFC 2581, RFC 2861) */ + if (ccid2_do_cwv && !hc->tx_pipe && + (s32)(now - hc->tx_lsndtime) >= hc->tx_rto) + ccid2_cwnd_restart(sk, now); + + hc->tx_lsndtime = now; + hc->tx_pipe += 1; + + /* see whether cwnd was fully used (RFC 2861), update expected window */ + if (ccid2_cwnd_network_limited(hc)) { + ccid2_update_used_window(hc, hc->tx_cwnd); + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; + } else { + if (hc->tx_pipe > hc->tx_cwnd_used) + hc->tx_cwnd_used = hc->tx_pipe; + + ccid2_update_used_window(hc, hc->tx_cwnd_used); + + if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto) + ccid2_cwnd_application_limited(sk, now); + } hc->tx_seqh->ccid2s_seq = dp->dccps_gss; hc->tx_seqh->ccid2s_acked = 0; - hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; + hc->tx_seqh->ccid2s_sent = now; next = hc->tx_seqh->ccid2s_next; /* check if we need to alloc more space */ @@ -583,15 +659,6 @@ done: dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } -/* - * Convert RFC 3390 larger initial window into an equivalent number of packets. - * This is based on the numbers specified in RFC 5681, 3.1. - */ -static inline u32 rfc3390_bytes_to_packets(const u32 smss) -{ - return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); -} - static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); @@ -603,6 +670,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* Use larger initial windows (RFC 4341, section 5). */ hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); + hc->tx_expected_wnd = hc->tx_cwnd; /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); @@ -615,7 +683,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rto = DCCP_TIMEOUT_INIT; hc->tx_rpdupack = -1; - hc->tx_last_cong = ccid2_time_stamp; + hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; + hc->tx_cwnd_used = 0; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); INIT_LIST_HEAD(&hc->tx_av_chunks); @@ -636,18 +705,14 @@ static void ccid2_hc_tx_exit(struct sock *sk) static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk); - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - hc->rx_data++; - if (hc->rx_data >= dp->dccps_r_ack_ratio) { - dccp_send_ack(sk); - hc->rx_data = 0; - } - break; + if (!dccp_data_packet(skb)) + return; + + if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) { + dccp_send_ack(sk); + hc->rx_num_data_pkts = 0; } } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index e9985dafc2c7..f585d330e1e5 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -53,6 +53,10 @@ struct ccid2_seq { * @tx_rttvar: moving average/maximum of @mdev_max * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) * @tx_rtt_seq: to decay RTTVAR at most once per flight + * @tx_cwnd_used: actually used cwnd, W_used of RFC 2861 + * @tx_expected_wnd: moving average of @tx_cwnd_used + * @tx_cwnd_stamp: to track idle periods in CWV + * @tx_lsndtime: last time (in jiffies) a data packet was sent * @tx_rpseq: last consecutive seqno * @tx_rpdupack: dupacks since rpseq * @tx_av_chunks: list of Ack Vectors received on current skb @@ -76,6 +80,12 @@ struct ccid2_hc_tx_sock { u64 tx_rtt_seq:48; struct timer_list tx_rtotimer; + /* Congestion Window validation (optional, RFC 2861) */ + u32 tx_cwnd_used, + tx_expected_wnd, + tx_cwnd_stamp, + tx_lsndtime; + u64 tx_rpseq; int tx_rpdupack; u32 tx_last_cong; @@ -88,8 +98,21 @@ static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) return hc->tx_pipe >= hc->tx_cwnd; } +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + +/** + * struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection + * @rx_num_data_pkts: number of data packets received since last feedback + */ struct ccid2_hc_rx_sock { - int rx_data; + u32 rx_num_data_pkts; }; static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/input.c b/net/dccp/input.c index 4222e7a654b0..51d5fe5fffba 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -619,20 +619,31 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; } - if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { - if (dccp_check_seqno(sk, skb)) - goto discard; - - /* - * Step 8: Process options and mark acknowledgeable - */ - if (dccp_parse_options(sk, NULL, skb)) - return 1; + /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */ + if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb)) + goto discard; - dccp_handle_ackvec_processing(sk, skb); - dccp_deliver_input_to_ccids(sk, skb); + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if ((dp->dccps_role != DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_RESPONSE) || + (dp->dccps_role == DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_REQUEST) || + (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); + goto discard; } + /* Step 8: Process options */ + if (dccp_parse_options(sk, NULL, skb)) + return 1; + /* * Step 9: Process Reset * If P.type == Reset, @@ -640,31 +651,15 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return - */ + */ if (dh->dccph_type == DCCP_PKT_RESET) { dccp_rcv_reset(sk, skb); return 0; - /* - * Step 7: Check for unexpected packet types - * If (S.is_server and P.type == Response) - * or (S.is_client and P.type == Request) - * or (S.state == RESPOND and P.type == Data), - * Send Sync packet acknowledging P.seqno - * Drop packet and return - */ - } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_RESPONSE) || - (dp->dccps_role == DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_REQUEST) || - (sk->sk_state == DCCP_RESPOND && - dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); - goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */ if (dccp_rcv_closereq(sk, skb)) return 0; goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSE) { + } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */ if (dccp_rcv_close(sk, skb)) return 0; goto discard; @@ -679,8 +674,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, __kfree_skb(skb); return 0; - case DCCP_RESPOND: case DCCP_PARTOPEN: + /* Step 8: if using Ack Vectors, mark packet acknowledgeable */ + dccp_handle_ackvec_processing(sk, skb); + dccp_deliver_input_to_ccids(sk, skb); + /* fall through */ + case DCCP_RESPOND: queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); break; diff --git a/net/dccp/output.c b/net/dccp/output.c index fab108e51e5a..dede3edb8849 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -27,11 +27,13 @@ static inline void dccp_event_ack_sent(struct sock *sk) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } -static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) +/* enqueue @skb on sk_send_head for retransmission, return clone to send now */ +static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) { skb_set_owner_w(skb, sk); WARN_ON(sk->sk_send_head); sk->sk_send_head = skb; + return skb_clone(sk->sk_send_head, gfp_any()); } /* @@ -552,8 +554,7 @@ int dccp_connect(struct sock *sk) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); + dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); /* Timer for repeating the REQUEST until an answer. */ @@ -678,8 +679,7 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; if (active) { - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, prio)); + skb = dccp_skb_entail(sk, skb); /* * Retransmission timer for active-close: RFC 4340, 8.3 requires * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ @@ -692,6 +692,6 @@ void dccp_send_close(struct sock *sk, const int active) */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); - } else - dccp_transmit_skb(sk, skb); + } + dccp_transmit_skb(sk, skb); } diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index ea3b6ee21fc9..19acd00a6382 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -291,23 +291,23 @@ int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned c *buf++ = type; - switch(type) { - case 0: - *buf++ = sdn->sdn_objnum; - break; - case 1: - *buf++ = 0; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 3 + le16_to_cpu(sdn->sdn_objnamel); - break; - case 2: - memset(buf, 0, 5); - buf += 5; - *buf++ = le16_to_cpu(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); - len = 7 + le16_to_cpu(sdn->sdn_objnamel); - break; + switch (type) { + case 0: + *buf++ = sdn->sdn_objnum; + break; + case 1: + *buf++ = 0; + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 3 + le16_to_cpu(sdn->sdn_objnamel); + break; + case 2: + memset(buf, 0, 5); + buf += 5; + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 7 + le16_to_cpu(sdn->sdn_objnamel); + break; } return len; @@ -337,23 +337,23 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, *fmt = *data++; type = *data++; - switch(*fmt) { - case 0: - sdn->sdn_objnum = type; - return 2; - case 1: - namel = 16; - break; - case 2: - len -= 4; - data += 4; - break; - case 4: - len -= 8; - data += 8; - break; - default: - return -1; + switch (*fmt) { + case 0: + sdn->sdn_objnum = type; + return 2; + case 1: + namel = 16; + break; + case 2: + len -= 4; + data += 4; + break; + case 4: + len -= 8; + data += 8; + break; + default: + return -1; } len -= 1; @@ -575,25 +575,26 @@ int dn_destroy_timer(struct sock *sk) scp->persist = dn_nsp_persist(sk); - switch(scp->state) { - case DN_DI: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_di_count) - scp->state = DN_CN; - return 0; + switch (scp->state) { + case DN_DI: + dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); + if (scp->nsp_rxtshift >= decnet_di_count) + scp->state = DN_CN; + return 0; - case DN_DR: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); - if (scp->nsp_rxtshift >= decnet_dr_count) - scp->state = DN_DRC; - return 0; + case DN_DR: + dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC); + if (scp->nsp_rxtshift >= decnet_dr_count) + scp->state = DN_DRC; + return 0; - case DN_DN: - if (scp->nsp_rxtshift < decnet_dn_count) { - /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */ - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC); - return 0; - } + case DN_DN: + if (scp->nsp_rxtshift < decnet_dn_count) { + /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */ + dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, + GFP_ATOMIC); + return 0; + } } scp->persist = (HZ * decnet_time_wait); @@ -623,42 +624,42 @@ static void dn_destroy_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; - switch(scp->state) { - case DN_DN: - dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, - sk->sk_allocation); - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - case DN_CR: - scp->state = DN_DR; - goto disc_reject; - case DN_RUN: - scp->state = DN_DI; - case DN_DI: - case DN_DR: + switch (scp->state) { + case DN_DN: + dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, + sk->sk_allocation); + scp->persist_fxn = dn_destroy_timer; + scp->persist = dn_nsp_persist(sk); + break; + case DN_CR: + scp->state = DN_DR; + goto disc_reject; + case DN_RUN: + scp->state = DN_DI; + case DN_DI: + case DN_DR: disc_reject: - dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); - case DN_NC: - case DN_NR: - case DN_RJ: - case DN_DIC: - case DN_CN: - case DN_DRC: - case DN_CI: - case DN_CD: - scp->persist_fxn = dn_destroy_timer; - scp->persist = dn_nsp_persist(sk); - break; - default: - printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); - case DN_O: - dn_stop_slow_timer(sk); + dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); + case DN_NC: + case DN_NR: + case DN_RJ: + case DN_DIC: + case DN_CN: + case DN_DRC: + case DN_CI: + case DN_CD: + scp->persist_fxn = dn_destroy_timer; + scp->persist = dn_nsp_persist(sk); + break; + default: + printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); + case DN_O: + dn_stop_slow_timer(sk); - dn_unhash_sock_bh(sk); - sock_put(sk); + dn_unhash_sock_bh(sk); + sock_put(sk); - break; + break; } } @@ -683,15 +684,15 @@ static int dn_create(struct net *net, struct socket *sock, int protocol, if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; - switch(sock->type) { - case SOCK_SEQPACKET: - if (protocol != DNPROTO_NSP) - return -EPROTONOSUPPORT; - break; - case SOCK_STREAM: - break; - default: - return -ESOCKTNOSUPPORT; + switch (sock->type) { + case SOCK_SEQPACKET: + if (protocol != DNPROTO_NSP) + return -EPROTONOSUPPORT; + break; + case SOCK_STREAM: + break; + default: + return -ESOCKTNOSUPPORT; } @@ -987,16 +988,16 @@ static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int { struct dn_scp *scp = DN_SK(sk); - switch(scp->state) { - case DN_RUN: - return 0; - case DN_CR: - return dn_confirm_accept(sk, timeo, sk->sk_allocation); - case DN_CI: - case DN_CC: - return dn_wait_run(sk, timeo); - case DN_O: - return __dn_connect(sk, addr, addrlen, timeo, flags); + switch (scp->state) { + case DN_RUN: + return 0; + case DN_CR: + return dn_confirm_accept(sk, timeo, sk->sk_allocation); + case DN_CI: + case DN_CC: + return dn_wait_run(sk, timeo); + case DN_O: + return __dn_connect(sk, addr, addrlen, timeo, flags); } return -EINVAL; @@ -1363,141 +1364,140 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (copy_from_user(&u, optval, optlen)) return -EFAULT; - switch(optname) { - case DSO_CONDATA: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if ((scp->state != DN_O) && (scp->state != DN_CR)) - return -EINVAL; + switch (optname) { + case DSO_CONDATA: + if (sock->state == SS_CONNECTED) + return -EISCONN; + if ((scp->state != DN_O) && (scp->state != DN_CR)) + return -EINVAL; - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; + if (optlen != sizeof(struct optdata_dn)) + return -EINVAL; - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; + if (le16_to_cpu(u.opt.opt_optl) > 16) + return -EINVAL; - memcpy(&scp->conndata_out, &u.opt, optlen); - break; - - case DSO_DISDATA: - if (sock->state != SS_CONNECTED && scp->accept_mode == ACC_IMMED) - return -ENOTCONN; - - if (optlen != sizeof(struct optdata_dn)) - return -EINVAL; + memcpy(&scp->conndata_out, &u.opt, optlen); + break; - if (le16_to_cpu(u.opt.opt_optl) > 16) - return -EINVAL; + case DSO_DISDATA: + if (sock->state != SS_CONNECTED && + scp->accept_mode == ACC_IMMED) + return -ENOTCONN; - memcpy(&scp->discdata_out, &u.opt, optlen); - break; + if (optlen != sizeof(struct optdata_dn)) + return -EINVAL; - case DSO_CONACCESS: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; + if (le16_to_cpu(u.opt.opt_optl) > 16) + return -EINVAL; - if (optlen != sizeof(struct accessdata_dn)) - return -EINVAL; + memcpy(&scp->discdata_out, &u.opt, optlen); + break; - if ((u.acc.acc_accl > DN_MAXACCL) || - (u.acc.acc_passl > DN_MAXACCL) || - (u.acc.acc_userl > DN_MAXACCL)) - return -EINVAL; + case DSO_CONACCESS: + if (sock->state == SS_CONNECTED) + return -EISCONN; + if (scp->state != DN_O) + return -EINVAL; - memcpy(&scp->accessdata, &u.acc, optlen); - break; + if (optlen != sizeof(struct accessdata_dn)) + return -EINVAL; - case DSO_ACCEPTMODE: - if (sock->state == SS_CONNECTED) - return -EISCONN; - if (scp->state != DN_O) - return -EINVAL; + if ((u.acc.acc_accl > DN_MAXACCL) || + (u.acc.acc_passl > DN_MAXACCL) || + (u.acc.acc_userl > DN_MAXACCL)) + return -EINVAL; - if (optlen != sizeof(int)) - return -EINVAL; + memcpy(&scp->accessdata, &u.acc, optlen); + break; - if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER)) - return -EINVAL; + case DSO_ACCEPTMODE: + if (sock->state == SS_CONNECTED) + return -EISCONN; + if (scp->state != DN_O) + return -EINVAL; - scp->accept_mode = (unsigned char)u.mode; - break; + if (optlen != sizeof(int)) + return -EINVAL; - case DSO_CONACCEPT: + if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER)) + return -EINVAL; - if (scp->state != DN_CR) - return -EINVAL; - timeo = sock_rcvtimeo(sk, 0); - err = dn_confirm_accept(sk, &timeo, sk->sk_allocation); - return err; + scp->accept_mode = (unsigned char)u.mode; + break; - case DSO_CONREJECT: + case DSO_CONACCEPT: + if (scp->state != DN_CR) + return -EINVAL; + timeo = sock_rcvtimeo(sk, 0); + err = dn_confirm_accept(sk, &timeo, sk->sk_allocation); + return err; - if (scp->state != DN_CR) - return -EINVAL; + case DSO_CONREJECT: + if (scp->state != DN_CR) + return -EINVAL; - scp->state = DN_DR; - sk->sk_shutdown = SHUTDOWN_MASK; - dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); - break; + scp->state = DN_DR; + sk->sk_shutdown = SHUTDOWN_MASK; + dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation); + break; - default: + default: #ifdef CONFIG_NETFILTER return nf_setsockopt(sk, PF_DECnet, optname, optval, optlen); #endif - case DSO_LINKINFO: - case DSO_STREAM: - case DSO_SEQPACKET: - return -ENOPROTOOPT; - - case DSO_MAXWINDOW: - if (optlen != sizeof(unsigned long)) - return -EINVAL; - if (u.win > NSP_MAX_WINDOW) - u.win = NSP_MAX_WINDOW; - if (u.win == 0) - return -EINVAL; - scp->max_window = u.win; - if (scp->snd_window > u.win) - scp->snd_window = u.win; - break; + case DSO_LINKINFO: + case DSO_STREAM: + case DSO_SEQPACKET: + return -ENOPROTOOPT; + + case DSO_MAXWINDOW: + if (optlen != sizeof(unsigned long)) + return -EINVAL; + if (u.win > NSP_MAX_WINDOW) + u.win = NSP_MAX_WINDOW; + if (u.win == 0) + return -EINVAL; + scp->max_window = u.win; + if (scp->snd_window > u.win) + scp->snd_window = u.win; + break; - case DSO_NODELAY: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == 2) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 1; - /* if (scp->nonagle == 1) { Push pending frames } */ - break; + case DSO_NODELAY: + if (optlen != sizeof(int)) + return -EINVAL; + if (scp->nonagle == 2) + return -EINVAL; + scp->nonagle = (u.val == 0) ? 0 : 1; + /* if (scp->nonagle == 1) { Push pending frames } */ + break; - case DSO_CORK: - if (optlen != sizeof(int)) - return -EINVAL; - if (scp->nonagle == 1) - return -EINVAL; - scp->nonagle = (u.val == 0) ? 0 : 2; - /* if (scp->nonagle == 0) { Push pending frames } */ - break; + case DSO_CORK: + if (optlen != sizeof(int)) + return -EINVAL; + if (scp->nonagle == 1) + return -EINVAL; + scp->nonagle = (u.val == 0) ? 0 : 2; + /* if (scp->nonagle == 0) { Push pending frames } */ + break; - case DSO_SERVICES: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if ((u.services & ~NSP_FC_MASK) != 0x01) - return -EINVAL; - if ((u.services & NSP_FC_MASK) == NSP_FC_MASK) - return -EINVAL; - scp->services_loc = u.services; - break; + case DSO_SERVICES: + if (optlen != sizeof(unsigned char)) + return -EINVAL; + if ((u.services & ~NSP_FC_MASK) != 0x01) + return -EINVAL; + if ((u.services & NSP_FC_MASK) == NSP_FC_MASK) + return -EINVAL; + scp->services_loc = u.services; + break; - case DSO_INFO: - if (optlen != sizeof(unsigned char)) - return -EINVAL; - if (u.info & 0xfc) - return -EINVAL; - scp->info_loc = u.info; - break; + case DSO_INFO: + if (optlen != sizeof(unsigned char)) + return -EINVAL; + if (u.info & 0xfc) + return -EINVAL; + scp->info_loc = u.info; + break; } return 0; @@ -1527,107 +1527,106 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us if(get_user(r_len , optlen)) return -EFAULT; - switch(optname) { - case DSO_CONDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->conndata_in; - break; - - case DSO_DISDATA: - if (r_len > sizeof(struct optdata_dn)) - r_len = sizeof(struct optdata_dn); - r_data = &scp->discdata_in; - break; + switch (optname) { + case DSO_CONDATA: + if (r_len > sizeof(struct optdata_dn)) + r_len = sizeof(struct optdata_dn); + r_data = &scp->conndata_in; + break; - case DSO_CONACCESS: - if (r_len > sizeof(struct accessdata_dn)) - r_len = sizeof(struct accessdata_dn); - r_data = &scp->accessdata; - break; + case DSO_DISDATA: + if (r_len > sizeof(struct optdata_dn)) + r_len = sizeof(struct optdata_dn); + r_data = &scp->discdata_in; + break; - case DSO_ACCEPTMODE: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->accept_mode; - break; + case DSO_CONACCESS: + if (r_len > sizeof(struct accessdata_dn)) + r_len = sizeof(struct accessdata_dn); + r_data = &scp->accessdata; + break; - case DSO_LINKINFO: - if (r_len > sizeof(struct linkinfo_dn)) - r_len = sizeof(struct linkinfo_dn); + case DSO_ACCEPTMODE: + if (r_len > sizeof(unsigned char)) + r_len = sizeof(unsigned char); + r_data = &scp->accept_mode; + break; - memset(&link, 0, sizeof(link)); + case DSO_LINKINFO: + if (r_len > sizeof(struct linkinfo_dn)) + r_len = sizeof(struct linkinfo_dn); - switch(sock->state) { - case SS_CONNECTING: - link.idn_linkstate = LL_CONNECTING; - break; - case SS_DISCONNECTING: - link.idn_linkstate = LL_DISCONNECTING; - break; - case SS_CONNECTED: - link.idn_linkstate = LL_RUNNING; - break; - default: - link.idn_linkstate = LL_INACTIVE; - } + memset(&link, 0, sizeof(link)); - link.idn_segsize = scp->segsize_rem; - r_data = &link; + switch (sock->state) { + case SS_CONNECTING: + link.idn_linkstate = LL_CONNECTING; + break; + case SS_DISCONNECTING: + link.idn_linkstate = LL_DISCONNECTING; + break; + case SS_CONNECTED: + link.idn_linkstate = LL_RUNNING; break; - default: + link.idn_linkstate = LL_INACTIVE; + } + + link.idn_segsize = scp->segsize_rem; + r_data = &link; + break; + + default: #ifdef CONFIG_NETFILTER - { - int ret, len; + { + int ret, len; - if(get_user(len, optlen)) - return -EFAULT; + if (get_user(len, optlen)) + return -EFAULT; - ret = nf_getsockopt(sk, PF_DECnet, optname, - optval, &len); - if (ret >= 0) - ret = put_user(len, optlen); - return ret; - } + ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len); + if (ret >= 0) + ret = put_user(len, optlen); + return ret; + } #endif - case DSO_STREAM: - case DSO_SEQPACKET: - case DSO_CONACCEPT: - case DSO_CONREJECT: - return -ENOPROTOOPT; - - case DSO_MAXWINDOW: - if (r_len > sizeof(unsigned long)) - r_len = sizeof(unsigned long); - r_data = &scp->max_window; - break; + case DSO_STREAM: + case DSO_SEQPACKET: + case DSO_CONACCEPT: + case DSO_CONREJECT: + return -ENOPROTOOPT; + + case DSO_MAXWINDOW: + if (r_len > sizeof(unsigned long)) + r_len = sizeof(unsigned long); + r_data = &scp->max_window; + break; - case DSO_NODELAY: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == 1); - r_data = &val; - break; + case DSO_NODELAY: + if (r_len > sizeof(int)) + r_len = sizeof(int); + val = (scp->nonagle == 1); + r_data = &val; + break; - case DSO_CORK: - if (r_len > sizeof(int)) - r_len = sizeof(int); - val = (scp->nonagle == 2); - r_data = &val; - break; + case DSO_CORK: + if (r_len > sizeof(int)) + r_len = sizeof(int); + val = (scp->nonagle == 2); + r_data = &val; + break; - case DSO_SERVICES: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->services_rem; - break; + case DSO_SERVICES: + if (r_len > sizeof(unsigned char)) + r_len = sizeof(unsigned char); + r_data = &scp->services_rem; + break; - case DSO_INFO: - if (r_len > sizeof(unsigned char)) - r_len = sizeof(unsigned char); - r_data = &scp->info_rem; - break; + case DSO_INFO: + if (r_len > sizeof(unsigned char)) + r_len = sizeof(unsigned char); + r_data = &scp->info_rem; + break; } if (r_data) { @@ -2088,15 +2087,15 @@ static int dn_device_event(struct notifier_block *this, unsigned long event, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - switch(event) { - case NETDEV_UP: - dn_dev_up(dev); - break; - case NETDEV_DOWN: - dn_dev_down(dev); - break; - default: - break; + switch (event) { + case NETDEV_UP: + dn_dev_up(dev); + break; + case NETDEV_DOWN: + dn_dev_down(dev); + break; + default: + break; } return NOTIFY_DONE; @@ -2209,54 +2208,54 @@ static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf) int i; switch (le16_to_cpu(dn->sdn_objnamel)) { - case 0: - sprintf(buf, "%d", dn->sdn_objnum); - break; - default: - for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { - buf[i] = dn->sdn_objname[i]; - if (IS_NOT_PRINTABLE(buf[i])) - buf[i] = '.'; - } - buf[i] = 0; + case 0: + sprintf(buf, "%d", dn->sdn_objnum); + break; + default: + for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { + buf[i] = dn->sdn_objname[i]; + if (IS_NOT_PRINTABLE(buf[i])) + buf[i] = '.'; + } + buf[i] = 0; } } static char *dn_state2asc(unsigned char state) { - switch(state) { - case DN_O: - return "OPEN"; - case DN_CR: - return " CR"; - case DN_DR: - return " DR"; - case DN_DRC: - return " DRC"; - case DN_CC: - return " CC"; - case DN_CI: - return " CI"; - case DN_NR: - return " NR"; - case DN_NC: - return " NC"; - case DN_CD: - return " CD"; - case DN_RJ: - return " RJ"; - case DN_RUN: - return " RUN"; - case DN_DI: - return " DI"; - case DN_DIC: - return " DIC"; - case DN_DN: - return " DN"; - case DN_CL: - return " CL"; - case DN_CN: - return " CN"; + switch (state) { + case DN_O: + return "OPEN"; + case DN_CR: + return " CR"; + case DN_DR: + return " DR"; + case DN_DRC: + return " DRC"; + case DN_CC: + return " CC"; + case DN_CI: + return " CI"; + case DN_NR: + return " NR"; + case DN_NC: + return " NC"; + case DN_CD: + return " CD"; + case DN_RJ: + return " RJ"; + case DN_RUN: + return " RUN"; + case DN_DI: + return " DI"; + case DN_DIC: + return " DIC"; + case DN_DN: + return " DN"; + case DN_CL: + return " CL"; + case DN_CN: + return " CN"; } return "????"; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index cf26ac74a188..ba4faceec405 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -437,17 +437,17 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) dev_load(&init_net, ifr->ifr_name); - switch(cmd) { - case SIOCGIFADDR: - break; - case SIOCSIFADDR: - if (!capable(CAP_NET_ADMIN)) - return -EACCES; - if (sdn->sdn_family != AF_DECnet) - return -EINVAL; - break; - default: + switch (cmd) { + case SIOCGIFADDR: + break; + case SIOCSIFADDR: + if (!capable(CAP_NET_ADMIN)) + return -EACCES; + if (sdn->sdn_family != AF_DECnet) return -EINVAL; + break; + default: + return -EINVAL; } rtnl_lock(); @@ -470,27 +470,27 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) goto done; } - switch(cmd) { - case SIOCGIFADDR: - *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local; - goto rarok; - - case SIOCSIFADDR: - if (!ifa) { - if ((ifa = dn_dev_alloc_ifa()) == NULL) { - ret = -ENOBUFS; - break; - } - memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - } else { - if (ifa->ifa_local == dn_saddr2dn(sdn)) - break; - dn_dev_del_ifa(dn_db, ifap, 0); + switch (cmd) { + case SIOCGIFADDR: + *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local; + goto rarok; + + case SIOCSIFADDR: + if (!ifa) { + if ((ifa = dn_dev_alloc_ifa()) == NULL) { + ret = -ENOBUFS; + break; } + memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + } else { + if (ifa->ifa_local == dn_saddr2dn(sdn)) + break; + dn_dev_del_ifa(dn_db, ifap, 0); + } - ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn); + ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn); - ret = dn_dev_set_ifa(dev, ifa); + ret = dn_dev_set_ifa(dev, ifa); } done: rtnl_unlock(); @@ -1313,7 +1313,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; - dev = (struct net_device *)v; + dev = v; if (v == SEQ_START_TOKEN) dev = net_device_entry(&init_net.dev_base_head); @@ -1335,13 +1335,13 @@ static void dn_dev_seq_stop(struct seq_file *seq, void *v) static char *dn_type2asc(char type) { - switch(type) { - case DN_DEV_BCAST: - return "B"; - case DN_DEV_UCAST: - return "U"; - case DN_DEV_MPOINT: - return "M"; + switch (type) { + case DN_DEV_BCAST: + return "B"; + case DN_DEV_UCAST: + return "U"; + case DN_DEV_MPOINT: + return "M"; } return "?"; @@ -1414,9 +1414,9 @@ void __init dn_dev_init(void) dn_dev_devices_on(); - rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL); - rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL); - rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr); + rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL); + rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL); + rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL); proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 1c74ed36ce8f..9e885f180b60 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -30,7 +30,7 @@ #include <linux/netdevice.h> #include <linux/timer.h> #include <linux/spinlock.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/uaccess.h> #include <net/neighbour.h> #include <net/dst.h> @@ -414,33 +414,34 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn res->fi = fi; - switch(type) { - case RTN_NAT: - DN_FIB_RES_RESET(*res); + switch (type) { + case RTN_NAT: + DN_FIB_RES_RESET(*res); + atomic_inc(&fi->fib_clntref); + return 0; + case RTN_UNICAST: + case RTN_LOCAL: + for_nexthops(fi) { + if (nh->nh_flags & RTNH_F_DEAD) + continue; + if (!fld->flowidn_oif || + fld->flowidn_oif == nh->nh_oif) + break; + } + if (nhsel < fi->fib_nhs) { + res->nh_sel = nhsel; atomic_inc(&fi->fib_clntref); return 0; - case RTN_UNICAST: - case RTN_LOCAL: - for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (!fld->flowidn_oif || - fld->flowidn_oif == nh->nh_oif) - break; - } - if (nhsel < fi->fib_nhs) { - res->nh_sel = nhsel; - atomic_inc(&fi->fib_clntref); - return 0; - } - endfor_nexthops(fi); - res->fi = NULL; - return 1; - default: - if (net_ratelimit()) - printk("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n", type); - res->fi = NULL; - return -EINVAL; + } + endfor_nexthops(fi); + res->fi = NULL; + return 1; + default: + if (net_ratelimit()) + printk("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n", + type); + res->fi = NULL; + return -EINVAL; } } return err; @@ -647,20 +648,20 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, { struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr; - switch(event) { - case NETDEV_UP: - dn_fib_add_ifaddr(ifa); - dn_fib_sync_up(ifa->ifa_dev->dev); + switch (event) { + case NETDEV_UP: + dn_fib_add_ifaddr(ifa); + dn_fib_sync_up(ifa->ifa_dev->dev); + dn_rt_cache_flush(-1); + break; + case NETDEV_DOWN: + dn_fib_del_ifaddr(ifa); + if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { + dn_fib_disable_addr(ifa->ifa_dev->dev, 1); + } else { dn_rt_cache_flush(-1); - break; - case NETDEV_DOWN: - dn_fib_del_ifaddr(ifa); - if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) { - dn_fib_disable_addr(ifa->ifa_dev->dev, 1); - } else { - dn_rt_cache_flush(-1); - } - break; + } + break; } return NOTIFY_DONE; } @@ -763,8 +764,8 @@ void __init dn_fib_init(void) register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL); - rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL); + rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL); + rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL); } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 602dade7e9a3..7f0eb087dc11 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -38,7 +38,7 @@ #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <linux/jhash.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <net/net_namespace.h> #include <net/neighbour.h> #include <net/dst.h> @@ -51,9 +51,9 @@ static int dn_neigh_construct(struct neighbour *); static void dn_long_error_report(struct neighbour *, struct sk_buff *); static void dn_short_error_report(struct neighbour *, struct sk_buff *); -static int dn_long_output(struct sk_buff *); -static int dn_short_output(struct sk_buff *); -static int dn_phase3_output(struct sk_buff *); +static int dn_long_output(struct neighbour *, struct sk_buff *); +static int dn_short_output(struct neighbour *, struct sk_buff *); +static int dn_phase3_output(struct neighbour *, struct sk_buff *); /* @@ -64,8 +64,6 @@ static const struct neigh_ops dn_long_ops = { .error_report = dn_long_error_report, .output = dn_long_output, .connected_output = dn_long_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; /* @@ -76,8 +74,6 @@ static const struct neigh_ops dn_short_ops = { .error_report = dn_short_error_report, .output = dn_short_output, .connected_output = dn_short_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; /* @@ -88,8 +84,6 @@ static const struct neigh_ops dn_phase3_ops = { .error_report = dn_short_error_report, /* Can use short version here */ .output = dn_phase3_output, .connected_output = dn_phase3_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit }; static u32 dn_neigh_hash(const void *pkey, @@ -208,14 +202,14 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst->neighbour; + struct neighbour *neigh = dst_get_neighbour(dst); struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; dn_dn2eth(mac_addr, rt->rt_local_src); if (dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, mac_addr, skb->len) >= 0) - return neigh->ops->queue_xmit(skb); + return dev_queue_xmit(skb); if (net_ratelimit()) printk(KERN_DEBUG "dn_neigh_output_packet: oops, can't send packet\n"); @@ -224,10 +218,8 @@ static int dn_neigh_output_packet(struct sk_buff *skb) return -EINVAL; } -static int dn_long_output(struct sk_buff *skb) +static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; unsigned char *data; @@ -271,10 +263,8 @@ static int dn_long_output(struct sk_buff *skb) neigh->dev, dn_neigh_output_packet); } -static int dn_short_output(struct sk_buff *skb) +static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; @@ -315,10 +305,8 @@ static int dn_short_output(struct sk_buff *skb) * Phase 3 output is the same is short output, execpt that * it clears the area bits before transmission. */ -static int dn_phase3_output(struct sk_buff *skb) +static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; struct dn_short_packet *sp; @@ -404,13 +392,13 @@ int dn_neigh_router_hello(struct sk_buff *skb) dn->flags &= ~DN_NDFLAG_P3; - switch(msg->iinfo & DN_RT_INFO_TYPE) { - case DN_RT_INFO_L1RT: - dn->flags &=~DN_NDFLAG_R2; - dn->flags |= DN_NDFLAG_R1; - break; - case DN_RT_INFO_L2RT: - dn->flags |= DN_NDFLAG_R2; + switch (msg->iinfo & DN_RT_INFO_TYPE) { + case DN_RT_INFO_L1RT: + dn->flags &=~DN_NDFLAG_R2; + dn->flags |= DN_NDFLAG_R1; + break; + case DN_RT_INFO_L2RT: + dn->flags |= DN_NDFLAG_R2; } } diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index b430549e2b91..73fa268fe2e8 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -101,23 +101,27 @@ static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack) unsigned short type = ((ack >> 12) & 0x0003); int wakeup = 0; - switch(type) { - case 0: /* ACK - Data */ - if (dn_after(ack, scp->ackrcv_dat)) { - scp->ackrcv_dat = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, &scp->data_xmit_queue, ack); - } - break; - case 1: /* NAK - Data */ - break; - case 2: /* ACK - OtherData */ - if (dn_after(ack, scp->ackrcv_oth)) { - scp->ackrcv_oth = ack & 0x0fff; - wakeup |= dn_nsp_check_xmit_queue(sk, skb, &scp->other_xmit_queue, ack); - } - break; - case 3: /* NAK - OtherData */ - break; + switch (type) { + case 0: /* ACK - Data */ + if (dn_after(ack, scp->ackrcv_dat)) { + scp->ackrcv_dat = ack & 0x0fff; + wakeup |= dn_nsp_check_xmit_queue(sk, skb, + &scp->data_xmit_queue, + ack); + } + break; + case 1: /* NAK - Data */ + break; + case 2: /* ACK - OtherData */ + if (dn_after(ack, scp->ackrcv_oth)) { + scp->ackrcv_oth = ack & 0x0fff; + wakeup |= dn_nsp_check_xmit_queue(sk, skb, + &scp->other_xmit_queue, + ack); + } + break; + case 3: /* NAK - OtherData */ + break; } if (wakeup && !sock_flag(sk, SOCK_DEAD)) @@ -417,19 +421,19 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) scp->addrrem = cb->src_port; sk->sk_state = TCP_CLOSE; - switch(scp->state) { - case DN_CI: - case DN_CD: - scp->state = DN_RJ; - sk->sk_err = ECONNREFUSED; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - scp->state = DN_DN; - break; - case DN_DI: - scp->state = DN_DIC; - break; + switch (scp->state) { + case DN_CI: + case DN_CD: + scp->state = DN_RJ; + sk->sk_err = ECONNREFUSED; + break; + case DN_RUN: + sk->sk_shutdown |= SHUTDOWN_MASK; + scp->state = DN_DN; + break; + case DN_DI: + scp->state = DN_DIC; + break; } if (!sock_flag(sk, SOCK_DEAD)) { @@ -470,23 +474,23 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb) sk->sk_state = TCP_CLOSE; - switch(scp->state) { - case DN_CI: - scp->state = DN_NR; - break; - case DN_DR: - if (reason == NSP_REASON_DC) - scp->state = DN_DRC; - if (reason == NSP_REASON_NL) - scp->state = DN_CN; - break; - case DN_DI: - scp->state = DN_DIC; - break; - case DN_RUN: - sk->sk_shutdown |= SHUTDOWN_MASK; - case DN_CC: + switch (scp->state) { + case DN_CI: + scp->state = DN_NR; + break; + case DN_DR: + if (reason == NSP_REASON_DC) + scp->state = DN_DRC; + if (reason == NSP_REASON_NL) scp->state = DN_CN; + break; + case DN_DI: + scp->state = DN_DIC; + break; + case DN_RUN: + sk->sk_shutdown |= SHUTDOWN_MASK; + case DN_CC: + scp->state = DN_CN; } if (!sock_flag(sk, SOCK_DEAD)) { @@ -692,16 +696,16 @@ static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason) goto out; if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) { - switch(cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: /* (Retransmitted) Connect Init */ - dn_nsp_return_disc(skb, NSP_DISCINIT, reason); - ret = NET_RX_SUCCESS; - break; - case 0x20: /* Connect Confirm */ - dn_nsp_return_disc(skb, NSP_DISCCONF, reason); - ret = NET_RX_SUCCESS; - break; + switch (cb->nsp_flags & 0x70) { + case 0x10: + case 0x60: /* (Retransmitted) Connect Init */ + dn_nsp_return_disc(skb, NSP_DISCINIT, reason); + ret = NET_RX_SUCCESS; + break; + case 0x20: /* Connect Confirm */ + dn_nsp_return_disc(skb, NSP_DISCCONF, reason); + ret = NET_RX_SUCCESS; + break; } } @@ -733,17 +737,17 @@ static int dn_nsp_rx_packet(struct sk_buff *skb) * Filter out conninits and useless packet types */ if ((cb->nsp_flags & 0x0c) == 0x08) { - switch(cb->nsp_flags & 0x70) { - case 0x00: /* NOP */ - case 0x70: /* Reserved */ - case 0x50: /* Reserved, Phase II node init */ + switch (cb->nsp_flags & 0x70) { + case 0x00: /* NOP */ + case 0x70: /* Reserved */ + case 0x50: /* Reserved, Phase II node init */ + goto free_out; + case 0x10: + case 0x60: + if (unlikely(cb->rt_flags & DN_RT_F_RTS)) goto free_out; - case 0x10: - case 0x60: - if (unlikely(cb->rt_flags & DN_RT_F_RTS)) - goto free_out; - sk = dn_find_listener(skb, &reason); - goto got_it; + sk = dn_find_listener(skb, &reason); + goto got_it; } } @@ -836,20 +840,20 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Control packet. */ if ((cb->nsp_flags & 0x0c) == 0x08) { - switch(cb->nsp_flags & 0x70) { - case 0x10: - case 0x60: - dn_nsp_conn_init(sk, skb); - break; - case 0x20: - dn_nsp_conn_conf(sk, skb); - break; - case 0x30: - dn_nsp_disc_init(sk, skb); - break; - case 0x40: - dn_nsp_disc_conf(sk, skb); - break; + switch (cb->nsp_flags & 0x70) { + case 0x10: + case 0x60: + dn_nsp_conn_init(sk, skb); + break; + case 0x20: + dn_nsp_conn_conf(sk, skb); + break; + case 0x30: + dn_nsp_disc_init(sk, skb); + break; + case 0x40: + dn_nsp_disc_conf(sk, skb); + break; } } else if (cb->nsp_flags == 0x24) { @@ -890,15 +894,15 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb) if (scp->state != DN_RUN) goto free_out; - switch(cb->nsp_flags) { - case 0x10: /* LS */ - dn_nsp_linkservice(sk, skb); - break; - case 0x30: /* OD */ - dn_nsp_otherdata(sk, skb); - break; - default: - dn_nsp_data(sk, skb); + switch (cb->nsp_flags) { + case 0x10: /* LS */ + dn_nsp_linkservice(sk, skb); + break; + case 0x30: /* OD */ + dn_nsp_otherdata(sk, skb); + break; + default: + dn_nsp_data(sk, skb); } } else { /* Ack, chuck it out here */ diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 74544bc6fdec..43450c100226 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -116,6 +116,7 @@ static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr); static int dn_route_input(struct sk_buff *); static void dn_run_flush(unsigned long dummy); @@ -139,6 +140,7 @@ static struct dst_ops dn_dst_ops = { .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, + .neigh_lookup = dn_dst_neigh_lookup, }; static void dn_dst_destroy(struct dst_entry *dst) @@ -241,9 +243,11 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { + struct neighbour *n = dst_get_neighbour(dst); u32 min_mtu = 230; - struct dn_dev *dn = dst->neighbour ? - rcu_dereference_raw(dst->neighbour->dev->dn_ptr) : NULL; + struct dn_dev *dn; + + dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL; if (dn && dn->use_long == 0) min_mtu -= 6; @@ -495,11 +499,11 @@ static int dn_route_rx_packet(struct sk_buff *skb) } if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) { - switch(cb->rt_flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_SHORT: - return dn_return_short(skb); - case DN_RT_PKT_LONG: - return dn_return_long(skb); + switch (cb->rt_flags & DN_RT_PKT_MSK) { + case DN_RT_PKT_SHORT: + return dn_return_short(skb); + case DN_RT_PKT_LONG: + return dn_return_long(skb); } } @@ -652,38 +656,38 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type if (unlikely(skb_linearize(skb))) goto dump_it; - switch(flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_INIT: - dn_dev_init_pkt(skb); - break; - case DN_RT_PKT_VERI: - dn_dev_veri_pkt(skb); - break; + switch (flags & DN_RT_CNTL_MSK) { + case DN_RT_PKT_INIT: + dn_dev_init_pkt(skb); + break; + case DN_RT_PKT_VERI: + dn_dev_veri_pkt(skb); + break; } if (dn->parms.state != DN_DEV_S_RU) goto dump_it; - switch(flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_HELO: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, - dn_route_ptp_hello); - - case DN_RT_PKT_L1RT: - case DN_RT_PKT_L2RT: - return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - skb, skb->dev, NULL, - dn_route_discard); - case DN_RT_PKT_ERTH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, - dn_neigh_router_hello); - - case DN_RT_PKT_EEDH: - return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, - dn_neigh_endnode_hello); + switch (flags & DN_RT_CNTL_MSK) { + case DN_RT_PKT_HELO: + return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, + skb, skb->dev, NULL, + dn_route_ptp_hello); + + case DN_RT_PKT_L1RT: + case DN_RT_PKT_L2RT: + return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, + skb, skb->dev, NULL, + dn_route_discard); + case DN_RT_PKT_ERTH: + return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, + skb, skb->dev, NULL, + dn_neigh_router_hello); + + case DN_RT_PKT_EEDH: + return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, + skb, skb->dev, NULL, + dn_neigh_endnode_hello); } } else { if (dn->parms.state != DN_DEV_S_RU) @@ -691,11 +695,11 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type skb_pull(skb, 1); /* Pull flags */ - switch(flags & DN_RT_PKT_MSK) { - case DN_RT_PKT_LONG: - return dn_route_rx_long(skb); - case DN_RT_PKT_SHORT: - return dn_route_rx_short(skb); + switch (flags & DN_RT_PKT_MSK) { + case DN_RT_PKT_LONG: + return dn_route_rx_long(skb); + case DN_RT_PKT_SHORT: + return dn_route_rx_short(skb); } } @@ -705,6 +709,14 @@ out: return NET_RX_DROP; } +static int dn_to_neigh_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n = dst_get_neighbour(dst); + + return n->output(n, skb); +} + static int dn_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -715,7 +727,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if ((neigh = dst->neighbour) == NULL) + if ((neigh = dst_get_neighbour(dst)) == NULL) goto error; skb->dev = dev; @@ -733,7 +745,7 @@ static int dn_output(struct sk_buff *skb) cb->hops = 0; return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev, - neigh->output); + dn_to_neigh_output); error: if (net_ratelimit()) @@ -750,7 +762,6 @@ static int dn_forward(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr); struct dn_route *rt; - struct neighbour *neigh = dst->neighbour; int header_len; #ifdef CONFIG_NETFILTER struct net_device *dev = skb->dev; @@ -783,7 +794,7 @@ static int dn_forward(struct sk_buff *skb) cb->rt_flags |= DN_RT_F_IE; return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev, - neigh->output); + dn_to_neigh_output); drop: kfree_skb(skb); @@ -818,6 +829,11 @@ static unsigned int dn_dst_default_mtu(const struct dst_entry *dst) return dst->dev->mtu; } +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) +{ + return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev); +} + static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; @@ -833,11 +849,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && rt->dst.neighbour == NULL) { + if (dev != NULL && dst_get_neighbour(&rt->dst) == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - rt->dst.neighbour = n; + dst_set_neighbour(&rt->dst, n); } if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) @@ -1144,7 +1160,7 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); neigh = NULL; rt->dst.lastuse = jiffies; @@ -1416,23 +1432,23 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - rt->dst.neighbour = neigh; + dst_set_neighbour(&rt->dst, neigh); rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; - switch(res.type) { - case RTN_UNICAST: - rt->dst.input = dn_forward; - break; - case RTN_LOCAL: - rt->dst.output = dn_output; - rt->dst.input = dn_nsp_rx; - rt->dst.dev = in_dev; - flags |= RTCF_LOCAL; - break; - default: - case RTN_UNREACHABLE: - case RTN_BLACKHOLE: - rt->dst.input = dst_discard; + switch (res.type) { + case RTN_UNICAST: + rt->dst.input = dn_forward; + break; + case RTN_LOCAL: + rt->dst.output = dn_output; + rt->dst.input = dn_nsp_rx; + rt->dst.dev = in_dev; + flags |= RTCF_LOCAL; + break; + default: + case RTN_UNREACHABLE: + case RTN_BLACKHOLE: + rt->dst.input = dst_discard; } rt->rt_flags = flags; @@ -1841,10 +1857,11 @@ void __init dn_route_init(void) proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER - rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump); + rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, + dn_fib_dump, NULL); #else rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_cache_dump); + dn_cache_dump, NULL); #endif } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index bd0a52dd1d40..a9a62f225a6b 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -25,7 +25,7 @@ #include <linux/netdevice.h> #include <linux/timer.h> #include <linux/spinlock.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/uaccess.h> #include <linux/route.h> /* RTF_xxx */ #include <net/neighbour.h> @@ -147,17 +147,18 @@ static void dn_rehash_zone(struct dn_zone *dz) old_divisor = dz->dz_divisor; - switch(old_divisor) { - case 16: - new_divisor = 256; - new_hashmask = 0xFF; - break; - default: - printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", old_divisor); - case 256: - new_divisor = 1024; - new_hashmask = 0x3FF; - break; + switch (old_divisor) { + case 16: + new_divisor = 256; + new_hashmask = 0xFF; + break; + default: + printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", + old_divisor); + case 256: + new_divisor = 1024; + new_hashmask = 0x3FF; + break; } ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL); diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c index 09825711d58a..67f691bd4acf 100644 --- a/net/decnet/dn_timer.c +++ b/net/decnet/dn_timer.c @@ -22,7 +22,7 @@ #include <linux/timer.h> #include <linux/spinlock.h> #include <net/sock.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <net/flow.h> #include <net/dn.h> diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 64a7f39e069f..69975e0bcdea 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -69,15 +69,15 @@ static void dnrmg_send_peer(struct sk_buff *skb) int group = 0; unsigned char flags = *skb->data; - switch(flags & DN_RT_CNTL_MSK) { - case DN_RT_PKT_L1RT: - group = DNRNG_NLGRP_L1; - break; - case DN_RT_PKT_L2RT: - group = DNRNG_NLGRP_L2; - break; - default: - return; + switch (flags & DN_RT_CNTL_MSK) { + case DN_RT_PKT_L1RT: + group = DNRNG_NLGRP_L1; + break; + case DN_RT_PKT_L2RT: + group = DNRNG_NLGRP_L2; + break; + default: + return; } skb2 = dnrmg_build_message(skb, &status); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 28f8b5e5f73b..02e75d11cfbb 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -68,14 +68,15 @@ static struct ctl_table_header *dn_table_header = NULL; static void strip_it(char *str) { for(;;) { - switch(*str) { - case ' ': - case '\n': - case '\r': - case ':': - *str = 0; - case 0: - return; + switch (*str) { + case ' ': + case '\n': + case '\r': + case ':': + *str = 0; + /* Fallthrough */ + case 0: + return; } str++; } diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index cfa7a5e1c5c9..fa000d26dc60 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -212,10 +212,12 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) int err = key->type_data.x[0]; seq_puts(m, key->description); - if (err) - seq_printf(m, ": %d", err); - else - seq_printf(m, ": %u", key->datalen); + if (key_is_instantiated(key)) { + if (err) + seq_printf(m, ": %d", err); + else + seq_printf(m, ": %u", key->datalen); + } } /* diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 45f7411e90ba..9bd1061fa4ee 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -118,10 +118,14 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0); /* - * Disable cascade port functionality, and set the switch's + * Disable cascade port functionality unless this device + * is used in a cascade configuration, and set the switch's * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); + if (ds->dst->pd->nr_chips > 1) + REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f)); + else + REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); /* * Send all frames with destination addresses matching diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index a1d9f3787dd5..1c1f26c5d672 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -9,6 +9,8 @@ * */ +#define pr_fmt(fmt) fmt + #include <linux/module.h> #include <linux/types.h> @@ -44,7 +46,7 @@ #include <linux/bitops.h> #include <linux/mutex.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/system.h> static const struct proto_ops econet_ops; @@ -63,9 +65,7 @@ static DEFINE_SPINLOCK(aun_queue_lock); static struct socket *udpsock; #define AUN_PORT 0x8000 - -struct aunhdr -{ +struct aunhdr { unsigned char code; /* AUN magic protocol byte */ unsigned char port; unsigned char cb; @@ -82,8 +82,7 @@ static struct timer_list ab_cleanup_timer; #endif /* CONFIG_ECONET_AUNUDP */ /* Per-packet information */ -struct ec_cb -{ +struct ec_cb { struct sockaddr_ec sec; unsigned long cookie; /* Supplied by user. */ #ifdef CONFIG_ECONET_AUNUDP @@ -137,7 +136,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, * but then it will block. */ - skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err); + skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); /* * An error occurred so return it. Because skb_recv_datagram() @@ -145,7 +144,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, * retries. */ - if(skb==NULL) + if (skb == NULL) goto out; /* @@ -154,10 +153,9 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, */ copied = skb->len; - if (copied > len) - { - copied=len; - msg->msg_flags|=MSG_TRUNC; + if (copied > len) { + copied = len; + msg->msg_flags |= MSG_TRUNC; } /* We can't use skb_copy_datagram here */ @@ -186,7 +184,8 @@ out: * Bind an Econet socket. */ -static int econet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int econet_bind(struct socket *sock, struct sockaddr *uaddr, + int addr_len) { struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr; struct sock *sk; @@ -226,9 +225,8 @@ static void tx_result(struct sock *sk, unsigned long cookie, int result) struct ec_cb *eb; struct sockaddr_ec *sec; - if (skb == NULL) - { - printk(KERN_DEBUG "ec: memory squeeze, transmit result dropped.\n"); + if (skb == NULL) { + pr_debug("econet: memory squeeze, transmit result dropped\n"); return; } @@ -265,7 +263,7 @@ static void ec_tx_done(struct sk_buff *skb, int result) static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { - struct sockaddr_ec *saddr=(struct sockaddr_ec *)msg->msg_name; + struct sockaddr_ec *saddr = (struct sockaddr_ec *)msg->msg_name; struct net_device *dev; struct ec_addr addr; int err; @@ -298,14 +296,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_lock(&econet_mutex); - if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { - mutex_unlock(&econet_mutex); - return -EINVAL; - } - addr.station = saddr->addr.station; - addr.net = saddr->addr.net; - port = saddr->port; - cb = saddr->cb; + if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) { + mutex_unlock(&econet_mutex); + return -EINVAL; + } + addr.station = saddr->addr.station; + addr.net = saddr->addr.net; + port = saddr->port; + cb = saddr->cb; /* Look for a device with the right network number. */ dev = net2dev_map[addr.net]; @@ -333,9 +331,9 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, dev_hold(dev); - skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); - if (skb==NULL) + if (skb == NULL) goto out_unlock; skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -355,7 +353,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, struct ec_framehdr *fh; /* Poke in our control byte and port number. Hack, hack. */ - fh = (struct ec_framehdr *)(skb->data); + fh = (struct ec_framehdr *)skb->data; fh->cb = cb; fh->port = port; if (sock->type != SOCK_DGRAM) { @@ -365,7 +363,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, } /* Copy the data. Returns -EFAULT on error */ - err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; @@ -385,9 +383,9 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, mutex_unlock(&econet_mutex); return len; - out_free: +out_free: kfree_skb(skb); - out_unlock: +out_unlock: if (dev) dev_put(dev); #else @@ -458,15 +456,14 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, goto error_free_buf; /* Get a skbuff (no data, just holds our cb information) */ - if ((skb = sock_alloc_send_skb(sk, 0, - msg->msg_flags & MSG_DONTWAIT, - &err)) == NULL) + skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, &err); + if (skb == NULL) goto error_free_buf; eb = (struct ec_cb *)&skb->cb; eb->cookie = saddr->cookie; - eb->timeout = (5*HZ); + eb->timeout = 5 * HZ; eb->start = jiffies; ah.handle = aun_seq; eb->seq = (aun_seq++); @@ -480,9 +477,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, udpmsg.msg_iovlen = 2; udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; - udpmsg.msg_flags=0; + udpmsg.msg_flags = 0; - oldfs = get_fs(); set_fs(KERNEL_DS); /* More privs :-) */ + oldfs = get_fs(); + set_fs(KERNEL_DS); /* More privs :-) */ err = sock_sendmsg(udpsock, &udpmsg, size); set_fs(oldfs); @@ -530,7 +528,7 @@ static int econet_getname(struct socket *sock, struct sockaddr *uaddr, static void econet_destroy_timer(unsigned long data) { - struct sock *sk=(struct sock *)data; + struct sock *sk = (struct sock *)data; if (!sk_has_allocations(sk)) { sk_free(sk); @@ -539,7 +537,7 @@ static void econet_destroy_timer(unsigned long data) sk->sk_timer.expires = jiffies + 10 * HZ; add_timer(&sk->sk_timer); - printk(KERN_DEBUG "econet socket destroy delayed\n"); + pr_debug("econet: socket destroy delayed\n"); } /* @@ -651,7 +649,8 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) + dev = dev_get_by_name(&init_net, ifr.ifr_name); + if (dev == NULL) return -ENODEV; sec = (struct sockaddr_ec *)&ifr.ifr_addr; @@ -715,28 +714,26 @@ static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) * Handle generic ioctls */ -static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +static int econet_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) { struct sock *sk = sock->sk; void __user *argp = (void __user *)arg; - switch(cmd) { - case SIOCGSTAMP: - return sock_get_timestamp(sk, argp); + switch (cmd) { + case SIOCGSTAMP: + return sock_get_timestamp(sk, argp); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, argp); + case SIOCGSTAMPNS: + return sock_get_timestampns(sk, argp); - case SIOCSIFADDR: - case SIOCGIFADDR: - return ec_dev_ioctl(sock, cmd, argp); - break; + case SIOCSIFADDR: + case SIOCGIFADDR: + return ec_dev_ioctl(sock, cmd, argp); - default: - return -ENOIOCTLCMD; } - /*NOTREACHED*/ - return 0; + + return -ENOIOCTLCMD; } static const struct net_proto_family econet_family_ops = { @@ -836,7 +833,7 @@ static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb) udpmsg.msg_namelen = sizeof(sin); udpmsg.msg_control = NULL; udpmsg.msg_controllen = 0; - udpmsg.msg_flags=0; + udpmsg.msg_flags = 0; kernel_sendmsg(udpsock, &udpmsg, &iov, 1, sizeof(ah)); } @@ -859,26 +856,25 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len) if (dst) edev = dst->dev->ec_ptr; - if (! edev) + if (!edev) goto bad; - if ((sk = ec_listening_socket(ah->port, stn, edev->net)) == NULL) + sk = ec_listening_socket(ah->port, stn, edev->net); + if (sk == NULL) goto bad; /* Nobody wants it */ newskb = alloc_skb((len - sizeof(struct aunhdr) + 15) & ~15, GFP_ATOMIC); - if (newskb == NULL) - { - printk(KERN_DEBUG "AUN: memory squeeze, dropping packet.\n"); + if (newskb == NULL) { + pr_debug("AUN: memory squeeze, dropping packet\n"); /* Send nack and hope sender tries again */ goto bad; } - memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah+1), + memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah + 1), len - sizeof(struct aunhdr)); - if (ec_queue_packet(sk, newskb, stn, edev->net, ah->cb, ah->port)) - { + if (ec_queue_packet(sk, newskb, stn, edev->net, ah->cb, ah->port)) { /* Socket is bankrupt. */ kfree_skb(newskb); goto bad; @@ -914,7 +910,7 @@ static void aun_tx_ack(unsigned long seq, int result) goto foundit; } spin_unlock_irqrestore(&aun_queue_lock, flags); - printk(KERN_DEBUG "AUN: unknown sequence %ld\n", seq); + pr_debug("AUN: unknown sequence %ld\n", seq); return; foundit: @@ -939,18 +935,17 @@ static void aun_data_available(struct sock *sk, int slen) while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { - printk(KERN_ERR "AUN: no data available?!"); + pr_err("AUN: no data available?!\n"); return; } - printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err); + pr_debug("AUN: recvfrom() error %d\n", -err); } data = skb_transport_header(skb) + sizeof(struct udphdr); ah = (struct aunhdr *)data; len = skb->len - sizeof(struct udphdr); - switch (ah->code) - { + switch (ah->code) { case 2: aun_incoming(skb, ah, len); break; @@ -961,7 +956,7 @@ static void aun_data_available(struct sock *sk, int slen) aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING); break; default: - printk(KERN_DEBUG "unknown AUN packet (type %d)\n", data[0]); + pr_debug("AUN: unknown packet type: %d\n", data[0]); } skb_free_datagram(sk, skb); @@ -991,7 +986,7 @@ static void ab_cleanup(unsigned long h) } spin_unlock_irqrestore(&aun_queue_lock, flags); - mod_timer(&ab_cleanup_timer, jiffies + (HZ*2)); + mod_timer(&ab_cleanup_timer, jiffies + (HZ * 2)); } static int __init aun_udp_initialise(void) @@ -1001,7 +996,7 @@ static int __init aun_udp_initialise(void) skb_queue_head_init(&aun_queue); setup_timer(&ab_cleanup_timer, ab_cleanup, 0); - ab_cleanup_timer.expires = jiffies + (HZ*2); + ab_cleanup_timer.expires = jiffies + (HZ * 2); add_timer(&ab_cleanup_timer); memset(&sin, 0, sizeof(sin)); @@ -1009,9 +1004,9 @@ static int __init aun_udp_initialise(void) /* We can count ourselves lucky Acorn machines are too dim to speak IPv6. :-) */ - if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, 0, &udpsock)) < 0) - { - printk("AUN: socket error %d\n", -error); + error = sock_create_kern(PF_INET, SOCK_DGRAM, 0, &udpsock); + if (error < 0) { + pr_err("AUN: socket error %d\n", -error); return error; } @@ -1020,10 +1015,9 @@ static int __init aun_udp_initialise(void) from interrupts */ error = udpsock->ops->bind(udpsock, (struct sockaddr *)&sin, - sizeof(sin)); - if (error < 0) - { - printk("AUN: bind error %d\n", -error); + sizeof(sin)); + if (error < 0) { + pr_err("AUN: bind error %d\n", -error); goto release; } @@ -1044,7 +1038,8 @@ release: * Receive an Econet frame from a device. */ -static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static int econet_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; struct sock *sk = NULL; @@ -1059,13 +1054,14 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet if (!edev) goto drop; - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb == NULL) return NET_RX_DROP; if (!pskb_may_pull(skb, sizeof(struct ec_framehdr))) goto drop; - hdr = (struct ec_framehdr *) skb->data; + hdr = (struct ec_framehdr *)skb->data; /* First check for encapsulated IP */ if (hdr->port == EC_PORT_IP) { @@ -1093,8 +1089,8 @@ drop: } static struct packet_type econet_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_ECONET), - .func = econet_rcv, + .type = cpu_to_be16(ETH_P_ECONET), + .func = econet_rcv, }; static void econet_hw_initialise(void) @@ -1104,9 +1100,10 @@ static void econet_hw_initialise(void) #endif -static int econet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int econet_notifier(struct notifier_block *this, unsigned long msg, + void *data) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = data; struct ec_device *edev; if (!net_eq(dev_net(dev), &init_net)) @@ -1116,8 +1113,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void case NETDEV_UNREGISTER: /* A device has gone down - kill any data we hold for it. */ edev = dev->ec_ptr; - if (edev) - { + if (edev) { if (net2dev_map[0] == dev) net2dev_map[0] = NULL; net2dev_map[edev->net] = NULL; @@ -1131,7 +1127,7 @@ static int econet_notifier(struct notifier_block *this, unsigned long msg, void } static struct notifier_block econet_netdev_notifier = { - .notifier_call =econet_notifier, + .notifier_call = econet_notifier, }; static void __exit econet_proto_exit(void) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 44d2b42fda56..27997d35ebd3 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -231,11 +231,11 @@ EXPORT_SYMBOL(eth_header_parse); * eth_header_cache - fill cache entry from neighbour * @neigh: source neighbour * @hh: destination cache entry + * @type: Ethernet type field * Create an Ethernet header template from the neighbour. */ -int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh) +int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type) { - __be16 type = hh->hh_type; struct ethhdr *eth; const struct net_device *dev = neigh->dev; @@ -340,6 +340,7 @@ void ether_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; + dev->priv_flags = IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 6df6ecf49708..40e606f3788f 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -302,7 +302,7 @@ static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { if (!netif_running(dev)) - return -ENODEV; + goto drop; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); #ifdef DEBUG print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 1a3334c2609a..faecf648123f 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -1,5 +1,5 @@ /* - * ZigBee socket interface + * IEEE 802.15.4 dgram socket interface * * Copyright 2007, 2008 Siemens AG * diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index ed0eab39f531..c64a38d57aa3 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -24,6 +24,7 @@ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/if_arp.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/wpan-phy.h> @@ -44,7 +45,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pr_debug("%s\n", __func__); if (!buf) - goto out; + return -EMSGSIZE; hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, IEEE802154_LIST_PHY); @@ -65,6 +66,7 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, pages * sizeof(uint32_t), buf); mutex_unlock(&phy->pib_lock); + kfree(buf); return genlmsg_end(msg, hdr); nla_put_failure: @@ -212,12 +214,37 @@ static int ieee802154_add_iface(struct sk_buff *skb, goto nla_put_failure; } + if (info->attrs[IEEE802154_ATTR_HW_ADDR] && + nla_len(info->attrs[IEEE802154_ATTR_HW_ADDR]) != + IEEE802154_ADDR_LEN) { + rc = -EINVAL; + goto nla_put_failure; + } + dev = phy->add_iface(phy, devname); if (IS_ERR(dev)) { rc = PTR_ERR(dev); goto nla_put_failure; } + if (info->attrs[IEEE802154_ATTR_HW_ADDR]) { + struct sockaddr addr; + + addr.sa_family = ARPHRD_IEEE802154; + nla_memcpy(&addr.sa_data, info->attrs[IEEE802154_ATTR_HW_ADDR], + IEEE802154_ADDR_LEN); + + /* + * strangely enough, some callbacks (inetdev_event) from + * dev_set_mac_address require RTNL_LOCK + */ + rtnl_lock(); + rc = dev_set_mac_address(dev, &addr); + rtnl_unlock(); + if (rc) + goto dev_unregister; + } + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); @@ -227,6 +254,11 @@ static int ieee802154_add_iface(struct sk_buff *skb, return ieee802154_nl_reply(msg, info); +dev_unregister: + rtnl_lock(); /* del_iface must be called with RTNL lock */ + phy->del_iface(phy, dev); + dev_put(dev); + rtnl_unlock(); nla_put_failure: nlmsg_free(msg); out_dev: diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cc1463156cd0..1b745d412cf6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -465,6 +465,11 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; + if (addr->sin_family != AF_INET) { + err = -EAFNOSUPPORT; + goto out; + } + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too @@ -673,6 +678,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk2); + sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); @@ -1434,11 +1440,11 @@ EXPORT_SYMBOL_GPL(inet_ctl_sock_create); unsigned long snmp_fold_field(void __percpu *mib[], int offt) { unsigned long res = 0; - int i; + int i, j; for_each_possible_cpu(i) { - res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); - res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); + for (j = 0; j < SNMP_ARRAY_SZ; j++) + res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt); } return res; } @@ -1452,28 +1458,19 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) int cpu; for_each_possible_cpu(cpu) { - void *bhptr, *userptr; + void *bhptr; struct u64_stats_sync *syncp; - u64 v_bh, v_user; + u64 v; unsigned int start; - /* first mib used by softirq context, we must use _bh() accessors */ - bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); + bhptr = per_cpu_ptr(mib[0], cpu); syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); do { start = u64_stats_fetch_begin_bh(syncp); - v_bh = *(((u64 *) bhptr) + offt); + v = *(((u64 *) bhptr) + offt); } while (u64_stats_fetch_retry_bh(syncp, start)); - /* second mib used in USER context */ - userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); - syncp = (struct u64_stats_sync *)(userptr + syncp_offset); - do { - start = u64_stats_fetch_begin(syncp); - v_user = *(((u64 *) userptr) + offt); - } while (u64_stats_fetch_retry(syncp, start)); - - res += v_bh + v_user; + res += v; } return res; } @@ -1485,25 +1482,28 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) BUG_ON(ptr == NULL); ptr[0] = __alloc_percpu(mibsize, align); if (!ptr[0]) - goto err0; + return -ENOMEM; +#if SNMP_ARRAY_SZ == 2 ptr[1] = __alloc_percpu(mibsize, align); - if (!ptr[1]) - goto err1; + if (!ptr[1]) { + free_percpu(ptr[0]); + ptr[0] = NULL; + return -ENOMEM; + } +#endif return 0; -err1: - free_percpu(ptr[0]); - ptr[0] = NULL; -err0: - return -ENOMEM; } EXPORT_SYMBOL_GPL(snmp_mib_init); -void snmp_mib_free(void __percpu *ptr[2]) +void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ]) { + int i; + BUG_ON(ptr == NULL); - free_percpu(ptr[0]); - free_percpu(ptr[1]); - ptr[0] = ptr[1] = NULL; + for (i = 0; i < SNMP_ARRAY_SZ; i++) { + free_percpu(ptr[i]); + ptr[i] = NULL; + } } EXPORT_SYMBOL_GPL(snmp_mib_free); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1b74d3b64371..96a164aa1367 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -97,7 +97,6 @@ #include <linux/init.h> #include <linux/net.h> #include <linux/rcupdate.h> -#include <linux/jhash.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> @@ -139,8 +138,6 @@ static const struct neigh_ops arp_generic_ops = { .error_report = arp_error_report, .output = neigh_resolve_output, .connected_output = neigh_connected_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; static const struct neigh_ops arp_hh_ops = { @@ -149,16 +146,12 @@ static const struct neigh_ops arp_hh_ops = { .error_report = arp_error_report, .output = neigh_resolve_output, .connected_output = neigh_resolve_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; static const struct neigh_ops arp_direct_ops = { .family = AF_INET, - .output = dev_queue_xmit, - .connected_output = dev_queue_xmit, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, + .output = neigh_direct_output, + .connected_output = neigh_direct_output, }; static const struct neigh_ops arp_broken_ops = { @@ -167,8 +160,6 @@ static const struct neigh_ops arp_broken_ops = { .error_report = arp_error_report, .output = neigh_compat_output, .connected_output = neigh_compat_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; struct neigh_table arp_tbl = { @@ -232,7 +223,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 hash_rnd) { - return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd); + return arp_hashfn(*(u32 *)pkey, dev, hash_rnd); } static int arp_constructor(struct neighbour *neigh) @@ -259,7 +250,7 @@ static int arp_constructor(struct neighbour *neigh) if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &arp_direct_ops; - neigh->output = neigh->ops->queue_xmit; + neigh->output = neigh_direct_output; } else { /* Good devices (checked by reading texts, but only Ethernet is tested) @@ -518,30 +509,6 @@ EXPORT_SYMBOL(arp_find); /* END OF OBSOLETE FUNCTIONS */ -int arp_bind_neighbour(struct dst_entry *dst) -{ - struct net_device *dev = dst->dev; - struct neighbour *n = dst->neighbour; - - if (dev == NULL) - return -EINVAL; - if (n == NULL) { - __be32 nexthop = ((struct rtable *)dst)->rt_gateway; - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - nexthop = 0; - n = __neigh_lookup_errno( -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) - dev->type == ARPHRD_ATM ? - clip_tbl_hook : -#endif - &arp_tbl, &nexthop, dev); - if (IS_ERR(n)) - return PTR_ERR(n); - dst->neighbour = n; - } - return 0; -} - /* * Check if we can use proxy ARP for this path */ diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2b3c23c287cd..2c2a98e402e7 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -50,7 +50,7 @@ #include <net/tcp.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/bug.h> #include <asm/unaligned.h> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0d4a184af16f..bc19bd06dd00 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1134,15 +1134,15 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, struct in_device *in_dev) { - struct in_ifaddr *ifa = in_dev->ifa_list; - - if (!ifa) - return; + struct in_ifaddr *ifa; - arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_local, dev, - ifa->ifa_local, NULL, - dev->dev_addr, NULL); + for (ifa = in_dev->ifa_list; ifa; + ifa = ifa->ifa_next) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, + ifa->ifa_local, dev, + ifa->ifa_local, NULL, + dev->dev_addr, NULL); + } } /* Called only under RTNL semaphore */ @@ -1833,8 +1833,8 @@ void __init devinet_init(void) rtnl_af_register(&inet_af_ops); - rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); - rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); + rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 22524716fe70..92fc5f69f5da 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1124,9 +1124,9 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); register_pernet_subsys(&fib_net_ops); register_netdevice_notifier(&fib_netdev_notifier); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 58c25ea5a5c1..de9e2978476f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -110,9 +110,10 @@ struct leaf { struct leaf_info { struct hlist_node hlist; - struct rcu_head rcu; int plen; + u32 mask_plen; /* ntohl(inet_make_mask(plen)) */ struct list_head falh; + struct rcu_head rcu; }; struct tnode { @@ -451,6 +452,7 @@ static struct leaf_info *leaf_info_new(int plen) struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); if (li) { li->plen = plen; + li->mask_plen = ntohl(inet_make_mask(plen)); INIT_LIST_HEAD(&li->falh); } return li; @@ -1359,10 +1361,8 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, hlist_for_each_entry_rcu(li, node, hhead, hlist) { struct fib_alias *fa; - int plen = li->plen; - __be32 mask = inet_make_mask(plen); - if (l->key != (key & ntohl(mask))) + if (l->key != (key & li->mask_plen)) continue; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -1394,7 +1394,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_passed++; #endif - res->prefixlen = plen; + res->prefixlen = li->plen; res->nh_sel = nhsel; res->type = fa->fa_type; res->scope = fa->fa_info->fib_scope; @@ -1402,7 +1402,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, res->table = tb; res->fa_head = &li->falh; if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&res->fi->fib_clntref); + atomic_inc(&fi->fib_clntref); return 0; } } diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index c6933f2ea310..dbfc21de3479 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -15,8 +15,8 @@ #include <linux/kmod.h> #include <linux/skbuff.h> #include <linux/in.h> +#include <linux/ip.h> #include <linux/netdevice.h> -#include <linux/version.h> #include <linux/spinlock.h> #include <net/protocol.h> #include <net/gre.h> @@ -97,27 +97,17 @@ drop: static void gre_err(struct sk_buff *skb, u32 info) { const struct gre_protocol *proto; - u8 ver; - - if (!pskb_may_pull(skb, 12)) - goto drop; + const struct iphdr *iph = (const struct iphdr *)skb->data; + u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; - ver = skb->data[1]&0x7f; if (ver >= GREPROTO_MAX) - goto drop; + return; rcu_read_lock(); proto = rcu_dereference(gre_proto[ver]); - if (!proto || !proto->err_handler) - goto drop_unlock; - proto->err_handler(skb, info); - rcu_read_unlock(); - return; - -drop_unlock: + if (proto && proto->err_handler) + proto->err_handler(skb, info); rcu_read_unlock(); -drop: - kfree_skb(skb); } static const struct net_protocol net_gre_protocol = { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5395e45dcce6..23ef31baa1af 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -380,6 +380,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct icmp_bxm *param) { struct rtable *rt, *rt2; + struct flowi4 fl4_dec; int err; memset(fl4, 0, sizeof(*fl4)); @@ -408,19 +409,19 @@ static struct rtable *icmp_route_lookup(struct net *net, } else return rt; - err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(fl4), AF_INET); + err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); if (err) goto relookup_failed; - if (inet_addr_type(net, fl4->saddr) == RTN_LOCAL) { - rt2 = __ip_route_output_key(net, fl4); + if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) { + rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) err = PTR_ERR(rt2); } else { struct flowi4 fl4_2 = {}; unsigned long orefdst; - fl4_2.daddr = fl4->saddr; + fl4_2.daddr = fl4_dec.saddr; rt2 = ip_route_output_key(net, &fl4_2); if (IS_ERR(rt2)) { err = PTR_ERR(rt2); @@ -428,7 +429,7 @@ static struct rtable *icmp_route_lookup(struct net *net, } /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ - err = ip_route_input(skb_in, fl4->daddr, fl4->saddr, + err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, RT_TOS(tos), rt2->dst.dev); dst_release(&rt2->dst); @@ -440,10 +441,11 @@ static struct rtable *icmp_route_lookup(struct net *net, goto relookup_failed; rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, - flowi4_to_flowi(fl4), NULL, + flowi4_to_flowi(&fl4_dec), NULL, XFRM_LOOKUP_ICMP); if (!IS_ERR(rt2)) { dst_release(&rt->dst); + memcpy(fl4, &fl4_dec, sizeof(*fl4)); rt = rt2; } else if (PTR_ERR(rt2) == -EPERM) { if (rt) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 61fac4cabc78..c14d88ad348d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * This struct holds the first and last local port number. */ struct local_ports sysctl_local_ports __read_mostly = { - .lock = SEQLOCK_UNLOCKED, + .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock), .range = { 32768, 61000 }, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 6ffe94ca5bc9..389a2e6a17fd 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -437,7 +437,7 @@ static int valid_cc(const void *bc, int len, int cc) return 0; if (cc == len) return 1; - if (op->yes < 4) + if (op->yes < 4 || op->yes & 3) return 0; len -= op->yes; bc += op->yes; @@ -447,11 +447,11 @@ static int valid_cc(const void *bc, int len, int cc) static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { - const unsigned char *bc = bytecode; + const void *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; + const struct inet_diag_bc_op *op = bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { @@ -462,22 +462,20 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4) + if (op->no < 4 || op->no > len + 4 || op->no & 3) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len - op->no)) return -EINVAL; break; case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len + 4) - return -EINVAL; break; default: return -EINVAL; } + if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + return -EINVAL; bc += op->yes; len -= op->yes; } @@ -871,7 +869,7 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } return netlink_dump_start(idiagnl, skb, nlh, - inet_diag_dump, NULL); + inet_diag_dump, NULL, 0); } return inet_diag_get_exact(skb, nlh); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 85a0f75dae64..ef7ae6049a51 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -146,8 +146,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) } static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, - struct iphdr *iph, struct tcphdr *tcph, - u16 vlan_tag, struct vlan_group *vgrp) + struct iphdr *iph, struct tcphdr *tcph) { int nr_frags; __be32 *ptr; @@ -173,8 +172,6 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, } lro_desc->mss = tcp_data_len; - lro_desc->vgrp = vgrp; - lro_desc->vlan_tag = vlan_tag; lro_desc->active = 1; lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, @@ -309,29 +306,17 @@ static void lro_flush(struct net_lro_mgr *lro_mgr, skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; - if (lro_desc->vgrp) { - if (lro_mgr->features & LRO_F_NAPI) - vlan_hwaccel_receive_skb(lro_desc->parent, - lro_desc->vgrp, - lro_desc->vlan_tag); - else - vlan_hwaccel_rx(lro_desc->parent, - lro_desc->vgrp, - lro_desc->vlan_tag); - - } else { - if (lro_mgr->features & LRO_F_NAPI) - netif_receive_skb(lro_desc->parent); - else - netif_rx(lro_desc->parent); - } + if (lro_mgr->features & LRO_F_NAPI) + netif_receive_skb(lro_desc->parent); + else + netif_rx(lro_desc->parent); LRO_INC_STATS(lro_mgr, flushed); lro_clear_desc(lro_desc); } static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, - struct vlan_group *vgrp, u16 vlan_tag, void *priv) + void *priv) { struct net_lro_desc *lro_desc; struct iphdr *iph; @@ -360,7 +345,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, goto out; skb->ip_summed = lro_mgr->ip_summed_aggr; - lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); + lro_init_desc(lro_desc, skb, iph, tcph); LRO_INC_STATS(lro_mgr, aggregated); return 0; } @@ -433,8 +418,7 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, struct skb_frag_struct *frags, int len, int true_size, - struct vlan_group *vgrp, - u16 vlan_tag, void *priv, __wsum sum) + void *priv, __wsum sum) { struct net_lro_desc *lro_desc; struct iphdr *iph; @@ -480,7 +464,7 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, tcph = (void *)((u8 *)skb->data + vlan_hdr_len + IP_HDR_LEN(iph)); - lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); + lro_init_desc(lro_desc, skb, iph, tcph); LRO_INC_STATS(lro_mgr, aggregated); return NULL; } @@ -514,7 +498,7 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, void *priv) { - if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { + if (__lro_proc_skb(lro_mgr, skb, priv)) { if (lro_mgr->features & LRO_F_NAPI) netif_receive_skb(skb); else @@ -523,29 +507,13 @@ void lro_receive_skb(struct net_lro_mgr *lro_mgr, } EXPORT_SYMBOL(lro_receive_skb); -void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, - struct sk_buff *skb, - struct vlan_group *vgrp, - u16 vlan_tag, - void *priv) -{ - if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { - if (lro_mgr->features & LRO_F_NAPI) - vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); - else - vlan_hwaccel_rx(skb, vgrp, vlan_tag); - } -} -EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb); - void lro_receive_frags(struct net_lro_mgr *lro_mgr, struct skb_frag_struct *frags, int len, int true_size, void *priv, __wsum sum) { struct sk_buff *skb; - skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, - priv, sum); + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum); if (!skb) return; @@ -556,26 +524,6 @@ void lro_receive_frags(struct net_lro_mgr *lro_mgr, } EXPORT_SYMBOL(lro_receive_frags); -void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, - struct skb_frag_struct *frags, - int len, int true_size, - struct vlan_group *vgrp, - u16 vlan_tag, void *priv, __wsum sum) -{ - struct sk_buff *skb; - - skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp, - vlan_tag, priv, sum); - if (!skb) - return; - - if (lro_mgr->features & LRO_F_NAPI) - vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); - else - vlan_hwaccel_rx(skb, vgrp, vlan_tag); -} -EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags); - void lro_flush_all(struct net_lro_mgr *lro_mgr) { int i; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ce616d92cc54..e38213817d0a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -54,15 +54,11 @@ * 1. Nodes may appear in the tree only with the pool lock held. * 2. Nodes may disappear from the tree only with the pool lock held * AND reference count being 0. - * 3. Nodes appears and disappears from unused node list only under - * "inet_peer_unused_lock". - * 4. Global variable peer_total is modified under the pool lock. - * 5. struct inet_peer fields modification: + * 3. Global variable peer_total is modified under the pool lock. + * 4. struct inet_peer fields modification: * avl_left, avl_right, avl_parent, avl_height: pool lock - * unused: unused node list lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing - * dtime: unused node list lock * daddr: unchangeable * ip_id_count: atomic value (no lock needed) */ @@ -104,19 +100,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m * aggressively at this stage */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -int inet_peer_gc_mintime __read_mostly = 10 * HZ; -int inet_peer_gc_maxtime __read_mostly = 120 * HZ; - -static struct { - struct list_head list; - spinlock_t lock; -} unused_peers = { - .list = LIST_HEAD_INIT(unused_peers.list), - .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), -}; - -static void peer_check_expire(unsigned long dummy); -static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); /* Called from ip_output.c:ip_init */ @@ -142,21 +125,6 @@ void __init inet_initpeers(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - /* All the timers, started at system startup tend - to synchronize. Perturb it a bit. - */ - peer_periodic_timer.expires = jiffies - + net_random() % inet_peer_gc_maxtime - + inet_peer_gc_maxtime; - add_timer(&peer_periodic_timer); -} - -/* Called with or without local BH being disabled. */ -static void unlink_from_unused(struct inet_peer *p) -{ - spin_lock_bh(&unused_peers.lock); - list_del_init(&p->unused); - spin_unlock_bh(&unused_peers.lock); } static int addr_compare(const struct inetpeer_addr *a, @@ -203,20 +171,6 @@ static int addr_compare(const struct inetpeer_addr *a, u; \ }) -static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) -{ - int cur, old = atomic_read(ptr); - - while (old != u) { - *newv = old + a; - cur = atomic_cmpxchg(ptr, old, *newv); - if (cur == old) - return true; - old = cur; - } - return false; -} - /* * Called with rcu_read_lock() * Because we hold no lock against a writer, its quite possible we fall @@ -225,8 +179,7 @@ static bool atomic_add_unless_return(atomic_t *ptr, int a, int u, int *newv) * We exit from this function if number of links exceeds PEER_MAXDEPTH */ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, - struct inet_peer_base *base, - int *newrefcnt) + struct inet_peer_base *base) { struct inet_peer *u = rcu_dereference(base->root); int count = 0; @@ -235,11 +188,9 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, int cmp = addr_compare(daddr, &u->daddr); if (cmp == 0) { /* Before taking a reference, check if this entry was - * deleted, unlink_from_pool() sets refcnt=-1 to make - * distinction between an unused entry (refcnt=0) and - * a freed one. + * deleted (refcnt=-1) */ - if (!atomic_add_unless_return(&u->refcnt, 1, -1, newrefcnt)) + if (!atomic_add_unless(&u->refcnt, 1, -1)) u = NULL; return u; } @@ -366,137 +317,99 @@ static void inetpeer_free_rcu(struct rcu_head *head) kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); } -/* May be called with local BH enabled. */ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) { - int do_free; - - do_free = 0; - - write_seqlock_bh(&base->lock); - /* Check the reference counter. It was artificially incremented by 1 - * in cleanup() function to prevent sudden disappearing. If we can - * atomically (because of lockless readers) take this last reference, - * it's safe to remove the node and free it later. - * We use refcnt=-1 to alert lockless readers this entry is deleted. - */ - if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { - struct inet_peer __rcu ***stackptr, ***delp; - if (lookup(&p->daddr, stack, base) != p) - BUG(); - delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty_rcu) { - *delp[0] = p->avl_right; - --stackptr; - } else { - /* look for a node to insert instead of p */ - struct inet_peer *t; - t = lookup_rightempty(p, base); - BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); - **--stackptr = t->avl_left; - /* t is removed, t->daddr > x->daddr for any - * x in p->avl_left subtree. - * Put t in the old place of p. */ - RCU_INIT_POINTER(*delp[0], t); - t->avl_left = p->avl_left; - t->avl_right = p->avl_right; - t->avl_height = p->avl_height; - BUG_ON(delp[1] != &p->avl_left); - delp[1] = &t->avl_left; /* was &p->avl_left */ - } - peer_avl_rebalance(stack, stackptr, base); - base->total--; - do_free = 1; + struct inet_peer __rcu ***stackptr, ***delp; + + if (lookup(&p->daddr, stack, base) != p) + BUG(); + delp = stackptr - 1; /* *delp[0] == p */ + if (p->avl_left == peer_avl_empty_rcu) { + *delp[0] = p->avl_right; + --stackptr; + } else { + /* look for a node to insert instead of p */ + struct inet_peer *t; + t = lookup_rightempty(p, base); + BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); + **--stackptr = t->avl_left; + /* t is removed, t->daddr > x->daddr for any + * x in p->avl_left subtree. + * Put t in the old place of p. */ + RCU_INIT_POINTER(*delp[0], t); + t->avl_left = p->avl_left; + t->avl_right = p->avl_right; + t->avl_height = p->avl_height; + BUG_ON(delp[1] != &p->avl_left); + delp[1] = &t->avl_left; /* was &p->avl_left */ } - write_sequnlock_bh(&base->lock); - - if (do_free) - call_rcu(&p->rcu, inetpeer_free_rcu); - else - /* The node is used again. Decrease the reference counter - * back. The loop "cleanup -> unlink_from_unused - * -> unlink_from_pool -> putpeer -> link_to_unused - * -> cleanup (for the same node)" - * doesn't really exist because the entry will have a - * recent deletion time and will not be cleaned again soon. - */ - inet_putpeer(p); + peer_avl_rebalance(stack, stackptr, base); + base->total--; + call_rcu(&p->rcu, inetpeer_free_rcu); } static struct inet_peer_base *family_to_base(int family) { - return (family == AF_INET ? &v4_peers : &v6_peers); -} - -static struct inet_peer_base *peer_to_base(struct inet_peer *p) -{ - return family_to_base(p->daddr.family); + return family == AF_INET ? &v4_peers : &v6_peers; } -/* May be called with local BH enabled. */ -static int cleanup_once(unsigned long ttl, struct inet_peer __rcu **stack[PEER_MAXDEPTH]) +/* perform garbage collect on all items stacked during a lookup */ +static int inet_peer_gc(struct inet_peer_base *base, + struct inet_peer __rcu **stack[PEER_MAXDEPTH], + struct inet_peer __rcu ***stackptr) { - struct inet_peer *p = NULL; - - /* Remove the first entry from the list of unused nodes. */ - spin_lock_bh(&unused_peers.lock); - if (!list_empty(&unused_peers.list)) { - __u32 delta; - - p = list_first_entry(&unused_peers.list, struct inet_peer, unused); - delta = (__u32)jiffies - p->dtime; + struct inet_peer *p, *gchead = NULL; + __u32 delta, ttl; + int cnt = 0; - if (delta < ttl) { - /* Do not prune fresh entries. */ - spin_unlock_bh(&unused_peers.lock); - return -1; + if (base->total >= inet_peer_threshold) + ttl = 0; /* be aggressive */ + else + ttl = inet_peer_maxttl + - (inet_peer_maxttl - inet_peer_minttl) / HZ * + base->total / inet_peer_threshold * HZ; + stackptr--; /* last stack slot is peer_avl_empty */ + while (stackptr > stack) { + stackptr--; + p = rcu_deref_locked(**stackptr, base); + if (atomic_read(&p->refcnt) == 0) { + smp_rmb(); + delta = (__u32)jiffies - p->dtime; + if (delta >= ttl && + atomic_cmpxchg(&p->refcnt, 0, -1) == 0) { + p->gc_next = gchead; + gchead = p; + } } - - list_del_init(&p->unused); - - /* Grab an extra reference to prevent node disappearing - * before unlink_from_pool() call. */ - atomic_inc(&p->refcnt); } - spin_unlock_bh(&unused_peers.lock); - - if (p == NULL) - /* It means that the total number of USED entries has - * grown over inet_peer_threshold. It shouldn't really - * happen because of entry limits in route cache. */ - return -1; - - unlink_from_pool(p, peer_to_base(p), stack); - return 0; + while ((p = gchead) != NULL) { + gchead = p->gc_next; + cnt++; + unlink_from_pool(p, base, stack); + } + return cnt; } -/* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) +struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; unsigned int sequence; - int invalidated, newrefcnt = 0; + int invalidated, gccnt = 0; - /* Look up for the address quickly, lockless. + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock(); sequence = read_seqbegin(&base->lock); - p = lookup_rcu(daddr, base, &newrefcnt); + p = lookup_rcu(daddr, base); invalidated = read_seqretry(&base->lock, sequence); rcu_read_unlock(); - if (p) { -found: /* The existing node has been found. - * Remove the entry from unused list if it was there. - */ - if (newrefcnt == 1) - unlink_from_unused(p); + if (p) return p; - } /* If no writer did a change during our lookup, we can return early. */ if (!create && !invalidated) @@ -506,18 +419,27 @@ found: /* The existing node has been found. * At least, nodes should be hot in our cache. */ write_seqlock_bh(&base->lock); +relookup: p = lookup(daddr, stack, base); if (p != peer_avl_empty) { - newrefcnt = atomic_inc_return(&p->refcnt); + atomic_inc(&p->refcnt); write_sequnlock_bh(&base->lock); - goto found; + return p; + } + if (!gccnt) { + gccnt = inet_peer_gc(base, stack, stackptr); + if (gccnt && create) + goto relookup; } p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); + atomic_set(&p->ip_id_count, + (daddr->family == AF_INET) ? + secure_ip_id(daddr->addr.a4) : + secure_ipv6_id(daddr->addr.a6)); p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; @@ -525,7 +447,6 @@ found: /* The existing node has been found. p->pmtu_expires = 0; p->pmtu_orig = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); - INIT_LIST_HEAD(&p->unused); /* Link the node. */ @@ -534,63 +455,15 @@ found: /* The existing node has been found. } write_sequnlock_bh(&base->lock); - if (base->total >= inet_peer_threshold) - /* Remove one less-recently-used entry. */ - cleanup_once(0, stack); - return p; } - -static int compute_total(void) -{ - return v4_peers.total + v6_peers.total; -} EXPORT_SYMBOL_GPL(inet_getpeer); -/* Called with local BH disabled. */ -static void peer_check_expire(unsigned long dummy) -{ - unsigned long now = jiffies; - int ttl, total; - struct inet_peer __rcu **stack[PEER_MAXDEPTH]; - - total = compute_total(); - if (total >= inet_peer_threshold) - ttl = inet_peer_minttl; - else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - total / inet_peer_threshold * HZ; - while (!cleanup_once(ttl, stack)) { - if (jiffies != now) - break; - } - - /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime - * interval depending on the total number of entries (more entries, - * less interval). */ - total = compute_total(); - if (total >= inet_peer_threshold) - peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; - else - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - total / inet_peer_threshold * HZ; - add_timer(&peer_periodic_timer); -} - void inet_putpeer(struct inet_peer *p) { - local_bh_disable(); - - if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { - list_add_tail(&p->unused, &unused_peers.list); - p->dtime = (__u32)jiffies; - spin_unlock(&unused_peers.lock); - } - - local_bh_enable(); + p->dtime = (__u32)jiffies; + smp_mb__before_atomic_dec(); + atomic_dec(&p->refcnt); } EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 0ad6035f6366..0e0ab98abc6f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -261,8 +261,9 @@ static void ip_expire(unsigned long arg) * Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ - if (qp->user == IP_DEFRAG_CONNTRACK_IN && - skb_rtable(head)->rt_type != RTN_LOCAL) + if (qp->user == IP_DEFRAG_AF_PACKET || + (qp->user == IP_DEFRAG_CONNTRACK_IN && + skb_rtable(head)->rt_type != RTN_LOCAL)) goto out_rcu_unlock; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8871067560db..d7bb94c48345 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -731,9 +731,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { + struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); const struct in6_addr *addr6; int addr_type; - struct neighbour *neigh = skb_dst(skb)->neighbour; if (neigh == NULL) goto tx_error; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c8f48efc5fd3..073a9b01c40c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -165,7 +165,7 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && net_eq(sock_net(sk), dev_net(dev))) { - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) return 1; } @@ -256,7 +256,7 @@ int ip_local_deliver(struct sk_buff *skb) * Reassemble IP fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c3118e1cd3bb..ec93335901dd 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/types.h> #include <asm/uaccess.h> +#include <asm/unaligned.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/icmp.h> @@ -350,7 +351,7 @@ int ip_options_compile(struct net *net, goto error; } if (optptr[2] <= optlen) { - __be32 *timeptr = NULL; + unsigned char *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -359,7 +360,7 @@ int ip_options_compile(struct net *net, case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) - timeptr = (__be32*)&optptr[optptr[2]-1]; + timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; @@ -371,7 +372,7 @@ int ip_options_compile(struct net *net, opt->ts = optptr - iph; if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; @@ -389,7 +390,7 @@ int ip_options_compile(struct net *net, if (inet_addr_type(net, addr) == RTN_UNICAST) break; if (skb) - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; @@ -403,10 +404,10 @@ int ip_options_compile(struct net *net, } if (timeptr) { struct timespec tv; - __be32 midtime; + u32 midtime; getnstimeofday(&tv); - midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); - memcpy(timeptr, &midtime, sizeof(__be32)); + midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC; + put_unaligned_be32(midtime, timeptr); opt->is_changed = 1; } } else { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 98af3697c718..ccaaa851ab42 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -182,6 +182,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); + struct neighbour *neigh; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); @@ -203,10 +204,9 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + neigh = dst_get_neighbour(dst); + if (neigh) + return neigh_output(neigh, skb); if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); @@ -489,7 +489,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (first_len - hlen > mtu || ((first_len - hlen) & 7) || - (iph->frag_off & htons(IP_MF|IP_OFFSET)) || + ip_is_fragment(iph) || skb_cloned(skb)) goto slow_path; @@ -734,7 +734,7 @@ static inline int ip_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu, unsigned int flags) + int transhdrlen, int maxfraglen, unsigned int flags) { struct sk_buff *skb; int err; @@ -767,7 +767,7 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->csum = 0; /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = mtu - fragheaderlen; + skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; __skb_queue_tail(queue, skb); } @@ -799,9 +799,9 @@ static int __ip_append_data(struct sock *sk, int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; - exthdrlen = transhdrlen ? rt->dst.header_len : 0; - length += exthdrlen; - transhdrlen += exthdrlen; + skb = skb_peek_tail(queue); + + exthdrlen = !skb ? rt->dst.header_len : 0; mtu = cork->fragsize; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -825,15 +825,13 @@ static int __ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; - skb = skb_peek_tail(queue); - cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, - mtu, flags); + maxfraglen, flags); if (err) goto error; return 0; @@ -883,17 +881,16 @@ alloc_new_skb: else alloclen = fraglen; + alloclen += exthdrlen; + /* The last fragment gets additional space at tail. * Note, with MSG_MORE we overallocate on fragments, * because we have no idea what fragment will be * the last. */ - if (datalen == length + fraggap) { + if (datalen == length + fraggap) alloclen += rt->dst.trailer_len; - /* make sure mtu is not reached */ - if (datalen > mtu - fragheaderlen - rt->dst.trailer_len) - datalen -= ALIGN(rt->dst.trailer_len, 8); - } + if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen + hh_len + 15, @@ -926,11 +923,11 @@ alloc_new_skb: /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen); + data = skb_put(skb, fraglen + exthdrlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); - data += fragheaderlen; + data += fragheaderlen + exthdrlen; if (fraggap) { skb->csum = skb_copy_and_csum_bits( @@ -1064,7 +1061,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, */ *rtp = NULL; cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + rt->dst.dev->mtu : dst_mtu(&rt->dst); cork->dst = &rt->dst; cork->length = 0; cork->tx_flags = ipc->tx_flags; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index ab7e5542c1cf..472a8c4f1dc0 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -861,41 +861,44 @@ static void __init ic_do_bootp_ext(u8 *ext) #endif switch (*ext++) { - case 1: /* Subnet mask */ - if (ic_netmask == NONE) - memcpy(&ic_netmask, ext+1, 4); - break; - case 3: /* Default gateway */ - if (ic_gateway == NONE) - memcpy(&ic_gateway, ext+1, 4); - break; - case 6: /* DNS server */ - servers= *ext/4; - if (servers > CONF_NAMESERVERS_MAX) - servers = CONF_NAMESERVERS_MAX; - for (i = 0; i < servers; i++) { - if (ic_nameservers[i] == NONE) - memcpy(&ic_nameservers[i], ext+1+4*i, 4); - } - break; - case 12: /* Host name */ - ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN); - ic_host_name_set = 1; - break; - case 15: /* Domain name (DNS) */ - ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); - break; - case 17: /* Root path */ - if (!root_server_path[0]) - ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path)); - break; - case 26: /* Interface MTU */ - memcpy(&mtu, ext+1, sizeof(mtu)); - ic_dev_mtu = ntohs(mtu); - break; - case 40: /* NIS Domain name (_not_ DNS) */ - ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN); - break; + case 1: /* Subnet mask */ + if (ic_netmask == NONE) + memcpy(&ic_netmask, ext+1, 4); + break; + case 3: /* Default gateway */ + if (ic_gateway == NONE) + memcpy(&ic_gateway, ext+1, 4); + break; + case 6: /* DNS server */ + servers= *ext/4; + if (servers > CONF_NAMESERVERS_MAX) + servers = CONF_NAMESERVERS_MAX; + for (i = 0; i < servers; i++) { + if (ic_nameservers[i] == NONE) + memcpy(&ic_nameservers[i], ext+1+4*i, 4); + } + break; + case 12: /* Host name */ + ic_bootp_string(utsname()->nodename, ext+1, *ext, + __NEW_UTS_LEN); + ic_host_name_set = 1; + break; + case 15: /* Domain name (DNS) */ + ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); + break; + case 17: /* Root path */ + if (!root_server_path[0]) + ic_bootp_string(root_server_path, ext+1, *ext, + sizeof(root_server_path)); + break; + case 26: /* Interface MTU */ + memcpy(&mtu, ext+1, sizeof(mtu)); + ic_dev_mtu = ntohs(mtu); + break; + case 40: /* NIS Domain name (_not_ DNS) */ + ic_bootp_string(utsname()->domainname, ext+1, *ext, + __NEW_UTS_LEN); + break; } } @@ -932,7 +935,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop; /* Fragments are not supported */ - if (h->frag_off & htons(IP_OFFSET | IP_MF)) { + if (ip_is_fragment(h)) { if (net_ratelimit()) printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " "reply.\n"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 30a7763c400e..58e879157976 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1796,7 +1796,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = iph->tos, + .flowi4_tos = RT_TOS(iph->tos), .flowi4_oif = rt->rt_oif, .flowi4_iif = rt->rt_iif, .flowi4_mark = rt->rt_mark, @@ -2544,7 +2544,8 @@ int __init ip_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, + NULL, ipmr_rtm_dumproute, NULL); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4614babdc45f..2e97e3ec1eb7 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,51 +17,35 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; - unsigned long orefdst; + __be32 saddr = iph->saddr; + __u8 flags = 0; unsigned int hh_len; - unsigned int type; - type = inet_addr_type(net, iph->saddr); - if (skb->sk && inet_sk(skb->sk)->transparent) - type = RTN_LOCAL; - if (addr_type == RTN_UNSPEC) - addr_type = type; + if (!skb->sk && addr_type != RTN_LOCAL) { + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(net, saddr); + if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) + flags |= FLOWI_FLAG_ANYSRC; + else + saddr = 0; + } /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ - if (addr_type == RTN_LOCAL) { - fl4.daddr = iph->daddr; - if (type == RTN_LOCAL) - fl4.saddr = iph->saddr; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl4.flowi4_mark = skb->mark; - fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return -1; - - /* Drop old route. */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl4.daddr = iph->saddr; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return -1; + fl4.daddr = iph->daddr; + fl4.saddr = saddr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return -1; - orefdst = skb->_skb_refdst; - if (ip_route_input(skb, iph->daddr, iph->saddr, - RT_TOS(iph->tos), rt->dst.dev) != 0) { - dst_release(&rt->dst); - return -1; - } - dst_release(&rt->dst); - refdst_drop(orefdst); - } + /* Drop old route. */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); if (skb_dst(skb)->error) return -1; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index d2c1311cb28d..5c9b9d963918 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -203,7 +203,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); @@ -402,7 +403,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 764743843503..24e556e83a3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -566,7 +566,7 @@ check_entry(const struct ipt_entry *e, const char *name) const struct xt_entry_target *t; if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, par->match->name); + duprintf("ip check failed %p %s.\n", e, name); return -EINVAL; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d609ac3cb9a4..db8d22db425f 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -307,7 +307,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) * error messages (RELATED) and information requests (see below) */ if (ip_hdr(skb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) + ctinfo == IP_CT_RELATED_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, @@ -317,19 +317,19 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) hash = clusterip_hashfn(skb, cipinfo->config); switch (ctinfo) { - case IP_CT_NEW: - ct->mark = hash; - break; - case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: - /* FIXME: we don't handle expectations at the - * moment. they can arrive on a different node than - * the master connection (e.g. FTP passive mode) */ - case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: - break; - default: - break; + case IP_CT_NEW: + ct->mark = hash; + break; + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + /* FIXME: we don't handle expectations at the moment. + * They can arrive on a different node than + * the master connection (e.g. FTP passive mode) */ + case IP_CT_ESTABLISHED: + case IP_CT_ESTABLISHED_REPLY: + break; + default: /* Prevent gcc warnings */ + break; } #ifdef DEBUG diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index d2ed9dc74ebc..9931152a78b5 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -60,7 +60,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1ff79e557f96..51f13f8ec724 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -40,7 +40,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _otcph, *tcph; - unsigned int addr_type; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) @@ -55,6 +54,9 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (oth->rst) return; + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) return; @@ -101,19 +103,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); - addr_type = RTN_UNSPEC; - if (hook != NF_INET_FORWARD -#ifdef CONFIG_BRIDGE_NETFILTER - || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) -#endif - ) - addr_type = RTN_LOCAL; - /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(nskb, addr_type)) + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index af6e9c778345..2b57e52c746c 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -25,7 +25,8 @@ MODULE_LICENSE("GPL"); static inline bool match_ip(const struct sk_buff *skb, const struct ipt_ecn_info *einfo) { - return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect; + return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^ + !!(einfo->invert & IPT_ECN_OP_MATCH_IP); } static inline bool match_tcp(const struct sk_buff *skb, @@ -76,8 +77,6 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { - if (ip_hdr(skb)->protocol != IPPROTO_TCP) - return false; if (!match_tcp(skb, info, &par->hotdrop)) return false; } @@ -97,7 +96,7 @@ static int ecn_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && - ip->proto != IPPROTO_TCP) { + (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { pr_info("cannot match TCP bits in rule for non-tcp packets\n"); return -EINVAL; } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a03c02af999..de9da21113a1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -101,7 +101,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); @@ -121,7 +121,9 @@ static unsigned int ipv4_confirm(unsigned int hooknum, return ret; } - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7404bde95994..ab5b27a2916f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -160,7 +160,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } /* Small and modified version of icmp_rcv */ diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f3a9b42b16c6..9bb1b8a37a22 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -82,7 +82,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #endif #endif /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); if (nf_ct_ipv4_gather_frags(skb, user)) return NF_STOLEN; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 9c71b2755ce3..3346de5d94d0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -433,7 +433,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, /* Must be RELATED */ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + skb->nfctinfo == IP_CT_RELATED_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 99cfa28b6d38..ebc5f8894f99 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -160,7 +160,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data if (skb->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - skb->dev->features & NETIF_F_V4_CSUM) { + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) + diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 21c30426480b..733c9abc1cbd 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -53,7 +53,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 8812a02078ab..076b7c8c4aa4 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -719,117 +719,115 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, l = 0; switch (type) { - case SNMP_INTEGER: - len = sizeof(long); - if (!asn1_long_decode(ctx, end, &l)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, - GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - (*obj)->syntax.l[0] = l; - break; - case SNMP_OCTETSTR: - case SNMP_OPAQUE: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, - GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.c, p, len); + case SNMP_INTEGER: + len = sizeof(long); + if (!asn1_long_decode(ctx, end, &l)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + (*obj)->syntax.l[0] = l; + break; + case SNMP_OCTETSTR: + case SNMP_OPAQUE: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { kfree(p); - break; - case SNMP_NULL: - case SNMP_NOSUCHOBJECT: - case SNMP_NOSUCHINSTANCE: - case SNMP_ENDOFMIBVIEW: - len = 0; - *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - if (!asn1_null_decode(ctx, end)) { - kfree(id); - kfree(*obj); - *obj = NULL; - return 0; - } - break; - case SNMP_OBJECTID: - if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { - kfree(id); - return 0; - } - len *= sizeof(unsigned long); - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(lp); - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.ul, lp, len); + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.c, p, len); + kfree(p); + break; + case SNMP_NULL: + case SNMP_NOSUCHOBJECT: + case SNMP_NOSUCHINSTANCE: + case SNMP_ENDOFMIBVIEW: + len = 0; + *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + if (!asn1_null_decode(ctx, end)) { + kfree(id); + kfree(*obj); + *obj = NULL; + return 0; + } + break; + case SNMP_OBJECTID: + if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { + kfree(id); + return 0; + } + len *= sizeof(unsigned long); + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { kfree(lp); - break; - case SNMP_IPADDR: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - if (len != 4) { - kfree(p); - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.uc, p, len); + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.ul, lp, len); + kfree(lp); + break; + case SNMP_IPADDR: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + if (len != 4) { kfree(p); - break; - case SNMP_COUNTER: - case SNMP_GAUGE: - case SNMP_TIMETICKS: - len = sizeof(unsigned long); - if (!asn1_ulong_decode(ctx, end, &ul)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - pr_notice("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - (*obj)->syntax.ul[0] = ul; - break; - default: kfree(id); return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(p); + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + memcpy((*obj)->syntax.uc, p, len); + kfree(p); + break; + case SNMP_COUNTER: + case SNMP_GAUGE: + case SNMP_TIMETICKS: + len = sizeof(unsigned long); + if (!asn1_ulong_decode(ctx, end, &ul)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + if (net_ratelimit()) + pr_notice("OOM in bsalg (%d)\n", __LINE__); + return 0; + } + (*obj)->syntax.ul[0] = ul; + break; + default: + kfree(id); + return 0; } (*obj)->syntax_len = len; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 7317bdf1d457..a6e606e84820 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -88,7 +88,7 @@ nf_nat_fn(unsigned int hooknum, /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would @@ -116,7 +116,7 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: + case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) @@ -144,7 +144,7 @@ nf_nat_fn(unsigned int hooknum, default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); + ctinfo == IP_CT_ESTABLISHED_REPLY); } return nf_nat_packet(ct, ctinfo, hooknum, skb); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9aaa67165f42..39b403f854c6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -41,7 +41,6 @@ #include <linux/proc_fs.h> #include <net/sock.h> #include <net/ping.h> -#include <net/icmp.h> #include <net/udp.h> #include <net/route.h> #include <net/inet_common.h> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index c9893d43242e..1457acb39cec 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -38,7 +38,7 @@ */ #include <linux/types.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/byteorder.h> #include <asm/current.h> #include <asm/uaccess.h> @@ -825,28 +825,28 @@ static int compat_raw_getsockopt(struct sock *sk, int level, int optname, static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) { switch (cmd) { - case SIOCOUTQ: { - int amount = sk_wmem_alloc_get(sk); + case SIOCOUTQ: { + int amount = sk_wmem_alloc_get(sk); - return put_user(amount, (int __user *)arg); - } - case SIOCINQ: { - struct sk_buff *skb; - int amount = 0; - - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) - amount = skb->len; - spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); - } + return put_user(amount, (int __user *)arg); + } + case SIOCINQ: { + struct sk_buff *skb; + int amount = 0; + + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb != NULL) + amount = skb->len; + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } - default: + default: #ifdef CONFIG_IP_MROUTE - return ipmr_ioctl(sk, cmd, (void __user *)arg); + return ipmr_ioctl(sk, cmd, (void __user *)arg); #else - return -ENOIOCTLCMD; + return -ENOIOCTLCMD; #endif } } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 52b0b956508b..1730689f560e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -108,6 +108,7 @@ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif +#include <net/atmclip.h> #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) @@ -184,6 +185,8 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); + static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), @@ -198,6 +201,7 @@ static struct dst_ops ipv4_dst_ops = { .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .local_out = __ip_local_out, + .neigh_lookup = ipv4_neigh_lookup, }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -411,8 +415,10 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "HHUptod\tSpecDst"); else { struct rtable *r = v; + struct neighbour *n; int len; + n = dst_get_neighbour(&r->dst); seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", r->dst.dev ? r->dst.dev->name : "*", @@ -425,9 +431,8 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), r->rt_key_tos, - r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, - r->dst.hh ? (r->dst.hh->hh_output == - dev_queue_xmit) : 0, + -1, + (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, r->rt_spec_dst, &len); seq_printf(seq, "%*s\n", 127 - len, ""); @@ -1006,6 +1011,37 @@ static int slow_chain_length(const struct rtable *head) return length >> FRACT_BITS; } +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) +{ + struct neigh_table *tbl = &arp_tbl; + static const __be32 inaddr_any = 0; + struct net_device *dev = dst->dev; + const __be32 *pkey = daddr; + struct neighbour *n; + +#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) + if (dev->type == ARPHRD_ATM) + tbl = clip_tbl_hook; +#endif + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + pkey = &inaddr_any; + + n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); + if (n) + return n; + return neigh_create(tbl, pkey, dev); +} + +static int rt_bind_neighbour(struct rtable *rt) +{ + struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + dst_set_neighbour(&rt->dst, n); + + return 0; +} + static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, struct sk_buff *skb, int ifindex) { @@ -1042,7 +1078,7 @@ restart: rt->dst.flags |= DST_NOCACHE; if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { - int err = arp_bind_neighbour(&rt->dst); + int err = rt_bind_neighbour(rt); if (err) { if (net_ratelimit()) printk(KERN_WARNING @@ -1138,7 +1174,7 @@ restart: route or unicast forwarding path. */ if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) { - int err = arp_bind_neighbour(&rt->dst); + int err = rt_bind_neighbour(rt); if (err) { spin_unlock_bh(rt_hash_lock_addr(hash)); @@ -1316,6 +1352,23 @@ reject_redirect: ; } +static bool peer_pmtu_expired(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + time_after_eq(jiffies, orig) && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + +static bool peer_pmtu_cleaned(struct inet_peer *peer) +{ + unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); + + return orig && + cmpxchg(&peer->pmtu_expires, orig, 0) == orig; +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1331,14 +1384,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && - rt->peer->pmtu_expires && - time_after_eq(jiffies, rt->peer->pmtu_expires)) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(dst, RTAX_MTU, - rt->peer->pmtu_orig); + } else if (rt->peer && peer_pmtu_expired(rt->peer)) { + dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); } } return ret; @@ -1428,20 +1475,20 @@ static int ip_error(struct sk_buff *skb) int code; switch (rt->dst.error) { - case EINVAL: - default: - goto out; - case EHOSTUNREACH: - code = ICMP_HOST_UNREACH; - break; - case ENETUNREACH: - code = ICMP_NET_UNREACH; - IP_INC_STATS_BH(dev_net(rt->dst.dev), - IPSTATS_MIB_INNOROUTES); - break; - case EACCES: - code = ICMP_PKT_FILTERED; - break; + case EINVAL: + default: + goto out; + case EHOSTUNREACH: + code = ICMP_HOST_UNREACH; + break; + case ENETUNREACH: + code = ICMP_NET_UNREACH; + IP_INC_STATS_BH(dev_net(rt->dst.dev), + IPSTATS_MIB_INNOROUTES); + break; + case EACCES: + code = ICMP_PKT_FILTERED; + break; } if (!rt->peer) @@ -1531,8 +1578,10 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { - unsigned long expires = peer->pmtu_expires; + unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); + if (!expires) + return; if (time_before(jiffies, expires)) { u32 orig_dst_mtu = dst_mtu(dst); if (peer->pmtu_learned < orig_dst_mtu) { @@ -1555,10 +1604,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) rt_bind_peer(rt, rt->rt_dst, 1); peer = rt->peer; if (peer) { + unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; + if (!pmtu_expires || mtu < peer->pmtu_learned) { pmtu_expires = jiffies + ip_rt_mtu_expires; if (!pmtu_expires) @@ -1578,23 +1628,24 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; + struct neighbour *n; dst_confirm(&rt->dst); - neigh_release(rt->dst.neighbour); - rt->dst.neighbour = NULL; + neigh_release(dst_get_neighbour(&rt->dst)); + dst_set_neighbour(&rt->dst, NULL); rt->rt_gateway = peer->redirect_learned.a4; - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); + rt_bind_neighbour(rt); + n = dst_get_neighbour(&rt->dst); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); rt->rt_gateway = orig_gw; return -EAGAIN; } else { rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, - rt->dst.neighbour); + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } return 0; } @@ -1612,13 +1663,14 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) rt_bind_peer(rt, rt->rt_dst, 0); peer = rt->peer; - if (peer && peer->pmtu_expires) + if (peer) { check_peer_pmtu(dst, peer); - if (peer && peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - if (check_peer_redir(dst, peer)) - return NULL; + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } } rt->rt_peer_genid = rt_peer_genid(); @@ -1649,14 +1701,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && - rt->peer && - rt->peer->pmtu_expires) { - unsigned long orig = rt->peer->pmtu_expires; - - if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); - } + if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); } static int ip_rt_bug(struct sk_buff *skb) @@ -1694,7 +1740,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = iph->daddr; fl4.saddr = iph->saddr; - fl4.flowi4_tos = iph->tos; + fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = rt->dst.dev->ifindex; fl4.flowi4_iif = skb->dev->ifindex; fl4.flowi4_mark = skb->mark; @@ -1770,8 +1816,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); - if (peer->pmtu_expires) - check_peer_pmtu(&rt->dst, peer); + check_peer_pmtu(&rt->dst, peer); if (peer->redirect_learned.a4 && peer->redirect_learned.a4 != rt->rt_gateway) { rt->rt_gateway = peer->redirect_learned.a4; @@ -1894,9 +1939,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); + return IS_ERR(rth) ? PTR_ERR(rth) : 0; e_nobufs: return -ENOBUFS; @@ -2702,6 +2745,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .cow_metrics = ipv4_rt_blackhole_cow_metrics, + .neigh_lookup = ipv4_neigh_lookup, }; struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) @@ -2775,7 +2819,8 @@ static int rt_fill_info(struct net *net, struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; - long expires; + long expires = 0; + const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -2823,15 +2868,16 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); error = rt->dst.error; - expires = (rt->peer && rt->peer->pmtu_expires) ? - rt->peer->pmtu_expires - jiffies : 0; - if (rt->peer) { + if (peer) { inet_peer_refcheck(rt->peer); - id = atomic_read(&rt->peer->ip_id_count) & 0xffff; - if (rt->peer->tcp_ts_stamp) { - ts = rt->peer->tcp_ts; - tsage = get_seconds() - rt->peer->tcp_ts_stamp; + id = atomic_read(&peer->ip_id_count) & 0xffff; + if (peer->tcp_ts_stamp) { + ts = peer->tcp_ts; + tsage = get_seconds() - peer->tcp_ts_stamp; } + expires = ACCESS_ONCE(peer->pmtu_expires); + if (expires) + expires -= jiffies; } if (rt_is_input_route(rt)) { @@ -3295,7 +3341,7 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(ip_rt_max_size); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 26461492a847..92bb9434b338 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 57d0752e239a..69fd7201129a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -398,20 +398,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .procname = "inet_peer_gc_mintime", - .data = &inet_peer_gc_mintime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "inet_peer_gc_maxtime", - .data = &inet_peer_gc_maxtime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { .procname = "tcp_orphan_retries", .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 054a59d21eb0..46febcacb729 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3220,7 +3220,7 @@ __setup("thash_entries=", set_thash_entries); void __init tcp_init(void) { struct sk_buff *skb = NULL; - unsigned long nr_pages, limit; + unsigned long limit; int i, max_share, cnt; unsigned long jiffy = jiffies; @@ -3277,13 +3277,7 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - /* Set the pressure threshold to be a fraction of global memory that - * is up to 1/2 at 256 MB, decreasing toward zero with the amount of - * memory, with a floor of 128 pages. - */ - nr_pages = totalram_pages - totalhigh_pages; - limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bef9f04c22ba..ea0d2183df4b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); if (tp->snd_ssthresh > tp->snd_cwnd_clamp) tp->snd_ssthresh = tp->snd_cwnd_clamp; + } else { + /* ssthresh may have been reduced unnecessarily during. + * 3WHS. Restore it back to its initial default. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } if (dst_metric(dst, RTAX_REORDERING) && tp->reordering != dst_metric(dst, RTAX_REORDERING)) { @@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk) tp->reordering = dst_metric(dst, RTAX_REORDERING); } - if (dst_metric(dst, RTAX_RTT) == 0) - goto reset; - - if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) + if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) goto reset; /* Initial rtt is determined from SYN,SYN-ACK. @@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } tcp_set_rto(sk); - if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { reset: - /* Play conservative. If timestamps are not - * supported, TCP will fail to recalculate correct - * rtt, if initial rto is too small. FORGET ALL AND RESET! + if (tp->srtt == 0) { + /* RFC2988bis: We've failed to get a valid RTT sample from + * 3WHS. This is most likely due to retransmission, + * including spurious one. Reset the RTO back to 3secs + * from the more aggressive 1sec to avoid more spurious + * retransmission. */ - if (!tp->rx_opt.saw_tstamp && tp->srtt) { - tp->srtt = 0; - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; - } + tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; } - tp->snd_cwnd = tcp_init_cwnd(tp, dst); + /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC2988bis' more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tcp_xmit_retransmit_queue(sk); } -static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) +void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) { tcp_rtt_estimator(sk, seq_rtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; } +EXPORT_SYMBOL(tcp_valid_rtt_meas); /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Supersedes RFC1323) @@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - /* tcp_ack considers this ACK as duplicate - * and does not calculate rtt. - * Force it here. - */ - tcp_ack_update_rtt(sk, 0, 0); - if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7d6671e33b8..955b8e65b69e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) break; icsk->icsk_backoff--; - inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << - icsk->icsk_backoff; + inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : + TCP_TIMEOUT_INIT) << icsk->icsk_backoff; tcp_bound_rto(sk); skb = tcp_write_queue_head(sk); @@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; if (tcp_v4_send_synack(sk, dst, req, (struct request_values *)&tmp_ext) || @@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(newsk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); + newtp->total_retrans = req->retrans; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1589,6 +1594,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1854,7 +1860,7 @@ static int tcp_v4_init_sock(struct sock *sk) * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - tp->snd_cwnd = 2; + tp->snd_cwnd = TCP_INIT_CWND; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 80b1f80759ab..d2fe4e06b472 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -486,7 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, * algorithms that we must have the following bandaid to talk * efficiently to them. -DaveM */ - newtp->snd_cwnd = 2; + newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; newtp->bytes_acked = 0; @@ -720,6 +720,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); return NULL; } + if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) + tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; + else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ + tcp_rsk(req)->snt_synack = 0; /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index abca870d8ff6..1b5a19340a95 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -105,6 +105,7 @@ #include <net/route.h> #include <net/checksum.h> #include <net/xfrm.h> +#include <trace/events/udp.h> #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -1249,6 +1250,9 @@ csum_copy_err: if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } @@ -1363,6 +1367,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) is_udplite); UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); + trace_udp_fail_queue_rcv_skb(rc, sk); return -1; } @@ -2206,16 +2211,10 @@ void __init udp_table_init(struct udp_table *table, const char *name) void __init udp_init(void) { - unsigned long nr_pages, limit; + unsigned long limit; udp_table_init(&udp_table, "UDP"); - /* Set the pressure threshold up by the same strategy of TCP. It is a - * fraction of global memory that is up to 1/2 at 256 MB, decreasing - * toward zero with the amount of memory, with a floor of 128 pages. - */ - nr_pages = totalram_pages - totalhigh_pages; - limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_udp_mem[0] = limit / 4 * 3; sysctl_udp_mem[1] = limit; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 2d51840e53a1..327a617d594c 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -32,7 +32,12 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) dst = skb_dst(skb); mtu = dst_mtu(dst); if (skb->len > mtu) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + if (skb->sk) + ip_local_error(skb->sk, EMSGSIZE, ip_hdr(skb)->daddr, + inet_sk(skb->sk)->inet_dport, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); ret = -EMSGSIZE; } out: diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 981e43eaf704..fc5368ad2b0d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,7 +117,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; - if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: case IPPROTO_UDPLITE: diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 498b927f68be..a55500cc0b29 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (rt->rt6i_nexthop == NULL) + if (dst_get_neighbour(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; @@ -1470,6 +1470,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + if (ifp->prefix_len == 127) /* RFC 6164 */ + return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) return; @@ -1479,6 +1481,8 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + if (ifp->prefix_len == 127) /* RFC 6164 */ + return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) return; @@ -1559,6 +1563,11 @@ static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) return -1; } +static int addrconf_ifid_gre(u8 *eui, struct net_device *dev) +{ + return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr); +} + static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) { switch (dev->type) { @@ -1572,6 +1581,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_infiniband(eui, dev); case ARPHRD_SIT: return addrconf_ifid_sit(eui, dev); + case ARPHRD_IPGRE: + return addrconf_ifid_gre(eui, dev); } return -1; } @@ -2423,6 +2434,29 @@ static void addrconf_sit_config(struct net_device *dev) } #endif +#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +static void addrconf_gre_config(struct net_device *dev) +{ + struct inet6_dev *idev; + struct in6_addr addr; + + pr_info("ipv6: addrconf_gre_config(%s)\n", dev->name); + + ASSERT_RTNL(); + + if ((idev = ipv6_find_idev(dev)) == NULL) { + printk(KERN_DEBUG "init gre: add_dev failed\n"); + return; + } + + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + addrconf_prefix_route(&addr, 64, dev, 0, 0); + + if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) + addrconf_add_linklocal(idev, &addr); +} +#endif + static inline int ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) { @@ -2539,6 +2573,11 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, addrconf_sit_config(dev); break; #endif +#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) + case ARPHRD_IPGRE: + addrconf_gre_config(dev); + break; +#endif case ARPHRD_TUNNEL6: addrconf_ip6_tnl_config(dev); break; @@ -4692,16 +4731,20 @@ int __init addrconf_init(void) if (err < 0) goto errout_af; - err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); + err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, + NULL); if (err < 0) goto errout; /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL); - __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL); - __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr); - __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); - __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); + __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL); + __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL); + __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, + inet6_dump_ifaddr, NULL); + __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, + inet6_dump_ifmcaddr, NULL); + __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, + inet6_dump_ifacaddr, NULL); ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index c8993e5a337c..2d8ddba9ee58 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -592,8 +592,11 @@ out: void __init ipv6_addr_label_rtnl_register(void) { - __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL); - __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL); - __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump); + __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, + NULL, NULL); + __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, + NULL, NULL); + __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, + ip6addrlbl_dump, NULL); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b7919f901fbf..3b5669a2582d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -272,6 +272,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + + if (addr->sin6_family != AF_INET6) + return -EAFNOSUPPORT; + addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) return -EINVAL; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 4076a0b14b20..54a4678955bf 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) { + (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; @@ -1586,7 +1586,8 @@ int __init fib6_init(void) if (ret) goto out_kmem_cache_create; - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib); + ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); if (ret) goto out_unregister_subsys; out: diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9d4b165837d6..32e5339db0c8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -100,6 +100,7 @@ static int ip6_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct neighbour *neigh; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -134,10 +135,9 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); + neigh = dst_get_neighbour(dst); + if (neigh) + return neigh_output(neigh, skb); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -385,6 +385,7 @@ int ip6_forward(struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + struct neighbour *n; u32 mtu; if (net->ipv6.devconf_all->forwarding == 0) @@ -459,11 +460,10 @@ int ip6_forward(struct sk_buff *skb) send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb_sec_path(skb)) { + n = dst_get_neighbour(dst); + if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; - struct neighbour *n = dst->neighbour; /* * incoming and outgoing devices are the same @@ -596,6 +596,31 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } +void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static atomic_t ipv6_fragmentation_id; + int old, new; + + if (rt) { + struct inet_peer *peer; + + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + peer = rt->rt6i_peer; + if (peer) { + fhdr->identification = htonl(inet_getid(peer, 0)); + return; + } + } + do { + old = atomic_read(&ipv6_fragmentation_id); + new = old + 1; + if (!new) + new = 1; + } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); + fhdr->identification = htonl(new); +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -680,7 +705,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(fh); + ipv6_select_ident(fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); @@ -826,7 +851,7 @@ slow_path: fh->nexthdr = nexthdr; fh->reserved = 0; if (!frag_id) { - ipv6_select_ident(fh); + ipv6_select_ident(fh, rt); frag_id = fh->identification; } else fh->identification = frag_id; @@ -920,8 +945,11 @@ out: static int ip6_dst_lookup_tail(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { - int err; struct net *net = sock_net(sk); +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + struct neighbour *n; +#endif + int err; if (*dst == NULL) *dst = ip6_route_output(net, sk, fl6); @@ -947,7 +975,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { + n = dst_get_neighbour(*dst); + if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; @@ -1072,7 +1101,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu,unsigned int flags) + int transhdrlen, int mtu,unsigned int flags, + struct rt6_info *rt) { struct sk_buff *skb; @@ -1116,7 +1146,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr); + ipv6_select_ident(&fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); @@ -1282,7 +1312,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, fragheaderlen, - transhdrlen, mtu, flags); + transhdrlen, mtu, flags, rt); if (err) goto error; return 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 36c2842a86b2..0bc98886c383 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -40,7 +40,7 @@ #include <linux/slab.h> #include <asm/uaccess.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <net/icmp.h> #include <net/ip.h> diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 82a809901f8e..705c82886281 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1354,7 +1354,8 @@ int __init ip6_mr_init(void) goto add_proto_fail; } #endif - rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute); + rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, + ip6mr_rtm_dumproute, NULL); return 0; #ifdef CONFIG_IPV6_PIMSM_V2 add_proto_fail: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7596f071d308..9da6e02eaaeb 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -107,8 +107,6 @@ static const struct neigh_ops ndisc_generic_ops = { .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_connected_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; static const struct neigh_ops ndisc_hh_ops = { @@ -117,17 +115,13 @@ static const struct neigh_ops ndisc_hh_ops = { .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_resolve_output, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, }; static const struct neigh_ops ndisc_direct_ops = { .family = AF_INET6, - .output = dev_queue_xmit, - .connected_output = dev_queue_xmit, - .hh_output = dev_queue_xmit, - .queue_xmit = dev_queue_xmit, + .output = neigh_direct_output, + .connected_output = neigh_direct_output, }; struct neigh_table nd_tbl = { @@ -392,7 +386,7 @@ static int ndisc_constructor(struct neighbour *neigh) if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &ndisc_direct_ops; - neigh->output = neigh->ops->queue_xmit; + neigh->output = neigh_direct_output; } else { if (is_multicast) { neigh->nud_state = NUD_NOARP; @@ -1244,7 +1238,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); if (rt) - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (rt && lifetime == 0) { neigh_clone(neigh); @@ -1265,7 +1259,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = rt->rt6i_nexthop; + neigh = dst_get_neighbour(&rt->dst); if (neigh == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 413ab0754e1f..249394863284 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -204,7 +204,8 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) else pmsg->outdev_name[0] = '\0'; - if (entry->indev && entry->skb->dev) { + if (entry->indev && entry->skb->dev && + entry->skb->mac_header != entry->skb->network_header) { pmsg->hw_type = entry->skb->dev->type; pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); } @@ -403,7 +404,8 @@ ipq_dev_drop(int ifindex) static inline void __ipq_rcv_skb(struct sk_buff *skb) { - int status, type, pid, flags, nlmsglen, skblen; + int status, type, pid, flags; + unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; skblen = skb->len; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c8af58b22562..4111050a9fc5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -160,7 +160,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) + if (!ct || ctinfo == IP_CT_RELATED_REPLY) goto out; help = nfct_help(ct); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 1df3c8b6bf47..7c05e7eacbc6 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -177,7 +177,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + return NF_ACCEPT; } static int diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cc7313b8f7ea..6a79f3081bdb 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -959,57 +959,54 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, return -EFAULT; switch (optname) { - case IPV6_CHECKSUM: - if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && - level == IPPROTO_IPV6) { - /* - * RFC3542 tells that IPV6_CHECKSUM socket - * option in the IPPROTO_IPV6 level is not - * allowed on ICMPv6 sockets. - * If you want to set it, use IPPROTO_RAW - * level IPV6_CHECKSUM socket option - * (Linux extension). - */ - return -EINVAL; - } + case IPV6_CHECKSUM: + if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && + level == IPPROTO_IPV6) { + /* + * RFC3542 tells that IPV6_CHECKSUM socket + * option in the IPPROTO_IPV6 level is not + * allowed on ICMPv6 sockets. + * If you want to set it, use IPPROTO_RAW + * level IPV6_CHECKSUM socket option + * (Linux extension). + */ + return -EINVAL; + } - /* You may get strange result with a positive odd offset; - RFC2292bis agrees with me. */ - if (val > 0 && (val&1)) - return -EINVAL; - if (val < 0) { - rp->checksum = 0; - } else { - rp->checksum = 1; - rp->offset = val; - } + /* You may get strange result with a positive odd offset; + RFC2292bis agrees with me. */ + if (val > 0 && (val&1)) + return -EINVAL; + if (val < 0) { + rp->checksum = 0; + } else { + rp->checksum = 1; + rp->offset = val; + } - return 0; - break; + return 0; - default: - return -ENOPROTOOPT; + default: + return -ENOPROTOOPT; } } static int rawv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { - switch(level) { - case SOL_RAW: - break; + switch (level) { + case SOL_RAW: + break; - case SOL_ICMPV6: - if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; - return rawv6_seticmpfilter(sk, level, optname, optval, - optlen); - case SOL_IPV6: - if (optname == IPV6_CHECKSUM) - break; - default: - return ipv6_setsockopt(sk, level, optname, optval, - optlen); + case SOL_ICMPV6: + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + return rawv6_seticmpfilter(sk, level, optname, optval, optlen); + case SOL_IPV6: + if (optname == IPV6_CHECKSUM) + break; + default: + return ipv6_setsockopt(sk, level, optname, optval, optlen); } return do_rawv6_setsockopt(sk, level, optname, optval, optlen); @@ -1075,21 +1072,19 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, static int rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - switch(level) { - case SOL_RAW: - break; + switch (level) { + case SOL_RAW: + break; - case SOL_ICMPV6: - if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) - return -EOPNOTSUPP; - return rawv6_geticmpfilter(sk, level, optname, optval, - optlen); - case SOL_IPV6: - if (optname == IPV6_CHECKSUM) - break; - default: - return ipv6_getsockopt(sk, level, optname, optval, - optlen); + case SOL_ICMPV6: + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return -EOPNOTSUPP; + return rawv6_geticmpfilter(sk, level, optname, optval, optlen); + case SOL_IPV6: + if (optname == IPV6_CHECKSUM) + break; + default: + return ipv6_getsockopt(sk, level, optname, optval, optlen); } return do_rawv6_getsockopt(sk, level, optname, optval, optlen); @@ -1119,31 +1114,29 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) { - switch(cmd) { - case SIOCOUTQ: - { - int amount = sk_wmem_alloc_get(sk); + switch (cmd) { + case SIOCOUTQ: { + int amount = sk_wmem_alloc_get(sk); - return put_user(amount, (int __user *)arg); - } - case SIOCINQ: - { - struct sk_buff *skb; - int amount = 0; - - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) - amount = skb->tail - skb->transport_header; - spin_unlock_bh(&sk->sk_receive_queue.lock); - return put_user(amount, (int __user *)arg); - } + return put_user(amount, (int __user *)arg); + } + case SIOCINQ: { + struct sk_buff *skb; + int amount = 0; + + spin_lock_bh(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); + if (skb != NULL) + amount = skb->tail - skb->transport_header; + spin_unlock_bh(&sk->sk_receive_queue.lock); + return put_user(amount, (int __user *)arg); + } - default: + default: #ifdef CONFIG_IPV6_MROUTE - return ip6mr_ioctl(sk, cmd, (void __user *)arg); + return ip6mr_ioctl(sk, cmd, (void __user *)arg); #else - return -ENOIOCTLCMD; + return -ENOIOCTLCMD; #endif } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index de2b1decd786..e8987da06667 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -72,7 +72,8 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); +static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, + const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_default_mtu(const struct dst_entry *dst); @@ -127,6 +128,11 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } +static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) +{ + return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev); +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -142,6 +148,7 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, + .neigh_lookup = ip6_neigh_lookup, }; static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) @@ -168,6 +175,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .cow_metrics = ip6_rt_blackhole_cow_metrics, + .neigh_lookup = ip6_neigh_lookup, }; static const u32 ip6_template_metrics[RTAX_MAX] = { @@ -228,9 +236,10 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, - struct net_device *dev) + struct net_device *dev, + int flags) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0); + struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); @@ -355,7 +364,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; + struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -403,7 +412,7 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = rt->rt6i_nexthop; + struct neighbour *neigh = dst_get_neighbour(&rt->dst); int m; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) @@ -682,7 +691,8 @@ int ip6_ins_rt(struct rt6_info *rt) return __ip6_ins_rt(rt, &info); } -static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr, +static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, + const struct in6_addr *daddr, const struct in6_addr *saddr) { struct rt6_info *rt; @@ -691,7 +701,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_add * Clone the route. */ - rt = ip6_rt_copy(ort); + rt = ip6_rt_copy(ort, daddr); if (rt) { struct neighbour *neigh; @@ -699,12 +709,11 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_add if (!(rt->rt6i_flags&RTF_GATEWAY)) { if (rt->rt6i_dst.plen != 128 && - ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) + ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; ipv6_addr_copy(&rt->rt6i_gateway, daddr); } - ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; @@ -744,22 +753,23 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_add dst_free(&rt->dst); return NULL; } - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); } return rt; } -static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr) +static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, + const struct in6_addr *daddr) { - struct rt6_info *rt = ip6_rt_copy(ort); + struct rt6_info *rt = ip6_rt_copy(ort, daddr); + if (rt) { - ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; - rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst))); } return rt; } @@ -793,7 +803,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -899,7 +909,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->input = dst_discard; new->output = dst_discard; - dst_copy_metrics(new, &ort->dst); + if (dst_metrics_read_only(&ort->dst)) + new->_metrics = ort->dst._metrics; + else + dst_copy_metrics(new, &ort->dst); rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) in6_dev_hold(rt->rt6i_idev); @@ -1042,7 +1055,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(idev == NULL)) return NULL; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); if (unlikely(rt == NULL)) { in6_dev_put(idev); goto out; @@ -1057,19 +1070,12 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, } rt->rt6i_idev = idev; - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); atomic_set(&rt->dst.__refcnt, 1); + ipv6_addr_copy(&rt->rt6i_dst.addr, addr); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); rt->dst.output = ip6_output; -#if 0 /* there's no chance to use these for ndisc */ - rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST - ? DST_HOST - : 0; - ipv6_addr_copy(&rt->rt6i_dst.addr, addr); - rt->rt6i_dst.plen = 128; -#endif - spin_lock_bh(&icmp6_dst_lock); rt->dst.next = icmp6_dst_gc_list; icmp6_dst_gc_list = &rt->dst; @@ -1214,7 +1220,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL); + rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); if (rt == NULL) { err = -ENOMEM; @@ -1244,7 +1250,7 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1345,12 +1351,12 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_prefsrc.plen = 0; if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { - rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); - if (IS_ERR(rt->rt6i_nexthop)) { - err = PTR_ERR(rt->rt6i_nexthop); - rt->rt6i_nexthop = NULL; + struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); + if (IS_ERR(n)) { + err = PTR_ERR(n); goto out; } + dst_set_neighbour(&rt->dst, n); } rt->rt6i_flags = cfg->fc_flags; @@ -1581,10 +1587,10 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == rt->dst.neighbour) + if (neigh == dst_get_neighbour(&rt->dst)) goto out; - nrt = ip6_rt_copy(rt); + nrt = ip6_rt_copy(rt, dest); if (nrt == NULL) goto out; @@ -1592,12 +1598,11 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); nrt->rt6i_dst.plen = 128; nrt->dst.flags |= DST_HOST; ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); - nrt->rt6i_nexthop = neigh_clone(neigh); + dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); if (ip6_ins_rt(nrt)) goto out; @@ -1677,7 +1682,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -1730,16 +1735,19 @@ void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *sad * Misc support functions */ -static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) +static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, + const struct in6_addr *dest) { struct net *net = dev_net(ort->rt6i_dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev); + ort->dst.dev, 0); if (rt) { rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; + ipv6_addr_copy(&rt->rt6i_dst.addr, dest); + rt->rt6i_dst.plen = ort->rt6i_dst.plen; dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; @@ -1752,7 +1760,6 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; rt->rt6i_metric = 0; - memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); #ifdef CONFIG_IPV6_SUBTREES memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); #endif @@ -2013,7 +2020,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev); + net->loopback_dev, 0); struct neighbour *neigh; if (rt == NULL) { @@ -2025,7 +2032,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, in6_dev_hold(idev); - rt->dst.flags = DST_HOST; + rt->dst.flags |= DST_HOST; rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; @@ -2042,7 +2049,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, return ERR_CAST(neigh); } - rt->rt6i_nexthop = neigh; + dst_set_neighbour(&rt->dst, neigh); ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; @@ -2407,8 +2414,8 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (rt->dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); + if (dst_get_neighbour(&rt->dst)) + NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2592,6 +2599,7 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { struct seq_file *m = p_arg; + struct neighbour *n; seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2600,9 +2608,9 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - - if (rt->rt6i_nexthop) { - seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); + n = dst_get_neighbour(&rt->dst); + if (n) { + seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } @@ -2925,9 +2933,9 @@ int __init ip6_route_init(void) goto xfrm6_init; ret = -ENOBUFS; - if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) goto fib6_rules_init; ret = register_netdevice_notifier(&ip6_route_dev_notifier); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1cca5761aea9..07bf1085458f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -677,7 +677,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) @@ -702,7 +702,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct neighbour *neigh = NULL; if (skb_dst(skb)) - neigh = skb_dst(skb)->neighbour; + neigh = dst_get_neighbour(skb_dst(skb)); if (neigh == NULL) { if (net_ratelimit()) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8b9644a8b697..89d5bf806222 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -223,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fd28711ba5..78aa53492b3e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1341,6 +1341,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) } have_isn: tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; security_inet_conn_request(sk, skb, req); @@ -1509,6 +1510,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); tcp_initialize_rcv_mss(newsk); + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(newsk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); + newtp->total_retrans = req->retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; @@ -1644,6 +1649,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { + sock_rps_save_rxhash(nsk, skb->rxhash); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 41f8c9c08dba..29213b51c499 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -453,8 +453,11 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (flags & MSG_DONTWAIT) + if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } @@ -1356,7 +1359,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features) fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - ipv6_select_ident(fptr); + ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index cc616974a447..c24f25ab67d3 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -369,7 +369,7 @@ static void irda_getvalue_confirm(int result, __u16 obj_id, { struct irda_sock *self; - self = (struct irda_sock *) priv; + self = priv; if (!self) { IRDA_WARNING("%s: lost myself!\n", __func__); return; @@ -418,7 +418,7 @@ static void irda_selective_discovery_indication(discinfo_t *discovery, IRDA_DEBUG(2, "%s()\n", __func__); - self = (struct irda_sock *) priv; + self = priv; if (!self) { IRDA_WARNING("%s: lost myself!\n", __func__); return; diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c index 3c1754023022..b65d66e0d817 100644 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ b/net/irda/ircomm/ircomm_tty_attach.c @@ -382,7 +382,7 @@ static void ircomm_tty_discovery_indication(discinfo_t *discovery, info.daddr = discovery->daddr; info.saddr = discovery->saddr; - self = (struct ircomm_tty_cb *) priv; + self = priv; ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, NULL, &info); } diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index 25cc2e695158..3eca35faf2a8 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -262,7 +262,7 @@ static void irda_task_timer_expired(void *data) IRDA_DEBUG(2, "%s()\n", __func__); - task = (struct irda_task *) data; + task = data; irda_task_kick(task); } diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 36477538cea8..e71e85ba2bf1 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -87,6 +87,8 @@ static inline void iriap_start_watchdog_timer(struct iriap_cb *self, iriap_watchdog_timer_expired); } +static struct lock_class_key irias_objects_key; + /* * Function iriap_init (void) * @@ -114,6 +116,9 @@ int __init iriap_init(void) return -ENOMEM; } + lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key, + "irias_objects"); + /* * Register some default services for IrLMP */ @@ -300,7 +305,7 @@ static void iriap_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); @@ -754,7 +759,7 @@ static void iriap_connect_confirm(void *instance, void *sap, { struct iriap_cb *self; - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); @@ -786,7 +791,7 @@ static void iriap_connect_indication(void *instance, void *sap, IRDA_DEBUG(1, "%s()\n", __func__); - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(skb != NULL, return;); IRDA_ASSERT(self != NULL, goto out;); @@ -834,7 +839,7 @@ static int iriap_data_indication(void *instance, void *sap, IRDA_DEBUG(3, "%s()\n", __func__); - self = (struct iriap_cb *) instance; + self = instance; IRDA_ASSERT(skb != NULL, return 0;); IRDA_ASSERT(self != NULL, goto out;); diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c index 7ed3af957935..ba1a3fc39b5c 100644 --- a/net/irda/irlan/irlan_client.c +++ b/net/irda/irlan/irlan_client.c @@ -198,7 +198,7 @@ static int irlan_client_ctrl_data_indication(void *instance, void *sap, IRDA_DEBUG(2, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); @@ -226,8 +226,8 @@ static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s(), reason=%d\n", __func__ , reason); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -298,7 +298,7 @@ static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -542,7 +542,7 @@ void irlan_client_get_value_confirm(int result, __u16 obj_id, IRDA_ASSERT(priv != NULL, return;); - self = (struct irlan_cb *) priv; + self = priv; IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); /* We probably don't need to make any more queries */ diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 6130f9d9dbe1..779117636270 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -317,8 +317,8 @@ static void irlan_connect_indication(void *instance, void *sap, IRDA_DEBUG(2, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -361,7 +361,7 @@ static void irlan_connect_confirm(void *instance, void *sap, { struct irlan_cb *self; - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -406,8 +406,8 @@ static void irlan_disconnect_indication(void *instance, IRDA_DEBUG(0, "%s(), reason=%d\n", __func__ , reason); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 8ee1ff6c742f..e8d5f4405d68 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -272,7 +272,7 @@ void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) struct irlan_cb *self; struct net_device *dev; - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index b8af74ab8b68..8b61cf0d8a69 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -73,7 +73,7 @@ static int irlan_provider_data_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); @@ -131,8 +131,8 @@ static void irlan_provider_connect_indication(void *instance, void *sap, IRDA_DEBUG(0, "%s()\n", __func__ ); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); @@ -182,8 +182,8 @@ static void irlan_provider_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s(), reason=%d\n", __func__ , reason); - self = (struct irlan_cb *) instance; - tsap = (struct tsap_cb *) sap; + self = instance; + tsap = sap; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index 9715e6e5900b..f06947c4fa82 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -780,7 +780,7 @@ void* hashbin_lock_find( hashbin_t* hashbin, long hashv, const char* name ) /* * Search for entry */ - entry = (irda_queue_t* ) hashbin_find( hashbin, hashv, name ); + entry = hashbin_find(hashbin, hashv, name); /* Release lock */ spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); @@ -813,7 +813,7 @@ void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name, * This allow to check if the current item is still in the * hashbin or has been removed. */ - entry = (irda_queue_t* ) hashbin_find( hashbin, hashv, name ); + entry = hashbin_find(hashbin, hashv, name); /* * Trick hashbin_get_next() to return what we want diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 9d9af4606970..285ccd623ae5 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -350,7 +350,7 @@ static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, { struct tsap_cb *self; - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); @@ -879,7 +879,7 @@ static int irttp_udata_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return -1;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); @@ -914,7 +914,7 @@ static int irttp_data_indication(void *instance, void *sap, unsigned long flags; int n; - self = (struct tsap_cb *) instance; + self = instance; n = skb->data[0] & 0x7f; /* Extract the credits */ @@ -996,7 +996,7 @@ static void irttp_status_indication(void *instance, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); @@ -1025,7 +1025,7 @@ static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) { struct tsap_cb *self; - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); @@ -1208,7 +1208,7 @@ static void irttp_connect_confirm(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); @@ -1292,13 +1292,13 @@ static void irttp_connect_indication(void *instance, void *sap, __u8 plen; __u8 n; - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); IRDA_ASSERT(skb != NULL, return;); - lsap = (struct lsap_cb *) sap; + lsap = sap; self->max_seg_size = max_seg_size - TTP_HEADER; self->max_header_size = max_header_size+TTP_HEADER; @@ -1602,7 +1602,7 @@ static void irttp_disconnect_indication(void *instance, void *sap, IRDA_DEBUG(4, "%s()\n", __func__); - self = (struct tsap_cb *) instance; + self = instance; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index a15c01524959..075a3808aa40 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -51,10 +51,10 @@ #include <linux/cpu.h> #include <linux/reboot.h> #include <net/iucv/iucv.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/ebcdic.h> #include <asm/io.h> -#include <asm/s390_ext.h> +#include <asm/irq.h> #include <asm/smp.h> /* @@ -1988,12 +1988,13 @@ static int __init iucv_init(void) rc = -EPROTONOSUPPORT; goto out; } + ctl_set_bit(0, 1); rc = iucv_query_maxconn(); if (rc) - goto out; + goto out_ctl; rc = register_external_interrupt(0x4000, iucv_external_interrupt); if (rc) - goto out; + goto out_ctl; iucv_root = root_device_register("iucv"); if (IS_ERR(iucv_root)) { rc = PTR_ERR(iucv_root); @@ -2055,6 +2056,8 @@ out_free: root_device_unregister(iucv_root); out_int: unregister_external_interrupt(0x4000, iucv_external_interrupt); +out_ctl: + ctl_clear_bit(0, 1); out: return rc; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 8f92cf8116ea..1e733e9073d0 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -621,7 +621,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct unsigned short family; xfrm_address_t *xaddr; - sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = ext_hdrs[SADB_EXT_SA - 1]; if (sa == NULL) return NULL; @@ -630,7 +630,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct return NULL; /* sadb_address_len should be checked by caller */ - addr = (const struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; + addr = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; if (addr == NULL) return NULL; @@ -1039,7 +1039,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, int err; - sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = ext_hdrs[SADB_EXT_SA - 1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) @@ -1078,7 +1078,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); - key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && ((key->sadb_key_bits+7) / 8 == 0 || @@ -1105,14 +1105,14 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; - lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; + lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; + lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT - 1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); @@ -1120,7 +1120,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - sec_ctx = (const struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -1134,7 +1134,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } - key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); @@ -2219,7 +2219,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (xp->selector.dport) xp->selector.dport_mask = htons(0xffff); - sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2323,7 +2323,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (sel.dport) sel.dport_mask = htons(0xffff); - sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index ed8a2335442f..ad4ac2601a56 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -55,7 +55,7 @@ #include <net/protocol.h> #include <asm/byteorder.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "l2tp_core.h" diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index b8dbae82fab8..76130134bfa6 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -258,7 +258,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file) */ pd->net = get_net_ns_by_pid(current->pid); if (IS_ERR(pd->net)) { - rc = -PTR_ERR(pd->net); + rc = PTR_ERR(pd->net); goto err_free_pd; } diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index a8193f52c13c..d2726a74597d 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -103,7 +103,7 @@ static struct net_device_ops l2tp_eth_netdev_ops = { static void l2tp_eth_dev_setup(struct net_device *dev) { ether_setup(dev); - + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &l2tp_eth_netdev_ops; dev->destructor = free_netdev; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index b6466e71f5e1..d21e7ebd91ca 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -480,18 +480,16 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (connected) rt = (struct rtable *) __sk_dst_check(sk, 0); + rcu_read_lock(); if (rt == NULL) { - struct ip_options_rcu *inet_opt; + const struct ip_options_rcu *inet_opt; - rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); /* Use correct destination address if we have options. */ if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; - rcu_read_unlock(); - /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. @@ -503,12 +501,20 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m sk->sk_bound_dev_if); if (IS_ERR(rt)) goto no_route; - sk_setup_caps(sk, &rt->dst); + if (connected) + sk_setup_caps(sk, &rt->dst); + else + dst_release(&rt->dst); /* safe since we hold rcu_read_lock */ } - skb_dst_set(skb, dst_clone(&rt->dst)); + + /* We dont need to clone dst here, it is guaranteed to not disappear. + * __dev_xmit_skb() might force a refcount if needed. + */ + skb_dst_set_noref(skb, &rt->dst); /* Queue the packet to IP for output */ rc = ip_queue_xmit(skb, &inet->cork.fl); + rcu_read_unlock(); error: /* Update stats */ @@ -525,6 +531,7 @@ out: return rc; no_route: + rcu_read_unlock(); IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); rc = -EHOSTUNREACH; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 39a21d0c61c4..f42cd0915966 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -97,7 +97,7 @@ #include <net/xfrm.h> #include <asm/byteorder.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "l2tp_core.h" diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index d5d8d555c410..956b7e47dc52 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -300,26 +300,26 @@ int lapb_disconnect_request(struct net_device *dev) goto out; switch (lapb->state) { - case LAPB_STATE_0: - rc = LAPB_NOTCONNECTED; - goto out_put; + case LAPB_STATE_0: + rc = LAPB_NOTCONNECTED; + goto out_put; - case LAPB_STATE_1: + case LAPB_STATE_1: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 TX DISC(1)\n", lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S1 TX DISC(1)\n", lapb->dev); #endif #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S1 -> S0\n", lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S1 -> S0\n", lapb->dev); #endif - lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND); - lapb->state = LAPB_STATE_0; - lapb_start_t1timer(lapb); - rc = LAPB_NOTCONNECTED; - goto out_put; - - case LAPB_STATE_2: - rc = LAPB_OK; - goto out_put; + lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND); + lapb->state = LAPB_STATE_0; + lapb_start_t1timer(lapb); + rc = LAPB_NOTCONNECTED; + goto out_put; + + case LAPB_STATE_2: + rc = LAPB_OK; + goto out_put; } lapb_clear_queues(lapb); diff --git a/net/lapb/lapb_in.c b/net/lapb/lapb_in.c index 21904a002449..2ec1af5c36cc 100644 --- a/net/lapb/lapb_in.c +++ b/net/lapb/lapb_in.c @@ -44,89 +44,86 @@ static void lapb_state0_machine(struct lapb_cb *lapb, struct sk_buff *skb, struct lapb_frame *frame) { switch (frame->type) { - case LAPB_SABM: + case LAPB_SABM: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S0 RX SABM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S0 RX SABM(%d)\n", + lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S0 TX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S0 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } else { + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + } else { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n", + lapb->dev, frame->pf); #endif #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S0 -> S3\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S0 -> S3\n", lapb->dev); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - lapb_stop_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_3; - lapb->condition = 0x00; - lapb->n2count = 0; - lapb->vs = 0; - lapb->vr = 0; - lapb->va = 0; - lapb_connect_indication(lapb, LAPB_OK); - } - break; + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_3; + lapb->condition = 0x00; + lapb->n2count = 0; + lapb->vs = 0; + lapb->vr = 0; + lapb->va = 0; + lapb_connect_indication(lapb, LAPB_OK); + } + break; - case LAPB_SABME: + case LAPB_SABME: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S0 RX SABME(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S0 RX SABME(%d)\n", + lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n", + lapb->dev, frame->pf); #endif #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S0 -> S3\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S0 -> S3\n", lapb->dev); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - lapb_stop_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_3; - lapb->condition = 0x00; - lapb->n2count = 0; - lapb->vs = 0; - lapb->vr = 0; - lapb->va = 0; - lapb_connect_indication(lapb, LAPB_OK); - } else { -#if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S0 TX DM(%d)\n", - lapb->dev, frame->pf); + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_3; + lapb->condition = 0x00; + lapb->n2count = 0; + lapb->vs = 0; + lapb->vr = 0; + lapb->va = 0; + lapb_connect_indication(lapb, LAPB_OK); + } else { +#if LAPB_DEBUG > 1 + printk(KERN_DEBUG "lapb: (%p) S0 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } - break; + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + } + break; - case LAPB_DISC: + case LAPB_DISC: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S0 RX DISC(%d)\n", - lapb->dev, frame->pf); - printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S0 RX DISC(%d)\n", + lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S0 TX UA(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - break; + lapb_send_control(lapb, LAPB_UA, frame->pf, LAPB_RESPONSE); + break; - default: - break; + default: + break; } kfree_skb(skb); @@ -140,100 +137,97 @@ static void lapb_state1_machine(struct lapb_cb *lapb, struct sk_buff *skb, struct lapb_frame *frame) { switch (frame->type) { - case LAPB_SABM: + case LAPB_SABM: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 RX SABM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S1 RX SABM(%d)\n", + lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n", - lapb->dev, frame->pf); -#endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } else { -#if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - } - break; - - case LAPB_SABME: + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + } else { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 RX SABME(%d)\n", + printk(KERN_DEBUG "lapb: (%p) S1 TX UA(%d)\n", lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + } + break; + + case LAPB_SABME: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S1 RX SABME(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - } else { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S1 TX UA(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } - break; - - case LAPB_DISC: + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + } else { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 RX DISC(%d)\n", - lapb->dev, frame->pf); printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n", lapb->dev, frame->pf); #endif lapb_send_control(lapb, LAPB_DM, frame->pf, LAPB_RESPONSE); - break; + } + break; - case LAPB_UA: + case LAPB_DISC: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 RX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S1 RX DISC(%d)\n", + lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S1 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - if (frame->pf) { -#if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S1 -> S3\n", - lapb->dev); -#endif - lapb_stop_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_3; - lapb->condition = 0x00; - lapb->n2count = 0; - lapb->vs = 0; - lapb->vr = 0; - lapb->va = 0; - lapb_connect_confirmation(lapb, LAPB_OK); - } - break; + lapb_send_control(lapb, LAPB_DM, frame->pf, LAPB_RESPONSE); + break; - case LAPB_DM: + case LAPB_UA: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S1 RX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S1 RX UA(%d)\n", + lapb->dev, frame->pf); #endif - if (frame->pf) { + if (frame->pf) { #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S1 -> S0\n", - lapb->dev); -#endif - lapb_clear_queues(lapb); - lapb->state = LAPB_STATE_0; - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb_disconnect_indication(lapb, LAPB_REFUSED); - } - break; + printk(KERN_DEBUG "lapb: (%p) S1 -> S3\n", lapb->dev); +#endif + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_3; + lapb->condition = 0x00; + lapb->n2count = 0; + lapb->vs = 0; + lapb->vr = 0; + lapb->va = 0; + lapb_connect_confirmation(lapb, LAPB_OK); + } + break; + + case LAPB_DM: +#if LAPB_DEBUG > 1 + printk(KERN_DEBUG "lapb: (%p) S1 RX DM(%d)\n", + lapb->dev, frame->pf); +#endif + if (frame->pf) { +#if LAPB_DEBUG > 0 + printk(KERN_DEBUG "lapb: (%p) S1 -> S0\n", lapb->dev); +#endif + lapb_clear_queues(lapb); + lapb->state = LAPB_STATE_0; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb_disconnect_indication(lapb, LAPB_REFUSED); + } + break; } kfree_skb(skb); @@ -247,78 +241,73 @@ static void lapb_state2_machine(struct lapb_cb *lapb, struct sk_buff *skb, struct lapb_frame *frame) { switch (frame->type) { - case LAPB_SABM: - case LAPB_SABME: + case LAPB_SABM: + case LAPB_SABME: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S2 RX {SABM,SABME}(%d)\n", - lapb->dev, frame->pf); - printk(KERN_DEBUG "lapb: (%p) S2 TX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 RX {SABM,SABME}(%d)\n", + lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - break; + lapb_send_control(lapb, LAPB_DM, frame->pf, LAPB_RESPONSE); + break; - case LAPB_DISC: + case LAPB_DISC: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S2 RX DISC(%d)\n", - lapb->dev, frame->pf); - printk(KERN_DEBUG "lapb: (%p) S2 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 RX DISC(%d)\n", + lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 TX UA(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - break; + lapb_send_control(lapb, LAPB_UA, frame->pf, LAPB_RESPONSE); + break; - case LAPB_UA: + case LAPB_UA: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S2 RX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 RX UA(%d)\n", + lapb->dev, frame->pf); #endif - if (frame->pf) { + if (frame->pf) { #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S2 -> S0\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S2 -> S0\n", lapb->dev); #endif - lapb->state = LAPB_STATE_0; - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb_disconnect_confirmation(lapb, LAPB_OK); - } - break; + lapb->state = LAPB_STATE_0; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb_disconnect_confirmation(lapb, LAPB_OK); + } + break; - case LAPB_DM: + case LAPB_DM: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S2 RX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 RX DM(%d)\n", + lapb->dev, frame->pf); #endif - if (frame->pf) { + if (frame->pf) { #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S2 -> S0\n", - lapb->dev); -#endif - lapb->state = LAPB_STATE_0; - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb_disconnect_confirmation(lapb, - LAPB_NOTCONNECTED); - } - break; + printk(KERN_DEBUG "lapb: (%p) S2 -> S0\n", lapb->dev); +#endif + lapb->state = LAPB_STATE_0; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb_disconnect_confirmation(lapb, LAPB_NOTCONNECTED); + } + break; - case LAPB_I: - case LAPB_REJ: - case LAPB_RNR: - case LAPB_RR: + case LAPB_I: + case LAPB_REJ: + case LAPB_RNR: + case LAPB_RR: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S2 RX {I,REJ,RNR,RR}" - "(%d)\n", lapb->dev, frame->pf); - printk(KERN_DEBUG "lapb: (%p) S2 RX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 RX {I,REJ,RNR,RR}(%d)\n", + lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S2 RX DM(%d)\n", + lapb->dev, frame->pf); #endif - if (frame->pf) - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - break; + if (frame->pf) + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + break; } kfree_skb(skb); @@ -336,277 +325,267 @@ static void lapb_state3_machine(struct lapb_cb *lapb, struct sk_buff *skb, LAPB_SMODULUS; switch (frame->type) { - case LAPB_SABM: + case LAPB_SABM: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX SABM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 RX SABM(%d)\n", + lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 TX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } else { + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + } else { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 TX UA(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - lapb_stop_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->condition = 0x00; - lapb->n2count = 0; - lapb->vs = 0; - lapb->vr = 0; - lapb->va = 0; - lapb_requeue_frames(lapb); - } - break; + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->condition = 0x00; + lapb->n2count = 0; + lapb->vs = 0; + lapb->vr = 0; + lapb->va = 0; + lapb_requeue_frames(lapb); + } + break; - case LAPB_SABME: + case LAPB_SABME: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX SABME(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 RX SABME(%d)\n", + lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 TX UA(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - lapb_stop_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->condition = 0x00; - lapb->n2count = 0; - lapb->vs = 0; - lapb->vr = 0; - lapb->va = 0; - lapb_requeue_frames(lapb); - } else { + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->condition = 0x00; + lapb->n2count = 0; + lapb->vs = 0; + lapb->vr = 0; + lapb->va = 0; + lapb_requeue_frames(lapb); + } else { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 TX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } - break; + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + } + break; - case LAPB_DISC: + case LAPB_DISC: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX DISC(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 RX DISC(%d)\n", + lapb->dev, frame->pf); #endif #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S0\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S0\n", lapb->dev); #endif - lapb_clear_queues(lapb); - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_0; - lapb_disconnect_indication(lapb, LAPB_OK); - break; + lapb_clear_queues(lapb); + lapb_send_control(lapb, LAPB_UA, frame->pf, LAPB_RESPONSE); + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_0; + lapb_disconnect_indication(lapb, LAPB_OK); + break; - case LAPB_DM: + case LAPB_DM: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 RX DM(%d)\n", + lapb->dev, frame->pf); #endif #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S0\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S0\n", lapb->dev); #endif - lapb_clear_queues(lapb); - lapb->state = LAPB_STATE_0; - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb_disconnect_indication(lapb, LAPB_NOTCONNECTED); - break; + lapb_clear_queues(lapb); + lapb->state = LAPB_STATE_0; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb_disconnect_indication(lapb, LAPB_NOTCONNECTED); + break; - case LAPB_RNR: + case LAPB_RNR: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX RNR(%d) R%d\n", - lapb->dev, frame->pf, frame->nr); + printk(KERN_DEBUG "lapb: (%p) S3 RX RNR(%d) R%d\n", + lapb->dev, frame->pf, frame->nr); #endif - lapb->condition |= LAPB_PEER_RX_BUSY_CONDITION; - lapb_check_need_response(lapb, frame->cr, frame->pf); - if (lapb_validate_nr(lapb, frame->nr)) { - lapb_check_iframes_acked(lapb, frame->nr); - } else { - lapb->frmr_data = *frame; - lapb->frmr_type = LAPB_FRMR_Z; - lapb_transmit_frmr(lapb); + lapb->condition |= LAPB_PEER_RX_BUSY_CONDITION; + lapb_check_need_response(lapb, frame->cr, frame->pf); + if (lapb_validate_nr(lapb, frame->nr)) { + lapb_check_iframes_acked(lapb, frame->nr); + } else { + lapb->frmr_data = *frame; + lapb->frmr_type = LAPB_FRMR_Z; + lapb_transmit_frmr(lapb); #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", lapb->dev); #endif - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_4; - lapb->n2count = 0; - } - break; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_4; + lapb->n2count = 0; + } + break; - case LAPB_RR: + case LAPB_RR: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX RR(%d) R%d\n", - lapb->dev, frame->pf, frame->nr); + printk(KERN_DEBUG "lapb: (%p) S3 RX RR(%d) R%d\n", + lapb->dev, frame->pf, frame->nr); #endif - lapb->condition &= ~LAPB_PEER_RX_BUSY_CONDITION; - lapb_check_need_response(lapb, frame->cr, frame->pf); - if (lapb_validate_nr(lapb, frame->nr)) { - lapb_check_iframes_acked(lapb, frame->nr); - } else { - lapb->frmr_data = *frame; - lapb->frmr_type = LAPB_FRMR_Z; - lapb_transmit_frmr(lapb); + lapb->condition &= ~LAPB_PEER_RX_BUSY_CONDITION; + lapb_check_need_response(lapb, frame->cr, frame->pf); + if (lapb_validate_nr(lapb, frame->nr)) { + lapb_check_iframes_acked(lapb, frame->nr); + } else { + lapb->frmr_data = *frame; + lapb->frmr_type = LAPB_FRMR_Z; + lapb_transmit_frmr(lapb); #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", lapb->dev); #endif - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_4; - lapb->n2count = 0; - } - break; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_4; + lapb->n2count = 0; + } + break; - case LAPB_REJ: + case LAPB_REJ: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX REJ(%d) R%d\n", - lapb->dev, frame->pf, frame->nr); + printk(KERN_DEBUG "lapb: (%p) S3 RX REJ(%d) R%d\n", + lapb->dev, frame->pf, frame->nr); #endif - lapb->condition &= ~LAPB_PEER_RX_BUSY_CONDITION; - lapb_check_need_response(lapb, frame->cr, frame->pf); - if (lapb_validate_nr(lapb, frame->nr)) { - lapb_frames_acked(lapb, frame->nr); - lapb_stop_t1timer(lapb); - lapb->n2count = 0; - lapb_requeue_frames(lapb); - } else { - lapb->frmr_data = *frame; - lapb->frmr_type = LAPB_FRMR_Z; - lapb_transmit_frmr(lapb); + lapb->condition &= ~LAPB_PEER_RX_BUSY_CONDITION; + lapb_check_need_response(lapb, frame->cr, frame->pf); + if (lapb_validate_nr(lapb, frame->nr)) { + lapb_frames_acked(lapb, frame->nr); + lapb_stop_t1timer(lapb); + lapb->n2count = 0; + lapb_requeue_frames(lapb); + } else { + lapb->frmr_data = *frame; + lapb->frmr_type = LAPB_FRMR_Z; + lapb_transmit_frmr(lapb); #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", lapb->dev); #endif - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_4; - lapb->n2count = 0; - } - break; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_4; + lapb->n2count = 0; + } + break; - case LAPB_I: + case LAPB_I: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX I(%d) S%d R%d\n", - lapb->dev, frame->pf, frame->ns, frame->nr); + printk(KERN_DEBUG "lapb: (%p) S3 RX I(%d) S%d R%d\n", + lapb->dev, frame->pf, frame->ns, frame->nr); #endif - if (!lapb_validate_nr(lapb, frame->nr)) { - lapb->frmr_data = *frame; - lapb->frmr_type = LAPB_FRMR_Z; - lapb_transmit_frmr(lapb); + if (!lapb_validate_nr(lapb, frame->nr)) { + lapb->frmr_data = *frame; + lapb->frmr_type = LAPB_FRMR_Z; + lapb_transmit_frmr(lapb); #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", lapb->dev); #endif - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_4; - lapb->n2count = 0; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_4; + lapb->n2count = 0; + break; + } + if (lapb->condition & LAPB_PEER_RX_BUSY_CONDITION) + lapb_frames_acked(lapb, frame->nr); + else + lapb_check_iframes_acked(lapb, frame->nr); + + if (frame->ns == lapb->vr) { + int cn; + cn = lapb_data_indication(lapb, skb); + queued = 1; + /* + * If upper layer has dropped the frame, we + * basically ignore any further protocol + * processing. This will cause the peer + * to re-transmit the frame later like + * a frame lost on the wire. + */ + if (cn == NET_RX_DROP) { + printk(KERN_DEBUG "LAPB: rx congestion\n"); break; } - if (lapb->condition & LAPB_PEER_RX_BUSY_CONDITION) - lapb_frames_acked(lapb, frame->nr); - else - lapb_check_iframes_acked(lapb, frame->nr); - - if (frame->ns == lapb->vr) { - int cn; - cn = lapb_data_indication(lapb, skb); - queued = 1; - /* - * If upper layer has dropped the frame, we - * basically ignore any further protocol - * processing. This will cause the peer - * to re-transmit the frame later like - * a frame lost on the wire. - */ - if (cn == NET_RX_DROP) { - printk(KERN_DEBUG - "LAPB: rx congestion\n"); - break; + lapb->vr = (lapb->vr + 1) % modulus; + lapb->condition &= ~LAPB_REJECT_CONDITION; + if (frame->pf) + lapb_enquiry_response(lapb); + else { + if (!(lapb->condition & + LAPB_ACK_PENDING_CONDITION)) { + lapb->condition |= LAPB_ACK_PENDING_CONDITION; + lapb_start_t2timer(lapb); } - lapb->vr = (lapb->vr + 1) % modulus; - lapb->condition &= ~LAPB_REJECT_CONDITION; + } + } else { + if (lapb->condition & LAPB_REJECT_CONDITION) { if (frame->pf) lapb_enquiry_response(lapb); - else { - if (!(lapb->condition & - LAPB_ACK_PENDING_CONDITION)) { - lapb->condition |= LAPB_ACK_PENDING_CONDITION; - lapb_start_t2timer(lapb); - } - } } else { - if (lapb->condition & LAPB_REJECT_CONDITION) { - if (frame->pf) - lapb_enquiry_response(lapb); - } else { -#if LAPB_DEBUG > 1 - printk(KERN_DEBUG - "lapb: (%p) S3 TX REJ(%d) R%d\n", - lapb->dev, frame->pf, lapb->vr); -#endif - lapb->condition |= LAPB_REJECT_CONDITION; - lapb_send_control(lapb, LAPB_REJ, - frame->pf, - LAPB_RESPONSE); - lapb->condition &= ~LAPB_ACK_PENDING_CONDITION; - } +#if LAPB_DEBUG > 1 + printk(KERN_DEBUG + "lapb: (%p) S3 TX REJ(%d) R%d\n", + lapb->dev, frame->pf, lapb->vr); +#endif + lapb->condition |= LAPB_REJECT_CONDITION; + lapb_send_control(lapb, LAPB_REJ, frame->pf, + LAPB_RESPONSE); + lapb->condition &= ~LAPB_ACK_PENDING_CONDITION; } - break; + } + break; - case LAPB_FRMR: + case LAPB_FRMR: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX FRMR(%d) %02X " - "%02X %02X %02X %02X\n", lapb->dev, frame->pf, - skb->data[0], skb->data[1], skb->data[2], - skb->data[3], skb->data[4]); + printk(KERN_DEBUG "lapb: (%p) S3 RX FRMR(%d) %02X " + "%02X %02X %02X %02X\n", lapb->dev, frame->pf, + skb->data[0], skb->data[1], skb->data[2], + skb->data[3], skb->data[4]); #endif - lapb_establish_data_link(lapb); + lapb_establish_data_link(lapb); #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S1\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S1\n", lapb->dev); #endif - lapb_requeue_frames(lapb); - lapb->state = LAPB_STATE_1; - break; + lapb_requeue_frames(lapb); + lapb->state = LAPB_STATE_1; + break; - case LAPB_ILLEGAL: + case LAPB_ILLEGAL: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S3 RX ILLEGAL(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S3 RX ILLEGAL(%d)\n", + lapb->dev, frame->pf); #endif - lapb->frmr_data = *frame; - lapb->frmr_type = LAPB_FRMR_W; - lapb_transmit_frmr(lapb); + lapb->frmr_data = *frame; + lapb->frmr_type = LAPB_FRMR_W; + lapb_transmit_frmr(lapb); #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S3 -> S4\n", lapb->dev); #endif - lapb_start_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_4; - lapb->n2count = 0; - break; + lapb_start_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_4; + lapb->n2count = 0; + break; } if (!queued) @@ -621,75 +600,73 @@ static void lapb_state4_machine(struct lapb_cb *lapb, struct sk_buff *skb, struct lapb_frame *frame) { switch (frame->type) { - case LAPB_SABM: + case LAPB_SABM: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S4 RX SABM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S4 RX SABM(%d)\n", + lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S4 TX DM(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S4 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } else { + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + } else { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S4 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S4 TX UA(%d)\n", + lapb->dev, frame->pf); #endif #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S4 -> S3\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S4 -> S3\n", lapb->dev); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - lapb_stop_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_3; - lapb->condition = 0x00; - lapb->n2count = 0; - lapb->vs = 0; - lapb->vr = 0; - lapb->va = 0; - lapb_connect_indication(lapb, LAPB_OK); - } - break; + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_3; + lapb->condition = 0x00; + lapb->n2count = 0; + lapb->vs = 0; + lapb->vr = 0; + lapb->va = 0; + lapb_connect_indication(lapb, LAPB_OK); + } + break; - case LAPB_SABME: + case LAPB_SABME: #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S4 RX SABME(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S4 RX SABME(%d)\n", + lapb->dev, frame->pf); #endif - if (lapb->mode & LAPB_EXTENDED) { + if (lapb->mode & LAPB_EXTENDED) { #if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S4 TX UA(%d)\n", - lapb->dev, frame->pf); + printk(KERN_DEBUG "lapb: (%p) S4 TX UA(%d)\n", + lapb->dev, frame->pf); #endif #if LAPB_DEBUG > 0 - printk(KERN_DEBUG "lapb: (%p) S4 -> S3\n", - lapb->dev); + printk(KERN_DEBUG "lapb: (%p) S4 -> S3\n", lapb->dev); #endif - lapb_send_control(lapb, LAPB_UA, frame->pf, - LAPB_RESPONSE); - lapb_stop_t1timer(lapb); - lapb_stop_t2timer(lapb); - lapb->state = LAPB_STATE_3; - lapb->condition = 0x00; - lapb->n2count = 0; - lapb->vs = 0; - lapb->vr = 0; - lapb->va = 0; - lapb_connect_indication(lapb, LAPB_OK); - } else { -#if LAPB_DEBUG > 1 - printk(KERN_DEBUG "lapb: (%p) S4 TX DM(%d)\n", - lapb->dev, frame->pf); + lapb_send_control(lapb, LAPB_UA, frame->pf, + LAPB_RESPONSE); + lapb_stop_t1timer(lapb); + lapb_stop_t2timer(lapb); + lapb->state = LAPB_STATE_3; + lapb->condition = 0x00; + lapb->n2count = 0; + lapb->vs = 0; + lapb->vr = 0; + lapb->va = 0; + lapb_connect_indication(lapb, LAPB_OK); + } else { +#if LAPB_DEBUG > 1 + printk(KERN_DEBUG "lapb: (%p) S4 TX DM(%d)\n", + lapb->dev, frame->pf); #endif - lapb_send_control(lapb, LAPB_DM, frame->pf, - LAPB_RESPONSE); - } - break; + lapb_send_control(lapb, LAPB_DM, frame->pf, + LAPB_RESPONSE); + } + break; } kfree_skb(skb); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index cd5fb40d3fd4..556e7e6ddf0a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -698,6 +698,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { static void ieee80211_if_setup(struct net_device *dev) { ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &ieee80211_dataif_ops; dev->destructor = free_netdev; } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 2b18053070c1..3460108810d5 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -57,29 +57,29 @@ static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) #define PREQ_IE_TTL(x) (*(x + 2)) #define PREQ_IE_PREQ_ID(x) u32_field_get(x, 3, 0) #define PREQ_IE_ORIG_ADDR(x) (x + 7) -#define PREQ_IE_ORIG_SN(x) u32_field_get(x, 13, 0); -#define PREQ_IE_LIFETIME(x) u32_field_get(x, 17, AE_F_SET(x)); -#define PREQ_IE_METRIC(x) u32_field_get(x, 21, AE_F_SET(x)); +#define PREQ_IE_ORIG_SN(x) u32_field_get(x, 13, 0) +#define PREQ_IE_LIFETIME(x) u32_field_get(x, 17, AE_F_SET(x)) +#define PREQ_IE_METRIC(x) u32_field_get(x, 21, AE_F_SET(x)) #define PREQ_IE_TARGET_F(x) (*(AE_F_SET(x) ? x + 32 : x + 26)) #define PREQ_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 33 : x + 27) -#define PREQ_IE_TARGET_SN(x) u32_field_get(x, 33, AE_F_SET(x)); +#define PREQ_IE_TARGET_SN(x) u32_field_get(x, 33, AE_F_SET(x)) #define PREP_IE_FLAGS(x) PREQ_IE_FLAGS(x) #define PREP_IE_HOPCOUNT(x) PREQ_IE_HOPCOUNT(x) #define PREP_IE_TTL(x) PREQ_IE_TTL(x) #define PREP_IE_ORIG_ADDR(x) (x + 3) -#define PREP_IE_ORIG_SN(x) u32_field_get(x, 9, 0); -#define PREP_IE_LIFETIME(x) u32_field_get(x, 13, AE_F_SET(x)); -#define PREP_IE_METRIC(x) u32_field_get(x, 17, AE_F_SET(x)); +#define PREP_IE_ORIG_SN(x) u32_field_get(x, 9, 0) +#define PREP_IE_LIFETIME(x) u32_field_get(x, 13, AE_F_SET(x)) +#define PREP_IE_METRIC(x) u32_field_get(x, 17, AE_F_SET(x)) #define PREP_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) -#define PREP_IE_TARGET_SN(x) u32_field_get(x, 27, AE_F_SET(x)); +#define PREP_IE_TARGET_SN(x) u32_field_get(x, 27, AE_F_SET(x)) #define PERR_IE_TTL(x) (*(x)) #define PERR_IE_TARGET_FLAGS(x) (*(x + 2)) #define PERR_IE_TARGET_ADDR(x) (x + 3) -#define PERR_IE_TARGET_SN(x) u32_field_get(x, 9, 0); -#define PERR_IE_TARGET_RCODE(x) u16_field_get(x, 13, 0); +#define PERR_IE_TARGET_SN(x) u32_field_get(x, 9, 0) +#define PERR_IE_TARGET_RCODE(x) u16_field_get(x, 13, 0) #define MSEC_TO_TU(x) (x*1000/1024) #define SN_GT(x, y) ((long) (y) - (long) (x) < 0) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index b83870bf60fa..3db78b696c5c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -97,7 +97,6 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - rcu_read_lock_held() || lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); while (sta) { @@ -105,7 +104,6 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference_check(sta->hnext, - rcu_read_lock_held() || lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); } @@ -123,7 +121,6 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - rcu_read_lock_held() || lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); while (sta) { @@ -132,7 +129,6 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) break; sta = rcu_dereference_check(sta->hnext, - rcu_read_lock_held() || lockdep_is_held(&local->sta_lock) || lockdep_is_held(&local->sta_mtx)); } diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 2c5b348eb3a8..ba36c283d837 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -109,6 +109,16 @@ config IP_SET_HASH_NETPORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETIFACE + tristate "hash:net,iface set support" + depends on IP_SET + help + This option adds the hash:net,iface set type support, by which + one can store IPv4/IPv6 network address/prefix and + interface name pairs as elements in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_LIST_SET tristate "list:set set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 5adbdab67bd2..6e965ecd5444 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o +obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index ba2d16607f48..e3e73997c3be 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -54,7 +54,7 @@ ip_to_id(const struct bitmap_ip *m, u32 ip) } static int -bitmap_ip_test(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_test(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ip *map = set->data; u16 id = *(u16 *)value; @@ -63,7 +63,7 @@ bitmap_ip_test(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_add(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_add(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; u16 id = *(u16 *)value; @@ -75,7 +75,7 @@ bitmap_ip_add(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_del(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_del(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; u16 id = *(u16 *)value; @@ -131,7 +131,7 @@ nla_put_failure: /* Timeout variant */ static int -bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ip *map = set->data; const unsigned long *members = map->members; @@ -141,13 +141,13 @@ bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; unsigned long *members = map->members; u16 id = *(u16 *)value; - if (ip_set_timeout_test(members[id])) + if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; members[id] = ip_set_timeout_set(timeout); @@ -156,7 +156,7 @@ bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout) +bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ip *map = set->data; unsigned long *members = map->members; @@ -219,24 +219,25 @@ nla_put_failure: static int bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; u32 ip; - ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (ip < map->first_ip || ip > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; ip = ip_to_id(map, ip); - return adtfn(set, &ip, map->timeout); + return adtfn(set, &ip, opt_timeout(opt, map), opt->cmdflags); } static int bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -266,7 +267,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST) { id = ip_to_id(map, ip); - return adtfn(set, &id, timeout); + return adtfn(set, &id, timeout, flags); } if (tb[IPSET_ATTR_IP_TO]) { @@ -283,8 +284,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; @@ -293,7 +293,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], for (; !before(ip_to, ip); ip += map->hosts) { id = ip_to_id(map, ip); - ret = adtfn(set, &id, timeout); + ret = adtfn(set, &id, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -478,7 +478,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (cidr >= 32) return -IPSET_ERR_INVALID_CIDR; - last_ip = first_ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(first_ip, last_ip, cidr); } else return -IPSET_ERR_PROTOCOL; @@ -551,7 +551,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = AF_INET, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index a274300b6a56..56096f544978 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -99,7 +99,7 @@ bitmap_ipmac_exist(const struct ipmac_telem *elem) /* Base variant */ static int -bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -117,7 +117,7 @@ bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -146,7 +146,7 @@ bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -212,7 +212,7 @@ nla_put_failure: /* Timeout variant */ static int -bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -231,15 +231,16 @@ bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); + bool flag_exist = flags & IPSET_FLAG_EXIST; switch (elem->match) { case MAC_UNSET: - if (!data->ether) + if (!(data->ether || flag_exist)) /* Already added without ethernet address */ return -IPSET_ERR_EXIST; /* Fill the MAC address and activate the timer */ @@ -251,7 +252,7 @@ bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) elem->timeout = ip_set_timeout_set(timeout); break; case MAC_FILLED: - if (!bitmap_expired(map, data->id)) + if (!(bitmap_expired(map, data->id) || flag_exist)) return -IPSET_ERR_EXIST; /* Fall through */ case MAC_EMPTY: @@ -273,7 +274,7 @@ bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout) +bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_ipmac *map = set->data; const struct ipmac *data = value; @@ -337,17 +338,18 @@ nla_put_failure: static int bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct ipmac data; /* MAC can be src only */ - if (!(flags & IPSET_DIM_TWO_SRC)) + if (!(opt->flags & IPSET_DIM_TWO_SRC)) return 0; - data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + data.id = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); if (data.id < map->first_ip || data.id > map->last_ip) return -IPSET_ERR_BITMAP_RANGE; @@ -359,12 +361,12 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, data.id -= map->first_ip; data.ether = eth_hdr(skb)->h_source; - return adtfn(set, &data, map->timeout); + return adtfn(set, &data, opt_timeout(opt, map), opt->cmdflags); } static int bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -399,7 +401,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], data.id -= map->first_ip; - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -577,7 +579,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], if (cidr >= 32) return -IPSET_ERR_INVALID_CIDR; - last_ip = first_ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(first_ip, last_ip, cidr); } else return -IPSET_ERR_PROTOCOL; @@ -622,7 +624,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = AF_INET, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, @@ -632,7 +635,8 @@ static struct ip_set_type bitmap_ipmac_type = { }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, - [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, + .len = ETH_ALEN }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, }, diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 6b38eb8f6ed8..29ba93bb94be 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -40,7 +40,7 @@ struct bitmap_port { /* Base variant */ static int -bitmap_port_test(struct ip_set *set, void *value, u32 timeout) +bitmap_port_test(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_port *map = set->data; u16 id = *(u16 *)value; @@ -49,7 +49,7 @@ bitmap_port_test(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_add(struct ip_set *set, void *value, u32 timeout) +bitmap_port_add(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; u16 id = *(u16 *)value; @@ -61,7 +61,7 @@ bitmap_port_add(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_del(struct ip_set *set, void *value, u32 timeout) +bitmap_port_del(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; u16 id = *(u16 *)value; @@ -119,7 +119,7 @@ nla_put_failure: /* Timeout variant */ static int -bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout) +bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) { const struct bitmap_port *map = set->data; const unsigned long *members = map->members; @@ -129,13 +129,13 @@ bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout) +bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; unsigned long *members = map->members; u16 id = *(u16 *)value; - if (ip_set_timeout_test(members[id])) + if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; members[id] = ip_set_timeout_set(timeout); @@ -144,7 +144,7 @@ bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout) } static int -bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout) +bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) { struct bitmap_port *map = set->data; unsigned long *members = map->members; @@ -208,14 +208,16 @@ nla_put_failure: static int bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; __be16 __port; u16 port = 0; - if (!ip_set_get_ip_port(skb, pf, flags & IPSET_DIM_ONE_SRC, &__port)) + if (!ip_set_get_ip_port(skb, opt->family, + opt->flags & IPSET_DIM_ONE_SRC, &__port)) return -EINVAL; port = ntohs(__port); @@ -225,12 +227,12 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, port -= map->first_port; - return adtfn(set, &port, map->timeout); + return adtfn(set, &port, opt_timeout(opt, map), opt->cmdflags); } static int bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -259,7 +261,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST) { id = port - map->first_port; - return adtfn(set, &id, timeout); + return adtfn(set, &id, timeout, flags); } if (tb[IPSET_ATTR_PORT_TO]) { @@ -277,7 +279,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], for (; port <= port_to; port++) { id = port - map->first_port; - ret = adtfn(set, &id, timeout); + ret = adtfn(set, &id, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -482,7 +484,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8041befc6555..d7e86ef9d23a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -17,10 +17,10 @@ #include <linux/spinlock.h> #include <linux/netlink.h> #include <linux/rculist.h> -#include <linux/version.h> #include <net/netlink.h> #include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/ipset/ip_set.h> @@ -70,7 +70,8 @@ find_set_type(const char *name, u8 family, u8 revision) list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STREQ(type->name, name) && (type->family == family || type->family == AF_UNSPEC) && - type->revision == revision) + revision >= type->revision_min && + revision <= type->revision_max) return type; return NULL; } @@ -135,10 +136,10 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) if (STREQ(type->name, name) && (type->family == family || type->family == AF_UNSPEC)) { found = true; - if (type->revision < *min) - *min = type->revision; - if (type->revision > *max) - *max = type->revision; + if (type->revision_min < *min) + *min = type->revision_min; + if (type->revision_max > *max) + *max = type->revision_max; } rcu_read_unlock(); if (found) @@ -159,25 +160,27 @@ ip_set_type_register(struct ip_set_type *type) int ret = 0; if (type->protocol != IPSET_PROTOCOL) { - pr_warning("ip_set type %s, family %s, revision %u uses " + pr_warning("ip_set type %s, family %s, revision %u:%u uses " "wrong protocol version %u (want %u)\n", type->name, family_name(type->family), - type->revision, type->protocol, IPSET_PROTOCOL); + type->revision_min, type->revision_max, + type->protocol, IPSET_PROTOCOL); return -EINVAL; } ip_set_type_lock(); - if (find_set_type(type->name, type->family, type->revision)) { + if (find_set_type(type->name, type->family, type->revision_min)) { /* Duplicate! */ - pr_warning("ip_set type %s, family %s, revision %u " + pr_warning("ip_set type %s, family %s with revision min %u " "already registered!\n", type->name, - family_name(type->family), type->revision); + family_name(type->family), type->revision_min); ret = -EINVAL; goto unlock; } list_add_rcu(&type->list, &ip_set_type_list); - pr_debug("type %s, family %s, revision %u registered.\n", - type->name, family_name(type->family), type->revision); + pr_debug("type %s, family %s, revision %u:%u registered.\n", + type->name, family_name(type->family), + type->revision_min, type->revision_max); unlock: ip_set_type_unlock(); return ret; @@ -189,15 +192,15 @@ void ip_set_type_unregister(struct ip_set_type *type) { ip_set_type_lock(); - if (!find_set_type(type->name, type->family, type->revision)) { - pr_warning("ip_set type %s, family %s, revision %u " + if (!find_set_type(type->name, type->family, type->revision_min)) { + pr_warning("ip_set type %s, family %s with revision min %u " "not registered\n", type->name, - family_name(type->family), type->revision); + family_name(type->family), type->revision_min); goto unlock; } list_del_rcu(&type->list); - pr_debug("type %s, family %s, revision %u unregistered.\n", - type->name, family_name(type->family), type->revision); + pr_debug("type %s, family %s with revision min %u unregistered.\n", + type->name, family_name(type->family), type->revision_min); unlock: ip_set_type_unlock(); @@ -325,7 +328,8 @@ __ip_set_put(ip_set_id_t index) int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, - u8 family, u8 dim, u8 flags) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_list[index]; int ret = 0; @@ -333,19 +337,19 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); - if (dim < set->type->dimension || - !(family == set->family || set->family == AF_UNSPEC)) + if (opt->dim < set->type->dimension || + !(opt->family == set->family || set->family == AF_UNSPEC)) return 0; read_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags); + ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); read_unlock_bh(&set->lock); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be competed, ADD is triggered\n"); write_lock_bh(&set->lock); - set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + set->variant->kadt(set, skb, par, IPSET_ADD, opt); write_unlock_bh(&set->lock); ret = 1; } @@ -357,7 +361,8 @@ EXPORT_SYMBOL_GPL(ip_set_test); int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, - u8 family, u8 dim, u8 flags) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_list[index]; int ret; @@ -365,12 +370,12 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); - if (dim < set->type->dimension || - !(family == set->family || set->family == AF_UNSPEC)) + if (opt->dim < set->type->dimension || + !(opt->family == set->family || set->family == AF_UNSPEC)) return 0; write_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); write_unlock_bh(&set->lock); return ret; @@ -379,7 +384,8 @@ EXPORT_SYMBOL_GPL(ip_set_add); int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, - u8 family, u8 dim, u8 flags) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_list[index]; int ret = 0; @@ -387,12 +393,12 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, BUG_ON(set == NULL); pr_debug("set %s, index %u\n", set->name, index); - if (dim < set->type->dimension || - !(family == set->family || set->family == AF_UNSPEC)) + if (opt->dim < set->type->dimension || + !(opt->family == set->family || set->family == AF_UNSPEC)) return 0; write_lock_bh(&set->lock); - ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags); + ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); write_unlock_bh(&set->lock); return ret; @@ -656,6 +662,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, rwlock_init(&set->lock); strlcpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; + set->revision = revision; /* * Next, check that we know the type, and take @@ -675,8 +682,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy)) { - ret = -IPSET_ERR_PROTOCOL; - goto put_out; + ret = -IPSET_ERR_PROTOCOL; + goto put_out; } ret = set->type->create(set, tb, flags); @@ -696,7 +703,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, (flags & IPSET_FLAG_EXIST) && STREQ(set->type->name, clash->type->name) && set->type->family == clash->type->family && - set->type->revision == clash->type->revision && + set->type->revision_min == clash->type->revision_min && + set->type->revision_max == clash->type->revision_max && set->variant->same_set(set, clash)) ret = 0; goto cleanup; @@ -767,7 +775,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < ip_set_max; i++) { if (ip_set_list[i] != NULL && ip_set_list[i]->ref) { - ret = IPSET_ERR_BUSY; + ret = -IPSET_ERR_BUSY; goto out; } } @@ -939,10 +947,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, /* List/save set data */ -#define DUMP_INIT 0L -#define DUMP_ALL 1L -#define DUMP_ONE 2L -#define DUMP_LAST 3L +#define DUMP_INIT 0 +#define DUMP_ALL 1 +#define DUMP_ONE 2 +#define DUMP_LAST 3 + +#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF) +#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16) static int ip_set_dump_done(struct netlink_callback *cb) @@ -973,6 +984,7 @@ dump_init(struct netlink_callback *cb) int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; struct nlattr *attr = (void *)nlh + min_len; + u32 dump_type; ip_set_id_t index; /* Second pass, so parser can't fail */ @@ -984,17 +996,22 @@ dump_init(struct netlink_callback *cb) * [..]: type specific */ - if (!cda[IPSET_ATTR_SETNAME]) { - cb->args[0] = DUMP_ALL; - return 0; - } + if (cda[IPSET_ATTR_SETNAME]) { + index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) + return -ENOENT; - index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); - if (index == IPSET_INVALID_ID) - return -ENOENT; + dump_type = DUMP_ONE; + cb->args[1] = index; + } else + dump_type = DUMP_ALL; + + if (cda[IPSET_ATTR_FLAGS]) { + u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); + dump_type |= (f << 16); + } + cb->args[0] = dump_type; - cb->args[0] = DUMP_ONE; - cb->args[1] = index; return 0; } @@ -1005,9 +1022,10 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + u32 dump_type, dump_flags; int ret = 0; - if (cb->args[0] == DUMP_INIT) { + if (!cb->args[0]) { ret = dump_init(cb); if (ret < 0) { nlh = nlmsg_hdr(cb->skb); @@ -1022,14 +1040,17 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[1] >= ip_set_max) goto out; - max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + dump_type = DUMP_TYPE(cb->args[0]); + dump_flags = DUMP_FLAGS(cb->args[0]); + max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; dump_last: - pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); + pr_debug("args[0]: %u %u args[1]: %ld\n", + dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; set = ip_set_list[index]; if (set == NULL) { - if (cb->args[0] == DUMP_ONE) { + if (dump_type == DUMP_ONE) { ret = -ENOENT; goto out; } @@ -1038,8 +1059,8 @@ dump_last: /* When dumping all sets, we must dump "sorted" * so that lists (unions of sets) are dumped last. */ - if (cb->args[0] != DUMP_ONE && - ((cb->args[0] == DUMP_ALL) == + if (dump_type != DUMP_ONE && + ((dump_type == DUMP_ALL) == !!(set->type->features & IPSET_DUMP_LAST))) continue; pr_debug("List set: %s\n", set->name); @@ -1057,6 +1078,8 @@ dump_last: } NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); + if (dump_flags & IPSET_FLAG_LIST_SETNAME) + goto next_set; switch (cb->args[2]) { case 0: /* Core header data */ @@ -1065,28 +1088,27 @@ dump_last: NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, set->family); NLA_PUT_U8(skb, IPSET_ATTR_REVISION, - set->type->revision); + set->revision); ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; + if (dump_flags & IPSET_FLAG_LIST_HEADER) + goto next_set; /* Fall through and add elements */ default: read_lock_bh(&set->lock); ret = set->variant->list(set, skb, cb); read_unlock_bh(&set->lock); - if (!cb->args[2]) { + if (!cb->args[2]) /* Set is done, proceed with next one */ - if (cb->args[0] == DUMP_ONE) - cb->args[1] = IPSET_INVALID_ID; - else - cb->args[1]++; - } + goto next_set; goto release_refcount; } } /* If we dump all sets, continue with dumping last ones */ - if (cb->args[0] == DUMP_ALL) { - cb->args[0] = DUMP_LAST; + if (dump_type == DUMP_ALL) { + dump_type = DUMP_LAST; + cb->args[0] = dump_type | (dump_flags << 16); cb->args[1] = 0; goto dump_last; } @@ -1094,6 +1116,11 @@ dump_last: nla_put_failure: ret = -EFAULT; +next_set: + if (dump_type == DUMP_ONE) + cb->args[1] = IPSET_INVALID_ID; + else + cb->args[1]++; release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { @@ -1120,7 +1147,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, return netlink_dump_start(ctnl, skb, nlh, ip_set_dump_start, - ip_set_dump_done); + ip_set_dump_done, 0); } /* Add, del and test */ @@ -1139,17 +1166,18 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 flags, bool use_lineno) { - int ret, retried = 0; + int ret; u32 lineno = 0; - bool eexist = flags & IPSET_FLAG_EXIST; + bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { write_lock_bh(&set->lock); - ret = set->variant->uadt(set, tb, adt, &lineno, flags); + ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); write_unlock_bh(&set->lock); + retried = true; } while (ret == -EAGAIN && set->variant->resize && - (ret = set->variant->resize(set, retried++)) == 0); + (ret = set->variant->resize(set, retried)) == 0); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; @@ -1322,7 +1350,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; read_lock_bh(&set->lock); - ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0); + ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); read_unlock_bh(&set->lock); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) @@ -1365,7 +1393,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); - NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->revision); nlmsg_end(skb2, nlh2); ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 43bcce200129..f2d576e6b769 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -53,7 +53,8 @@ struct hash_ip4_telem { static inline bool hash_ip4_data_equal(const struct hash_ip4_elem *ip1, - const struct hash_ip4_elem *ip2) + const struct hash_ip4_elem *ip2, + u32 *multi) { return ip1->ip == ip2->ip; } @@ -108,25 +109,32 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d) +{ + h->next.ip = ntohl(d->ip); +} + static int hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; __be32 ip; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); ip &= ip_set_netmask(h->netmask); if (ip == 0) return -EINVAL; - return adtfn(set, &ip, h->timeout); + return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); } static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -157,7 +165,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], nip = htonl(ip); if (nip == 0) return -IPSET_ERR_HASH_ELEM; - return adtfn(set, &nip, timeout); + return adtfn(set, &nip, timeout, flags); } if (tb[IPSET_ATTR_IP_TO]) { @@ -171,18 +179,19 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + if (retried) + ip = h->next.ip; for (; !before(ip_to, ip); ip += hosts) { nip = htonl(ip); if (nip == 0) return -IPSET_ERR_HASH_ELEM; - ret = adtfn(set, &nip, timeout); + ret = adtfn(set, &nip, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -217,7 +226,8 @@ struct hash_ip6_telem { static inline bool hash_ip6_data_equal(const struct hash_ip6_elem *ip1, - const struct hash_ip6_elem *ip2) + const struct hash_ip6_elem *ip2, + u32 *multi) { return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; } @@ -281,20 +291,26 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ip6_data_next(struct ip_set_hash *h, const struct hash_ip6_elem *d) +{ +} + static int hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; union nf_inet_addr ip; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip.in6); ip6_netmask(&ip, h->netmask); if (ipv6_addr_any(&ip.in6)) return -EINVAL; - return adtfn(set, &ip, h->timeout); + return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); } static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = { @@ -305,7 +321,7 @@ static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = { static int hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -336,7 +352,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - ret = adtfn(set, &ip, timeout); + ret = adtfn(set, &ip, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -428,7 +444,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 14281b6b8074..6ee10f5d59bd 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -60,7 +60,8 @@ struct hash_ipport4_telem { static inline bool hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1, - const struct hash_ipport4_elem *ip2) + const struct hash_ipport4_elem *ip2, + u32 *multi) { return ip1->ip == ip2->ip && ip1->port == ip2->port && @@ -124,31 +125,40 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipport4_data_next(struct ip_set_hash *h, + const struct hash_ipport4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); +} + static int hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem data = { }; - u32 ip, ip_to, p, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -192,7 +202,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -208,8 +218,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; @@ -220,17 +229,21 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - for (; !before(ip_to, ip); ip++) - for (p = port; p <= port_to; p++) { + if (retried) + ip = h->next.ip; + for (; !before(ip_to, ip); ip++) { + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; else ret = 0; } + } return ret; } @@ -264,7 +277,8 @@ struct hash_ipport6_telem { static inline bool hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, - const struct hash_ipport6_elem *ip2) + const struct hash_ipport6_elem *ip2, + u32 *multi) { return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && ip1->port == ip2->port && @@ -328,26 +342,34 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipport6_data_next(struct ip_set_hash *h, + const struct hash_ipport6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem data = { }; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -396,7 +418,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -405,9 +427,11 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -491,7 +515,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + .revision_max = 1, /* SCTP and UDPLITE support added */ .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 401c8a2531db..fb90e344e907 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -62,7 +62,8 @@ struct hash_ipportip4_telem { static inline bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, - const struct hash_ipportip4_elem *ip2) + const struct hash_ipportip4_elem *ip2, + u32 *multi) { return ip1->ip == ip2->ip && ip1->ip2 == ip2->ip2 && @@ -127,32 +128,41 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportip4_data_next(struct ip_set_hash *h, + const struct hash_ipportip4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); +} + static int hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); - ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem data = { }; - u32 ip, ip_to, p, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -200,7 +210,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -216,8 +226,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); + ip_set_mask_from_to(ip, ip_to, cidr); } else ip_to = ip; @@ -228,17 +237,21 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - for (; !before(ip_to, ip); ip++) - for (p = port; p <= port_to; p++) { + if (retried) + ip = h->next.ip; + for (; !before(ip_to, ip); ip++) { + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { data.ip = htonl(ip); data.port = htons(p); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; else ret = 0; } + } return ret; } @@ -274,7 +287,8 @@ struct hash_ipportip6_telem { static inline bool hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, - const struct hash_ipportip6_elem *ip2) + const struct hash_ipportip6_elem *ip2, + u32 *multi) { return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && @@ -341,27 +355,35 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportip6_data_next(struct ip_set_hash *h, + const struct hash_ipportip6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem data = { }; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); - ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -414,7 +436,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -423,9 +445,11 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -509,7 +533,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + .revision_max = 1, /* SCTP and UDPLITE support added */ .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 4743e5402522..deb3e3dfa5fc 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -62,7 +62,8 @@ struct hash_ipportnet4_telem { static inline bool hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1, - const struct hash_ipportnet4_elem *ip2) + const struct hash_ipportnet4_elem *ip2, + u32 *multi) { return ip1->ip == ip2->ip && ip1->ip2 == ip2->ip2 && @@ -140,39 +141,51 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportnet4_data_next(struct ip_set_hash *h, + const struct hash_ipportnet4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); + h->next.ip2 = ntohl(d->ip2); +} + static int hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet4_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); - ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); data.ip2 &= ip_set_netmask(data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; - u32 ip, ip_to, p, port, port_to; + u32 ip, ip_to, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to, ip2_last, ip2; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -186,21 +199,19 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from); if (ret) return ret; - if (tb[IPSET_ATTR_CIDR2]) + if (tb[IPSET_ATTR_CIDR2]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); - - if (!data.cidr) - return -IPSET_ERR_INVALID_CIDR; - - data.ip2 &= ip_set_netmask(data.cidr); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_PORT]) data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); @@ -225,14 +236,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; if (adt == IPSET_TEST || - !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || - tb[IPSET_ATTR_PORT_TO])) { - ret = adtfn(set, &data, timeout); + !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports || + tb[IPSET_ATTR_IP2_TO])) { + data.ip = htonl(ip); + data.ip2 = htonl(ip2_from & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } - ip = ntohl(data.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) @@ -244,29 +257,50 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (cidr > 32) return -IPSET_ERR_INVALID_CIDR; - ip &= ip_set_hostmask(cidr); - ip_to = ip | ~ip_set_hostmask(cidr); - } else - ip_to = ip; + ip_set_mask_from_to(ip, ip_to, cidr); + } port_to = port = ntohs(data.port); - if (with_ports && tb[IPSET_ATTR_PORT_TO]) { + if (tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_from > ip2_to) + swap(ip2_from, ip2_to); + if (ip2_from + UINT_MAX == ip2_to) + return -IPSET_ERR_HASH_RANGE; + } else { + ip_set_mask_from_to(ip2_from, ip2_to, data.cidr); + } - for (; !before(ip_to, ip); ip++) - for (p = port; p <= port_to; p++) { - data.ip = htonl(ip); + if (retried) + ip = h->next.ip; + for (; !before(ip_to, ip); ip++) { + data.ip = htonl(ip); + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { data.port = htons(p); - ret = adtfn(set, &data, timeout); - - if (ret && !ip_set_eexist(ret, flags)) - return ret; - else - ret = 0; + ip2 = retried && ip == h->next.ip && p == h->next.port + ? h->next.ip2 : ip2_from; + while (!after(ip2, ip2_to)) { + data.ip2 = htonl(ip2); + ip2_last = ip_set_range_to_cidr(ip2, ip2_to, + &data.cidr); + ret = adtfn(set, &data, timeout, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = ip2_last + 1; + } } + } return ret; } @@ -302,7 +336,8 @@ struct hash_ipportnet6_telem { static inline bool hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, - const struct hash_ipportnet6_elem *ip2) + const struct hash_ipportnet6_elem *ip2, + u32 *multi) { return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && @@ -388,34 +423,43 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_ipportnet6_data_next(struct ip_set_hash *h, + const struct hash_ipportnet6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_ipportnet6_elem data = - { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_ipportnet6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); - ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); ip6_netmask(&data.ip2, data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -432,6 +476,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); @@ -476,7 +522,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -485,9 +531,11 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -574,7 +622,9 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + /* 1 SCTP and UDPLITE support added */ + .revision_max = 2, /* Range as input support for IPv4 added */ .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -587,6 +637,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c4db202b7da4..60d016541c58 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -58,7 +58,8 @@ struct hash_net4_telem { static inline bool hash_net4_data_equal(const struct hash_net4_elem *ip1, - const struct hash_net4_elem *ip2) + const struct hash_net4_elem *ip2, + u32 *multi) { return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr; } @@ -125,33 +126,44 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_net4_data_next(struct ip_set_hash *h, + const struct hash_net4_elem *d) +{ + h->next.ip = ntohl(d->ip); +} + static int hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); data.ip &= ip_set_netmask(data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem data = { .cidr = HOST_MASK }; u32 timeout = h->timeout; + u32 ip = 0, ip_to, last; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -161,17 +173,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - - if (!data.cidr) - return -IPSET_ERR_INVALID_CIDR; - - data.ip &= ip_set_netmask(data.cidr); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_TIMEOUT]) { if (!with_timeout(h->timeout)) @@ -179,9 +189,35 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - ret = adtfn(set, &data, timeout); + if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { + data.ip = htonl(ip & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } - return ip_set_eexist(ret, flags) ? 0 : ret; + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } + if (retried) + ip = h->next.ip; + while (!after(ip, ip_to)) { + data.ip = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); + ret = adtfn(set, &data, timeout, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip = last + 1; + } + return ret; } static bool @@ -214,7 +250,8 @@ struct hash_net6_telem { static inline bool hash_net6_data_equal(const struct hash_net6_elem *ip1, - const struct hash_net6_elem *ip2) + const struct hash_net6_elem *ip2, + u32 *multi) { return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && ip1->cidr == ip2->cidr; @@ -290,28 +327,37 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_net6_data_next(struct ip_set_hash *h, + const struct hash_net6_elem *d) +{ +} + static int hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + struct hash_net6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); ip6_netmask(&data.ip, data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -322,6 +368,8 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); @@ -344,7 +392,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -425,7 +473,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 1, /* Range as input support for IPv4 added */ .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -436,6 +485,7 @@ static struct ip_set_type hash_net_type __read_mostly = { }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, }, diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c new file mode 100644 index 000000000000..e13095deb50d --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -0,0 +1,786 @@ +/* Copyright (C) 2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net,iface type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <linux/rbtree.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_timeout.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); +MODULE_DESCRIPTION("hash:net,iface type of IP sets"); +MODULE_ALIAS("ip_set_hash:net,iface"); + +/* Interface name rbtree */ + +struct iface_node { + struct rb_node node; + char iface[IFNAMSIZ]; +}; + +#define iface_data(n) (rb_entry(n, struct iface_node, node)->iface) + +static inline long +ifname_compare(const char *_a, const char *_b) +{ + const long *a = (const long *)_a; + const long *b = (const long *)_b; + + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + if (a[0] != b[0]) + return a[0] - b[0]; + if (IFNAMSIZ > sizeof(long)) { + if (a[1] != b[1]) + return a[1] - b[1]; + } + if (IFNAMSIZ > 2 * sizeof(long)) { + if (a[2] != b[2]) + return a[2] - b[2]; + } + if (IFNAMSIZ > 3 * sizeof(long)) { + if (a[3] != b[3]) + return a[3] - b[3]; + } + return 0; +} + +static void +rbtree_destroy(struct rb_root *root) +{ + struct rb_node *p, *n = root->rb_node; + struct iface_node *node; + + /* Non-recursive destroy, like in ext3 */ + while (n) { + if (n->rb_left) { + n = n->rb_left; + continue; + } + if (n->rb_right) { + n = n->rb_right; + continue; + } + p = rb_parent(n); + node = rb_entry(n, struct iface_node, node); + if (!p) + *root = RB_ROOT; + else if (p->rb_left == n) + p->rb_left = NULL; + else if (p->rb_right == n) + p->rb_right = NULL; + + kfree(node); + n = p; + } +} + +static int +iface_test(struct rb_root *root, const char **iface) +{ + struct rb_node *n = root->rb_node; + + while (n) { + const char *d = iface_data(n); + long res = ifname_compare(*iface, d); + + if (res < 0) + n = n->rb_left; + else if (res > 0) + n = n->rb_right; + else { + *iface = d; + return 1; + } + } + return 0; +} + +static int +iface_add(struct rb_root *root, const char **iface) +{ + struct rb_node **n = &(root->rb_node), *p = NULL; + struct iface_node *d; + + while (*n) { + char *ifname = iface_data(*n); + long res = ifname_compare(*iface, ifname); + + p = *n; + if (res < 0) + n = &((*n)->rb_left); + else if (res > 0) + n = &((*n)->rb_right); + else { + *iface = ifname; + return 0; + } + } + + d = kzalloc(sizeof(*d), GFP_ATOMIC); + if (!d) + return -ENOMEM; + strcpy(d->iface, *iface); + + rb_link_node(&d->node, p, n); + rb_insert_color(&d->node, root); + + *iface = d->iface; + return 0; +} + +/* Type specific function prefix */ +#define TYPE hash_netiface + +static bool +hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_netiface4_same_set hash_netiface_same_set +#define hash_netiface6_same_set hash_netiface_same_set + +#define STREQ(a, b) (strcmp(a, b) == 0) + +/* The type variant functions: IPv4 */ + +struct hash_netiface4_elem_hashed { + __be32 ip; + u8 physdev; + u8 cidr; + u16 padding; +}; + +#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) + +/* Member elements without timeout */ +struct hash_netiface4_elem { + __be32 ip; + u8 physdev; + u8 cidr; + u16 padding; + const char *iface; +}; + +/* Member elements with timeout support */ +struct hash_netiface4_telem { + __be32 ip; + u8 physdev; + u8 cidr; + u16 padding; + const char *iface; + unsigned long timeout; +}; + +static inline bool +hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, + const struct hash_netiface4_elem *ip2, + u32 *multi) +{ + return ip1->ip == ip2->ip && + ip1->cidr == ip2->cidr && + (++*multi) && + ip1->physdev == ip2->physdev && + ip1->iface == ip2->iface; +} + +static inline bool +hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_netiface4_data_copy(struct hash_netiface4_elem *dst, + const struct hash_netiface4_elem *src) { + dst->ip = src->ip; + dst->cidr = src->cidr; + dst->physdev = src->physdev; + dst->iface = src->iface; +} + +static inline void +hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr) +{ + elem->ip &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +static inline void +hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem) +{ + elem->cidr = 0; +} + +static bool +hash_netiface4_data_list(struct sk_buff *skb, + const struct hash_netiface4_elem *data) +{ + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netiface4_data_tlist(struct sk_buff *skb, + const struct hash_netiface4_elem *data) +{ + const struct hash_netiface4_telem *tdata = + (const struct hash_netiface4_telem *)data; + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_NETS +#define IP_SET_HASH_WITH_RBTREE +#define IP_SET_HASH_WITH_MULTI + +#define PF 4 +#define HOST_MASK 32 +#include <linux/netfilter/ipset/ip_set_ahash.h> + +static inline void +hash_netiface4_data_next(struct ip_set_hash *h, + const struct hash_netiface4_elem *d) +{ + h->next.ip = ntohl(d->ip); +} + +static int +hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface4_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; + int ret; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); + data.ip &= ip_set_netmask(data.cidr); + +#define IFACE(dir) (par->dir ? par->dir->name : NULL) +#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL) +#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) + + if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { +#ifdef CONFIG_BRIDGE_NETFILTER + const struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + if (!nf_bridge) + return -EINVAL; + data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); + data.physdev = 1; +#else + data.iface = NULL; +#endif + } else + data.iface = SRCDIR ? IFACE(in) : IFACE(out); + + if (!data.iface) + return -EINVAL; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +} + +static int +hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface4_elem data = { .cidr = HOST_MASK }; + u32 ip = 0, ip_to, last; + u32 timeout = h->timeout; + char iface[IFNAMSIZ] = {}; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_IFACE] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); + data.iface = iface; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) + data.physdev = 1; + } + + if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { + data.ip = htonl(ip & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } else { + ip_set_mask_from_to(ip, ip_to, data.cidr); + } + + if (retried) + ip = h->next.ip; + while (!after(ip, ip_to)) { + data.ip = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); + ret = adtfn(set, &data, timeout, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip = last + 1; + } + return ret; +} + +static bool +hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_netiface6_elem_hashed { + union nf_inet_addr ip; + u8 physdev; + u8 cidr; + u16 padding; +}; + +#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed) + +struct hash_netiface6_elem { + union nf_inet_addr ip; + u8 physdev; + u8 cidr; + u16 padding; + const char *iface; +}; + +struct hash_netiface6_telem { + union nf_inet_addr ip; + u8 physdev; + u8 cidr; + u16 padding; + const char *iface; + unsigned long timeout; +}; + +static inline bool +hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, + const struct hash_netiface6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->cidr == ip2->cidr && + (++*multi) && + ip1->physdev == ip2->physdev && + ip1->iface == ip2->iface; +} + +static inline bool +hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_netiface6_data_copy(struct hash_netiface6_elem *dst, + const struct hash_netiface6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) +{ +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip, cidr); + elem->cidr = cidr; +} + +static bool +hash_netiface6_data_list(struct sk_buff *skb, + const struct hash_netiface6_elem *data) +{ + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netiface6_data_tlist(struct sk_buff *skb, + const struct hash_netiface6_elem *data) +{ + const struct hash_netiface6_telem *e = + (const struct hash_netiface6_telem *)data; + u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_STRING(skb, IPSET_ATTR_IFACE, data->iface); + if (flags) + NLA_PUT_NET32(skb, IPSET_ATTR_CADT_FLAGS, flags); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include <linux/netfilter/ipset/ip_set_ahash.h> + +static inline void +hash_netiface6_data_next(struct ip_set_hash *h, + const struct hash_netiface6_elem *d) +{ +} + +static int +hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface6_elem data = { + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; + int ret; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6_netmask(&data.ip, data.cidr); + + if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { +#ifdef CONFIG_BRIDGE_NETFILTER + const struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + if (!nf_bridge) + return -EINVAL; + data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); + data.physdev = 1; +#else + data.iface = NULL; +#endif + } else + data.iface = SRCDIR ? IFACE(in) : IFACE(out); + + if (!data.iface) + return -EINVAL; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +} + +static int +hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface6_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + char iface[IFNAMSIZ] = {}; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !tb[IPSET_ATTR_IFACE] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + ip6_netmask(&data.ip, data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); + data.iface = iface; + ret = iface_test(&h->rbtree, &data.iface); + if (adt == IPSET_ADD) { + if (!ret) { + ret = iface_add(&h->rbtree, &data.iface); + if (ret) + return ret; + } + } else if (!ret) + return ret; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_PHYSDEV) + data.physdev = 1; + } + + ret = adtfn(set, &data, timeout, flags); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + h->ahash_max = AHASH_MAX_SIZE; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + h->rbtree = RB_ROOT; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_netiface4_tvariant : &hash_netiface6_tvariant; + + if (set->family == AF_INET) + hash_netiface4_gc_init(set); + else + hash_netiface6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_netiface4_variant : &hash_netiface6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_netiface_type __read_mostly = { + .name = "hash:net,iface", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE, + .dimension = IPSET_DIM_TWO, + .family = AF_UNSPEC, + .revision_min = 0, + .create = hash_netiface_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IFACE] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netiface_init(void) +{ + return ip_set_type_register(&hash_netiface_type); +} + +static void __exit +hash_netiface_fini(void) +{ + ip_set_type_unregister(&hash_netiface_type); +} + +module_init(hash_netiface_init); +module_exit(hash_netiface_fini); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index d2a40362dd3a..8f9de7207ec9 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -59,7 +59,8 @@ struct hash_netport4_telem { static inline bool hash_netport4_data_equal(const struct hash_netport4_elem *ip1, - const struct hash_netport4_elem *ip2) + const struct hash_netport4_elem *ip2, + u32 *multi) { return ip1->ip == ip2->ip && ip1->port == ip2->port && @@ -137,38 +138,48 @@ nla_put_failure: #define HOST_MASK 32 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_netport4_data_next(struct ip_set_hash *h, + const struct hash_netport4_elem *d) +{ + h->next.ip = ntohl(d->ip); + h->next.port = ntohs(d->port); +} + static int hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); data.ip &= ip_set_netmask(data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem data = { .cidr = HOST_MASK }; - u32 port, port_to; + u32 port, port_to, p = 0, ip = 0, ip_to, last; u32 timeout = h->timeout; bool with_ports = false; int ret; @@ -182,15 +193,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; - if (tb[IPSET_ATTR_CIDR]) + if (tb[IPSET_ATTR_CIDR]) { data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); - if (!data.cidr) - return -IPSET_ERR_INVALID_CIDR; - data.ip &= ip_set_netmask(data.cidr); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + } if (tb[IPSET_ATTR_PORT]) data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); @@ -215,24 +226,47 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } - if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; + if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) { + data.ip = htonl(ip & ip_set_hostmask(data.cidr)); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } - port = ntohs(data.port); - port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); - if (port > port_to) - swap(port, port_to); - - for (; port <= port_to; port++) { - data.port = htons(port); - ret = adtfn(set, &data, timeout); - - if (ret && !ip_set_eexist(ret, flags)) + port = port_to = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port_to < port) + swap(port, port_to); + } + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) return ret; - else - ret = 0; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } else { + ip_set_mask_from_to(ip, ip_to, data.cidr); + } + + if (retried) + ip = h->next.ip; + while (!after(ip, ip_to)) { + data.ip = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); + p = retried && ip == h->next.ip ? h->next.port : port; + for (; p <= port_to; p++) { + data.port = htons(p); + ret = adtfn(set, &data, timeout, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + ip = last + 1; } return ret; } @@ -267,7 +301,8 @@ struct hash_netport6_telem { static inline bool hash_netport6_data_equal(const struct hash_netport6_elem *ip1, - const struct hash_netport6_elem *ip2) + const struct hash_netport6_elem *ip2, + u32 *multi) { return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && ip1->port == ip2->port && @@ -350,33 +385,42 @@ nla_put_failure: #define HOST_MASK 128 #include <linux/netfilter/ipset/ip_set_ahash.h> +static inline void +hash_netport6_data_next(struct ip_set_hash *h, + const struct hash_netport6_elem *d) +{ + h->next.port = ntohs(d->port); +} + static int hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem data = { - .cidr = h->nets[0].cidr || HOST_MASK }; + .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + }; if (data.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) data.cidr = HOST_MASK; - if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &data.port, &data.proto)) return -EINVAL; - ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); ip6_netmask(&data.ip, data.cidr); - return adtfn(set, &data, h->timeout); + return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); } static int hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct ip_set_hash *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; @@ -391,6 +435,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); @@ -429,7 +475,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -438,9 +484,11 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], if (port > port_to) swap(port, port_to); + if (retried) + port = h->next.port; for (; port <= port_to; port++) { data.port = htons(port); - ret = adtfn(set, &data, timeout); + ret = adtfn(set, &data, timeout, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; @@ -526,7 +574,9 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = AF_UNSPEC, - .revision = 1, + .revision_min = 0, + /* 1 SCTP and UDPLITE support added */ + .revision_max = 2, /* Range as input support for IPv4 added */ .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -538,6 +588,7 @@ static struct ip_set_type hash_netport_type __read_mostly = { }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e9159e99fc4b..4d10819d462e 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -72,7 +72,8 @@ list_set_expired(const struct list_set *map, u32 id) static int list_set_kadt(struct ip_set *set, const struct sk_buff *skb, - enum ipset_adt adt, u8 pf, u8 dim, u8 flags) + const struct xt_action_param *par, + enum ipset_adt adt, const struct ip_set_adt_opt *opt) { struct list_set *map = set->data; struct set_elem *elem; @@ -87,17 +88,17 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, continue; switch (adt) { case IPSET_TEST: - ret = ip_set_test(elem->id, skb, pf, dim, flags); + ret = ip_set_test(elem->id, skb, par, opt); if (ret > 0) return ret; break; case IPSET_ADD: - ret = ip_set_add(elem->id, skb, pf, dim, flags); + ret = ip_set_add(elem->id, skb, par, opt); if (ret == 0) return ret; break; case IPSET_DEL: - ret = ip_set_del(elem->id, skb, pf, dim, flags); + ret = ip_set_del(elem->id, skb, par, opt); if (ret == 0) return ret; break; @@ -109,15 +110,28 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, } static bool -next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id) +id_eq(const struct list_set *map, u32 i, ip_set_id_t id) { const struct set_elem *elem; - if (i + 1 < map->size) { - elem = list_set_elem(map, i + 1); + if (i < map->size) { + elem = list_set_elem(map, i); + return elem->id == id; + } + + return 0; +} + +static bool +id_eq_timeout(const struct list_set *map, u32 i, ip_set_id_t id) +{ + const struct set_elem *elem; + + if (i < map->size) { + elem = list_set_elem(map, i); return !!(elem->id == id && !(with_timeout(map->timeout) && - list_set_expired(map, i + 1))); + list_set_expired(map, i))); } return 0; @@ -190,12 +204,26 @@ list_set_del(struct list_set *map, u32 i) return 0; } +static void +cleanup_entries(struct list_set *map) +{ + struct set_telem *e; + u32 i; + + for (i = 0; i < map->size; i++) { + e = list_set_telem(map, i); + if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) + list_set_del(map, i); + } +} + static int list_set_uadt(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags) + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct list_set *map = set->data; bool with_timeout = with_timeout(map->timeout); + bool flag_exist = flags & IPSET_FLAG_EXIST; int before = 0; u32 timeout = map->timeout; ip_set_id_t id, refid = IPSET_INVALID_ID; @@ -248,6 +276,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], } timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } + if (with_timeout && adt != IPSET_TEST) + cleanup_entries(map); switch (adt) { case IPSET_TEST: @@ -259,22 +289,37 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], else if (with_timeout && list_set_expired(map, i)) continue; else if (before > 0 && elem->id == id) - ret = next_id_eq(map, i, refid); + ret = id_eq_timeout(map, i + 1, refid); else if (before < 0 && elem->id == refid) - ret = next_id_eq(map, i, id); + ret = id_eq_timeout(map, i + 1, id); else if (before == 0 && elem->id == id) ret = 1; } break; case IPSET_ADD: - for (i = 0; i < map->size && !ret; i++) { + for (i = 0; i < map->size; i++) { elem = list_set_elem(map, i); - if (elem->id == id && - !(with_timeout && list_set_expired(map, i))) + if (elem->id != id) + continue; + if (!(with_timeout && flag_exist)) { ret = -IPSET_ERR_EXIST; + goto finish; + } else { + struct set_telem *e = list_set_telem(map, i); + + if ((before > 1 && + !id_eq(map, i + 1, refid)) || + (before < 0 && + (i == 0 || !id_eq(map, i - 1, refid)))) { + ret = -IPSET_ERR_EXIST; + goto finish; + } + e->timeout = ip_set_timeout_set(timeout); + ip_set_put_byindex(id); + ret = 0; + goto finish; + } } - if (ret == -IPSET_ERR_EXIST) - break; ret = -IPSET_ERR_LIST_FULL; for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { elem = list_set_elem(map, i); @@ -283,9 +328,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], : list_set_add(map, i, id, timeout); else if (elem->id != refid) continue; - else if (with_timeout && list_set_expired(map, i)) - ret = -IPSET_ERR_REF_EXIST; - else if (before) + else if (before > 0) ret = list_set_add(map, i, id, timeout); else if (i + 1 < map->size) ret = list_set_add(map, i + 1, id, timeout); @@ -299,16 +342,12 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; break; - } else if (with_timeout && list_set_expired(map, i)) - continue; - else if (elem->id == id && - (before == 0 || - (before > 0 && - next_id_eq(map, i, refid)))) + } else if (elem->id == id && + (before == 0 || + (before > 0 && id_eq(map, i + 1, refid)))) ret = list_set_del(map, i); - else if (before < 0 && - elem->id == refid && - next_id_eq(map, i, id)) + else if (elem->id == refid && + before < 0 && id_eq(map, i + 1, id)) ret = list_set_del(map, i + 1); } break; @@ -454,15 +493,9 @@ list_set_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct list_set *map = set->data; - struct set_telem *e; - u32 i; write_lock_bh(&set->lock); - for (i = 0; i < map->size; i++) { - e = list_set_telem(map, i); - if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) - list_set_del(map, i); - } + cleanup_entries(map); write_unlock_bh(&set->lock); map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; @@ -543,7 +576,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = AF_UNSPEC, - .revision = 0, + .revision_min = 0, + .revision_max = 0, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 23f8c8162214..bd13d66220f1 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -148,7 +148,7 @@ const union nf_inet_addr ip_set_netmask_map[] = { EXPORT_SYMBOL_GPL(ip_set_netmask_map); #undef E -#define E(a, b, c, d) \ +#define E(a, b, c, d) \ {.ip6 = { (__force __be32) a, (__force __be32) b, \ (__force __be32) c, (__force __be32) d, \ } } @@ -289,3 +289,24 @@ const union nf_inet_addr ip_set_hostmask_map[] = { E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), }; EXPORT_SYMBOL_GPL(ip_set_hostmask_map); + +/* Find the largest network which matches the range from left, in host order. */ +u32 +ip_set_range_to_cidr(u32 from, u32 to, u8 *cidr) +{ + u32 last; + u8 i; + + for (i = 1; i < 32; i++) { + if ((from & ip_set_hostmask(i)) != from) + continue; + last = from | ~ip_set_hostmask(i); + if (!after(last, to)) { + *cidr = i; + return last; + } + } + *cidr = 32; + return from; +} +EXPORT_SYMBOL_GPL(ip_set_range_to_cidr); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 059af3120be7..fe6cb4304d72 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -int __net_init __ip_vs_app_init(struct net *net) +int __net_init ip_vs_app_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -585,17 +585,7 @@ int __net_init __ip_vs_app_init(struct net *net) return 0; } -void __net_exit __ip_vs_app_cleanup(struct net *net) +void __net_exit ip_vs_app_net_cleanup(struct net *net) { proc_net_remove(net, "ip_vs_app"); } - -int __init ip_vs_app_init(void) -{ - return 0; -} - - -void ip_vs_app_cleanup(void) -{ -} diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index bf28ac2fc99b..12571fb2881c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -776,8 +776,16 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); - if (cp->flags & IP_VS_CONN_F_NFCT) + if (cp->flags & IP_VS_CONN_F_NFCT) { ip_vs_conn_drop_conntrack(cp); + /* Do not access conntracks during subsys cleanup + * because nf_conntrack_find_get can not be used after + * conntrack cleanup for the net. + */ + smp_rmb(); + if (ipvs->enable) + ip_vs_conn_drop_conntrack(cp); + } ip_vs_pe_put(cp->pe); kfree(cp->pe_data); @@ -1247,7 +1255,7 @@ flush_again: /* * per netns init and exit */ -int __net_init __ip_vs_conn_init(struct net *net) +int __net_init ip_vs_conn_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1258,7 +1266,7 @@ int __net_init __ip_vs_conn_init(struct net *net) return 0; } -void __net_exit __ip_vs_conn_cleanup(struct net *net) +void __net_exit ip_vs_conn_net_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index bfa808f4da13..4f77bb16d22a 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -852,7 +852,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, *related = 1; /* reassemble IP fragments */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -1156,8 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } else #endif - if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && - !pp->dont_defrag)) { + if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; @@ -1310,7 +1309,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) *related = 1; /* reassemble IP fragments */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -1384,7 +1383,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); - out: +out: __ip_vs_conn_put(cp); return verdict; @@ -1772,7 +1771,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1782,7 +1781,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1790,7 +1789,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1798,7 +1797,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1824,7 +1823,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 99, + .priority = NF_IP6_PRI_NAT_SRC - 2, }, /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1834,7 +1833,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, - .priority = 101, + .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { @@ -1842,7 +1841,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET, .hooknum = NF_INET_LOCAL_OUT, - .priority = -99, + .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { @@ -1850,7 +1849,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, - .priority = -98, + .priority = NF_IP6_PRI_NAT_DST + 2, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1891,22 +1890,22 @@ static int __net_init __ip_vs_init(struct net *net) atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; - if (__ip_vs_estimator_init(net) < 0) + if (ip_vs_estimator_net_init(net) < 0) goto estimator_fail; - if (__ip_vs_control_init(net) < 0) + if (ip_vs_control_net_init(net) < 0) goto control_fail; - if (__ip_vs_protocol_init(net) < 0) + if (ip_vs_protocol_net_init(net) < 0) goto protocol_fail; - if (__ip_vs_app_init(net) < 0) + if (ip_vs_app_net_init(net) < 0) goto app_fail; - if (__ip_vs_conn_init(net) < 0) + if (ip_vs_conn_net_init(net) < 0) goto conn_fail; - if (__ip_vs_sync_init(net) < 0) + if (ip_vs_sync_net_init(net) < 0) goto sync_fail; printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", @@ -1917,27 +1916,27 @@ static int __net_init __ip_vs_init(struct net *net) */ sync_fail: - __ip_vs_conn_cleanup(net); + ip_vs_conn_net_cleanup(net); conn_fail: - __ip_vs_app_cleanup(net); + ip_vs_app_net_cleanup(net); app_fail: - __ip_vs_protocol_cleanup(net); + ip_vs_protocol_net_cleanup(net); protocol_fail: - __ip_vs_control_cleanup(net); + ip_vs_control_net_cleanup(net); control_fail: - __ip_vs_estimator_cleanup(net); + ip_vs_estimator_net_cleanup(net); estimator_fail: return -ENOMEM; } static void __net_exit __ip_vs_cleanup(struct net *net) { - __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */ - __ip_vs_conn_cleanup(net); - __ip_vs_app_cleanup(net); - __ip_vs_protocol_cleanup(net); - __ip_vs_control_cleanup(net); - __ip_vs_estimator_cleanup(net); + ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */ + ip_vs_conn_net_cleanup(net); + ip_vs_app_net_cleanup(net); + ip_vs_protocol_net_cleanup(net); + ip_vs_control_net_cleanup(net); + ip_vs_estimator_net_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); } @@ -1945,7 +1944,8 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) { EnterFunction(2); net_ipvs(net)->enable = 0; /* Disable packet reception */ - __ip_vs_sync_cleanup(net); + smp_wmb(); + ip_vs_sync_net_cleanup(net); LeaveFunction(2); } @@ -1967,36 +1967,23 @@ static int __init ip_vs_init(void) { int ret; - ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { pr_err("can't setup control.\n"); - goto cleanup_estimator; + goto exit; } ip_vs_protocol_init(); - ret = ip_vs_app_init(); - if (ret < 0) { - pr_err("can't setup application helper.\n"); - goto cleanup_protocol; - } - ret = ip_vs_conn_init(); if (ret < 0) { pr_err("can't setup connection table.\n"); - goto cleanup_app; - } - - ret = ip_vs_sync_init(); - if (ret < 0) { - pr_err("can't setup sync data.\n"); - goto cleanup_conn; + goto cleanup_protocol; } ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ if (ret < 0) - goto cleanup_sync; + goto cleanup_conn; ret = register_pernet_device(&ipvs_core_dev_ops); if (ret < 0) @@ -2016,17 +2003,12 @@ cleanup_dev: unregister_pernet_device(&ipvs_core_dev_ops); cleanup_sub: unregister_pernet_subsys(&ipvs_core_ops); -cleanup_sync: - ip_vs_sync_cleanup(); - cleanup_conn: +cleanup_conn: ip_vs_conn_cleanup(); - cleanup_app: - ip_vs_app_cleanup(); - cleanup_protocol: +cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); - cleanup_estimator: - ip_vs_estimator_cleanup(); +exit: return ret; } @@ -2035,12 +2017,9 @@ static void __exit ip_vs_cleanup(void) nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); unregister_pernet_device(&ipvs_core_dev_ops); unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ - ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); - ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); - ip_vs_estimator_cleanup(); pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 699c79a55657..be43fd805bd0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1334,9 +1334,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) ip_vs_bind_pe(svc, pe); } - out_unlock: +out_unlock: write_unlock_bh(&__ip_vs_svc_lock); - out: +out: ip_vs_scheduler_put(old_sched); ip_vs_pe_put(old_pe); return ret; @@ -1483,7 +1483,7 @@ static int ip_vs_flush(struct net *net) * Delete service by {netns} in the service table. * Called by __ip_vs_cleanup() */ -void __ip_vs_service_cleanup(struct net *net) +void ip_vs_service_net_cleanup(struct net *net) { EnterFunction(2); /* Check for "full" addressed entries */ @@ -1662,7 +1662,7 @@ proc_do_sync_mode(ctl_table *table, int write, /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without - * align with netns init in __ip_vs_control_init() + * align with netns init in ip_vs_control_net_init() */ static struct ctl_table vs_vars[] = { @@ -2469,7 +2469,7 @@ __ip_vs_get_service_entries(struct net *net, count++; } } - out: +out: return ret; } @@ -2707,7 +2707,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; } - out: +out: mutex_unlock(&__ip_vs_mutex); return ret; } @@ -3595,7 +3595,7 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -int __net_init __ip_vs_control_init_sysctl(struct net *net) +int __net_init ip_vs_control_net_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3654,7 +3654,7 @@ int __net_init __ip_vs_control_init_sysctl(struct net *net) return 0; } -void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) +void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3665,8 +3665,8 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) #else -int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; } -void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } +int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } +void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { } #endif @@ -3674,7 +3674,7 @@ static struct notifier_block ip_vs_dst_notifier = { .notifier_call = ip_vs_dst_event, }; -int __net_init __ip_vs_control_init(struct net *net) +int __net_init ip_vs_control_net_init(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3702,7 +3702,7 @@ int __net_init __ip_vs_control_init(struct net *net) proc_net_fops_create(net, "ip_vs_stats_percpu", 0, &ip_vs_stats_percpu_fops); - if (__ip_vs_control_init_sysctl(net)) + if (ip_vs_control_net_init_sysctl(net)) goto err; return 0; @@ -3712,13 +3712,13 @@ err: return -ENOMEM; } -void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); - __ip_vs_control_cleanup_sysctl(net); + ip_vs_control_net_cleanup_sysctl(net); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 508cce98777c..0fac6017b6fb 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst, dst->outbps = (e->outbps + 0xF) >> 5; } -int __net_init __ip_vs_estimator_init(struct net *net) +int __net_init ip_vs_estimator_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -203,16 +203,7 @@ int __net_init __ip_vs_estimator_init(struct net *net) return 0; } -void __net_exit __ip_vs_estimator_cleanup(struct net *net) +void __net_exit ip_vs_estimator_net_cleanup(struct net *net) { del_timer_sync(&net_ipvs(net)->est_timer); } - -int __init ip_vs_estimator_init(void) -{ - return 0; -} - -void ip_vs_estimator_cleanup(void) -{ -} diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index af63553fa332..4490a32ad5b2 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -44,8 +44,8 @@ #include <net/ip_vs.h> -#define SERVER_STRING "227 Entering Passive Mode (" -#define CLIENT_STRING "PORT " +#define SERVER_STRING "227 " +#define CLIENT_STRING "PORT" /* @@ -79,14 +79,17 @@ ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) /* * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started - * with the "pattern" and terminated with the "term" character. + * with the "pattern", ignoring before "skip" and terminated with + * the "term" character. * <addr,port> is in network order. */ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, - const char *pattern, size_t plen, char term, + const char *pattern, size_t plen, + char skip, char term, __be32 *addr, __be16 *port, char **start, char **end) { + char *s, c; unsigned char p[6]; int i = 0; @@ -101,19 +104,38 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (strnicmp(data, pattern, plen) != 0) { return 0; } - *start = data + plen; + s = data + plen; + if (skip) { + int found = 0; + + for (;; s++) { + if (s == data_limit) + return -1; + if (!found) { + if (*s == skip) + found = 1; + } else if (*s != skip) { + break; + } + } + } - for (data = *start; *data != term; data++) { + for (data = s; ; data++) { if (data == data_limit) return -1; + if (*data == term) + break; } *end = data; memset(p, 0, sizeof(p)); - for (data = *start; data != *end; data++) { - if (*data >= '0' && *data <= '9') { - p[i] = p[i]*10 + *data - '0'; - } else if (*data == ',' && i < 5) { + for (data = s; ; data++) { + c = *data; + if (c == term) + break; + if (c >= '0' && c <= '9') { + p[i] = p[i]*10 + c - '0'; + } else if (c == ',' && i < 5) { i++; } else { /* unexpected character */ @@ -124,8 +146,9 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (i != 5) return -1; - *addr = get_unaligned((__be32 *)p); - *port = get_unaligned((__be16 *)(p + 4)); + *start = s; + *addr = get_unaligned((__be32 *) p); + *port = get_unaligned((__be16 *) (p + 4)); return 1; } @@ -185,7 +208,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, - sizeof(SERVER_STRING)-1, ')', + sizeof(SERVER_STRING)-1, + '(', ')', &from.ip, &port, &start, &end) != 1) return 1; @@ -345,7 +369,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ if (ip_vs_ftp_get_addrport(data_start, data_limit, CLIENT_STRING, sizeof(CLIENT_STRING)-1, - '\r', &to.ip, &port, + ' ', '\r', &to.ip, &port, &start, &end) != 1) return 1; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index eb86028536fc..52d073c105e9 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -int __net_init __ip_vs_protocol_init(struct net *net) +int __net_init ip_vs_protocol_net_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); @@ -336,7 +336,7 @@ int __net_init __ip_vs_protocol_init(struct net *net) return 0; } -void __net_exit __ip_vs_protocol_cleanup(struct net *net) +void __net_exit ip_vs_protocol_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e292e5bddc70..7ee7215b8ba0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1663,7 +1663,7 @@ int stop_sync_thread(struct net *net, int state) /* * Initialize data struct for each netns */ -int __net_init __ip_vs_sync_init(struct net *net) +int __net_init ip_vs_sync_net_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1677,7 +1677,7 @@ int __net_init __ip_vs_sync_init(struct net *net) return 0; } -void __ip_vs_sync_cleanup(struct net *net) +void ip_vs_sync_net_cleanup(struct net *net) { int retc; @@ -1689,12 +1689,3 @@ void __ip_vs_sync_cleanup(struct net *net) if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); } - -int __init ip_vs_sync_init(void) -{ - return 0; -} - -void ip_vs_sync_cleanup(void) -{ -} diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2e1c11f78419..f7af8b866017 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -850,7 +850,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + *ctinfo = IP_CT_ESTABLISHED_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { @@ -922,6 +922,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, ret = -ret; goto out; } + /* ICMP[v6] protocol trackers may assign one conntrack. */ + if (skb->nfct) + goto out; } ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, @@ -1143,7 +1146,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index e17cb7c7dd8f..6f5801eac999 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -368,7 +368,7 @@ static int help(struct sk_buff *skb, /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + ctinfo != IP_CT_ESTABLISHED_REPLY) { pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 18b2ce5c8ced..f03c2d4539f6 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -571,10 +571,9 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_h245: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); @@ -1125,10 +1124,9 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, int ret; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; - } + pr_debug("nf_ct_q931: skblen = %u\n", skb->len); spin_lock_bh(&nf_h323_lock); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index b394aa318776..4f9390b98697 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -125,8 +125,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 482e90c61850..7dec88a1755b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -970,7 +970,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, - ctnetlink_done); + ctnetlink_done, 0); err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) @@ -1840,7 +1840,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, ctnetlink_exp_dump_table, - ctnetlink_exp_done); + ctnetlink_exp_done, 0); } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 088944824e13..2fd4565144de 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -519,8 +519,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; nexthdr_off = protoff; diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index d9e27734b2a2..8501823b3f9b 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -78,7 +78,7 @@ static int help(struct sk_buff *skb, ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* Not a full tcp header? */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index cb5a28581782..93faf6a3a637 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1423,7 +1423,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && - ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + ctinfo != IP_CT_ESTABLISHED_REPLY) return NF_ACCEPT; /* No Data ? */ diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b4a4532823e8..1905976b5135 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -37,7 +37,7 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; +static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); void nfnl_lock(void) @@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) nfnl_unlock(); return -EBUSY; } - subsys_table[n->subsys_id] = n; + rcu_assign_pointer(subsys_table[n->subsys_id], n); nfnl_unlock(); return 0; @@ -71,7 +71,7 @@ int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) nfnl_lock(); subsys_table[n->subsys_id] = NULL; nfnl_unlock(); - + synchronize_rcu(); return 0; } EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); @@ -83,7 +83,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; - return subsys_table[subsys_id]; + return rcu_dereference(subsys_table[subsys_id]); } static inline const struct nfnl_callback * @@ -139,21 +139,27 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) type = nlh->nlmsg_type; replay: + rcu_read_lock(); ss = nfnetlink_get_subsys(type); if (!ss) { #ifdef CONFIG_MODULES - nfnl_unlock(); + rcu_read_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); - nfnl_lock(); + rcu_read_lock(); ss = nfnetlink_get_subsys(type); if (!ss) #endif + { + rcu_read_unlock(); return -EINVAL; + } } nc = nfnetlink_find_client(type, ss); - if (!nc) + if (!nc) { + rcu_read_unlock(); return -EINVAL; + } { int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); @@ -167,7 +173,23 @@ replay: if (err < 0) return err; - err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda); + if (nc->call_rcu) { + err = nc->call_rcu(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + rcu_read_unlock(); + } else { + rcu_read_unlock(); + nfnl_lock(); + if (rcu_dereference_protected( + subsys_table[NFNL_SUBSYS_ID(type)], + lockdep_is_held(&nfnl_mutex)) != ss || + nfnetlink_find_client(type, ss) != nc) + err = -EAGAIN; + else + err = nc->call(net->nfnl, skb, nlh, + (const struct nlattr **)cda); + nfnl_unlock(); + } if (err == -EAGAIN) goto replay; return err; @@ -176,9 +198,7 @@ replay: static void nfnetlink_rcv(struct sk_buff *skb) { - nfnl_lock(); netlink_rcv_skb(skb, &nfnetlink_rcv_msg); - nfnl_unlock(); } static int __net_init nfnetlink_net_init(struct net *net) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index e0ee010935e7..2d8158acf6fa 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -33,7 +33,7 @@ #include <net/netfilter/nf_log.h> #include <net/netfilter/nfnetlink_log.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #ifdef CONFIG_BRIDGE_NETFILTER #include "../bridge/br_private.h" @@ -456,7 +456,8 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->mark) NLA_PUT_BE32(inst->skb, NFULA_MARK, htonl(skb->mark)); - if (indev && skb->dev) { + if (indev && skb->dev && + skb->mac_header != skb->network_header) { struct nfulnl_msg_packet_hw phw; int len = dev_parse_header(skb, phw.hw_addr); if (len > 0) { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b83123f12b42..00bd475eab4b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -31,7 +31,7 @@ #include <net/sock.h> #include <net/netfilter/nf_queue.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #ifdef CONFIG_BRIDGE_NETFILTER #include "../bridge/br_private.h" @@ -58,7 +58,7 @@ struct nfqnl_instance { */ spinlock_t lock; unsigned int queue_total; - atomic_t id_sequence; /* 'sequence' of pkt ids */ + unsigned int id_sequence; /* 'sequence' of pkt ids */ struct list_head queue_list; /* packets in queue */ }; @@ -171,6 +171,13 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) queue->queue_total++; } +static void +__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) +{ + list_del(&entry->list); + queue->queue_total--; +} + static struct nf_queue_entry * find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) { @@ -185,10 +192,8 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) } } - if (entry) { - list_del(&entry->list); - queue->queue_total--; - } + if (entry) + __dequeue_entry(queue, entry); spin_unlock_bh(&queue->lock); @@ -213,13 +218,15 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) static struct sk_buff * nfqnl_build_packet_message(struct nfqnl_instance *queue, - struct nf_queue_entry *entry) + struct nf_queue_entry *entry, + __be32 **packet_id_ptr) { sk_buff_data_t old_tail; size_t size; size_t data_len = 0; struct sk_buff *skb; - struct nfqnl_msg_packet_hdr pmsg; + struct nlattr *nla; + struct nfqnl_msg_packet_hdr *pmsg; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct sk_buff *entskb = entry->skb; @@ -272,12 +279,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(queue->queue_num); - entry->id = atomic_inc_return(&queue->id_sequence); - pmsg.packet_id = htonl(entry->id); - pmsg.hw_protocol = entskb->protocol; - pmsg.hook = entry->hook; - - NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); + pmsg = nla_data(nla); + pmsg->hw_protocol = entskb->protocol; + pmsg->hook = entry->hook; + *packet_id_ptr = &pmsg->packet_id; indev = entry->indev; if (indev) { @@ -335,7 +341,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->mark) NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); - if (indev && entskb->dev) { + if (indev && entskb->dev && + entskb->mac_header != entskb->network_header) { struct nfqnl_msg_packet_hw phw; int len = dev_parse_header(entskb, phw.hw_addr); if (len) { @@ -388,6 +395,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) struct sk_buff *nskb; struct nfqnl_instance *queue; int err = -ENOBUFS; + __be32 *packet_id_ptr; /* rcu_read_lock()ed by nf_hook_slow() */ queue = instance_lookup(queuenum); @@ -401,7 +409,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) goto err_out; } - nskb = nfqnl_build_packet_message(queue, entry); + nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); if (nskb == NULL) { err = -ENOMEM; goto err_out; @@ -420,6 +428,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) queue->queue_total); goto err_out_free_nskb; } + entry->id = ++queue->id_sequence; + *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); @@ -607,6 +617,92 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, }; +static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, +}; + +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +{ + struct nfqnl_instance *queue; + + queue = instance_lookup(queue_num); + if (!queue) + return ERR_PTR(-ENODEV); + + if (queue->peer_pid != nlpid) + return ERR_PTR(-EPERM); + + return queue; +} + +static struct nfqnl_msg_verdict_hdr* +verdicthdr_get(const struct nlattr * const nfqa[]) +{ + struct nfqnl_msg_verdict_hdr *vhdr; + unsigned int verdict; + + if (!nfqa[NFQA_VERDICT_HDR]) + return NULL; + + vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); + verdict = ntohl(vhdr->verdict); + if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + return NULL; + return vhdr; +} + +static int nfq_id_after(unsigned int id, unsigned int max) +{ + return (int)(id - max) > 0; +} + +static int +nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nf_queue_entry *entry, *tmp; + unsigned int verdict, maxid; + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + LIST_HEAD(batch_list); + u16 queue_num = ntohs(nfmsg->res_id); + + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + vhdr = verdicthdr_get(nfqa); + if (!vhdr) + return -EINVAL; + + verdict = ntohl(vhdr->verdict); + maxid = ntohl(vhdr->id); + + spin_lock_bh(&queue->lock); + + list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) { + if (nfq_id_after(entry->id, maxid)) + break; + __dequeue_entry(queue, entry); + list_add_tail(&entry->list, &batch_list); + } + + spin_unlock_bh(&queue->lock); + + if (list_empty(&batch_list)) + return -ENOENT; + + list_for_each_entry_safe(entry, tmp, &batch_list, list) { + if (nfqa[NFQA_MARK]) + entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + nf_reinject(entry, verdict); + } + return 0; +} + static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, @@ -619,39 +715,23 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_instance *queue; unsigned int verdict; struct nf_queue_entry *entry; - int err; - rcu_read_lock(); queue = instance_lookup(queue_num); - if (!queue) { - err = -ENODEV; - goto err_out_unlock; - } + if (!queue) - if (queue->peer_pid != NETLINK_CB(skb).pid) { - err = -EPERM; - goto err_out_unlock; - } + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) + return PTR_ERR(queue); - if (!nfqa[NFQA_VERDICT_HDR]) { - err = -EINVAL; - goto err_out_unlock; - } + vhdr = verdicthdr_get(nfqa); + if (!vhdr) + return -EINVAL; - vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); verdict = ntohl(vhdr->verdict); - if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { - err = -EINVAL; - goto err_out_unlock; - } - entry = find_dequeue_entry(queue, ntohl(vhdr->id)); - if (entry == NULL) { - err = -ENOENT; - goto err_out_unlock; - } - rcu_read_unlock(); + if (entry == NULL) + return -ENOENT; if (nfqa[NFQA_PAYLOAD]) { if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), @@ -664,10 +744,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, nf_reinject(entry, verdict); return 0; - -err_out_unlock: - rcu_read_unlock(); - return err; } static int @@ -780,14 +856,17 @@ err_out_unlock: } static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { - [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, + [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp, .attr_count = NFQA_MAX, }, - [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, + [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict, .attr_count = NFQA_MAX, .policy = nfqa_verdict_policy }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, .attr_count = NFQA_CFG_MAX, .policy = nfqa_cfg_policy }, + [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_batch_policy }, }; static const struct nfnetlink_subsystem nfqnl_subsys = { @@ -869,7 +948,7 @@ static int seq_show(struct seq_file *s, void *v) inst->peer_pid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, - atomic_read(&inst->id_sequence), 1); + inst->id_sequence, 1); } static const struct seq_operations nfqnl_seq_ops = { diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 363a99ec0637..4bca15a0c385 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -163,6 +163,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) break; } +#ifdef CONFIG_NETWORK_SECMARK + if (skb->secmark) + audit_log_secctx(ab, skb->secmark); +#endif + audit_log_end(ab); errout: diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 782e51986a6f..0221d10de75a 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -5,7 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/gfp.h> #include <linux/skbuff.h> @@ -95,8 +95,11 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) if (info->helper[0]) { ret = -ENOENT; proto = xt_ct_find_proto(par); - if (!proto) + if (!proto) { + pr_info("You must specify a L4 protocol, " + "and not use inversions on it.\n"); goto err3; + } ret = -ENOMEM; help = nf_ct_helper_ext_add(ct, GFP_KERNEL); @@ -107,8 +110,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) help->helper = nf_conntrack_helper_try_module_get(info->helper, par->family, proto); - if (help->helper == NULL) + if (help->helper == NULL) { + pr_info("No such helper \"%s\"\n", info->helper); goto err3; + } } __set_bit(IPS_TEMPLATE_BIT, &ct->status); diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c index 95b084800fcc..1535e87ed9bd 100644 --- a/net/netfilter/xt_HL.c +++ b/net/netfilter/xt_HL.c @@ -38,22 +38,22 @@ ttl_tg(struct sk_buff *skb, const struct xt_action_param *par) iph = ip_hdr(skb); switch (info->mode) { - case IPT_TTL_SET: - new_ttl = info->ttl; - break; - case IPT_TTL_INC: - new_ttl = iph->ttl + info->ttl; - if (new_ttl > 255) - new_ttl = 255; - break; - case IPT_TTL_DEC: - new_ttl = iph->ttl - info->ttl; - if (new_ttl < 0) - new_ttl = 0; - break; - default: - new_ttl = iph->ttl; - break; + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; } if (new_ttl != iph->ttl) { @@ -78,22 +78,22 @@ hl_tg6(struct sk_buff *skb, const struct xt_action_param *par) ip6h = ipv6_hdr(skb); switch (info->mode) { - case IP6T_HL_SET: - new_hl = info->hop_limit; - break; - case IP6T_HL_INC: - new_hl = ip6h->hop_limit + info->hop_limit; - if (new_hl > 255) - new_hl = 255; - break; - case IP6T_HL_DEC: - new_hl = ip6h->hop_limit - info->hop_limit; - if (new_hl < 0) - new_hl = 0; - break; - default: - new_hl = ip6h->hop_limit; - break; + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; } ip6h->hop_limit = new_hl; diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index de079abd5bc8..f264032b8c56 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -60,11 +60,6 @@ struct xt_rateest *xt_rateest_lookup(const char *name) } EXPORT_SYMBOL_GPL(xt_rateest_lookup); -static void xt_rateest_free_rcu(struct rcu_head *head) -{ - kfree(container_of(head, struct xt_rateest, rcu)); -} - void xt_rateest_put(struct xt_rateest *est) { mutex_lock(&xt_rateest_mutex); @@ -75,7 +70,7 @@ void xt_rateest_put(struct xt_rateest *est) * gen_estimator est_timer() might access est->lock or bstats, * wait a RCU grace period before freeing 'est' */ - call_rcu(&est->rcu, xt_rateest_free_rcu); + kfree_rcu(est, rcu); } mutex_unlock(&xt_rateest_mutex); } @@ -188,7 +183,6 @@ static int __init xt_rateest_tg_init(void) static void __exit xt_rateest_tg_fini(void) { xt_unregister_target(&xt_rateest_tg_reg); - rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */ } diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c index 7d12221ead89..003951149c9e 100644 --- a/net/netfilter/xt_hl.c +++ b/net/netfilter/xt_hl.c @@ -31,14 +31,14 @@ static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par) const u8 ttl = ip_hdr(skb)->ttl; switch (info->mode) { - case IPT_TTL_EQ: - return ttl == info->ttl; - case IPT_TTL_NE: - return ttl != info->ttl; - case IPT_TTL_LT: - return ttl < info->ttl; - case IPT_TTL_GT: - return ttl > info->ttl; + case IPT_TTL_EQ: + return ttl == info->ttl; + case IPT_TTL_NE: + return ttl != info->ttl; + case IPT_TTL_LT: + return ttl < info->ttl; + case IPT_TTL_GT: + return ttl > info->ttl; } return false; @@ -50,14 +50,14 @@ static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par) const struct ipv6hdr *ip6h = ipv6_hdr(skb); switch (info->mode) { - case IP6T_HL_EQ: - return ip6h->hop_limit == info->hop_limit; - case IP6T_HL_NE: - return ip6h->hop_limit != info->hop_limit; - case IP6T_HL_LT: - return ip6h->hop_limit < info->hop_limit; - case IP6T_HL_GT: - return ip6h->hop_limit > info->hop_limit; + case IP6T_HL_EQ: + return ip6h->hop_limit == info->hop_limit; + case IP6T_HL_NE: + return ip6h->hop_limit != info->hop_limit; + case IP6T_HL_LT: + return ip6h->hop_limit < info->hop_limit; + case IP6T_HL_GT: + return ip6h->hop_limit > info->hop_limit; } return false; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index b3babaed7719..0ec8138aa470 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/skbuff.h> -#include <linux/version.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_set.h> @@ -29,23 +28,33 @@ MODULE_ALIAS("ip6t_SET"); static inline int match_set(ip_set_id_t index, const struct sk_buff *skb, - u8 pf, u8 dim, u8 flags, int inv) + const struct xt_action_param *par, + const struct ip_set_adt_opt *opt, int inv) { - if (ip_set_test(index, skb, pf, dim, flags)) + if (ip_set_test(index, skb, par, opt)) inv = !inv; return inv; } +#define ADT_OPT(n, f, d, fs, cfs, t) \ +const struct ip_set_adt_opt n = { \ + .family = f, \ + .dim = d, \ + .flags = fs, \ + .cmdflags = cfs, \ + .timeout = t, \ +} + /* Revision 0 interface: backward compatible with netfilter/iptables */ static bool set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.u.compat.dim, + info->match_set.u.compat.flags, 0, UINT_MAX); - return match_set(info->match_set.index, skb, par->family, - info->match_set.u.compat.dim, - info->match_set.u.compat.flags, + return match_set(info->match_set.index, skb, par, &opt, info->match_set.u.compat.flags & IPSET_INV_MATCH); } @@ -103,15 +112,15 @@ static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, + info->add_set.u.compat.flags, 0, UINT_MAX); + ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, + info->del_set.u.compat.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_add(info->add_set.index, skb, par->family, - info->add_set.u.compat.dim, - info->add_set.u.compat.flags); + ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_del(info->del_set.index, skb, par->family, - info->del_set.u.compat.dim, - info->del_set.u.compat.flags); + ip_set_del(info->del_set.index, skb, par, &del_opt); return XT_CONTINUE; } @@ -170,23 +179,23 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(info->del_set.index); } -/* Revision 1: current interface to netfilter/iptables */ +/* Revision 1 match and target */ static bool -set_match(const struct sk_buff *skb, struct xt_action_param *par) +set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { - const struct xt_set_info_match *info = par->matchinfo; + const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, 0, UINT_MAX); - return match_set(info->match_set.index, skb, par->family, - info->match_set.dim, - info->match_set.flags, + return match_set(info->match_set.index, skb, par, &opt, info->match_set.flags & IPSET_INV_MATCH); } static int -set_match_checkentry(const struct xt_mtchk_param *par) +set_match_v1_checkentry(const struct xt_mtchk_param *par) { - struct xt_set_info_match *info = par->matchinfo; + struct xt_set_info_match_v1 *info = par->matchinfo; ip_set_id_t index; index = ip_set_nfnl_get_byindex(info->match_set.index); @@ -207,36 +216,34 @@ set_match_checkentry(const struct xt_mtchk_param *par) } static void -set_match_destroy(const struct xt_mtdtor_param *par) +set_match_v1_destroy(const struct xt_mtdtor_param *par) { - struct xt_set_info_match *info = par->matchinfo; + struct xt_set_info_match_v1 *info = par->matchinfo; ip_set_nfnl_put(info->match_set.index); } static unsigned int -set_target(struct sk_buff *skb, const struct xt_action_param *par) +set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_set_info_target *info = par->targinfo; + const struct xt_set_info_target_v1 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, 0, UINT_MAX); + ADT_OPT(del_opt, par->family, info->del_set.dim, + info->del_set.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_add(info->add_set.index, - skb, par->family, - info->add_set.dim, - info->add_set.flags); + ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_del(info->del_set.index, - skb, par->family, - info->del_set.dim, - info->del_set.flags); + ip_set_del(info->del_set.index, skb, par, &del_opt); return XT_CONTINUE; } static int -set_target_checkentry(const struct xt_tgchk_param *par) +set_target_v1_checkentry(const struct xt_tgchk_param *par) { - const struct xt_set_info_target *info = par->targinfo; + const struct xt_set_info_target_v1 *info = par->targinfo; ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { @@ -273,9 +280,9 @@ set_target_checkentry(const struct xt_tgchk_param *par) } static void -set_target_destroy(const struct xt_tgdtor_param *par) +set_target_v1_destroy(const struct xt_tgdtor_param *par) { - const struct xt_set_info_target *info = par->targinfo; + const struct xt_set_info_target_v1 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(info->add_set.index); @@ -283,6 +290,28 @@ set_target_destroy(const struct xt_tgdtor_param *par) ip_set_nfnl_put(info->del_set.index); } +/* Revision 2 target */ + +static unsigned int +set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target_v2 *info = par->targinfo; + ADT_OPT(add_opt, par->family, info->add_set.dim, + info->add_set.flags, info->flags, info->timeout); + ADT_OPT(del_opt, par->family, info->del_set.dim, + info->del_set.flags, 0, UINT_MAX); + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, skb, par, &add_opt); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, skb, par, &del_opt); + + return XT_CONTINUE; +} + +#define set_target_v2_checkentry set_target_v1_checkentry +#define set_target_v2_destroy set_target_v1_destroy + static struct xt_match set_matches[] __read_mostly = { { .name = "set", @@ -298,20 +327,20 @@ static struct xt_match set_matches[] __read_mostly = { .name = "set", .family = NFPROTO_IPV4, .revision = 1, - .match = set_match, - .matchsize = sizeof(struct xt_set_info_match), - .checkentry = set_match_checkentry, - .destroy = set_match_destroy, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, .me = THIS_MODULE }, { .name = "set", .family = NFPROTO_IPV6, .revision = 1, - .match = set_match, - .matchsize = sizeof(struct xt_set_info_match), - .checkentry = set_match_checkentry, - .destroy = set_match_destroy, + .match = set_match_v1, + .matchsize = sizeof(struct xt_set_info_match_v1), + .checkentry = set_match_v1_checkentry, + .destroy = set_match_v1_destroy, .me = THIS_MODULE }, }; @@ -331,20 +360,40 @@ static struct xt_target set_targets[] __read_mostly = { .name = "SET", .revision = 1, .family = NFPROTO_IPV4, - .target = set_target, - .targetsize = sizeof(struct xt_set_info_target), - .checkentry = set_target_checkentry, - .destroy = set_target_destroy, + .target = set_target_v1, + .targetsize = sizeof(struct xt_set_info_target_v1), + .checkentry = set_target_v1_checkentry, + .destroy = set_target_v1_destroy, .me = THIS_MODULE }, { .name = "SET", .revision = 1, .family = NFPROTO_IPV6, - .target = set_target, - .targetsize = sizeof(struct xt_set_info_target), - .checkentry = set_target_checkentry, - .destroy = set_target_destroy, + .target = set_target_v1, + .targetsize = sizeof(struct xt_set_info_target_v1), + .checkentry = set_target_v1_checkentry, + .destroy = set_target_v1_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 2, + .family = NFPROTO_IPV4, + .target = set_target_v2, + .targetsize = sizeof(struct xt_set_info_target_v2), + .checkentry = set_target_v2_checkentry, + .destroy = set_target_v2_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 2, + .family = NFPROTO_IPV6, + .target = set_target_v2, + .targetsize = sizeof(struct xt_set_info_target_v2), + .checkentry = set_target_v2_checkentry, + .destroy = set_target_v2_destroy, .me = THIS_MODULE }, }; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 9cc46356b577..fe39f7e913df 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,9 +143,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && - ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index bae5756b1626..dd53a36d89af 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -39,7 +39,7 @@ #include <net/genetlink.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "netlabel_user.h" #include "netlabel_cipso_v4.h" diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index de0d8e4cbfb6..2aa975e5452d 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -55,8 +55,7 @@ struct netlbl_domhsh_tbl { * should be okay */ static DEFINE_SPINLOCK(netlbl_domhsh_lock); #define netlbl_domhsh_rcu_deref(p) \ - rcu_dereference_check(p, rcu_read_lock_held() || \ - lockdep_is_held(&netlbl_domhsh_lock)) + rcu_dereference_check(p, lockdep_is_held(&netlbl_domhsh_lock)) static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; static struct netlbl_dom_map *netlbl_domhsh_def = NULL; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 1b83e0009d8d..b528dd928d3c 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -39,7 +39,7 @@ #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <asm/bug.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 4f251b19fbcc..dff8a0809245 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -42,7 +42,7 @@ #include <net/ipv6.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "netlabel_domainhash.h" #include "netlabel_user.h" diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 0a25838bcf45..8db37f4c10f7 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -32,7 +32,7 @@ #define _NETLABEL_MGMT_H #include <net/netlabel.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * The following NetLabel payloads are supported by the management interface. diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 9c38658fba8b..f1ecf848e3ac 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -52,7 +52,7 @@ #include <net/net_namespace.h> #include <net/netlabel.h> #include <asm/bug.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "netlabel_user.h" #include "netlabel_addrlist.h" @@ -116,8 +116,7 @@ struct netlbl_unlhsh_walk_arg { * hash table should be okay */ static DEFINE_SPINLOCK(netlbl_unlhsh_lock); #define netlbl_unlhsh_rcu_deref(p) \ - rcu_dereference_check(p, rcu_read_lock_held() || \ - lockdep_is_held(&netlbl_unlhsh_lock)) + rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock)) static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL; static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL; @@ -426,10 +425,9 @@ int netlbl_unlhsh_add(struct net *net, audit_info); switch (addr_len) { case sizeof(struct in_addr): { - struct in_addr *addr4, *mask4; + const struct in_addr *addr4 = addr; + const struct in_addr *mask4 = mask; - addr4 = (struct in_addr *)addr; - mask4 = (struct in_addr *)mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) netlbl_af4list_audit_addr(audit_buf, 1, @@ -440,10 +438,9 @@ int netlbl_unlhsh_add(struct net *net, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case sizeof(struct in6_addr): { - struct in6_addr *addr6, *mask6; + const struct in6_addr *addr6 = addr; + const struct in6_addr *mask6 = mask; - addr6 = (struct in6_addr *)addr; - mask6 = (struct in6_addr *)mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) netlbl_af6list_audit_addr(audit_buf, 1, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a7ec8512f552..0a4db0211da0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1659,13 +1659,10 @@ static int netlink_dump(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); struct netlink_callback *cb; - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct nlmsghdr *nlh; int len, err = -ENOBUFS; - - skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); - if (!skb) - goto errout; + int alloc_size; mutex_lock(nlk->cb_mutex); @@ -1675,6 +1672,12 @@ static int netlink_dump(struct sock *sk) goto errout_skb; } + alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + + skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); + if (!skb) + goto errout_skb; + len = cb->dump(skb, cb); if (len > 0) { @@ -1715,7 +1718,6 @@ static int netlink_dump(struct sock *sk) errout_skb: mutex_unlock(nlk->cb_mutex); kfree_skb(skb); -errout: return err; } @@ -1723,7 +1725,8 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, int (*dump)(struct sk_buff *skb, struct netlink_callback *), - int (*done)(struct netlink_callback *)) + int (*done)(struct netlink_callback *), + u16 min_dump_alloc) { struct netlink_callback *cb; struct sock *sk; @@ -1737,6 +1740,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->dump = dump; cb->done = done; cb->nlh = nlh; + cb->min_dump_alloc = min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1781d99145e2..482fa571b4ee 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -525,7 +525,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) genl_unlock(); err = netlink_dump_start(net->genl_sock, skb, nlh, - ops->dumpit, ops->done); + ops->dumpit, ops->done, 0); genl_lock(); return err; } diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 44059d0c8dd1..cd5ddb2ebc43 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -257,9 +257,12 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, case 3: if (nr_node->routes[1].quality > nr_node->routes[0].quality) { switch (nr_node->which) { - case 0: nr_node->which = 1; break; - case 1: nr_node->which = 0; break; - default: break; + case 0: + nr_node->which = 1; + break; + case 1: + nr_node->which = 0; + break; } nr_route = nr_node->routes[0]; nr_node->routes[0] = nr_node->routes[1]; @@ -505,12 +508,13 @@ static int nr_dec_obs(void) s->count--; switch (i) { - case 0: - s->routes[0] = s->routes[1]; - case 1: - s->routes[1] = s->routes[2]; - case 2: - break; + case 0: + s->routes[0] = s->routes[1]; + /* Fallthrough */ + case 1: + s->routes[1] = s->routes[2]; + case 2: + break; } break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 925f715686a5..c698cec0a445 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -187,9 +187,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); static void packet_flush_mclist(struct sock *sk); +struct packet_fanout; struct packet_sock { /* struct sock has to be the first member of packet_sock */ struct sock sk; + struct packet_fanout *fanout; struct tpacket_stats stats; struct packet_ring_buffer rx_ring; struct packet_ring_buffer tx_ring; @@ -212,6 +214,24 @@ struct packet_sock { struct packet_type prot_hook ____cacheline_aligned_in_smp; }; +#define PACKET_FANOUT_MAX 256 + +struct packet_fanout { +#ifdef CONFIG_NET_NS + struct net *net; +#endif + unsigned int num_members; + u16 id; + u8 type; + u8 defrag; + atomic_t rr_cur; + struct list_head list; + struct sock *arr[PACKET_FANOUT_MAX]; + spinlock_t lock; + atomic_t sk_ref; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + struct packet_skb_cb { unsigned int origlen; union { @@ -222,6 +242,64 @@ struct packet_skb_cb { #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) +static inline struct packet_sock *pkt_sk(struct sock *sk) +{ + return (struct packet_sock *)sk; +} + +static void __fanout_unlink(struct sock *sk, struct packet_sock *po); +static void __fanout_link(struct sock *sk, struct packet_sock *po); + +/* register_prot_hook must be invoked with the po->bind_lock held, + * or from a context in which asynchronous accesses to the packet + * socket is not possible (packet_create()). + */ +static void register_prot_hook(struct sock *sk) +{ + struct packet_sock *po = pkt_sk(sk); + if (!po->running) { + if (po->fanout) + __fanout_link(sk, po); + else + dev_add_pack(&po->prot_hook); + sock_hold(sk); + po->running = 1; + } +} + +/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock + * held. If the sync parameter is true, we will temporarily drop + * the po->bind_lock and do a synchronize_net to make sure no + * asynchronous packet processing paths still refer to the elements + * of po->prot_hook. If the sync parameter is false, it is the + * callers responsibility to take care of this. + */ +static void __unregister_prot_hook(struct sock *sk, bool sync) +{ + struct packet_sock *po = pkt_sk(sk); + + po->running = 0; + if (po->fanout) + __fanout_unlink(sk, po); + else + __dev_remove_pack(&po->prot_hook); + __sock_put(sk); + + if (sync) { + spin_unlock(&po->bind_lock); + synchronize_net(); + spin_lock(&po->bind_lock); + } +} + +static void unregister_prot_hook(struct sock *sk, bool sync) +{ + struct packet_sock *po = pkt_sk(sk); + + if (po->running) + __unregister_prot_hook(sk, sync); +} + static inline __pure struct page *pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) @@ -324,11 +402,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } -static inline struct packet_sock *pkt_sk(struct sock *sk) -{ - return (struct packet_sock *)sk; -} - static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); @@ -344,6 +417,240 @@ static void packet_sock_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } +static int fanout_rr_next(struct packet_fanout *f, unsigned int num) +{ + int x = atomic_read(&f->rr_cur) + 1; + + if (x >= num) + x = 0; + + return x; +} + +static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +{ + u32 idx, hash = skb->rxhash; + + idx = ((u64)hash * num) >> 32; + + return f->arr[idx]; +} + +static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +{ + int cur, old; + + cur = atomic_read(&f->rr_cur); + while ((old = atomic_cmpxchg(&f->rr_cur, cur, + fanout_rr_next(f, num))) != cur) + cur = old; + return f->arr[cur]; +} + +static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +{ + unsigned int cpu = smp_processor_id(); + + return f->arr[cpu % num]; +} + +static struct sk_buff *fanout_check_defrag(struct sk_buff *skb) +{ +#ifdef CONFIG_INET + const struct iphdr *iph; + u32 len; + + if (skb->protocol != htons(ETH_P_IP)) + return skb; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return skb; + if (!pskb_may_pull(skb, iph->ihl*4)) + return skb; + iph = ip_hdr(skb); + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + return skb; + + if (ip_is_fragment(ip_hdr(skb))) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb) { + if (pskb_trim_rcsum(skb, len)) + return skb; + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (ip_defrag(skb, IP_DEFRAG_AF_PACKET)) + return NULL; + skb->rxhash = 0; + } + } +#endif + return skb; +} + +static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct packet_fanout *f = pt->af_packet_priv; + unsigned int num = f->num_members; + struct packet_sock *po; + struct sock *sk; + + if (!net_eq(dev_net(dev), read_pnet(&f->net)) || + !num) { + kfree_skb(skb); + return 0; + } + + switch (f->type) { + case PACKET_FANOUT_HASH: + default: + if (f->defrag) { + skb = fanout_check_defrag(skb); + if (!skb) + return 0; + } + skb_get_rxhash(skb); + sk = fanout_demux_hash(f, skb, num); + break; + case PACKET_FANOUT_LB: + sk = fanout_demux_lb(f, skb, num); + break; + case PACKET_FANOUT_CPU: + sk = fanout_demux_cpu(f, skb, num); + break; + } + + po = pkt_sk(sk); + + return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); +} + +static DEFINE_MUTEX(fanout_mutex); +static LIST_HEAD(fanout_list); + +static void __fanout_link(struct sock *sk, struct packet_sock *po) +{ + struct packet_fanout *f = po->fanout; + + spin_lock(&f->lock); + f->arr[f->num_members] = sk; + smp_wmb(); + f->num_members++; + spin_unlock(&f->lock); +} + +static void __fanout_unlink(struct sock *sk, struct packet_sock *po) +{ + struct packet_fanout *f = po->fanout; + int i; + + spin_lock(&f->lock); + for (i = 0; i < f->num_members; i++) { + if (f->arr[i] == sk) + break; + } + BUG_ON(i >= f->num_members); + f->arr[i] = f->arr[f->num_members - 1]; + f->num_members--; + spin_unlock(&f->lock); +} + +static int fanout_add(struct sock *sk, u16 id, u16 type_flags) +{ + struct packet_sock *po = pkt_sk(sk); + struct packet_fanout *f, *match; + u8 type = type_flags & 0xff; + u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; + int err; + + switch (type) { + case PACKET_FANOUT_HASH: + case PACKET_FANOUT_LB: + case PACKET_FANOUT_CPU: + break; + default: + return -EINVAL; + } + + if (!po->running) + return -EINVAL; + + if (po->fanout) + return -EALREADY; + + mutex_lock(&fanout_mutex); + match = NULL; + list_for_each_entry(f, &fanout_list, list) { + if (f->id == id && + read_pnet(&f->net) == sock_net(sk)) { + match = f; + break; + } + } + err = -EINVAL; + if (match && match->defrag != defrag) + goto out; + if (!match) { + err = -ENOMEM; + match = kzalloc(sizeof(*match), GFP_KERNEL); + if (!match) + goto out; + write_pnet(&match->net, sock_net(sk)); + match->id = id; + match->type = type; + match->defrag = defrag; + atomic_set(&match->rr_cur, 0); + INIT_LIST_HEAD(&match->list); + spin_lock_init(&match->lock); + atomic_set(&match->sk_ref, 0); + match->prot_hook.type = po->prot_hook.type; + match->prot_hook.dev = po->prot_hook.dev; + match->prot_hook.func = packet_rcv_fanout; + match->prot_hook.af_packet_priv = match; + dev_add_pack(&match->prot_hook); + list_add(&match->list, &fanout_list); + } + err = -EINVAL; + if (match->type == type && + match->prot_hook.type == po->prot_hook.type && + match->prot_hook.dev == po->prot_hook.dev) { + err = -ENOSPC; + if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) { + __dev_remove_pack(&po->prot_hook); + po->fanout = match; + atomic_inc(&match->sk_ref); + __fanout_link(sk, po); + err = 0; + } + } +out: + mutex_unlock(&fanout_mutex); + return err; +} + +static void fanout_release(struct sock *sk) +{ + struct packet_sock *po = pkt_sk(sk); + struct packet_fanout *f; + + f = po->fanout; + if (!f) + return; + + po->fanout = NULL; + + mutex_lock(&fanout_mutex); + if (atomic_dec_and_test(&f->sk_ref)) { + list_del(&f->list); + dev_remove_pack(&f->prot_hook); + kfree(f); + } + mutex_unlock(&fanout_mutex); +} static const struct proto_ops packet_ops; @@ -798,7 +1105,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + if (vlan_tx_tag_present(skb)) { + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + status |= TP_STATUS_VLAN_VALID; + } else { + h.h2->tp_vlan_tci = 0; + } + h.h2->tp_padding = 0; hdrlen = sizeof(*h.h2); break; default: @@ -816,7 +1129,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, else sll->sll_ifindex = dev->ifindex; - __packet_set_status(po, h.raw, status); smp_mb(); #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 { @@ -825,8 +1137,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); for (start = h.raw; start < end; start += PAGE_SIZE) flush_dcache_page(pgv_to_page(start)); + smp_wmb(); } #endif + __packet_set_status(po, h.raw, status); sk->sk_data_ready(sk, 0); @@ -969,7 +1283,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; __be16 proto; - int ifindex, err, reserve = 0; + bool need_rls_dev = false; + int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; int tp_len, size_max; @@ -981,7 +1296,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) err = -EBUSY; if (saddr == NULL) { - ifindex = po->ifindex; + dev = po->prot_hook.dev; proto = po->num; addr = NULL; } else { @@ -992,12 +1307,12 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) + offsetof(struct sockaddr_ll, sll_addr))) goto out; - ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; + dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); + need_rls_dev = true; } - dev = dev_get_by_index(sock_net(&po->sk), ifindex); err = -ENXIO; if (unlikely(dev == NULL)) goto out; @@ -1083,7 +1398,8 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - dev_put(dev); + if (need_rls_dev) + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -1121,8 +1437,9 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; + bool need_rls_dev = false; unsigned char *addr; - int ifindex, err, reserve = 0; + int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; int vnet_hdr_len; @@ -1134,7 +1451,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - ifindex = po->ifindex; + dev = po->prot_hook.dev; proto = po->num; addr = NULL; } else { @@ -1143,13 +1460,12 @@ static int packet_snd(struct socket *sock, goto out; if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; - ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; + dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); + need_rls_dev = true; } - - dev = dev_get_by_index(sock_net(sk), ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -1280,14 +1596,15 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - dev_put(dev); + if (need_rls_dev) + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev) + if (dev && need_rls_dev) dev_put(dev); out: return err; @@ -1328,14 +1645,10 @@ static int packet_release(struct socket *sock) spin_unlock_bh(&net->packet.sklist_lock); spin_lock(&po->bind_lock); - if (po->running) { - /* - * Remove from protocol table - */ - po->running = 0; - po->num = 0; - __dev_remove_pack(&po->prot_hook); - __sock_put(sk); + unregister_prot_hook(sk, false); + if (po->prot_hook.dev) { + dev_put(po->prot_hook.dev); + po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); @@ -1349,6 +1662,8 @@ static int packet_release(struct socket *sock) if (po->tx_ring.pg_vec) packet_set_ring(sk, &req, 1, 1); + fanout_release(sk); + synchronize_net(); /* * Now the socket is dead. No more input will appear. @@ -1372,24 +1687,18 @@ static int packet_release(struct socket *sock) static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol) { struct packet_sock *po = pkt_sk(sk); - /* - * Detach an existing hook if present. - */ + + if (po->fanout) + return -EINVAL; lock_sock(sk); spin_lock(&po->bind_lock); - if (po->running) { - __sock_put(sk); - po->running = 0; - po->num = 0; - spin_unlock(&po->bind_lock); - dev_remove_pack(&po->prot_hook); - spin_lock(&po->bind_lock); - } - + unregister_prot_hook(sk, true); po->num = protocol; po->prot_hook.type = protocol; + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; @@ -1398,9 +1707,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc goto out_unlock; if (!dev || (dev->flags & IFF_UP)) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; + register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) @@ -1434,10 +1741,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, strlcpy(name, uaddr->sa_data, sizeof(name)); dev = dev_get_by_name(sock_net(sk), name); - if (dev) { + if (dev) err = packet_do_bind(sk, dev, pkt_sk(sk)->num); - dev_put(dev); - } return err; } @@ -1465,8 +1770,6 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len goto out; } err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); - if (dev) - dev_put(dev); out: return err; @@ -1531,9 +1834,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (proto) { po->prot_hook.type = proto; - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; + register_prot_hook(sk); } spin_lock_bh(&net->packet.sklist_lock); @@ -1675,6 +1976,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; vnet_hdr.csum_start = skb_checksum_start_offset(skb); vnet_hdr.csum_offset = skb->csum_offset; + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, @@ -1725,8 +2028,13 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - aux.tp_vlan_tci = vlan_tx_tag_get(skb); - + if (vlan_tx_tag_present(skb)) { + aux.tp_vlan_tci = vlan_tx_tag_get(skb); + aux.tp_status |= TP_STATUS_VLAN_VALID; + } else { + aux.tp_vlan_tci = 0; + } + aux.tp_padding = 0; put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -2091,6 +2399,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv po->tp_tstamp = val; return 0; } + case PACKET_FANOUT: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + + return fanout_add(sk, val & 0xffff, val >> 16); + } default: return -ENOPROTOOPT; } @@ -2189,6 +2508,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, val = po->tp_tstamp; data = &val; break; + case PACKET_FANOUT: + if (len > sizeof(int)) + len = sizeof(int); + val = (po->fanout ? + ((u32)po->fanout->id | + ((u32)po->fanout->type << 16)) : + 0); + data = &val; + break; default: return -ENOPROTOOPT; } @@ -2222,15 +2550,15 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); if (po->running) { - __dev_remove_pack(&po->prot_hook); - __sock_put(sk); - po->running = 0; + __unregister_prot_hook(sk, false); sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { po->ifindex = -1; + if (po->prot_hook.dev) + dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); @@ -2239,11 +2567,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void case NETDEV_UP: if (dev->ifindex == po->ifindex) { spin_lock(&po->bind_lock); - if (po->num && !po->running) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; - } + if (po->num) + register_prot_hook(sk); spin_unlock(&po->bind_lock); } break; @@ -2510,10 +2835,8 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, was_running = po->running; num = po->num; if (was_running) { - __dev_remove_pack(&po->prot_hook); po->num = 0; - po->running = 0; - __sock_put(sk); + __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); @@ -2544,11 +2867,9 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, mutex_unlock(&po->pg_vec_lock); spin_lock(&po->bind_lock); - if (was_running && !po->running) { - sock_hold(sk); - po->running = 1; + if (was_running) { po->num = num; - dev_add_pack(&po->prot_hook); + register_prot_hook(sk); } spin_unlock(&po->bind_lock); diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 438accb7a5a8..d61f6761777d 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -289,15 +289,16 @@ out: int __init phonet_netlink_register(void) { - int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); + int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, + NULL, NULL); if (err) return err; /* Further __rtnl_register() cannot fail */ - __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); - __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); - __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL); - __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL); - __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit); + __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL); + __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL); + __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL); + __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL); + __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL); return 0; } diff --git a/net/rds/bind.c b/net/rds/bind.c index 2f6b3fcc79f8..637bde56c9db 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -35,6 +35,7 @@ #include <linux/in.h> #include <linux/if_arp.h> #include <linux/jhash.h> +#include <linux/ratelimit.h> #include "rds.h" #define BIND_HASH_SIZE 1024 @@ -185,8 +186,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!trans) { ret = -EADDRNOTAVAIL; rds_remove_bound(rs); - if (printk_ratelimit()) - printk(KERN_INFO "RDS: rds_bind() could not find a transport, " + printk_ratelimited(KERN_INFO "RDS: rds_bind() could not find a transport, " "load rds_tcp or rds_rdma?\n"); goto out; } diff --git a/net/rds/ib.c b/net/rds/ib.c index cce19f95c624..3b83086bcc30 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -325,7 +325,7 @@ static int rds_ib_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/ib.h b/net/rds/ib.h index 4297d92788dc..edfaaaf164eb 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -3,6 +3,7 @@ #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> +#include <linux/interrupt.h> #include <linux/pci.h> #include <linux/slab.h> #include "rds.h" diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index ee369d201a65..cd67026be2d5 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/ratelimit.h> #include "rds.h" #include "ib.h" @@ -435,13 +436,12 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } else if (printk_ratelimit()) { - printk(KERN_NOTICE "RDS: Connection from %pI4 using " + } + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using " "incompatible protocol version %u.%u\n", &dp->dp_saddr, dp->dp_protocol_major, dp->dp_protocol_minor); - } return version; } @@ -587,7 +587,7 @@ int rds_ib_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 7c4dce8fa5e6..e59094981175 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/device.h> #include <linux/dmapool.h> +#include <linux/ratelimit.h> #include "rds.h" #include "ib.h" @@ -207,8 +208,7 @@ static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic, } break; default: - if (printk_ratelimit()) - printk(KERN_NOTICE + printk_ratelimited(KERN_NOTICE "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; diff --git a/net/rds/iw.c b/net/rds/iw.c index 5a9676fe594f..f7474844f096 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -226,7 +226,7 @@ static int rds_iw_laddr_check(__be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/iw.h b/net/rds/iw.h index 90151922178c..04ce3b193f79 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -1,6 +1,7 @@ #ifndef _RDS_IW_H #define _RDS_IW_H +#include <linux/interrupt.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> #include "rds.h" diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 3a60a15d1b4a..9556d2895f7a 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -258,8 +259,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn) */ rds_iwdev = ib_get_client_data(dev, &rds_iw_client); if (!rds_iwdev) { - if (printk_ratelimit()) - printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", + printk_ratelimited(KERN_NOTICE "RDS/IW: No client_data for device %s\n", dev->name); return -EOPNOTSUPP; } @@ -365,13 +365,12 @@ static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp) version = RDS_PROTOCOL_3_0; while ((common >>= 1) != 0) version++; - } else if (printk_ratelimit()) { - printk(KERN_NOTICE "RDS: Connection from %pI4 using " + } + printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using " "incompatible protocol version %u.%u\n", &dp->dp_saddr, dp->dp_protocol_major, dp->dp_protocol_minor); - } return version; } @@ -522,7 +521,7 @@ int rds_iw_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, - RDMA_PS_TCP); + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); ic->i_cm_id = NULL; diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 6deaa77495e3..8b77edbab272 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -32,6 +32,7 @@ */ #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -729,8 +730,8 @@ static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) failed_wr = &f_wr; ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr); BUG_ON(failed_wr != &f_wr); - if (ret && printk_ratelimit()) - printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + if (ret) + printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); return ret; } @@ -751,8 +752,8 @@ static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr) failed_wr = &s_wr; ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr); - if (ret && printk_ratelimit()) { - printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + if (ret) { + printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); goto out; } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 545d8ee3efb1..e40c3c5db2c4 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -34,6 +34,7 @@ #include <linux/in.h> #include <linux/device.h> #include <linux/dmapool.h> +#include <linux/ratelimit.h> #include "rds.h" #include "iw.h" @@ -258,8 +259,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) * when the SEND completes. */ break; default: - if (printk_ratelimit()) - printk(KERN_NOTICE + printk_ratelimited(KERN_NOTICE "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", __func__, send->s_wr.opcode); break; diff --git a/net/rds/page.c b/net/rds/page.c index d8acdebe3c7c..b82d63e77b03 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -32,6 +32,7 @@ */ #include <linux/highmem.h> #include <linux/gfp.h> +#include <linux/cpu.h> #include "rds.h" diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 4195a0539829..f8760e1b6688 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -158,7 +158,8 @@ static int rds_rdma_listen_init(void) struct rdma_cm_id *cm_id; int ret; - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP); + cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " diff --git a/net/rds/send.c b/net/rds/send.c index d58ae5f9339e..aa57e22539ef 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -35,6 +35,7 @@ #include <net/sock.h> #include <linux/in.h> #include <linux/list.h> +#include <linux/ratelimit.h> #include "rds.h" @@ -1006,16 +1007,14 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { - if (printk_ratelimit()) - printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", + printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", &rm->rdma, conn->c_trans->xmit_rdma); ret = -EOPNOTSUPP; goto out; } if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) { - if (printk_ratelimit()) - printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", + printk_ratelimited(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n", &rm->atomic, conn->c_trans->xmit_atomic); ret = -EOPNOTSUPP; goto out; diff --git a/net/rds/tcp_stats.c b/net/rds/tcp_stats.c index d5898d03cd68..f8a7954f1f59 100644 --- a/net/rds/tcp_stats.c +++ b/net/rds/tcp_stats.c @@ -40,7 +40,7 @@ DEFINE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats) ____cacheline_aligned; -static const char const *rds_tcp_stat_names[] = { +static const char * const rds_tcp_stat_names[] = { "tcp_data_ready_calls", "tcp_write_space_calls", "tcp_sndbuf_full", diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index fa5f5641a2c2..7a02bd1cc5a0 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -266,13 +266,6 @@ void rose_transmit_link(struct sk_buff *skb, struct rose_neigh *neigh) { unsigned char *dptr; -#if 0 - if (call_fw_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) { - kfree_skb(skb); - return; - } -#endif - if (neigh->loopback) { rose_loopback_queue(skb, neigh); return; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 479cae57d187..d389de197089 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -864,11 +864,6 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) int res = 0; char buf[11]; -#if 0 - if (call_in_firewall(PF_ROSE, skb->dev, skb->data, NULL, &skb) != FW_ACCEPT) - return res; -#endif - if (skb->len < ROSE_MIN_LEN) return res; frametype = skb->data[2]; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a606025814a1..f2fb67e701a3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -365,10 +365,10 @@ static struct tc_action_ops *tc_lookup_action_id(u32 type) } #endif -int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, +int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { - struct tc_action *a; + const struct tc_action *a; int ret = -1; if (skb->tc_verd & TC_NCLS) { @@ -1115,9 +1115,10 @@ nlmsg_failure: static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL); - rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action); + rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, + NULL); return 0; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 6cdf9abe475f..453a73431ac4 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -500,7 +500,7 @@ fail: } static int tcf_csum(struct sk_buff *skb, - struct tc_action *a, struct tcf_result *res) + const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = a->priv; int action; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 2b4ab4b05ce8..b77f5a06a658 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -125,7 +125,8 @@ static int tcf_gact_cleanup(struct tc_action *a, int bind) return 0; } -static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_gact *gact = a->priv; int action = TC_ACT_SHOT; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 9fc211a1b20e..60f8f616e8fa 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -195,7 +195,7 @@ static int tcf_ipt_cleanup(struct tc_action *a, int bind) return tcf_ipt_release(ipt, bind); } -static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, +static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 961386e2f2c0..102fc212cd64 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -154,7 +154,7 @@ static int tcf_mirred_cleanup(struct tc_action *a, int bind) return 0; } -static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, +static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *m = a->priv; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 762b027650a9..001d1b354869 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -102,7 +102,7 @@ static int tcf_nat_cleanup(struct tc_action *a, int bind) return tcf_hash_release(&p->common, bind, &nat_hash_info); } -static int tcf_nat(struct sk_buff *skb, struct tc_action *a, +static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_nat *p = a->priv; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7affe9a92757..10d3aed86560 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -120,7 +120,7 @@ static int tcf_pedit_cleanup(struct tc_action *a, int bind) return 0; } -static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, +static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = a->priv; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b3b9b32f4e00..6fb3f5af0f85 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -282,7 +282,7 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) return ret; } -static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, +static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_police *police = a->priv; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index a34a22de60b3..73e0a3ab4d55 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -36,7 +36,8 @@ static struct tcf_hashinfo simp_hash_info = { }; #define SIMP_MAX_DATA 32 -static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_simp(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_defact *d = a->priv; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 5f6f0c7c3905..35dbbe91027e 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -39,7 +39,7 @@ static struct tcf_hashinfo skbedit_hash_info = { .lock = &skbedit_lock, }; -static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, +static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_skbedit *d = a->priv; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bb2c523f8158..a69d44f1dac5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -40,9 +40,9 @@ static DEFINE_RWLOCK(cls_mod_lock); /* Find classifier type by string name */ -static struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) +static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) { - struct tcf_proto_ops *t = NULL; + const struct tcf_proto_ops *t = NULL; if (kind) { read_lock(&cls_mod_lock); @@ -132,7 +132,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *q; struct tcf_proto **back, **chain; struct tcf_proto *tp; - struct tcf_proto_ops *tp_ops; + const struct tcf_proto_ops *tp_ops; const struct Qdisc_class_ops *cops; unsigned long cl; unsigned long fh; @@ -610,10 +610,10 @@ EXPORT_SYMBOL(tcf_exts_dump_stats); static int __init tc_filter_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, - tc_dump_tfilter); + tc_dump_tfilter, NULL); return 0; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 8be8872dd571..ea1f70b5a5f4 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -39,7 +39,7 @@ static const struct tcf_ext_map basic_ext_map = { .police = TCA_BASIC_POLICE }; -static int basic_classify(struct sk_buff *skb, struct tcf_proto *tp, +static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { int r; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 32a335194ca5..f84fdc3a7f27 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -101,7 +101,7 @@ struct cls_cgroup_head { struct tcf_ematch_tree ematches; }; -static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, +static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_cgroup_head *head = tp->root; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 8ec01391d988..6994214db8f8 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb) if (!pskb_network_may_pull(skb, sizeof(*iph))) break; iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(iph)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && @@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb) if (!pskb_network_may_pull(skb, sizeof(*iph))) break; iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(iph)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && @@ -356,7 +356,7 @@ static u32 flow_key_get(struct sk_buff *skb, int key) } } -static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp, +static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct flow_head *head = tp->root; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 26e7bc4ffb79..389af152ec45 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -77,7 +77,7 @@ static inline int fw_hash(u32 handle) return handle & (HTSIZE - 1); } -static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, +static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct fw_head *head = (struct fw_head *)tp->root; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a907905376df..13ab66e9df58 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -125,7 +125,7 @@ static inline int route4_hash_wild(void) return 0; \ } -static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, +static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct route4_head *head = (struct route4_head *)tp->root; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 402c44b241a3..be4505ee67a9 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -130,7 +130,7 @@ static struct tcf_ext_map rsvp_ext_map = { return r; \ } -static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, +static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; @@ -167,7 +167,7 @@ restart: dst = &nhptr->daddr; protocol = nhptr->protocol; xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); - if (nhptr->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(nhptr)) return -1; #endif diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 36667fa64237..dbe199234c63 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -79,7 +79,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key) } -static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp, +static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct tcindex_data *p = PRIV(tp); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 3b93fc0c8955..939b627b4795 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -93,7 +93,7 @@ static inline unsigned int u32_hash_fold(__be32 key, return h; } -static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) +static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct { struct tc_u_knode *knode; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 49130e8abff0..1363bf14e61b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -404,12 +404,6 @@ META_COLLECTOR(int_sk_alloc) dst->value = (__force int) skb->sk->sk_allocation; } -META_COLLECTOR(int_sk_route_caps) -{ - SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_route_caps; -} - META_COLLECTOR(int_sk_hash) { SKIP_NONLOCAL(skb); @@ -530,7 +524,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen), [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), - [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps), [META_ID(SK_HASH)] = META_FUNC(int_sk_hash), [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6b8627661c98..dca6c1a576f7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1644,7 +1644,7 @@ done: * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, +int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { __be16 protocol = skb->protocol; @@ -1668,12 +1668,12 @@ int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, } EXPORT_SYMBOL(tc_classify_compat); -int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, +int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { int err = 0; #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto *otp = tp; + const struct tcf_proto *otp = tp; reclassify: #endif @@ -1792,12 +1792,12 @@ static int __init pktsched_init(void) register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass); + rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL); return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 3f08158b8688..e25e49061a0d 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 06afbaeb4c88..3422b25df9e4 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -181,7 +181,7 @@ static bool choke_match_flow(struct sk_buff *skb1, ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) return false; - if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip1) | ip_is_fragment(ip2)) ip_proto = 0; off1 += ip1->ihl * 4; off2 += ip2->ihl * 4; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b1721d71c27c..69fca2798804 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -189,15 +189,15 @@ static inline int qdisc_restart(struct Qdisc *q) void __qdisc_run(struct Qdisc *q) { - unsigned long start_time = jiffies; + int quota = weight_p; while (qdisc_restart(q)) { /* - * Postpone processing if - * 1. another process needs the CPU; - * 2. we've been doing it for too long. + * Ordered by possible occurrence: Postpone processing if + * 1. we've exceeded packet quota + * 2. another process needs the CPU; */ - if (need_resched() || jiffies != start_time) { + if (--quota <= 0 || need_resched()) { __netif_schedule(q); break; } @@ -251,9 +251,8 @@ static void dev_watchdog(unsigned long arg) } if (some_queue_timedout) { - char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", - dev->name, netdev_drivername(dev, drivername, 64), i); + dev->name, netdev_drivername(dev), i); dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 69c35f6cd13f..eb3b9a86c6ed 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -13,6 +13,7 @@ * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> */ +#include <linux/mm.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b6ea6afa55b0..4536ee64383e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -157,7 +157,7 @@ static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) iph = ip_hdr(skb); h = (__force u32)iph->daddr; h2 = (__force u32)iph->saddr ^ iph->protocol; - if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(iph)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 45cd30098e34..a3b7120fcc74 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -229,7 +229,7 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); - struct neighbour *mn = skb_dst(skb)->neighbour; + struct neighbour *mn = dst_get_neighbour(skb_dst(skb)); struct neighbour *n = q->ncache; if (mn->tbl == NULL) @@ -270,7 +270,7 @@ static inline int teql_resolve(struct sk_buff *skb, if (dev->header_ops == NULL || skb_dst(skb) == NULL || - skb_dst(skb)->neighbour == NULL) + dst_get_neighbour(skb_dst(skb)) == NULL) return 0; return __teql_resolve(skb, skb_res, dev); } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 525f97c467e9..dc16b90ddb6f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -280,6 +280,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.asconf_capable = 0; if (sctp_addip_noauth) asoc->peer.asconf_capable = 1; + asoc->asconf_addr_del_pending = NULL; + asoc->src_out_of_asoc_ok = 0; /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); @@ -444,15 +446,11 @@ void sctp_association_free(struct sctp_association *asoc) asoc->peer.transport_count = 0; - /* Free any cached ASCONF_ACK chunk. */ - sctp_assoc_free_asconf_acks(asoc); - - /* Free the ASCONF queue. */ - sctp_assoc_free_asconf_queue(asoc); + sctp_asconf_queue_teardown(asoc); - /* Free any cached ASCONF chunk. */ - if (asoc->addip_last_asconf) - sctp_chunk_free(asoc->addip_last_asconf); + /* Free pending address space being deleted */ + if (asoc->asconf_addr_del_pending != NULL) + kfree(asoc->asconf_addr_del_pending); /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); @@ -1646,3 +1644,16 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( return NULL; } + +void sctp_asconf_queue_teardown(struct sctp_association *asoc) +{ + /* Free any cached ASCONF_ACK chunk. */ + sctp_assoc_free_asconf_acks(asoc); + + /* Free the ASCONF queue. */ + sctp_assoc_free_asconf_queue(asoc); + + /* Free any cached ASCONF chunk. */ + if (asoc->addip_last_asconf) + sctp_chunk_free(asoc->addip_last_asconf); +} diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 83e3011c19ca..4ece451c8d27 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -430,7 +430,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, list_for_each_entry(laddr, &bp->address_list, list) { addr_buf = (union sctp_addr *)addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) break; @@ -534,6 +534,21 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope) return 0; } +int sctp_is_ep_boundall(struct sock *sk) +{ + struct sctp_bind_addr *bp; + struct sctp_sockaddr_entry *addr; + + bp = &sctp_sk(sk)->ep->base.bind_addr; + if (sctp_list_single_entry(&bp->address_list)) { + addr = list_entry(bp->address_list.next, + struct sctp_sockaddr_entry, list); + if (sctp_is_any(sk, &addr->a)) + return 1; + } + return 0; +} + /******************************************************************** * 3rd Level Abstractions ********************************************************************/ diff --git a/net/sctp/input.c b/net/sctp/input.c index 741ed1648838..b7692aab6e9c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -510,8 +510,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb, * discard the packet. */ if (vtag == 0) { - chunkhdr = (struct sctp_init_chunk *)((void *)sctphdr - + sizeof(struct sctphdr)); + chunkhdr = (void *)sctphdr + sizeof(struct sctphdr); if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t) + sizeof(__be32) || chunkhdr->chunk_hdr.type != SCTP_CID_INIT || diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 0bb0d7cb9f10..aabaee41dd3e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -112,6 +112,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); spin_unlock_bh(&sctp_local_addr_lock); } break; @@ -122,6 +123,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { + sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); diff --git a/net/sctp/output.c b/net/sctp/output.c index b4f3cf06d8da..08b3cead6503 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -500,23 +500,20 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: Adler-32 is no longer applicable, as has been replaced * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. */ - if (!sctp_checksum_disable && - !(dst->dev->features & (NETIF_F_NO_CSUM | NETIF_F_SCTP_CSUM))) { - __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); + if (!sctp_checksum_disable) { + if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { + __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); - /* 3) Put the resultant value into the checksum field in the - * common header, and leave the rest of the bits unchanged. - */ - sh->checksum = sctp_end_cksum(crc32); - } else { - if (dst->dev->features & NETIF_F_SCTP_CSUM) { + /* 3) Put the resultant value into the checksum field in the + * common header, and leave the rest of the bits unchanged. + */ + sh->checksum = sctp_end_cksum(crc32); + } else { /* no need to seed pseudo checksum for SCTP */ nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (skb_transport_header(nskb) - nskb->head); nskb->csum_offset = offsetof(struct sctphdr, checksum); - } else { - nskb->ip_summed = CHECKSUM_UNNECESSARY; } } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 1c88c8911dc5..a6d27bf563a5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -754,6 +754,16 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) */ list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) { + /* RFC 5061, 5.3 + * F1) This means that until such time as the ASCONF + * containing the add is acknowledged, the sender MUST + * NOT use the new IP address as a source for ANY SCTP + * packet except on carrying an ASCONF Chunk. + */ + if (asoc->src_out_of_asoc_ok && + chunk->chunk_hdr->type != SCTP_CID_ASCONF) + continue; + list_del_init(&chunk->list); /* Pick the right transport to use. */ @@ -881,6 +891,9 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) } } + if (q->asoc->src_out_of_asoc_ok) + goto sctp_flush_out; + /* Is it OK to send data chunks? */ switch (asoc->state) { case SCTP_STATE_COOKIE_ECHOED: @@ -1582,6 +1595,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, #endif /* SCTP_DEBUG */ if (transport) { if (bytes_acked) { + struct sctp_association *asoc = transport->asoc; + /* We may have counted DATA that was migrated * to this transport due to DEL-IP operation. * Subtract those bytes, since the were never @@ -1600,6 +1615,17 @@ static void sctp_check_transmitted(struct sctp_outq *q, transport->error_count = 0; transport->asoc->overall_error_count = 0; + /* + * While in SHUTDOWN PENDING, we may have started + * the T5 shutdown guard timer after reaching the + * retransmission limit. Stop that timer as soon + * as the receiver acknowledged any data. + */ + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING && + del_timer(&asoc->timers + [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD])) + sctp_association_put(asoc); + /* Mark the destination transport address as * active if it is not so marked. */ @@ -1629,10 +1655,15 @@ static void sctp_check_transmitted(struct sctp_outq *q, * A sender is doing zero window probing when the * receiver's advertised window is zero, and there is * only one data chunk in flight to the receiver. + * + * Allow the association to timeout while in SHUTDOWN + * PENDING or SHUTDOWN RECEIVED in case the receiver + * stays in zero window mode forever. */ if (!q->asoc->peer.rwnd && !list_empty(&tlist) && - (sack_ctsn+2 == q->asoc->next_tsn)) { + (sack_ctsn+2 == q->asoc->next_tsn) && + q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) { SCTP_DEBUG_PRINTK("%s: SACK received for zero " "window probe: %u\n", __func__, sack_ctsn); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 67380a29e2e9..91784f44a2e2 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -503,7 +503,9 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_v4_dst_saddr(&dst_saddr, fl4, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) + if (!laddr->valid || (laddr->state == SCTP_ADDR_DEL) || + (laddr->state != SCTP_ADDR_SRC && + !asoc->src_out_of_asoc_ok)) continue; if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; @@ -623,6 +625,143 @@ static void sctp_v4_ecn_capable(struct sock *sk) INET_ECN_xmit(sk); } +void sctp_addr_wq_timeout_handler(unsigned long arg) +{ + struct sctp_sockaddr_entry *addrw, *temp; + struct sctp_sock *sp; + + spin_lock_bh(&sctp_addr_wq_lock); + + list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", + " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, + addrw); + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + /* Now we send an ASCONF for each association */ + /* Note. we currently don't handle link local IPv6 addressees */ + if (addrw->a.sa.sa_family == AF_INET6) { + struct in6_addr *in6; + + if (ipv6_addr_type(&addrw->a.v6.sin6_addr) & + IPV6_ADDR_LINKLOCAL) + goto free_next; + + in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr; + if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 && + addrw->state == SCTP_ADDR_NEW) { + unsigned long timeo_val; + + SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n", + SCTP_ADDRESS_TICK_DELAY); + timeo_val = jiffies; + timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); + mod_timer(&sctp_addr_wq_timer, timeo_val); + break; + } + } +#endif + list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) { + struct sock *sk; + + sk = sctp_opt2sk(sp); + /* ignore bound-specific endpoints */ + if (!sctp_is_ep_boundall(sk)) + continue; + sctp_bh_lock_sock(sk); + if (sctp_asconf_mgmt(sp, addrw) < 0) + SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n"); + sctp_bh_unlock_sock(sk); + } +free_next: + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + +static void sctp_free_addr_wq(void) +{ + struct sctp_sockaddr_entry *addrw; + struct sctp_sockaddr_entry *temp; + + spin_lock_bh(&sctp_addr_wq_lock); + del_timer(&sctp_addr_wq_timer); + list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + +/* lookup the entry for the same address in the addr_waitq + * sctp_addr_wq MUST be locked + */ +static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr) +{ + struct sctp_sockaddr_entry *addrw; + + list_for_each_entry(addrw, &sctp_addr_waitq, list) { + if (addrw->a.sa.sa_family != addr->a.sa.sa_family) + continue; + if (addrw->a.sa.sa_family == AF_INET) { + if (addrw->a.v4.sin_addr.s_addr == + addr->a.v4.sin_addr.s_addr) + return addrw; + } else if (addrw->a.sa.sa_family == AF_INET6) { + if (ipv6_addr_equal(&addrw->a.v6.sin6_addr, + &addr->a.v6.sin6_addr)) + return addrw; + } + } + return NULL; +} + +void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) +{ + struct sctp_sockaddr_entry *addrw; + unsigned long timeo_val; + + /* first, we check if an opposite message already exist in the queue. + * If we found such message, it is removed. + * This operation is a bit stupid, but the DHCP client attaches the + * new address after a couple of addition and deletion of that address + */ + + spin_lock_bh(&sctp_addr_wq_lock); + /* Offsets existing events in addr_wq */ + addrw = sctp_addr_wq_lookup(addr); + if (addrw) { + if (addrw->state != cmd) { + SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", + " in wq %p\n", addrw->state, &addrw->a, + &sctp_addr_waitq); + list_del(&addrw->list); + kfree(addrw); + } + spin_unlock_bh(&sctp_addr_wq_lock); + return; + } + + /* OK, we have to add the new address to the wait queue */ + addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + if (addrw == NULL) { + spin_unlock_bh(&sctp_addr_wq_lock); + return; + } + addrw->state = cmd; + list_add_tail(&addrw->list, &sctp_addr_waitq); + SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", + " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq); + + if (!timer_pending(&sctp_addr_wq_timer)) { + timeo_val = jiffies; + timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); + mod_timer(&sctp_addr_wq_timer, timeo_val); + } + spin_unlock_bh(&sctp_addr_wq_lock); +} + /* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same @@ -650,6 +789,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); spin_unlock_bh(&sctp_local_addr_lock); } break; @@ -660,6 +800,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); @@ -1058,7 +1199,6 @@ SCTP_STATIC __init int sctp_init(void) int status = -EINVAL; unsigned long goal; unsigned long limit; - unsigned long nr_pages; int max_share; int order; @@ -1148,15 +1288,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); - /* Set the pressure threshold to be a fraction of global memory that - * is up to 1/2 at 256 MB, decreasing toward zero with the amount of - * memory, with a floor of 128 pages. - * Note this initializes the data in sctpv6_prot too - * Unabashedly stolen from tcp_init - */ - nr_pages = totalram_pages - totalhigh_pages; - limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = nr_free_buffer_pages() / 8; limit = max(limit, 128UL); sysctl_sctp_mem[0] = limit / 4 * 3; sysctl_sctp_mem[1] = limit; @@ -1242,6 +1374,7 @@ SCTP_STATIC __init int sctp_init(void) /* Disable ADDIP by default. */ sctp_addip_enable = 0; sctp_addip_noauth = 0; + sctp_default_auto_asconf = 0; /* Enable PR-SCTP by default. */ sctp_prsctp_enable = 1; @@ -1266,6 +1399,13 @@ SCTP_STATIC __init int sctp_init(void) spin_lock_init(&sctp_local_addr_lock); sctp_get_local_addr_list(); + /* Initialize the address event list */ + INIT_LIST_HEAD(&sctp_addr_waitq); + INIT_LIST_HEAD(&sctp_auto_asconf_splist); + spin_lock_init(&sctp_addr_wq_lock); + sctp_addr_wq_timer.expires = 0; + setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0); + status = sctp_v4_protosw_init(); if (status) @@ -1337,6 +1477,7 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); sctp_v4_del_protocol(); + sctp_free_addr_wq(); /* Free the control endpoint. */ inet_ctl_sock_destroy(sctp_ctl_sock); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 58eb27fed4b4..81db4e385352 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2768,11 +2768,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, int addr_param_len = 0; int totallen = 0; int i; + int del_pickup = 0; /* Get total length of all the address parameters. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); @@ -2780,6 +2781,13 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, totallen += addr_param_len; addr_buf += af->sockaddr_len; + if (asoc->asconf_addr_del_pending && !del_pickup) { + /* reuse the parameter length from the same scope one */ + totallen += paramlen; + totallen += addr_param_len; + del_pickup = 1; + SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen); + } } /* Create an asconf chunk with the required length. */ @@ -2790,7 +2798,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, /* Add the address parameters to the asconf chunk. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; @@ -2802,6 +2810,17 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_buf += af->sockaddr_len; } + if (flags == SCTP_PARAM_ADD_IP && del_pickup) { + addr = asoc->asconf_addr_del_pending; + af = sctp_get_af_specific(addr->v4.sin_family); + addr_param_len = af->to_addr_param(addr, &addr_param); + param.param_hdr.type = SCTP_PARAM_DEL_IP; + param.param_hdr.length = htons(paramlen + addr_param_len); + param.crr_id = i; + + sctp_addto_chunk(retval, paramlen, ¶m); + sctp_addto_chunk(retval, addr_param_len, &addr_param); + } return retval; } @@ -2939,8 +2958,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, union sctp_addr addr; union sctp_addr_param *addr_param; - addr_param = (union sctp_addr_param *) - ((void *)asconf_param + sizeof(sctp_addip_param_t)); + addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP && asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP && @@ -3014,7 +3032,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, * an Error Cause TLV set to the new error code 'Request to * Delete Source IP Address' */ - if (sctp_cmp_addr_exact(sctp_source(asconf), &addr)) + if (sctp_cmp_addr_exact(&asconf->source, &addr)) return SCTP_ERROR_DEL_SRC_IP; /* Section 4.2.2 @@ -3125,7 +3143,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, * asconf parameter. */ length = ntohs(addr_param->p.length); - asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); + asconf_param = (void *)addr_param + length; chunk_len -= length; /* create an ASCONF_ACK chunk. @@ -3166,8 +3184,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, /* Move to the next ASCONF param. */ length = ntohs(asconf_param->param_hdr.length); - asconf_param = (sctp_addip_param_t *)((void *)asconf_param + - length); + asconf_param = (void *)asconf_param + length; chunk_len -= length; } @@ -3197,8 +3214,7 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, struct sctp_transport *transport; struct sctp_sockaddr_entry *saddr; - addr_param = (union sctp_addr_param *) - ((void *)asconf_param + sizeof(sctp_addip_param_t)); + addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->p.type)); @@ -3224,6 +3240,11 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, case SCTP_PARAM_DEL_IP: local_bh_disable(); sctp_del_bind_addr(bp, &addr); + if (asoc->asconf_addr_del_pending != NULL && + sctp_cmp_addr_exact(asoc->asconf_addr_del_pending, &addr)) { + kfree(asoc->asconf_addr_del_pending); + asoc->asconf_addr_del_pending = NULL; + } local_bh_enable(); list_for_each_entry(transport, &asoc->peer.transport_addr_list, transports) { @@ -3278,8 +3299,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: length = sizeof(sctp_addip_param_t); - err_param = (sctp_errhdr_t *) - ((void *)asconf_ack_param + length); + err_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; if (asconf_ack_len > 0) return err_param->cause; @@ -3292,8 +3312,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, } length = ntohs(asconf_ack_param->param_hdr.length); - asconf_ack_param = (sctp_addip_param_t *) - ((void *)asconf_ack_param + length); + asconf_ack_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; } @@ -3325,7 +3344,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * pointer to the first asconf parameter. */ length = ntohs(addr_param->p.length); - asconf_param = (sctp_addip_param_t *)((void *)addr_param + length); + asconf_param = (void *)addr_param + length; asconf_len -= length; /* ADDIP 4.1 @@ -3376,11 +3395,13 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * one. */ length = ntohs(asconf_param->param_hdr.length); - asconf_param = (sctp_addip_param_t *)((void *)asconf_param + - length); + asconf_param = (void *)asconf_param + length; asconf_len -= length; } + if (no_err && asoc->src_out_of_asoc_ok) + asoc->src_out_of_asoc_ok = 0; + /* Free the cached last sent asconf chunk. */ list_del_init(&asconf->transmitted_list); sctp_chunk_free(asconf); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d612ca1ca6c0..167c880cf8da 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -670,10 +670,19 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the * HEARTBEAT should clear the error counter of the destination * transport address to which the HEARTBEAT was sent. - * The association's overall error count is also cleared. */ t->error_count = 0; - t->asoc->overall_error_count = 0; + + /* + * Although RFC4960 specifies that the overall error count must + * be cleared when a HEARTBEAT ACK is received, we make an + * exception while in SHUTDOWN PENDING. If the peer keeps its + * window shut forever, we may never be able to transmit our + * outstanding data and rely on the retransmission limit be reached + * to shutdown the association. + */ + if (t->asoc->state != SCTP_STATE_SHUTDOWN_PENDING) + t->asoc->overall_error_count = 0; /* Clear the hb_sent flag to signal that we had a good * acknowledgement. @@ -1201,7 +1210,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, int local_cork = 0; if (SCTP_EVENT_T_TIMEOUT != event_type) - chunk = (struct sctp_chunk *) event_arg; + chunk = event_arg; /* Note: This whole file is a huge candidate for rework. * For example, each command could either have its own handler, so @@ -1437,6 +1446,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr); break; + case SCTP_CMD_TIMER_START_ONCE: + timer = &asoc->timers[cmd->obj.to]; + + if (timer_pending(timer)) + break; + /* fall through */ + case SCTP_CMD_TIMER_START: timer = &asoc->timers[cmd->obj.to]; timeout = asoc->timeouts[cmd->obj.to]; @@ -1670,6 +1686,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_SEND_NEXT_ASCONF: sctp_cmd_send_asconf(asoc); break; + case SCTP_CMD_PURGE_ASCONF_QUEUE: + sctp_asconf_queue_teardown(asoc); + break; default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f4a4f8368ee..49b847b00f99 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1718,11 +1718,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_CONSUME; } - /* For now, fail any unsent/unacked data. Consider the optional - * choice of resending of this data. + /* For now, stop pending T3-rtx and SACK timers, fail any unsent/unacked + * data. Consider the optional choice of resending of this data. */ + sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); + /* Stop pending T4-rto timer, teardown ASCONF queue, ASCONF-ACK queue + * and ASCONF-ACK cache. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); + sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL()); + repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; @@ -3998,31 +4008,32 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, auth_hdr = (struct sctp_authhdr *)chunk->skb->data; error = sctp_sf_authenticate(ep, asoc, type, chunk); switch (error) { - case SCTP_IERROR_AUTH_BAD_HMAC: - /* Generate the ERROR chunk and discard the rest - * of the packet - */ - err_chunk = sctp_make_op_error(asoc, chunk, - SCTP_ERROR_UNSUP_HMAC, - &auth_hdr->hmac_id, - sizeof(__u16), 0); - if (err_chunk) { - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, - SCTP_CHUNK(err_chunk)); - } - /* Fall Through */ - case SCTP_IERROR_AUTH_BAD_KEYID: - case SCTP_IERROR_BAD_SIG: - return sctp_sf_pdiscard(ep, asoc, type, arg, commands); - break; - case SCTP_IERROR_PROTO_VIOLATION: - return sctp_sf_violation_chunklen(ep, asoc, type, arg, - commands); - break; - case SCTP_IERROR_NOMEM: - return SCTP_DISPOSITION_NOMEM; - default: - break; + case SCTP_IERROR_AUTH_BAD_HMAC: + /* Generate the ERROR chunk and discard the rest + * of the packet + */ + err_chunk = sctp_make_op_error(asoc, chunk, + SCTP_ERROR_UNSUP_HMAC, + &auth_hdr->hmac_id, + sizeof(__u16), 0); + if (err_chunk) { + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, + SCTP_CHUNK(err_chunk)); + } + /* Fall Through */ + case SCTP_IERROR_AUTH_BAD_KEYID: + case SCTP_IERROR_BAD_SIG: + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + + case SCTP_IERROR_PROTO_VIOLATION: + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + + case SCTP_IERROR_NOMEM: + return SCTP_DISPOSITION_NOMEM; + + default: /* Prevent gcc warnings */ + break; } if (asoc->active_key_id != ntohs(auth_hdr->shkey_id)) { @@ -5144,7 +5155,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( * The sender of the SHUTDOWN MAY also start an overall guard timer * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); if (asoc->autoclose) @@ -5289,14 +5300,28 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); if (asoc->overall_error_count >= asoc->max_retrans) { - sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, - SCTP_ERROR(ETIMEDOUT)); - /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ - sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, - SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); - return SCTP_DISPOSITION_DELETE_TCB; + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { + /* + * We are here likely because the receiver had its rwnd + * closed for a while and we have not been able to + * transmit the locally queued data within the maximum + * retransmission attempts limit. Start the T5 + * shutdown guard timer to give the receiver one last + * chance and some additional time to recover before + * aborting. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START_ONCE, + SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); + } else { + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ETIMEDOUT)); + /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_NO_ERROR)); + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + return SCTP_DISPOSITION_DELETE_TCB; + } } /* E1) For the destination address for which the timer diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 0338dc6fdc9d..7c211a7f90f4 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -827,7 +827,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ + TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6766913a53e6..836aa63ee121 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -476,7 +476,7 @@ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) /* The list may contain either IPv4 or IPv6 address; * determine the address length for walking thru the list. */ - sa_addr = (struct sockaddr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); if (!af) { retval = -EINVAL; @@ -555,7 +555,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) { retval = -EINVAL; @@ -583,22 +583,35 @@ static int sctp_send_asconf_add_ip(struct sock *sk, goto out; } - retval = sctp_send_asconf(asoc, chunk); - if (retval) - goto out; - /* Add the new addresses to the bind address list with * use_as_src set to 0. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - addr = (union sctp_addr *)addr_buf; + addr = addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); memcpy(&saveaddr, addr, af->sockaddr_len); retval = sctp_add_bind_addr(bp, &saveaddr, SCTP_ADDR_NEW, GFP_ATOMIC); addr_buf += af->sockaddr_len; } + if (asoc->src_out_of_asoc_ok) { + struct sctp_transport *trans; + + list_for_each_entry(trans, + &asoc->peer.transport_addr_list, transports) { + /* Clear the source and route cache */ + dst_release(trans->dst); + trans->cwnd = min(4*asoc->pathmtu, max_t(__u32, + 2*asoc->pathmtu, 4380)); + trans->ssthresh = asoc->peer.i.a_rwnd; + trans->rto = asoc->rto_initial; + trans->rtt = trans->srtt = trans->rttvar = 0; + sctp_transport_route(trans, NULL, + sctp_sk(asoc->base.sk)); + } + } + retval = sctp_send_asconf(asoc, chunk); } out: @@ -646,7 +659,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) goto err_bindx_rem; } - sa_addr = (union sctp_addr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); if (!af) { retval = -EINVAL; @@ -715,7 +728,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sctp_sockaddr_entry *saddr; int i; int retval = 0; + int stored = 0; + chunk = NULL; if (!sctp_addip_enable) return retval; @@ -743,7 +758,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - laddr = (union sctp_addr *)addr_buf; + laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); if (!af) { retval = -EINVAL; @@ -766,8 +781,37 @@ static int sctp_send_asconf_del_ip(struct sock *sk, bp = &asoc->base.bind_addr; laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs, addrcnt, sp); - if (!laddr) - continue; + if ((laddr == NULL) && (addrcnt == 1)) { + if (asoc->asconf_addr_del_pending) + continue; + asoc->asconf_addr_del_pending = + kzalloc(sizeof(union sctp_addr), GFP_ATOMIC); + if (asoc->asconf_addr_del_pending == NULL) { + retval = -ENOMEM; + goto out; + } + asoc->asconf_addr_del_pending->sa.sa_family = + addrs->sa_family; + asoc->asconf_addr_del_pending->v4.sin_port = + htons(bp->port); + if (addrs->sa_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)addrs; + asoc->asconf_addr_del_pending->v4.sin_addr.s_addr = sin->sin_addr.s_addr; + } else if (addrs->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)addrs; + ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr); + } + SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", + " at %p\n", asoc, asoc->asconf_addr_del_pending, + asoc->asconf_addr_del_pending); + asoc->src_out_of_asoc_ok = 1; + stored = 1; + goto skip_mkasconf; + } /* We do not need RCU protection throughout this loop * because this is done under a socket lock from the @@ -780,12 +824,13 @@ static int sctp_send_asconf_del_ip(struct sock *sk, goto out; } +skip_mkasconf: /* Reset use_as_src flag for the addresses in the bind address * list that are to be deleted. */ addr_buf = addrs; for (i = 0; i < addrcnt; i++) { - laddr = (union sctp_addr *)addr_buf; + laddr = addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) @@ -805,12 +850,37 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sctp_sk(asoc->base.sk)); } + if (stored) + /* We don't need to transmit ASCONF */ + continue; retval = sctp_send_asconf(asoc, chunk); } out: return retval; } +/* set addr events to assocs in the endpoint. ep and addr_wq must be locked */ +int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) +{ + struct sock *sk = sctp_opt2sk(sp); + union sctp_addr *addr; + struct sctp_af *af; + + /* It is safe to write port space in caller. */ + addr = &addrw->a; + addr->v4.sin_port = htons(sp->ep->base.bind_addr.port); + af = sctp_get_af_specific(addr->sa.sa_family); + if (!af) + return -EINVAL; + if (sctp_verify_addr(sk, addr, af->sockaddr_len)) + return -EINVAL; + + if (addrw->state == SCTP_ADDR_NEW) + return sctp_send_asconf_add_ip(sk, (struct sockaddr *)addr, 1); + else + return sctp_send_asconf_del_ip(sk, (struct sockaddr *)addr, 1); +} + /* Helper for tunneling sctp_bindx() requests through sctp_setsockopt() * * API 8.1 @@ -927,7 +997,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, return -EINVAL; } - sa_addr = (struct sockaddr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa_family); /* If the address family is not supported or if this address @@ -1018,7 +1088,7 @@ static int __sctp_connect(struct sock* sk, goto out_free; } - sa_addr = (union sctp_addr *)addr_buf; + sa_addr = addr_buf; af = sctp_get_af_specific(sa_addr->sa.sa_family); /* If the address family is not supported or if this address @@ -1384,6 +1454,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) struct sctp_endpoint *ep; struct sctp_association *asoc; struct list_head *pos, *temp; + unsigned int data_was_unread; SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout); @@ -1393,6 +1464,10 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) ep = sctp_sk(sk)->ep; + /* Clean up any skbs sitting on the receive queue. */ + data_was_unread = sctp_queue_purge_ulpevents(&sk->sk_receive_queue); + data_was_unread += sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby); + /* Walk all associations on an endpoint. */ list_for_each_safe(pos, temp, &ep->asocs) { asoc = list_entry(pos, struct sctp_association, asocs); @@ -1410,7 +1485,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) } } - if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { + if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) || + !skb_queue_empty(&asoc->ulpq.reasm) || + (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, NULL, 0); @@ -1420,10 +1497,6 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) sctp_primitive_SHUTDOWN(asoc, NULL); } - /* Clean up any skbs sitting on the receive queue. */ - sctp_queue_purge_ulpevents(&sk->sk_receive_queue); - sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby); - /* On a TCP-style socket, block for at most linger_time if set. */ if (sctp_style(sk, TCP) && timeout) sctp_wait_for_close(sk, timeout); @@ -2073,10 +2146,33 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk, static int sctp_setsockopt_events(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_association *asoc; + struct sctp_ulpevent *event; + if (optlen > sizeof(struct sctp_event_subscribe)) return -EINVAL; if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; + + /* + * At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, + * if there is no data to be sent or retransmit, the stack will + * immediately send up this notification. + */ + if (sctp_ulpevent_type_enabled(SCTP_SENDER_DRY_EVENT, + &sctp_sk(sk)->subscribe)) { + asoc = sctp_id2assoc(sk, 0); + + if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { + event = sctp_ulpevent_make_sender_dry_event(asoc, + GFP_ATOMIC); + if (!event) + return -ENOMEM; + + sctp_ulpq_tail_event(&asoc->ulpq, event); + } + } + return 0; } @@ -3187,11 +3283,11 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, return -EFAULT; switch (val.sauth_chunk) { - case SCTP_CID_INIT: - case SCTP_CID_INIT_ACK: - case SCTP_CID_SHUTDOWN_COMPLETE: - case SCTP_CID_AUTH: - return -EINVAL; + case SCTP_CID_INIT: + case SCTP_CID_INIT_ACK: + case SCTP_CID_SHUTDOWN_COMPLETE: + case SCTP_CID_AUTH: + return -EINVAL; } /* add this chunk id to the endpoint */ @@ -3334,6 +3430,46 @@ static int sctp_setsockopt_del_key(struct sock *sk, } +/* + * 8.1.23 SCTP_AUTO_ASCONF + * + * This option will enable or disable the use of the automatic generation of + * ASCONF chunks to add and delete addresses to an existing association. Note + * that this option has two caveats namely: a) it only affects sockets that + * are bound to all addresses available to the SCTP stack, and b) the system + * administrator may have an overriding control that turns the ASCONF feature + * off no matter what setting the socket option may have. + * This option expects an integer boolean flag, where a non-zero value turns on + * the option, and a zero value turns off the option. + * Note. In this implementation, socket operation overrides default parameter + * being set by sysctl as well as FreeBSD implementation + */ +static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + int val; + struct sctp_sock *sp = sctp_sk(sk); + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(val, (int __user *)optval)) + return -EFAULT; + if (!sctp_is_ep_boundall(sk) && val) + return -EINVAL; + if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf)) + return 0; + + if (val == 0 && sp->do_auto_asconf) { + list_del(&sp->auto_asconf_list); + sp->do_auto_asconf = 0; + } else if (val && !sp->do_auto_asconf) { + list_add_tail(&sp->auto_asconf_list, + &sctp_auto_asconf_splist); + sp->do_auto_asconf = 1; + } + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * @@ -3481,6 +3617,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTH_DELETE_KEY: retval = sctp_setsockopt_del_key(sk, optval, optlen); break; + case SCTP_AUTO_ASCONF: + retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -3763,6 +3902,12 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + if (sctp_default_auto_asconf) { + list_add_tail(&sp->auto_asconf_list, + &sctp_auto_asconf_splist); + sp->do_auto_asconf = 1; + } else + sp->do_auto_asconf = 0; local_bh_enable(); return 0; @@ -3771,13 +3916,17 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Cleanup any SCTP per socket resources. */ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) { - struct sctp_endpoint *ep; + struct sctp_sock *sp; SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk); /* Release our hold on the endpoint. */ - ep = sctp_sk(sk)->ep; - sctp_endpoint_free(ep); + sp = sctp_sk(sk); + if (sp->do_auto_asconf) { + sp->do_auto_asconf = 0; + list_del(&sp->auto_asconf_list); + } + sctp_endpoint_free(sp->ep); local_bh_disable(); percpu_counter_dec(&sctp_sockets_allocated); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); @@ -5277,6 +5426,28 @@ static int sctp_getsockopt_assoc_number(struct sock *sk, int len, } /* + * 8.1.23 SCTP_AUTO_ASCONF + * See the corresponding setsockopt entry as description + */ +static int sctp_getsockopt_auto_asconf(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + int val = 0; + + if (len < sizeof(int)) + return -EINVAL; + + len = sizeof(int); + if (sctp_sk(sk)->do_auto_asconf && sctp_is_ep_boundall(sk)) + val = 1; + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &val, len)) + return -EFAULT; + return 0; +} + +/* * 8.2.6. Get the Current Identifiers of Associations * (SCTP_GET_ASSOC_ID_LIST) * @@ -5460,6 +5631,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_GET_ASSOC_ID_LIST: retval = sctp_getsockopt_assoc_ids(sk, len, optval, optlen); break; + case SCTP_AUTO_ASCONF: + retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6512,6 +6686,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; struct sctp_bind_hashbucket *head; + struct list_head tmplist; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -6519,7 +6694,12 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - inet_sk_copy_descendant(newsk, oldsk); + if (oldsp->do_auto_asconf) { + memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist)); + inet_sk_copy_descendant(newsk, oldsk); + memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist)); + } else + inet_sk_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 50cb57f0919e..6b3952961b85 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -183,6 +183,13 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "default_auto_asconf", + .data = &sctp_default_auto_asconf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "prsctp_enable", .data = &sctp_prsctp_enable, .maxlen = sizeof(int), diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index e70e5fc87890..8a84017834c2 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -1081,9 +1081,19 @@ void sctp_ulpevent_free(struct sctp_ulpevent *event) } /* Purge the skb lists holding ulpevents. */ -void sctp_queue_purge_ulpevents(struct sk_buff_head *list) +unsigned int sctp_queue_purge_ulpevents(struct sk_buff_head *list) { struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) - sctp_ulpevent_free(sctp_skb2event(skb)); + unsigned int data_unread = 0; + + while ((skb = skb_dequeue(list)) != NULL) { + struct sctp_ulpevent *event = sctp_skb2event(skb); + + if (!sctp_ulpevent_is_notification(event)) + data_unread += skb->len; + + sctp_ulpevent_free(event); + } + + return data_unread; } diff --git a/net/socket.c b/net/socket.c index 02dc82db3d23..b1cbbcd92558 100644 --- a/net/socket.c +++ b/net/socket.c @@ -467,7 +467,7 @@ static struct socket *sock_alloc(void) struct inode *inode; struct socket *sock; - inode = new_inode(sock_mnt->mnt_sb); + inode = new_inode_pseudo(sock_mnt->mnt_sb); if (!inode) return NULL; @@ -580,7 +580,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } EXPORT_SYMBOL(sock_sendmsg); -int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) +static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) { struct kiocb iocb; struct sock_iocb siocb; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index b2198e65d8bb..ffd243d09188 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -4,6 +4,10 @@ config SUNRPC config SUNRPC_GSS tristate +config SUNRPC_BACKCHANNEL + bool + depends on SUNRPC + config SUNRPC_XPRT_RDMA tristate depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 9d2fca5ad14a..8209a0411bca 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o -sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o +sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 67e31276682a..727e506cacda 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) * Run memory cache shrinker. */ static int -rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) { LIST_HEAD(free); int res; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return (nr_to_scan == 0) ? 0 : -1; @@ -624,7 +626,7 @@ rpcauth_refreshcred(struct rpc_task *task) if (err < 0) goto out; cred = task->tk_rqstp->rq_cred; - }; + } dprintk("RPC: %5u refreshing %s cred %p\n", task->tk_pid, cred->cr_auth->au_ops->au_name, cred); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 339ba64cce1e..364eb45e989d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -577,13 +577,13 @@ retry: } inode = &gss_msg->inode->vfs_inode; for (;;) { - prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); spin_lock(&inode->i_lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } spin_unlock(&inode->i_lock); - if (signalled()) { + if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; } @@ -1421,18 +1421,16 @@ gss_wrap_req(struct rpc_task *task, goto out; } switch (gss_cred->gc_service) { - case RPC_GSS_SVC_NONE: - gss_wrap_req_encode(encode, rqstp, p, obj); - status = 0; - break; - case RPC_GSS_SVC_INTEGRITY: - status = gss_wrap_req_integ(cred, ctx, encode, - rqstp, p, obj); - break; - case RPC_GSS_SVC_PRIVACY: - status = gss_wrap_req_priv(cred, ctx, encode, - rqstp, p, obj); - break; + case RPC_GSS_SVC_NONE: + gss_wrap_req_encode(encode, rqstp, p, obj); + status = 0; + break; + case RPC_GSS_SVC_INTEGRITY: + status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj); + break; + case RPC_GSS_SVC_PRIVACY: + status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj); + break; } out: gss_put_ctx(ctx); @@ -1531,18 +1529,18 @@ gss_unwrap_resp(struct rpc_task *task, if (ctx->gc_proc != RPC_GSS_PROC_DATA) goto out_decode; switch (gss_cred->gc_service) { - case RPC_GSS_SVC_NONE: - break; - case RPC_GSS_SVC_INTEGRITY: - status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p); - if (status) - goto out; - break; - case RPC_GSS_SVC_PRIVACY: - status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); - if (status) - goto out; - break; + case RPC_GSS_SVC_NONE: + break; + case RPC_GSS_SVC_INTEGRITY: + status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p); + if (status) + goto out; + break; + case RPC_GSS_SVC_PRIVACY: + status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); + if (status) + goto out; + break; } /* take into account extra slack for integrity and privacy cases: */ cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp) diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0a9a2ec2e469..8c67890de427 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -43,6 +43,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/sunrpc/xdr.h> #include <linux/crypto.h> +#include <linux/sunrpc/gss_krb5_enctypes.h> #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH @@ -743,6 +744,13 @@ static struct pf_desc gss_kerberos_pfs[] = { }, }; +MODULE_ALIAS("rpc-auth-gss-krb5"); +MODULE_ALIAS("rpc-auth-gss-krb5i"); +MODULE_ALIAS("rpc-auth-gss-krb5p"); +MODULE_ALIAS("rpc-auth-gss-390003"); +MODULE_ALIAS("rpc-auth-gss-390004"); +MODULE_ALIAS("rpc-auth-gss-390005"); + static struct gss_api_mech gss_kerberos_mech = { .gm_name = "krb5", .gm_owner = THIS_MODULE, @@ -750,7 +758,7 @@ static struct gss_api_mech gss_kerberos_mech = { .gm_ops = &gss_kerberos_ops, .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), .gm_pfs = gss_kerberos_pfs, - .gm_upcall_enctypes = "18,17,16,23,3,1,2", + .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES, }; static int __init init_kerberos_module(void) diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index e3c36a274412..ca8cad8251c7 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -141,7 +141,7 @@ gss_mech_get(struct gss_api_mech *gm) EXPORT_SYMBOL_GPL(gss_mech_get); struct gss_api_mech * -gss_mech_get_by_name(const char *name) +_gss_mech_get_by_name(const char *name) { struct gss_api_mech *pos, *gm = NULL; @@ -158,6 +158,17 @@ gss_mech_get_by_name(const char *name) } +struct gss_api_mech * gss_mech_get_by_name(const char *name) +{ + struct gss_api_mech *gm = NULL; + + gm = _gss_mech_get_by_name(name); + if (!gm) { + request_module("rpc-auth-gss-%s", name); + gm = _gss_mech_get_by_name(name); + } + return gm; +} EXPORT_SYMBOL_GPL(gss_mech_get_by_name); struct gss_api_mech * @@ -194,10 +205,9 @@ mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) return 0; } -struct gss_api_mech * -gss_mech_get_by_pseudoflavor(u32 pseudoflavor) +struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor) { - struct gss_api_mech *pos, *gm = NULL; + struct gss_api_mech *gm = NULL, *pos; spin_lock(®istered_mechs_lock); list_for_each_entry(pos, ®istered_mechs, gm_list) { @@ -213,6 +223,20 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) return gm; } +struct gss_api_mech * +gss_mech_get_by_pseudoflavor(u32 pseudoflavor) +{ + struct gss_api_mech *gm; + + gm = _gss_mech_get_by_pseudoflavor(pseudoflavor); + + if (!gm) { + request_module("rpc-auth-gss-%u", pseudoflavor); + gm = _gss_mech_get_by_pseudoflavor(pseudoflavor); + } + return gm; +} + EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index cf06af3b63c6..91eaa26e4c42 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -29,8 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RPCDBG_FACILITY RPCDBG_TRANS #endif -#if defined(CONFIG_NFS_V4_1) - /* * Helper routines that track the number of preallocation elements * on the transport. @@ -174,7 +172,7 @@ out_free: dprintk("RPC: setup backchannel transport failed\n"); return -1; } -EXPORT_SYMBOL(xprt_setup_backchannel); +EXPORT_SYMBOL_GPL(xprt_setup_backchannel); /* * Destroys the backchannel preallocated structures. @@ -204,7 +202,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) dprintk("RPC: backchannel list empty= %s\n", list_empty(&xprt->bc_pa_list) ? "true" : "false"); } -EXPORT_SYMBOL(xprt_destroy_backchannel); +EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); /* * One or more rpc_rqst structure have been preallocated during the @@ -279,4 +277,3 @@ void xprt_free_bc_request(struct rpc_rqst *req) spin_unlock_bh(&xprt->bc_pa_lock); } -#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c index 1dd1a6890007..0b2eb388cbda 100644 --- a/net/sunrpc/bc_svc.c +++ b/net/sunrpc/bc_svc.c @@ -27,8 +27,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * reply over an existing open connection previously established by the client. */ -#if defined(CONFIG_NFS_V4_1) - #include <linux/module.h> #include <linux/sunrpc/xprt.h> @@ -63,4 +61,3 @@ int bc_send(struct rpc_rqst *req) return ret; } -#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d83f9d48713..c5347d29cfb7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -13,10 +13,6 @@ * and need to be refreshed, or when a packet was damaged in transit. * This may be have to be moved to the VFS layer. * - * NB: BSD uses a more intelligent approach to guessing when a request - * or reply has been lost by keeping the RTO estimate for each procedure. - * We currently make do with a constant timeout value. - * * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> */ @@ -32,7 +28,9 @@ #include <linux/slab.h> #include <linux/utsname.h> #include <linux/workqueue.h> +#include <linux/in.h> #include <linux/in6.h> +#include <linux/un.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -66,9 +64,9 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static void call_bc_transmit(struct rpc_task *task); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -99,8 +97,7 @@ static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; - struct nameidata nd; - struct path path; + struct path path, dir; char name[15]; struct qstr q = { .name = name, @@ -115,7 +112,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) path.mnt = rpc_get_mount(); if (IS_ERR(path.mnt)) return PTR_ERR(path.mnt); - error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd); + error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &dir); if (error) goto err; @@ -123,7 +120,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); name[sizeof(name) - 1] = '\0'; q.hash = full_name_hash(q.name, q.len); - path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt); + path.dentry = rpc_create_client_dir(dir.dentry, &q, clnt); if (!IS_ERR(path.dentry)) break; error = PTR_ERR(path.dentry); @@ -134,11 +131,11 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) goto err_path_put; } } - path_put(&nd.path); + path_put(&dir); clnt->cl_path = path; return 0; err_path_put: - path_put(&nd.path); + path_put(&dir); err: rpc_put_mount(); return error; @@ -298,22 +295,27 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * up a string representation of the passed-in address. */ if (args->servername == NULL) { + struct sockaddr_un *sun = + (struct sockaddr_un *)args->address; + struct sockaddr_in *sin = + (struct sockaddr_in *)args->address; + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)args->address; + servername[0] = '\0'; switch (args->address->sa_family) { - case AF_INET: { - struct sockaddr_in *sin = - (struct sockaddr_in *)args->address; + case AF_LOCAL: + snprintf(servername, sizeof(servername), "%s", + sun->sun_path); + break; + case AF_INET: snprintf(servername, sizeof(servername), "%pI4", &sin->sin_addr.s_addr); break; - } - case AF_INET6: { - struct sockaddr_in6 *sin = - (struct sockaddr_in6 *)args->address; + case AF_INET6: snprintf(servername, sizeof(servername), "%pI6", - &sin->sin6_addr); + &sin6->sin6_addr); break; - } default: /* caller wants default server name, but * address family isn't recognized. */ @@ -713,7 +715,7 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, } EXPORT_SYMBOL_GPL(rpc_call_async); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it @@ -756,7 +758,7 @@ out: dprintk("RPC: rpc_run_bc_task: task= %p\n", task); return task; } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ void rpc_call_start(struct rpc_task *task) @@ -1058,7 +1060,7 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); - if (RPC_IS_ASYNC(task) || !signalled()) { + if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) { task->tk_action = call_allocate; rpc_delay(task, HZ>>4); return; @@ -1172,6 +1174,9 @@ call_bind_status(struct rpc_task *task) status = -EOPNOTSUPP; break; } + if (task->tk_rebind_retry == 0) + break; + task->tk_rebind_retry--; rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: @@ -1356,7 +1361,7 @@ call_transmit_status(struct rpc_task *task) } } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * 5b. Send the backchannel RPC reply. On error, drop the reply. In * addition, disconnect on connectivity errors. @@ -1420,7 +1425,7 @@ call_bc_transmit(struct rpc_task *task) } rpc_wake_up_queued_task(&req->rq_xprt->pending, task); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * 6. Sort out the RPC call status @@ -1545,8 +1550,7 @@ call_decode(struct rpc_task *task) kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; __be32 *p; - dprintk("RPC: %5u call_decode (status %d)\n", - task->tk_pid, task->tk_status); + dprint_status(task); if (task->tk_flags & RPC_CALL_MAJORSEEN) { if (clnt->cl_chatty) @@ -1660,19 +1664,18 @@ rpc_verify_header(struct rpc_task *task) if (--len < 0) goto out_overflow; switch ((n = ntohl(*p++))) { - case RPC_AUTH_ERROR: - break; - case RPC_MISMATCH: - dprintk("RPC: %5u %s: RPC call version " - "mismatch!\n", - task->tk_pid, __func__); - error = -EPROTONOSUPPORT; - goto out_err; - default: - dprintk("RPC: %5u %s: RPC call rejected, " - "unknown error: %x\n", - task->tk_pid, __func__, n); - goto out_eio; + case RPC_AUTH_ERROR: + break; + case RPC_MISMATCH: + dprintk("RPC: %5u %s: RPC call version mismatch!\n", + task->tk_pid, __func__); + error = -EPROTONOSUPPORT; + goto out_err; + default: + dprintk("RPC: %5u %s: RPC call rejected, " + "unknown error: %x\n", + task->tk_pid, __func__, n); + goto out_eio; } if (--len < 0) goto out_overflow; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 72bc53683965..b181e3441323 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -456,13 +456,13 @@ rpc_get_inode(struct super_block *sb, umode_t mode) inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - switch(mode & S_IFMT) { - case S_IFDIR: - inode->i_fop = &simple_dir_operations; - inode->i_op = &simple_dir_inode_operations; - inc_nlink(inode); - default: - break; + switch (mode & S_IFMT) { + case S_IFDIR: + inode->i_fop = &simple_dir_operations; + inode->i_op = &simple_dir_inode_operations; + inc_nlink(inode); + default: + break; } return inode; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c652e4cc9fe9..e45d2fbbe5a8 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/socket.h> +#include <linux/un.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/kernel.h> @@ -32,6 +33,8 @@ # define RPCDBG_FACILITY RPCDBG_BIND #endif +#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" + #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) @@ -158,20 +161,69 @@ static void rpcb_map_release(void *data) kfree(map); } -static const struct sockaddr_in rpcb_inaddr_loopback = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - .sin_port = htons(RPCBIND_PORT), -}; +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local_unix(void) +{ + static const struct sockaddr_un rpcb_localaddr_rpcbind = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_PATHNAME, + }; + struct rpc_create_args args = { + .net = &init_net, + .protocol = XPRT_TRANSPORT_LOCAL, + .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, + .addrsize = sizeof(rpcb_localaddr_rpcbind), + .servername = "localhost", + .program = &rpcb_program, + .version = RPCBVERS_2, + .authflavor = RPC_AUTH_NULL, + }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + /* + * Because we requested an RPC PING at transport creation time, + * this works only if the user space portmapper is rpcbind, and + * it's listening on AF_LOCAL on the named socket. + */ + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create AF_LOCAL rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } + + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to bind second program to " + "rpcbind v4 client (errno %ld).\n", + PTR_ERR(clnt4)); + clnt4 = NULL; + } + + /* Protected by rpcb_create_local_mutex */ + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; -static DEFINE_MUTEX(rpcb_create_local_mutex); +out: + return result; +} /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local(void) +static int rpcb_create_local_net(void) { + static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), + }; struct rpc_create_args args = { .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, @@ -186,13 +238,6 @@ static int rpcb_create_local(void) struct rpc_clnt *clnt, *clnt4; int result = 0; - if (rpcb_local_clnt) - return result; - - mutex_lock(&rpcb_create_local_mutex); - if (rpcb_local_clnt) - goto out; - clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " @@ -214,10 +259,34 @@ static int rpcb_create_local(void) clnt4 = NULL; } + /* Protected by rpcb_create_local_mutex */ rpcb_local_clnt = clnt; rpcb_local_clnt4 = clnt4; out: + return result; +} + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) +{ + static DEFINE_MUTEX(rpcb_create_local_mutex); + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + if (rpcb_create_local_unix() != 0) + result = rpcb_create_local_net(); + +out: mutex_unlock(&rpcb_create_local_mutex); return result; } @@ -528,7 +597,7 @@ void rpcb_getport_async(struct rpc_task *task) u32 bind_version; struct rpc_xprt *xprt; struct rpc_clnt *rpcb_clnt; - static struct rpcbind_args *map; + struct rpcbind_args *map; struct rpc_task *child; struct sockaddr_storage addr; struct sockaddr *sap = (struct sockaddr *)&addr; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 6b43ee7221d5..d12ffa545811 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -97,14 +97,16 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) /* * Add new request to a priority queue. */ -static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) { struct list_head *q; struct rpc_task *t; INIT_LIST_HEAD(&task->u.tk_wait.links); - q = &queue->tasks[task->tk_priority]; - if (unlikely(task->tk_priority > queue->maxpriority)) + q = &queue->tasks[queue_priority]; + if (unlikely(queue_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { @@ -123,12 +125,14 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r * improve overall performance. * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ -static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) { BUG_ON (RPC_IS_QUEUED(task)); if (RPC_IS_PRIORITY(queue)) - __rpc_add_wait_queue_priority(queue, task); + __rpc_add_wait_queue_priority(queue, task, queue_priority); else if (RPC_IS_SWAPPER(task)) list_add(&task->u.tk_wait.list, &queue->tasks[0]); else @@ -311,13 +315,15 @@ static void rpc_make_runnable(struct rpc_task *task) * NB: An RPC task will only receive interrupt-driven events as long * as it's on a wait queue. */ -static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action) +static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, + struct rpc_task *task, + rpc_action action, + unsigned char queue_priority) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - __rpc_add_wait_queue(q, task); + __rpc_add_wait_queue(q, task, queue_priority); BUG_ON(task->tk_callback != NULL); task->tk_callback = action; @@ -334,11 +340,25 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action); + __rpc_sleep_on_priority(q, task, action, task->tk_priority); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); +void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, + rpc_action action, int priority) +{ + /* We shouldn't ever put an inactive task to sleep */ + BUG_ON(!RPC_IS_ACTIVATED(task)); + + /* + * Protect the queue operations. + */ + spin_lock_bh(&q->lock); + __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); + spin_unlock_bh(&q->lock); +} + /** * __rpc_do_wake_up_task - wake up a single rpc_task * @queue: wait queue @@ -616,30 +636,25 @@ static void __rpc_execute(struct rpc_task *task) BUG_ON(RPC_IS_QUEUED(task)); for (;;) { + void (*do_action)(struct rpc_task *); /* - * Execute any pending callback. + * Execute any pending callback first. */ - if (task->tk_callback) { - void (*save_callback)(struct rpc_task *); - - /* - * We set tk_callback to NULL before calling it, - * in case it sets the tk_callback field itself: - */ - save_callback = task->tk_callback; - task->tk_callback = NULL; - save_callback(task); - } else { + do_action = task->tk_callback; + task->tk_callback = NULL; + if (do_action == NULL) { /* * Perform the next FSM step. - * tk_action may be NULL when the task has been killed - * by someone else. + * tk_action may be NULL if the task has been killed. + * In particular, note that rpc_killall_tasks may + * do this at any time, so beware when dereferencing. */ - if (task->tk_action == NULL) + do_action = task->tk_action; + if (do_action == NULL) break; - task->tk_action(task); } + do_action(task); /* * Lockless check for whether task is sleeping or not. @@ -792,6 +807,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; + task->tk_rebind_retry = 2; task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; task->tk_owner = current->tgid; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 08e05a8ce025..6a69a1131fb7 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -942,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv) if (progp->pg_vers[i]->vs_hidden) continue; + dprintk("svc: attempting to unregister %sv%u\n", + progp->pg_name, i); __svc_unregister(progp->pg_prog, i, progp->pg_name); } } @@ -1250,7 +1252,7 @@ svc_process(struct svc_rqst *rqstp) } } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Process a backchannel RPC request that arrived over an existing * outbound connection @@ -1298,8 +1300,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, return 0; } } -EXPORT_SYMBOL(bc_svc_process); -#endif /* CONFIG_NFS_V4_1 */ +EXPORT_SYMBOL_GPL(bc_svc_process); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * Return (transport-specific) limit on the rpc payload. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ab86b7927f84..bd31208bbb61 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -902,12 +902,13 @@ void svc_delete_xprt(struct svc_xprt *xprt) if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); /* - * We used to delete the transport from whichever list - * it's sk_xprt.xpt_ready node was on, but we don't actually - * need to. This is because the only time we're called - * while still attached to a queue, the queue itself - * is about to be destroyed (in svc_destroy). + * The only time we're called while xpt_ready is still on a list + * is while the list itself is about to be destroyed (in + * svc_destroy). BUT svc_xprt_enqueue could still be attempting + * to add new entries to the sp_sockets list, so we can't leave + * a freed xprt on it. */ + list_del_init(&xprt->xpt_ready); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index c8e10216c113..ce136323da8b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -30,12 +30,10 @@ struct unix_domain { struct auth_domain h; -#ifdef CONFIG_NFSD_DEPRECATED - int addr_changes; -#endif /* CONFIG_NFSD_DEPRECATED */ /* other stuff later */ }; +extern struct auth_ops svcauth_null; extern struct auth_ops svcauth_unix; static void svcauth_unix_domain_release(struct auth_domain *dom) @@ -74,9 +72,6 @@ struct auth_domain *unix_domain_find(char *name) return NULL; } new->h.flavour = &svcauth_unix; -#ifdef CONFIG_NFSD_DEPRECATED - new->addr_changes = 0; -#endif /* CONFIG_NFSD_DEPRECATED */ rv = auth_domain_lookup(name, &new->h); } } @@ -95,9 +90,6 @@ struct ip_map { char m_class[8]; /* e.g. "nfsd" */ struct in6_addr m_addr; struct unix_domain *m_client; -#ifdef CONFIG_NFSD_DEPRECATED - int m_add_change; -#endif /* CONFIG_NFSD_DEPRECATED */ }; static void ip_map_put(struct kref *kref) @@ -151,9 +143,6 @@ static void update(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->m_client->h.ref); new->m_client = item->m_client; -#ifdef CONFIG_NFSD_DEPRECATED - new->m_add_change = item->m_add_change; -#endif /* CONFIG_NFSD_DEPRECATED */ } static struct cache_head *ip_map_alloc(void) { @@ -338,16 +327,6 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, ip.h.flags = 0; if (!udom) set_bit(CACHE_NEGATIVE, &ip.h.flags); -#ifdef CONFIG_NFSD_DEPRECATED - else { - ip.m_add_change = udom->addr_changes; - /* if this is from the legacy set_client system call, - * we need m_add_change to be one higher - */ - if (expiry == NEVER) - ip.m_add_change++; - } -#endif /* CONFIG_NFSD_DEPRECATED */ ip.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ @@ -367,62 +346,6 @@ static inline int ip_map_update(struct net *net, struct ip_map *ipm, return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); } -#ifdef CONFIG_NFSD_DEPRECATED -int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom) -{ - struct unix_domain *udom; - struct ip_map *ipmp; - - if (dom->flavour != &svcauth_unix) - return -EINVAL; - udom = container_of(dom, struct unix_domain, h); - ipmp = ip_map_lookup(net, "nfsd", addr); - - if (ipmp) - return ip_map_update(net, ipmp, udom, NEVER); - else - return -ENOMEM; -} -EXPORT_SYMBOL_GPL(auth_unix_add_addr); - -int auth_unix_forget_old(struct auth_domain *dom) -{ - struct unix_domain *udom; - - if (dom->flavour != &svcauth_unix) - return -EINVAL; - udom = container_of(dom, struct unix_domain, h); - udom->addr_changes++; - return 0; -} -EXPORT_SYMBOL_GPL(auth_unix_forget_old); - -struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) -{ - struct ip_map *ipm; - struct auth_domain *rv; - struct sunrpc_net *sn; - - sn = net_generic(net, sunrpc_net_id); - ipm = ip_map_lookup(net, "nfsd", addr); - - if (!ipm) - return NULL; - if (cache_check(sn->ip_map_cache, &ipm->h, NULL)) - return NULL; - - if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { - sunrpc_invalidate(&ipm->h, sn->ip_map_cache); - rv = NULL; - } else { - rv = &ipm->m_client->h; - kref_get(&rv->ref); - } - cache_put(&ipm->h, sn->ip_map_cache); - return rv; -} -EXPORT_SYMBOL_GPL(auth_unix_lookup); -#endif /* CONFIG_NFSD_DEPRECATED */ void svcauth_unix_purge(void) { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b7d435c3f19e..767d494de7a2 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -51,6 +51,8 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/xprt.h> +#include "sunrpc.h" + #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -66,12 +68,12 @@ static void svc_sock_free(struct svc_xprt *); static struct svc_xprt *svc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); static void svc_bc_sock_free(struct svc_xprt *xprt); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key svc_key[2]; @@ -387,6 +389,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, return len; } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, + struct kvec *iov, int nr, + int buflen, unsigned int base) +{ + size_t save_iovlen; + void __user *save_iovbase; + unsigned int i; + int ret; + + if (base == 0) + return svc_recvfrom(rqstp, iov, nr, buflen); + + for (i = 0; i < nr; i++) { + if (iov[i].iov_len > base) + break; + base -= iov[i].iov_len; + } + save_iovlen = iov[i].iov_len; + save_iovbase = iov[i].iov_base; + iov[i].iov_len -= base; + iov[i].iov_base += base; + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); + iov[i].iov_len = save_iovlen; + iov[i].iov_base = save_iovbase; + return ret; +} + /* * Set socket snd and rcv buffer lengths */ @@ -409,7 +438,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif @@ -884,6 +912,56 @@ failed: return NULL; } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return 0; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + if (rqstp->rq_pages[i] != NULL) + put_page(rqstp->rq_pages[i]); + BUG_ON(svsk->sk_pages[i] == NULL); + rqstp->rq_pages[i] = svsk->sk_pages[i]; + svsk->sk_pages[i] = NULL; + } + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); + return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + svsk->sk_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + goto out; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + BUG_ON(svsk->sk_pages[i] == NULL); + put_page(svsk->sk_pages[i]); + svsk->sk_pages[i] = NULL; + } +out: + svsk->sk_tcplen = 0; +} + /* * Receive data. * If we haven't gotten the record length yet, get the next four bytes. @@ -893,31 +971,15 @@ failed: static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { struct svc_serv *serv = svsk->sk_xprt.xpt_server; + unsigned int want; int len; - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -927,7 +989,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - goto err_again; /* record header not complete */ + return -EAGAIN; } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -954,83 +1016,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) } } - /* Check whether enough data is available */ - len = svc_recv_available(svsk); - if (len < 0) - goto error; + if (svsk->sk_reclen < 8) + goto err_delete; /* client is nuts. */ - if (len < svsk->sk_reclen) { - dprintk("svc: incomplete TCP record (%d of %d)\n", - len, svsk->sk_reclen); - goto err_again; /* record not complete */ - } len = svsk->sk_reclen; - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); return len; - error: - if (len == -EAGAIN) - dprintk("RPC: TCP recv_record got EAGAIN\n"); +error: + dprintk("RPC: TCP recv_record got %d\n", len); return len; - err_delete: +err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again: return -EAGAIN; } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, - struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) { + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; struct rpc_rqst *req = NULL; - u32 *p; - u32 xid; - u32 calldir; - int len; + struct kvec *src, *dst; + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + __be32 xid; + __be32 calldir; - len = svc_recvfrom(rqstp, vec, 1, 8); - if (len < 0) - goto error; - - p = (u32 *)rqstp->rq_arg.head[0].iov_base; xid = *p++; calldir = *p; - if (calldir == 0) { - /* REQUEST is the most common case */ - vec[0] = rqstp->rq_arg.head[0]; - } else { - /* REPLY */ - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; - - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, xid); - vec[0] = rqstp->rq_arg.head[0]; - goto out; - } + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(struct xdr_buf)); - /* copy the xid and call direction */ - memcpy(req->rq_private_buf.head[0].iov_base, - rqstp->rq_arg.head[0].iov_base, 8); - vec[0] = req->rq_private_buf.head[0]; + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, xid); + return -EAGAIN; } - out: - vec[0].iov_base += 8; - vec[0].iov_len -= 8; - len = svsk->sk_reclen - 8; - error: - *reqpp = req; - return len; + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + /* + * XXX!: cheating for now! Only copying HEAD. + * But we know this is good enough for now (in fact, for any + * callback reply in the forseeable future). + */ + dst = &req->rq_private_buf.head[0]; + src = &rqstp->rq_arg.head[0]; + if (dst->iov_len < src->iov_len) + return -EAGAIN; /* whatever; just giving up. */ + memcpy(dst->iov_base, src->iov_base, src->iov_len); + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; + return 0; } +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ + int i = 0; + int t = 0; + + while (t < len) { + vec[i].iov_base = page_address(pages[i]); + vec[i].iov_len = PAGE_SIZE; + i++; + t += PAGE_SIZE; + } + return i; +} + + /* * Receive data from a TCP socket. */ @@ -1041,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - int pnum, vlen; - struct rpc_rqst *req = NULL; + unsigned int want, base; + __be32 *p; + __be32 calldir; + int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1053,87 +1109,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto error; + base = svc_tcp_restore_pages(svsk, rqstp); + want = svsk->sk_reclen - base; + vec = rqstp->rq_vec; - vec[0] = rqstp->rq_arg.head[0]; - vlen = PAGE_SIZE; - /* - * We have enough data for the whole tcp record. Let's try and read the - * first 8 bytes to get the xid and the call direction. We can use this - * to figure out if this is a call or a reply to a callback. If - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. - * In that case, don't bother with the calldir and just read the data. - * It will be rejected in svc_process. - */ - if (len >= 8) { - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - } + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], + svsk->sk_reclen); - pnum = 1; - while (vlen < len) { - vec[pnum].iov_base = (req) ? - page_address(req->rq_private_buf.pages[pnum - 1]) : - page_address(rqstp->rq_pages[pnum]); - vec[pnum].iov_len = PAGE_SIZE; - pnum++; - vlen += PAGE_SIZE; - } rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, len); - if (len < 0) - goto err_again; - - /* - * Account for the 8 bytes we read earlier - */ - len += 8; - - if (req) { - xprt_complete_rqst(req->rq_task, len); - len = 0; - goto out; + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + if (len >= 0) + svsk->sk_tcplen += len; + if (len != want) { + if (len < 0 && len != -EAGAIN) + goto err_other; + svc_tcp_save_pages(svsk, rqstp); + dprintk("svc: incomplete TCP record (%d of %d)\n", + svsk->sk_tcplen, svsk->sk_reclen); + goto err_noclose; } - dprintk("svc: TCP complete record (%d bytes)\n", len); - rqstp->rq_arg.len = len; + + rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; - if (len <= rqstp->rq_arg.head[0].iov_len) { - rqstp->rq_arg.head[0].iov_len = len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; rqstp->rq_arg.page_len = 0; - } else { - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - } + } else + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; -out: + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + calldir = p[1]; + if (calldir) + len = receive_cb_reply(svsk, rqstp); + /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + + if (len < 0) + goto error; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - return len; + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); + return rqstp->rq_arg.len; -err_again: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return len; - } error: - if (len != -EAGAIN) { - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - } + if (len != -EAGAIN) + goto err_other; + dprintk("RPC: TCP recvfrom got EAGAIN\n"); return -EAGAIN; +err_other: + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", + svsk->sk_xprt.xpt_server->sv_name, -len); + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: + return -EAGAIN; /* record not complete */ } /* @@ -1201,7 +1243,7 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); @@ -1242,7 +1284,7 @@ static void svc_cleanup_bc_xprt_sock(void) { svc_unreg_xprt_class(&svc_tcp_bc_class); } -#else /* CONFIG_NFS_V4_1 */ +#else /* CONFIG_SUNRPC_BACKCHANNEL */ static void svc_init_bc_xprt_sock(void) { } @@ -1250,7 +1292,7 @@ static void svc_init_bc_xprt_sock(void) static void svc_cleanup_bc_xprt_sock(void) { } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, @@ -1304,18 +1346,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1379,8 +1413,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1562,8 +1602,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) svc_sock_detach(xprt); - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + svc_tcp_clear_pages(svsk); kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); + } } /* @@ -1581,7 +1623,7 @@ static void svc_sock_free(struct svc_xprt *xprt) kfree(svsk); } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Create a back channel svc_xprt which shares the fore channel socket. */ @@ -1620,4 +1662,4 @@ static void svc_bc_sock_free(struct svc_xprt *xprt) if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 679cd674b81d..277ebd4bf095 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -126,7 +126,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) kaddr[buf->page_base + len] = '\0'; kunmap_atomic(kaddr, KM_USER0); } -EXPORT_SYMBOL(xdr_terminate_string); +EXPORT_SYMBOL_GPL(xdr_terminate_string); void xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, @@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) } EXPORT_SYMBOL_GPL(xdr_init_decode); +/** + * xdr_init_decode - Initialize an xdr_stream for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to XDR buffer from which to decode data + * @pages: list of pages to decode into + * @len: length in bytes of buffer in pages + */ +void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len) +{ + memset(buf, 0, sizeof(*buf)); + buf->pages = pages; + buf->page_len = len; + buf->buflen = len; + buf->len = len; + xdr_init_decode(xdr, buf, NULL); +} +EXPORT_SYMBOL_GPL(xdr_init_decode_pages); + static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p = xdr->p; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ce5eb68a9664..9b6a4d1ea8f8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,6 +62,7 @@ /* * Local functions */ +static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); @@ -191,10 +192,10 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); * transport connects from colliding with writes. No congestion control * is provided. */ -int xprt_reserve_xprt(struct rpc_task *task) +int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_xprt *xprt = req->rq_xprt; + int priority; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -202,8 +203,10 @@ int xprt_reserve_xprt(struct rpc_task *task) goto out_sleep; } xprt->snd_task = task; - req->rq_bytes_sent = 0; - req->rq_ntrans++; + if (req != NULL) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } return 1; @@ -212,10 +215,13 @@ out_sleep: task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL); + if (req == NULL) + priority = RPC_PRIORITY_LOW; + else if (!req->rq_ntrans) + priority = RPC_PRIORITY_NORMAL; else - rpc_sleep_on(&xprt->sending, task, NULL); + priority = RPC_PRIORITY_HIGH; + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -239,22 +245,24 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) * integrated into the decision of whether a request is allowed to be * woken up and given access to the transport. */ -int xprt_reserve_xprt_cong(struct rpc_task *task) +int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; + int priority; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) return 1; goto out_sleep; } + if (req == NULL) { + xprt->snd_task = task; + return 1; + } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; return 1; } xprt_clear_locked(xprt); @@ -262,10 +270,13 @@ out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL); + if (req == NULL) + priority = RPC_PRIORITY_LOW; + else if (!req->rq_ntrans) + priority = RPC_PRIORITY_NORMAL; else - rpc_sleep_on(&xprt->sending, task, NULL); + priority = RPC_PRIORITY_HIGH; + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -275,7 +286,7 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) int retval; spin_lock_bh(&xprt->transport_lock); - retval = xprt->ops->reserve_xprt(task); + retval = xprt->ops->reserve_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return retval; } @@ -288,12 +299,9 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - task = rpc_wake_up_next(&xprt->resend); - if (!task) { - task = rpc_wake_up_next(&xprt->sending); - if (!task) - goto out_unlock; - } + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; req = task->tk_rqstp; xprt->snd_task = task; @@ -310,24 +318,25 @@ out_unlock: static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { struct rpc_task *task; + struct rpc_rqst *req; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; if (RPCXPRT_CONGESTED(xprt)) goto out_unlock; - task = rpc_wake_up_next(&xprt->resend); - if (!task) { - task = rpc_wake_up_next(&xprt->sending); - if (!task) - goto out_unlock; + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; + + req = task->tk_rqstp; + if (req == NULL) { + xprt->snd_task = task; + return; } if (__xprt_get_cong(xprt, task)) { - struct rpc_rqst *req = task->tk_rqstp; xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; return; } out_unlock: @@ -852,7 +861,7 @@ int xprt_prepare_transmit(struct rpc_task *task) err = req->rq_reply_bytes_recvd; goto out_unlock; } - if (!xprt->ops->reserve_xprt(task)) + if (!xprt->ops->reserve_xprt(xprt, task)) err = -EAGAIN; out_unlock: spin_unlock_bh(&xprt->transport_lock); @@ -928,28 +937,66 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) +{ + struct rpc_rqst *req = ERR_PTR(-EAGAIN); + + if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs)) + goto out; + req = kzalloc(sizeof(struct rpc_rqst), gfp_flags); + if (req != NULL) + goto out; + atomic_dec(&xprt->num_reqs); + req = ERR_PTR(-ENOMEM); +out: + return req; +} + +static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) { + kfree(req); + return true; + } + return false; +} + static void xprt_alloc_slot(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_rqst *req; - task->tk_status = 0; - if (task->tk_rqstp) - return; if (!list_empty(&xprt->free)) { - struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); - list_del_init(&req->rq_list); - task->tk_rqstp = req; - xprt_request_init(task, xprt); - return; + req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); + list_del(&req->rq_list); + goto out_init_req; + } + req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT); + if (!IS_ERR(req)) + goto out_init_req; + switch (PTR_ERR(req)) { + case -ENOMEM: + rpc_delay(task, HZ >> 2); + dprintk("RPC: dynamic allocation of request slot " + "failed! Retrying\n"); + break; + case -EAGAIN: + rpc_sleep_on(&xprt->backlog, task, NULL); + dprintk("RPC: waiting for request slot\n"); } - dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; - task->tk_timeout = 0; - rpc_sleep_on(&xprt->backlog, task, NULL); + return; +out_init_req: + task->tk_status = 0; + task->tk_rqstp = req; + xprt_request_init(task, xprt); } static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { + if (xprt_dynamic_free_slot(xprt, req)) + return; + memset(req, 0, sizeof(*req)); /* mark unused */ spin_lock(&xprt->reserve_lock); @@ -958,25 +1005,49 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) spin_unlock(&xprt->reserve_lock); } -struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) +static void xprt_free_all_slots(struct rpc_xprt *xprt) +{ + struct rpc_rqst *req; + while (!list_empty(&xprt->free)) { + req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); + list_del(&req->rq_list); + kfree(req); + } +} + +struct rpc_xprt *xprt_alloc(struct net *net, size_t size, + unsigned int num_prealloc, + unsigned int max_alloc) { struct rpc_xprt *xprt; + struct rpc_rqst *req; + int i; xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; - atomic_set(&xprt->count, 1); - xprt->max_reqs = max_req; - xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) + xprt_init(xprt, net); + + for (i = 0; i < num_prealloc; i++) { + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (!req) + break; + list_add(&req->rq_list, &xprt->free); + } + if (i < num_prealloc) goto out_free; + if (max_alloc > num_prealloc) + xprt->max_reqs = max_alloc; + else + xprt->max_reqs = num_prealloc; + xprt->min_reqs = num_prealloc; + atomic_set(&xprt->num_reqs, num_prealloc); - xprt->xprt_net = get_net(net); return xprt; out_free: - kfree(xprt); + xprt_free(xprt); out: return NULL; } @@ -985,7 +1056,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc); void xprt_free(struct rpc_xprt *xprt) { put_net(xprt->xprt_net); - kfree(xprt->slot); + xprt_free_all_slots(xprt); kfree(xprt); } EXPORT_SYMBOL_GPL(xprt_free); @@ -1001,10 +1072,24 @@ void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - task->tk_status = -EIO; + task->tk_status = 0; + if (task->tk_rqstp != NULL) + return; + + /* Note: grabbing the xprt_lock_write() here is not strictly needed, + * but ensures that we throttle new slot allocation if the transport + * is congested (e.g. if reconnecting or if we're out of socket + * write buffer space). + */ + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (!xprt_lock_write(xprt, task)) + return; + spin_lock(&xprt->reserve_lock); xprt_alloc_slot(task); spin_unlock(&xprt->reserve_lock); + xprt_release_write(xprt, task); } static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) @@ -1021,6 +1106,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) { struct rpc_rqst *req = task->tk_rqstp; + INIT_LIST_HEAD(&req->rq_list); req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_task = task; req->rq_xprt = xprt; @@ -1073,6 +1159,34 @@ void xprt_release(struct rpc_task *task) xprt_free_bc_request(req); } +static void xprt_init(struct rpc_xprt *xprt, struct net *net) +{ + atomic_set(&xprt->count, 1); + + spin_lock_init(&xprt->transport_lock); + spin_lock_init(&xprt->reserve_lock); + + INIT_LIST_HEAD(&xprt->free); + INIT_LIST_HEAD(&xprt->recv); +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + spin_lock_init(&xprt->bc_pa_lock); + INIT_LIST_HEAD(&xprt->bc_pa_list); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + + xprt->last_used = jiffies; + xprt->cwnd = RPC_INITCWND; + xprt->bind_index = 0; + + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); + rpc_init_wait_queue(&xprt->pending, "xprt_pending"); + rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending"); + rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); + + xprt_init_xid(xprt); + + xprt->xprt_net = get_net(net); +} + /** * xprt_create_transport - create an RPC transport * @args: rpc transport creation arguments @@ -1081,7 +1195,6 @@ void xprt_release(struct rpc_task *task) struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { struct rpc_xprt *xprt; - struct rpc_rqst *req; struct xprt_class *t; spin_lock(&xprt_list_lock); @@ -1100,46 +1213,17 @@ found: if (IS_ERR(xprt)) { dprintk("RPC: xprt_create_transport: failed, %ld\n", -PTR_ERR(xprt)); - return xprt; + goto out; } - if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) - /* ->setup returned a pre-initialized xprt: */ - return xprt; - - spin_lock_init(&xprt->transport_lock); - spin_lock_init(&xprt->reserve_lock); - - INIT_LIST_HEAD(&xprt->free); - INIT_LIST_HEAD(&xprt->recv); -#if defined(CONFIG_NFS_V4_1) - spin_lock_init(&xprt->bc_pa_lock); - INIT_LIST_HEAD(&xprt->bc_pa_list); -#endif /* CONFIG_NFS_V4_1 */ - INIT_WORK(&xprt->task_cleanup, xprt_autoclose); if (xprt_has_timer(xprt)) setup_timer(&xprt->timer, xprt_init_autodisconnect, (unsigned long)xprt); else init_timer(&xprt->timer); - xprt->last_used = jiffies; - xprt->cwnd = RPC_INITCWND; - xprt->bind_index = 0; - - rpc_init_wait_queue(&xprt->binding, "xprt_binding"); - rpc_init_wait_queue(&xprt->pending, "xprt_pending"); - rpc_init_wait_queue(&xprt->sending, "xprt_sending"); - rpc_init_wait_queue(&xprt->resend, "xprt_resend"); - rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); - - /* initialize free list */ - for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) - list_add(&req->rq_list, &xprt->free); - - xprt_init_xid(xprt); - dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); +out: return xprt; } @@ -1157,7 +1241,6 @@ static void xprt_destroy(struct rpc_xprt *xprt) rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); rpc_destroy_wait_queue(&xprt->sending); - rpc_destroy_wait_queue(&xprt->resend); rpc_destroy_wait_queue(&xprt->backlog); cancel_work_sync(&xprt->task_cleanup); /* diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6c014dd3a20b..a385430c722a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -42,6 +42,7 @@ #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/debug.h> #include <linux/sunrpc/rpc_rdma.h> +#include <linux/interrupt.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -695,7 +696,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, return ERR_PTR(-ENOMEM); xprt = &cma_xprt->sc_xprt; - listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); + listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %d\n", ret); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0867070bb5ca..b446e100286f 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -283,6 +283,7 @@ xprt_setup_rdma(struct xprt_create *args) } xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), + xprt_rdma_slot_table_entries, xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", @@ -452,9 +453,8 @@ xprt_rdma_connect(struct rpc_task *task) } static int -xprt_rdma_reserve_xprt(struct rpc_task *task) +xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int credits = atomic_read(&r_xprt->rx_buf.rb_credits); @@ -466,7 +466,7 @@ xprt_rdma_reserve_xprt(struct rpc_task *task) BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); } xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; - return xprt_reserve_xprt_cong(task); + return xprt_reserve_xprt_cong(xprt, task); } /* diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index d4297dc43dc4..28236bab57f9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -47,6 +47,7 @@ * o buffer memory */ +#include <linux/interrupt.h> #include <linux/pci.h> /* for Tavor hack below */ #include <linux/slab.h> @@ -387,7 +388,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, init_completion(&ia->ri_done); - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); dprintk("RPC: %s: rdma_create_id() failed %i\n", diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cae761a8536c..08c5d5a128fc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -42,7 +42,7 @@ #include <linux/wait.h> /* wait_queue_head_t, etc */ #include <linux/spinlock.h> /* spinlock_t, etc */ -#include <asm/atomic.h> /* atomic_t, etc */ +#include <linux/atomic.h> /* atomic_t, etc */ #include <rdma/rdma_cm.h> /* RDMA connection api */ #include <rdma/ib_verbs.h> /* RDMA verbs api */ @@ -109,7 +109,7 @@ struct rpcrdma_ep { */ /* temporary static scatter/gather max */ -#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ +#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ #define MAX_RPCRDMAHDR (\ /* max supported RPC/RDMA header */ \ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf005d3c65ef..d7f97ef26590 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -19,6 +19,7 @@ */ #include <linux/types.h> +#include <linux/string.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/capability.h> @@ -28,6 +29,7 @@ #include <linux/in.h> #include <linux/net.h> #include <linux/mm.h> +#include <linux/un.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> @@ -35,7 +37,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprtsock.h> #include <linux/file.h> -#ifdef CONFIG_NFS_V4_1 +#ifdef CONFIG_SUNRPC_BACKCHANNEL #include <linux/sunrpc/bc_xprt.h> #endif @@ -45,11 +47,15 @@ #include <net/tcp.h> #include "sunrpc.h" + +static void xs_close(struct rpc_xprt *xprt); + /* * xprtsock tunables */ unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; +unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; @@ -70,6 +76,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; +static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; @@ -99,6 +106,15 @@ static ctl_table xs_tunables_table[] = { .extra2 = &max_slot_table_size }, { + .procname = "tcp_max_slot_table_entries", + .data = &xprt_max_tcp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_slot_table_size, + .extra2 = &max_tcp_slot_table_limit + }, + { .procname = "min_resvport", .data = &xprt_min_resvport, .maxlen = sizeof(unsigned int), @@ -261,6 +277,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) return (struct sockaddr *) &xprt->addr; } +static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) +{ + return (struct sockaddr_un *) &xprt->addr; +} + static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { return (struct sockaddr_in *) &xprt->addr; @@ -276,23 +297,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); struct sockaddr_in6 *sin6; struct sockaddr_in *sin; + struct sockaddr_un *sun; char buf[128]; - (void)rpc_ntop(sap, buf, sizeof(buf)); - xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - switch (sap->sa_family) { + case AF_LOCAL: + sun = xs_addr_un(xprt); + strlcpy(buf, sun->sun_path, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); + break; case AF_INET: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin = xs_addr_in(xprt); snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin6 = xs_addr_in6(xprt); snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } @@ -495,6 +527,70 @@ static int xs_nospace(struct rpc_task *task) return ret; } +/* + * Construct a stream transport record marker in @buf. + */ +static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) +{ + u32 reclen = buf->len - sizeof(rpc_fraghdr); + rpc_fraghdr *base = buf->head[0].iov_base; + *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); +} + +/** + * xs_local_send_request - write an RPC request to an AF_LOCAL socket + * @task: RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * ENOTCONN: Caller needs to invoke connect logic then call again + * other: Some other error occured, the request was not sent + */ +static int xs_local_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct xdr_buf *xdr = &req->rq_snd_buf; + int status; + + xs_encode_stream_record_marker(&req->rq_snd_buf); + + xs_pktdump("packet data:", + req->rq_svec->iov_base, req->rq_svec->iov_len); + + status = xs_sendpages(transport->sock, NULL, 0, + xdr, req->rq_bytes_sent); + dprintk("RPC: %s(%u) = %d\n", + __func__, xdr->len - req->rq_bytes_sent, status); + if (likely(status >= 0)) { + req->rq_bytes_sent += status; + req->rq_xmit_bytes_sent += status; + if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_bytes_sent = 0; + return 0; + } + status = -EAGAIN; + } + + switch (status) { + case -EAGAIN: + status = xs_nospace(task); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + case -EPIPE: + xs_close(xprt); + status = -ENOTCONN; + } + + return status; +} + /** * xs_udp_send_request - write an RPC request to a UDP socket * @task: address of RPC task that manages the state of an RPC request @@ -574,13 +670,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) kernel_sock_shutdown(sock, SHUT_WR); } -static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) -{ - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); -} - /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -603,7 +692,7 @@ static int xs_tcp_send_request(struct rpc_task *task) struct xdr_buf *xdr = &req->rq_snd_buf; int status; - xs_encode_tcp_record_marker(&req->rq_snd_buf); + xs_encode_stream_record_marker(&req->rq_snd_buf); xs_pktdump("packet data:", req->rq_svec->iov_base, @@ -677,6 +766,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) if (task == NULL) goto out_release; req = task->tk_rqstp; + if (req == NULL) + goto out_release; if (req->rq_bytes_sent == 0) goto out_release; if (req->rq_bytes_sent == req->rq_snd_buf.len) @@ -785,6 +876,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) return (struct rpc_xprt *) sk->sk_user_data; } +static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + struct xdr_skb_reader desc = { + .skb = skb, + .offset = sizeof(rpc_fraghdr), + .count = skb->len - sizeof(rpc_fraghdr), + }; + + if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) + return -1; + if (desc.count) + return -1; + return 0; +} + +/** + * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets + * @sk: socket with data to read + * @len: how much data to read + * + * Currently this assumes we can read the whole reply in a single gulp. + */ +static void xs_local_data_ready(struct sock *sk, int len) +{ + struct rpc_task *task; + struct rpc_xprt *xprt; + struct rpc_rqst *rovr; + struct sk_buff *skb; + int err, repsize, copied; + u32 _xid; + __be32 *xp; + + read_lock_bh(&sk->sk_callback_lock); + dprintk("RPC: %s...\n", __func__); + xprt = xprt_from_sock(sk); + if (xprt == NULL) + goto out; + + skb = skb_recv_datagram(sk, 0, 1, &err); + if (skb == NULL) + goto out; + + if (xprt->shutdown) + goto dropit; + + repsize = skb->len - sizeof(rpc_fraghdr); + if (repsize < 4) { + dprintk("RPC: impossible RPC reply size %d\n", repsize); + goto dropit; + } + + /* Copy the XID from the skb... */ + xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); + if (xp == NULL) + goto dropit; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + rovr = xprt_lookup_rqst(xprt, *xp); + if (!rovr) + goto out_unlock; + task = rovr->rq_task; + + copied = rovr->rq_private_buf.buflen; + if (copied > repsize) + copied = repsize; + + if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { + dprintk("RPC: sk_buff copy failed\n"); + goto out_unlock; + } + + xprt_complete_rqst(task, copied); + + out_unlock: + spin_unlock(&xprt->transport_lock); + dropit: + skb_free_datagram(sk, skb); + out: + read_unlock_bh(&sk->sk_callback_lock); +} + /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read @@ -1076,7 +1249,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, return 0; } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Obtains an rpc_rqst previously allocated and invokes the common * tcp read code to read the data. The result is placed in the callback @@ -1139,7 +1312,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, { return xs_tcp_read_reply(xprt, desc); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * Read data off the transport. This can be either an RPC_CALL or an @@ -1344,7 +1517,6 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ xprt_force_disconnect(xprt); - case TCP_SYN_SENT: xprt->connect_cookie++; case TCP_CLOSING: /* @@ -1571,11 +1743,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) return err; } +/* + * We don't support autobind on AF_LOCAL sockets + */ +static void xs_local_rpcbind(struct rpc_task *task) +{ + xprt_set_bound(task->tk_xprt); +} + +static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) +{ +} #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; +static inline void xs_reclassify_socketu(struct socket *sock) +{ + struct sock *sk = sock->sk; + + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", + &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]); +} + static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; @@ -1597,6 +1789,9 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { switch (family) { + case AF_LOCAL: + xs_reclassify_socketu(sock); + break; case AF_INET: xs_reclassify_socket4(sock); break; @@ -1606,6 +1801,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) } } #else +static inline void xs_reclassify_socketu(struct socket *sock) +{ +} + static inline void xs_reclassify_socket4(struct socket *sock) { } @@ -1644,6 +1843,94 @@ out: return ERR_PTR(err); } +static int xs_local_finish_connecting(struct rpc_xprt *xprt, + struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + xs_save_old_callbacks(transport, sk); + + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_local_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); +} + +/** + * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type + * + * Invoked by a work queue tasklet. + */ +static void xs_local_setup_socket(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock; + int status = -EIO; + + if (xprt->shutdown) + goto out; + + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + status = __sock_create(xprt->xprt_net, AF_LOCAL, + SOCK_STREAM, 0, &sock, 1); + if (status < 0) { + dprintk("RPC: can't create AF_LOCAL " + "transport socket (%d).\n", -status); + goto out; + } + xs_reclassify_socketu(sock); + + dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + + status = xs_local_finish_connecting(xprt, sock); + switch (status) { + case 0: + dprintk("RPC: xprt %p connected to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt_set_connected(xprt); + break; + case -ENOENT: + dprintk("RPC: xprt %p: socket %s does not exist\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + break; + default: + printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n", + __func__, -status, + xprt->address_strings[RPC_DISPLAY_ADDR]); + } + +out: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1758,6 +2045,7 @@ static void xs_tcp_reuse_connection(struct sock_xprt *transport) static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = -ENOTCONN; if (!transport->inet) { struct sock *sk = sock->sk; @@ -1789,12 +2077,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } if (!xprt_bound(xprt)) - return -ENOTCONN; + goto out; /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + switch (ret) { + case 0: + case -EINPROGRESS: + /* SYN_SENT! */ + xprt->connect_cookie++; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + } +out: + return ret; } /** @@ -1917,6 +2215,32 @@ static void xs_connect(struct rpc_task *task) } /** + * xs_local_print_stats - display AF_LOCAL socket-specifc stats + * @xprt: rpc_xprt struct containing statistics + * @seq: output file + * + */ +static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " + "%llu %llu\n", + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u); +} + +/** * xs_udp_print_stats - display UDP socket-specifc stats * @xprt: rpc_xprt struct containing statistics * @seq: output file @@ -2014,10 +2338,7 @@ static int bc_sendto(struct rpc_rqst *req) unsigned long headoff; unsigned long tailoff; - /* - * Set up the rpc header and record marker stuff - */ - xs_encode_tcp_record_marker(xbufp); + xs_encode_stream_record_marker(xbufp); tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; @@ -2089,6 +2410,21 @@ static void bc_destroy(struct rpc_xprt *xprt) { } +static struct rpc_xprt_ops xs_local_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xs_tcp_release_xprt, + .rpcbind = xs_local_rpcbind, + .set_port = xs_local_set_port, + .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, + .send_request = xs_local_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = xs_close, + .destroy = xs_destroy, + .print_stats = xs_local_print_stats, +}; + static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, @@ -2150,6 +2486,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) }; switch (family) { + case AF_LOCAL: + break; case AF_INET: memcpy(sap, &sin, sizeof(sin)); break; @@ -2164,7 +2502,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) } static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, - unsigned int slot_table_size) + unsigned int slot_table_size, + unsigned int max_slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; @@ -2174,7 +2513,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return ERR_PTR(-EBADF); } - xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size); + xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size, + max_slot_table_size); if (xprt == NULL) { dprintk("RPC: xs_setup_xprt: couldn't allocate " "rpc_xprt\n"); @@ -2197,6 +2537,71 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return xprt; } +static const struct rpc_timeout xs_local_default_timeout = { + .to_initval = 10 * HZ, + .to_maxval = 10 * HZ, + .to_retries = 2, +}; + +/** + * xs_setup_local - Set up transport to use an AF_LOCAL socket + * @args: rpc transport creation arguments + * + * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP + */ +static struct rpc_xprt *xs_setup_local(struct xprt_create *args) +{ + struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; + struct sock_xprt *transport; + struct rpc_xprt *xprt; + struct rpc_xprt *ret; + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_max_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = 0; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + xprt->bind_timeout = XS_BIND_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_local_ops; + xprt->timeout = &xs_local_default_timeout; + + switch (sun->sun_family) { + case AF_LOCAL: + if (sun->sun_path[0] != '/') { + dprintk("RPC: bad AF_LOCAL address: %s\n", + sun->sun_path); + ret = ERR_PTR(-EINVAL); + goto out_err; + } + xprt_set_bound(xprt); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_local_setup_socket); + xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); + break; + default: + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; + } + + dprintk("RPC: set up xprt to %s via AF_LOCAL\n", + xprt->address_strings[RPC_DISPLAY_ADDR]); + + if (try_module_get(THIS_MODULE)) + return xprt; + ret = ERR_PTR(-EINVAL); +out_err: + xprt_free(xprt); + return ret; +} + static const struct rpc_timeout xs_udp_default_timeout = { .to_initval = 5 * HZ, .to_maxval = 30 * HZ, @@ -2216,7 +2621,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) struct sock_xprt *transport; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries, + xprt_udp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2292,7 +2698,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) struct sock_xprt *transport; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_max_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2371,7 +2778,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) */ return args->bc_xprt->xpt_bc_xprt; } - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2438,6 +2846,14 @@ out_err: return ret; } +static struct xprt_class xs_local_transport = { + .list = LIST_HEAD_INIT(xs_local_transport.list), + .name = "named UNIX socket", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_LOCAL, + .setup = xs_setup_local, +}; + static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", @@ -2473,6 +2889,7 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); xprt_register_transport(&xs_bc_tcp_transport); @@ -2493,6 +2910,7 @@ void cleanup_socket_xprt(void) } #endif + xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); xprt_unregister_transport(&xs_bc_tcp_transport); @@ -2548,8 +2966,26 @@ static struct kernel_param_ops param_ops_slot_table_size = { #define param_check_slot_table_size(name, p) \ __param_check(name, p, unsigned int); +static int param_set_max_slot_table_size(const char *val, + const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_SLOT_TABLE, + RPC_MAX_SLOT_TABLE_LIMIT); +} + +static struct kernel_param_ops param_ops_max_slot_table_size = { + .set = param_set_max_slot_table_size, + .get = param_get_uint, +}; + +#define param_check_max_slot_table_size(name, p) \ + __param_check(name, p, unsigned int); + module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, slot_table_size, 0644); +module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries, + max_slot_table_size, 0644); module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, slot_table_size, 0644); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index fa68d1e9ff4b..759b318b5ffb 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -552,12 +552,16 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (likely(!msg_non_seq(buf_msg(buf)))) { struct tipc_msg *msg; - assert(tipc_bcast_nmap.count != 0); bcbuf_set_acks(buf, tipc_bcast_nmap.count); msg = buf_msg(buf); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); bcl->stats.sent_info++; + + if (WARN_ON(!tipc_bcast_nmap.count)) { + dump_stack(); + return 0; + } } /* Send buffer over bearers until all targets reached */ diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 85209eadfae6..85eba9c08ee9 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -402,7 +402,6 @@ void tipc_bearer_lock_push(struct tipc_bearer *b_ptr) void tipc_continue(struct tipc_bearer *b_ptr) { spin_lock_bh(&b_ptr->lock); - b_ptr->continue_count++; if (!list_empty(&b_ptr->cong_links)) tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr); b_ptr->blocked = 0; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 31d6172b20fd..5ad70eff1ebf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -107,7 +107,6 @@ struct media { * @link_req: ptr to (optional) structure making periodic link setup requests * @links: list of non-congested links associated with bearer * @cong_links: list of congested links associated with bearer - * @continue_count: # of times bearer has resumed after congestion or blocking * @active: non-zero if bearer structure is represents a bearer * @net_plane: network plane ('A' through 'H') currently associated with bearer * @nodes: indicates which nodes in cluster can be reached through bearer @@ -129,7 +128,6 @@ struct tipc_bearer { struct link_req *link_req; struct list_head links; struct list_head cong_links; - u32 continue_count; int active; char net_plane; struct tipc_node_map nodes; diff --git a/net/tipc/core.h b/net/tipc/core.h index 436dda1159d2..2761af36d141 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -47,7 +47,7 @@ #include <linux/string.h> #include <asm/uaccess.h> #include <linux/interrupt.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/hardirq.h> #include <linux/netdevice.h> #include <linux/in.h> @@ -62,12 +62,6 @@ struct tipc_msg; /* msg.h */ struct print_buf; /* log.h */ /* - * TIPC sanity test macros - */ - -#define assert(i) BUG_ON(!(i)) - -/* * TIPC system monitoring code */ diff --git a/net/tipc/link.c b/net/tipc/link.c index 5ed4b4f7452d..f89570c54f54 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1572,7 +1572,7 @@ static struct sk_buff *link_insert_deferred_queue(struct link *l_ptr, static int link_recv_buf_validate(struct sk_buff *buf) { static u32 min_data_hdr_size[8] = { - SHORT_H_SIZE, MCAST_H_SIZE, LONG_H_SIZE, DIR_MSG_H_SIZE, + SHORT_H_SIZE, MCAST_H_SIZE, NAMED_H_SIZE, BASIC_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE }; @@ -2553,7 +2553,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, u32 msg_sz = msg_size(imsg); u32 fragm_sz = msg_data_sz(fragm); u32 exp_fragm_cnt = msg_sz/fragm_sz + !!(msg_sz % fragm_sz); - u32 max = TIPC_MAX_USER_MSG_SIZE + LONG_H_SIZE; + u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE; if (msg_type(imsg) == TIPC_MCAST_MSG) max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE; if (msg_size(imsg) > max) { @@ -2882,7 +2882,7 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size) profile_total = 1; tipc_printf(&pb, " TX profile sample:%u packets average:%u octets\n" " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% " - "-16354:%u%% -32768:%u%% -66000:%u%%\n", + "-16384:%u%% -32768:%u%% -66000:%u%%\n", l_ptr->stats.msg_length_counts, l_ptr->stats.msg_lengths_total / profile_total, percent(l_ptr->stats.msg_length_profile[0], profile_total), diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 03e57bf92c73..83d50967910c 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -61,10 +61,8 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, msg_set_size(m, hsize); msg_set_prevnode(m, tipc_own_addr); msg_set_type(m, type); - if (!msg_short(m)) { - msg_set_orignode(m, tipc_own_addr); - msg_set_destnode(m, destnode); - } + msg_set_orignode(m, tipc_own_addr); + msg_set_destnode(m, destnode); } /** diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 8452454731fa..d93178f2e852 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -68,10 +68,10 @@ * Message header sizes */ -#define SHORT_H_SIZE 24 /* Connected, in-cluster messages */ -#define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ -#define LONG_H_SIZE 40 /* Named messages */ -#define MCAST_H_SIZE 44 /* Multicast messages */ +#define SHORT_H_SIZE 24 /* In-cluster basic payload message */ +#define BASIC_H_SIZE 32 /* Basic payload message */ +#define NAMED_H_SIZE 40 /* Named payload message */ +#define MCAST_H_SIZE 44 /* Multicast payload message */ #define INT_H_SIZE 40 /* Internal messages */ #define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ @@ -311,26 +311,6 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) } /* - * TIPC may utilize the "link ack #" and "link seq #" fields of a short - * message header to hold the destination node for the message, since the - * normal "dest node" field isn't present. This cache is only referenced - * when required, so populating the cache of a longer message header is - * harmless (as long as the header has the two link sequence fields present). - * - * Note: Host byte order is OK here, since the info never goes off-card. - */ - -static inline u32 msg_destnode_cache(struct tipc_msg *m) -{ - return m->hdr[2]; -} - -static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode) -{ - m->hdr[2] = dnode; -} - -/* * Words 3-10 */ @@ -377,7 +357,7 @@ static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) static inline int msg_short(struct tipc_msg *m) { - return msg_hdr_sz(m) == 24; + return msg_hdr_sz(m) == SHORT_H_SIZE; } static inline u32 msg_orignode(struct tipc_msg *m) @@ -635,7 +615,7 @@ static inline u32 msg_link_selector(struct tipc_msg *m) static inline void msg_set_link_selector(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 4, 0, 1, (n & 1)); + msg_set_bits(m, 4, 0, 1, n); } /* @@ -659,7 +639,7 @@ static inline u32 msg_probe(struct tipc_msg *m) static inline void msg_set_probe(struct tipc_msg *m, u32 val) { - msg_set_bits(m, 5, 0, 1, (val & 1)); + msg_set_bits(m, 5, 0, 1, val); } static inline char msg_net_plane(struct tipc_msg *m) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 80025a1b3bfd..cd356e504332 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -94,13 +94,13 @@ static void publ_to_item(struct distr_item *i, struct publication *p) static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) { - struct sk_buff *buf = tipc_buf_acquire(LONG_H_SIZE + size); + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size); struct tipc_msg *msg; if (buf != NULL) { msg = buf_msg(buf); - tipc_msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest); - msg_set_size(msg, LONG_H_SIZE + size); + tipc_msg_init(msg, NAME_DISTRIBUTOR, type, INT_H_SIZE, dest); + msg_set_size(msg, INT_H_SIZE + size); } return buf; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 205ed4a4e186..46e6b6c2ecc9 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -2,7 +2,7 @@ * net/tipc/name_table.c: TIPC name table code * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2008, Wind River Systems + * Copyright (c) 2004-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,9 +44,7 @@ static int tipc_nametbl_size = 1024; /* must be a power of 2 */ /** - * struct sub_seq - container for all published instances of a name sequence - * @lower: name sequence lower bound - * @upper: name sequence upper bound + * struct name_info - name sequence publication info * @node_list: circular list of publications made by own node * @cluster_list: circular list of publications made by own cluster * @zone_list: circular list of publications made by own zone @@ -59,18 +57,29 @@ static int tipc_nametbl_size = 1024; /* must be a power of 2 */ * (The cluster and node lists may be empty.) */ -struct sub_seq { - u32 lower; - u32 upper; - struct publication *node_list; - struct publication *cluster_list; - struct publication *zone_list; +struct name_info { + struct list_head node_list; + struct list_head cluster_list; + struct list_head zone_list; u32 node_list_size; u32 cluster_list_size; u32 zone_list_size; }; /** + * struct sub_seq - container for all published instances of a name sequence + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @info: pointer to name sequence publication info + */ + +struct sub_seq { + u32 lower; + u32 upper; + struct name_info *info; +}; + +/** * struct name_seq - container for all published instances of a name type * @type: 32 bit 'type' value for name sequence * @sseq: pointer to dynamically-sized array of sub-sequences of this 'type'; @@ -246,6 +255,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, struct subscription *st; struct publication *publ; struct sub_seq *sseq; + struct name_info *info; int created_subseq = 0; sseq = nameseq_find_subseq(nseq, lower); @@ -258,6 +268,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, type, lower, upper); return NULL; } + + info = sseq->info; } else { u32 inspos; struct sub_seq *freesseq; @@ -292,6 +304,17 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, nseq->alloc *= 2; } + info = kzalloc(sizeof(*info), GFP_ATOMIC); + if (!info) { + warn("Cannot publish {%u,%u,%u}, no memory\n", + type, lower, upper); + return NULL; + } + + INIT_LIST_HEAD(&info->node_list); + INIT_LIST_HEAD(&info->cluster_list); + INIT_LIST_HEAD(&info->zone_list); + /* Insert new sub-sequence */ sseq = &nseq->sseqs[inspos]; @@ -301,6 +324,7 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, nseq->first_free++; sseq->lower = lower; sseq->upper = upper; + sseq->info = info; created_subseq = 1; } @@ -310,33 +334,17 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq, if (!publ) return NULL; - sseq->zone_list_size++; - if (!sseq->zone_list) - sseq->zone_list = publ->zone_list_next = publ; - else { - publ->zone_list_next = sseq->zone_list->zone_list_next; - sseq->zone_list->zone_list_next = publ; - } + list_add(&publ->zone_list, &info->zone_list); + info->zone_list_size++; if (in_own_cluster(node)) { - sseq->cluster_list_size++; - if (!sseq->cluster_list) - sseq->cluster_list = publ->cluster_list_next = publ; - else { - publ->cluster_list_next = - sseq->cluster_list->cluster_list_next; - sseq->cluster_list->cluster_list_next = publ; - } + list_add(&publ->cluster_list, &info->cluster_list); + info->cluster_list_size++; } if (node == tipc_own_addr) { - sseq->node_list_size++; - if (!sseq->node_list) - sseq->node_list = publ->node_list_next = publ; - else { - publ->node_list_next = sseq->node_list->node_list_next; - sseq->node_list->node_list_next = publ; - } + list_add(&publ->node_list, &info->node_list); + info->node_list_size++; } /* @@ -370,9 +378,8 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i u32 node, u32 ref, u32 key) { struct publication *publ; - struct publication *curr; - struct publication *prev; struct sub_seq *sseq = nameseq_find_subseq(nseq, inst); + struct name_info *info; struct sub_seq *free; struct subscription *s, *st; int removed_subseq = 0; @@ -380,96 +387,41 @@ static struct publication *tipc_nameseq_remove_publ(struct name_seq *nseq, u32 i if (!sseq) return NULL; - /* Remove publication from zone scope list */ + info = sseq->info; - prev = sseq->zone_list; - publ = sseq->zone_list->zone_list_next; - while ((publ->key != key) || (publ->ref != ref) || - (publ->node && (publ->node != node))) { - prev = publ; - publ = publ->zone_list_next; - if (prev == sseq->zone_list) { + /* Locate publication, if it exists */ - /* Prevent endless loop if publication not found */ - - return NULL; - } - } - if (publ != sseq->zone_list) - prev->zone_list_next = publ->zone_list_next; - else if (publ->zone_list_next != publ) { - prev->zone_list_next = publ->zone_list_next; - sseq->zone_list = publ->zone_list_next; - } else { - sseq->zone_list = NULL; + list_for_each_entry(publ, &info->zone_list, zone_list) { + if ((publ->key == key) && (publ->ref == ref) && + (!publ->node || (publ->node == node))) + goto found; } - sseq->zone_list_size--; + return NULL; + +found: + /* Remove publication from zone scope list */ + + list_del(&publ->zone_list); + info->zone_list_size--; /* Remove publication from cluster scope list, if present */ if (in_own_cluster(node)) { - prev = sseq->cluster_list; - curr = sseq->cluster_list->cluster_list_next; - while (curr != publ) { - prev = curr; - curr = curr->cluster_list_next; - if (prev == sseq->cluster_list) { - - /* Prevent endless loop for malformed list */ - - err("Unable to de-list cluster publication\n" - "{%u%u}, node=0x%x, ref=%u, key=%u)\n", - publ->type, publ->lower, publ->node, - publ->ref, publ->key); - goto end_cluster; - } - } - if (publ != sseq->cluster_list) - prev->cluster_list_next = publ->cluster_list_next; - else if (publ->cluster_list_next != publ) { - prev->cluster_list_next = publ->cluster_list_next; - sseq->cluster_list = publ->cluster_list_next; - } else { - sseq->cluster_list = NULL; - } - sseq->cluster_list_size--; + list_del(&publ->cluster_list); + info->cluster_list_size--; } -end_cluster: /* Remove publication from node scope list, if present */ if (node == tipc_own_addr) { - prev = sseq->node_list; - curr = sseq->node_list->node_list_next; - while (curr != publ) { - prev = curr; - curr = curr->node_list_next; - if (prev == sseq->node_list) { - - /* Prevent endless loop for malformed list */ - - err("Unable to de-list node publication\n" - "{%u%u}, node=0x%x, ref=%u, key=%u)\n", - publ->type, publ->lower, publ->node, - publ->ref, publ->key); - goto end_node; - } - } - if (publ != sseq->node_list) - prev->node_list_next = publ->node_list_next; - else if (publ->node_list_next != publ) { - prev->node_list_next = publ->node_list_next; - sseq->node_list = publ->node_list_next; - } else { - sseq->node_list = NULL; - } - sseq->node_list_size--; + list_del(&publ->node_list); + info->node_list_size--; } -end_node: /* Contract subseq list if no more publications for that subseq */ - if (!sseq->zone_list) { + if (list_empty(&info->zone_list)) { + kfree(info); free = &nseq->sseqs[nseq->first_free--]; memmove(sseq, sseq + 1, (free - (sseq + 1)) * sizeof(*sseq)); removed_subseq = 1; @@ -506,12 +458,12 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s return; while (sseq != &nseq->sseqs[nseq->first_free]) { - struct publication *zl = sseq->zone_list; - if (zl && tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { - struct publication *crs = zl; + if (tipc_subscr_overlap(s, sseq->lower, sseq->upper)) { + struct publication *crs; + struct name_info *info = sseq->info; int must_report = 1; - do { + list_for_each_entry(crs, &info->zone_list, zone_list) { tipc_subscr_report_overlap(s, sseq->lower, sseq->upper, @@ -520,8 +472,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s crs->node, must_report); must_report = 0; - crs = crs->zone_list_next; - } while (crs != zl); + } } sseq++; } @@ -591,9 +542,10 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) { struct sub_seq *sseq; - struct publication *publ = NULL; + struct name_info *info; + struct publication *publ; struct name_seq *seq; - u32 ref; + u32 ref = 0; if (!tipc_in_scope(*destnode, tipc_own_addr)) return 0; @@ -606,55 +558,57 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) if (unlikely(!sseq)) goto not_found; spin_lock_bh(&seq->lock); + info = sseq->info; /* Closest-First Algorithm: */ if (likely(!*destnode)) { - publ = sseq->node_list; - if (publ) { - sseq->node_list = publ->node_list_next; -found: - ref = publ->ref; - *destnode = publ->node; - spin_unlock_bh(&seq->lock); - read_unlock_bh(&tipc_nametbl_lock); - return ref; - } - publ = sseq->cluster_list; - if (publ) { - sseq->cluster_list = publ->cluster_list_next; - goto found; - } - publ = sseq->zone_list; - if (publ) { - sseq->zone_list = publ->zone_list_next; - goto found; + if (!list_empty(&info->node_list)) { + publ = list_first_entry(&info->node_list, + struct publication, + node_list); + list_move_tail(&publ->node_list, + &info->node_list); + } else if (!list_empty(&info->cluster_list)) { + publ = list_first_entry(&info->cluster_list, + struct publication, + cluster_list); + list_move_tail(&publ->cluster_list, + &info->cluster_list); + } else { + publ = list_first_entry(&info->zone_list, + struct publication, + zone_list); + list_move_tail(&publ->zone_list, + &info->zone_list); } } /* Round-Robin Algorithm: */ else if (*destnode == tipc_own_addr) { - publ = sseq->node_list; - if (publ) { - sseq->node_list = publ->node_list_next; - goto found; - } + if (list_empty(&info->node_list)) + goto no_match; + publ = list_first_entry(&info->node_list, struct publication, + node_list); + list_move_tail(&publ->node_list, &info->node_list); } else if (in_own_cluster(*destnode)) { - publ = sseq->cluster_list; - if (publ) { - sseq->cluster_list = publ->cluster_list_next; - goto found; - } + if (list_empty(&info->cluster_list)) + goto no_match; + publ = list_first_entry(&info->cluster_list, struct publication, + cluster_list); + list_move_tail(&publ->cluster_list, &info->cluster_list); } else { - publ = sseq->zone_list; - if (publ) { - sseq->zone_list = publ->zone_list_next; - goto found; - } + publ = list_first_entry(&info->zone_list, struct publication, + zone_list); + list_move_tail(&publ->zone_list, &info->zone_list); } + + ref = publ->ref; + *destnode = publ->node; +no_match: spin_unlock_bh(&seq->lock); not_found: read_unlock_bh(&tipc_nametbl_lock); - return 0; + return ref; } /** @@ -676,6 +630,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, struct name_seq *seq; struct sub_seq *sseq; struct sub_seq *sseq_stop; + struct name_info *info; int res = 0; read_lock_bh(&tipc_nametbl_lock); @@ -693,16 +648,13 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, if (sseq->lower > upper) break; - publ = sseq->node_list; - if (publ) { - do { - if (publ->scope <= limit) - tipc_port_list_add(dports, publ->ref); - publ = publ->node_list_next; - } while (publ != sseq->node_list); + info = sseq->info; + list_for_each_entry(publ, &info->node_list, node_list) { + if (publ->scope <= limit) + tipc_port_list_add(dports, publ->ref); } - if (sseq->cluster_list_size != sseq->node_list_size) + if (info->cluster_list_size != info->node_list_size) res = 1; } @@ -840,16 +792,19 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth, { char portIdStr[27]; const char *scope_str[] = {"", " zone", " cluster", " node"}; - struct publication *publ = sseq->zone_list; + struct publication *publ; + struct name_info *info; tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper); - if (depth == 2 || !publ) { + if (depth == 2) { tipc_printf(buf, "\n"); return; } - do { + info = sseq->info; + + list_for_each_entry(publ, &info->zone_list, zone_list) { sprintf(portIdStr, "<%u.%u.%u:%u>", tipc_zone(publ->node), tipc_cluster(publ->node), tipc_node(publ->node), publ->ref); @@ -858,13 +813,9 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth, tipc_printf(buf, "%-10u %s", publ->key, scope_str[publ->scope]); } - - publ = publ->zone_list_next; - if (publ == sseq->zone_list) - break; - - tipc_printf(buf, "\n%33s", " "); - } while (1); + if (!list_is_last(&publ->zone_list, &info->zone_list)) + tipc_printf(buf, "\n%33s", " "); + }; tipc_printf(buf, "\n"); } diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index d228bd682655..62d77e5e902e 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -2,7 +2,7 @@ * net/tipc/name_table.h: Include file for TIPC name table code * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,9 +61,9 @@ struct port_list; * @subscr: subscription to "node down" event (for off-node publications only) * @local_list: adjacent entries in list of publications made by this node * @pport_list: adjacent entries in list of publications made by this port - * @node_list: next matching name seq publication with >= node scope - * @cluster_list: next matching name seq publication with >= cluster scope - * @zone_list: next matching name seq publication with >= zone scope + * @node_list: adjacent matching name seq publications with >= node scope + * @cluster_list: adjacent matching name seq publications with >= cluster scope + * @zone_list: adjacent matching name seq publications with >= zone scope * * Note that the node list, cluster list, and zone list are circular lists. */ @@ -79,9 +79,9 @@ struct publication { struct tipc_node_subscr subscr; struct list_head local_list; struct list_head pport_list; - struct publication *node_list_next; - struct publication *cluster_list_next; - struct publication *zone_list_next; + struct list_head node_list; + struct list_head cluster_list; + struct list_head zone_list; }; diff --git a/net/tipc/port.c b/net/tipc/port.c index c68dc956a423..54d812a5a4d9 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -222,7 +222,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->ref = ref; msg = &p_ptr->phdr; - tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); + tipc_msg_init(msg, importance, TIPC_NAMED_MSG, NAMED_H_SIZE, 0); msg_set_origport(msg, ref); INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); @@ -327,26 +327,23 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) } /* - * port_build_proto_msg(): build a port level protocol - * or a connection abortion message. Called with - * tipc_port lock on. + * port_build_proto_msg(): create connection protocol message for port + * + * On entry the port must be locked and connected. */ -static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, - u32 origport, u32 orignode, - u32 usr, u32 type, u32 err, - u32 ack) +static struct sk_buff *port_build_proto_msg(struct tipc_port *p_ptr, + u32 type, u32 ack) { struct sk_buff *buf; struct tipc_msg *msg; - buf = tipc_buf_acquire(LONG_H_SIZE); + buf = tipc_buf_acquire(INT_H_SIZE); if (buf) { msg = buf_msg(buf); - tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode); - msg_set_errcode(msg, err); - msg_set_destport(msg, destport); - msg_set_origport(msg, origport); - msg_set_orignode(msg, orignode); + tipc_msg_init(msg, CONN_MANAGER, type, INT_H_SIZE, + port_peernode(p_ptr)); + msg_set_destport(msg, port_peerport(p_ptr)); + msg_set_origport(msg, p_ptr->ref); msg_set_msgcnt(msg, ack); } return buf; @@ -358,45 +355,48 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) struct sk_buff *rbuf; struct tipc_msg *rmsg; int hdr_sz; - u32 imp = msg_importance(msg); + u32 imp; u32 data_sz = msg_data_sz(msg); - - if (data_sz > MAX_REJECT_SIZE) - data_sz = MAX_REJECT_SIZE; - if (msg_connected(msg) && (imp < TIPC_CRITICAL_IMPORTANCE)) - imp++; + u32 src_node; + u32 rmsg_sz; /* discard rejected message if it shouldn't be returned to sender */ - if (msg_errcode(msg) || msg_dest_droppable(msg)) { - buf_discard(buf); - return data_sz; - } - /* construct rejected message */ - if (msg_mcast(msg)) - hdr_sz = MCAST_H_SIZE; - else - hdr_sz = LONG_H_SIZE; - rbuf = tipc_buf_acquire(data_sz + hdr_sz); - if (rbuf == NULL) { - buf_discard(buf); - return data_sz; + if (WARN(!msg_isdata(msg), + "attempt to reject message with user=%u", msg_user(msg))) { + dump_stack(); + goto exit; } + if (msg_errcode(msg) || msg_dest_droppable(msg)) + goto exit; + + /* + * construct returned message by copying rejected message header and + * data (or subset), then updating header fields that need adjusting + */ + + hdr_sz = msg_hdr_sz(msg); + rmsg_sz = hdr_sz + min_t(u32, data_sz, MAX_REJECT_SIZE); + + rbuf = tipc_buf_acquire(rmsg_sz); + if (rbuf == NULL) + goto exit; + rmsg = buf_msg(rbuf); - tipc_msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg)); - msg_set_errcode(rmsg, err); - msg_set_destport(rmsg, msg_origport(msg)); - msg_set_origport(rmsg, msg_destport(msg)); - if (msg_short(msg)) { - msg_set_orignode(rmsg, tipc_own_addr); - /* leave name type & instance as zeroes */ - } else { - msg_set_orignode(rmsg, msg_destnode(msg)); - msg_set_nametype(rmsg, msg_nametype(msg)); - msg_set_nameinst(rmsg, msg_nameinst(msg)); + skb_copy_to_linear_data(rbuf, msg, rmsg_sz); + + if (msg_connected(rmsg)) { + imp = msg_importance(rmsg); + if (imp < TIPC_CRITICAL_IMPORTANCE) + msg_set_importance(rmsg, ++imp); } - msg_set_size(rmsg, data_sz + hdr_sz); - skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz); + msg_set_non_seq(rmsg, 0); + msg_set_size(rmsg, rmsg_sz); + msg_set_errcode(rmsg, err); + msg_set_prevnode(rmsg, tipc_own_addr); + msg_swap_words(rmsg, 4, 5); + if (!msg_short(rmsg)) + msg_swap_words(rmsg, 6, 7); /* send self-abort message when rejecting on a connected port */ if (msg_connected(msg)) { @@ -411,9 +411,15 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) tipc_net_route_msg(abuf); } - /* send rejected message */ + /* send returned message & dispose of rejected message */ + + src_node = msg_prevnode(msg); + if (src_node == tipc_own_addr) + tipc_port_recv_msg(rbuf); + else + tipc_link_send(rbuf, src_node, msg_link_selector(rmsg)); +exit: buf_discard(buf); - tipc_net_route_msg(rbuf); return data_sz; } @@ -449,14 +455,7 @@ static void port_timeout(unsigned long ref) if (p_ptr->probing_state == PROBING) { buf = port_build_self_abort_msg(p_ptr, TIPC_ERR_NO_PORT); } else { - buf = port_build_proto_msg(port_peerport(p_ptr), - port_peernode(p_ptr), - p_ptr->ref, - tipc_own_addr, - CONN_MANAGER, - CONN_PROBE, - TIPC_OK, - 0); + buf = port_build_proto_msg(p_ptr, CONN_PROBE, 0); p_ptr->probing_state = PROBING; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } @@ -480,100 +479,94 @@ static void port_handle_node_down(unsigned long ref) static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err) { - u32 imp = msg_importance(&p_ptr->phdr); + struct sk_buff *buf = port_build_peer_abort_msg(p_ptr, err); - if (!p_ptr->connected) - return NULL; - if (imp < TIPC_CRITICAL_IMPORTANCE) - imp++; - return port_build_proto_msg(p_ptr->ref, - tipc_own_addr, - port_peerport(p_ptr), - port_peernode(p_ptr), - imp, - TIPC_CONN_MSG, - err, - 0); + if (buf) { + struct tipc_msg *msg = buf_msg(buf); + msg_swap_words(msg, 4, 5); + msg_swap_words(msg, 6, 7); + } + return buf; } static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err) { - u32 imp = msg_importance(&p_ptr->phdr); + struct sk_buff *buf; + struct tipc_msg *msg; + u32 imp; if (!p_ptr->connected) return NULL; - if (imp < TIPC_CRITICAL_IMPORTANCE) - imp++; - return port_build_proto_msg(port_peerport(p_ptr), - port_peernode(p_ptr), - p_ptr->ref, - tipc_own_addr, - imp, - TIPC_CONN_MSG, - err, - 0); + + buf = tipc_buf_acquire(BASIC_H_SIZE); + if (buf) { + msg = buf_msg(buf); + memcpy(msg, &p_ptr->phdr, BASIC_H_SIZE); + msg_set_hdr_sz(msg, BASIC_H_SIZE); + msg_set_size(msg, BASIC_H_SIZE); + imp = msg_importance(msg); + if (imp < TIPC_CRITICAL_IMPORTANCE) + msg_set_importance(msg, ++imp); + msg_set_errcode(msg, err); + } + return buf; } void tipc_port_recv_proto_msg(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); - u32 err = TIPC_OK; + struct tipc_port *p_ptr; struct sk_buff *r_buf = NULL; - struct sk_buff *abort_buf = NULL; - - if (!p_ptr) { - err = TIPC_ERR_NO_PORT; - } else if (p_ptr->connected) { - if ((port_peernode(p_ptr) != msg_orignode(msg)) || - (port_peerport(p_ptr) != msg_origport(msg))) { - err = TIPC_ERR_NO_PORT; - } else if (msg_type(msg) == CONN_ACK) { - int wakeup = tipc_port_congested(p_ptr) && - p_ptr->congested && - p_ptr->wakeup; - p_ptr->acked += msg_msgcnt(msg); - if (tipc_port_congested(p_ptr)) - goto exit; - p_ptr->congested = 0; - if (!wakeup) - goto exit; - p_ptr->wakeup(p_ptr); - goto exit; + u32 orignode = msg_orignode(msg); + u32 origport = msg_origport(msg); + u32 destport = msg_destport(msg); + int wakeable; + + /* Validate connection */ + + p_ptr = tipc_port_lock(destport); + if (!p_ptr || !p_ptr->connected || + (port_peernode(p_ptr) != orignode) || + (port_peerport(p_ptr) != origport)) { + r_buf = tipc_buf_acquire(BASIC_H_SIZE); + if (r_buf) { + msg = buf_msg(r_buf); + tipc_msg_init(msg, TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, + BASIC_H_SIZE, orignode); + msg_set_errcode(msg, TIPC_ERR_NO_PORT); + msg_set_origport(msg, destport); + msg_set_destport(msg, origport); } - } else if (p_ptr->published) { - err = TIPC_ERR_NO_PORT; - } - if (err) { - r_buf = port_build_proto_msg(msg_origport(msg), - msg_orignode(msg), - msg_destport(msg), - tipc_own_addr, - TIPC_HIGH_IMPORTANCE, - TIPC_CONN_MSG, - err, - 0); + if (p_ptr) + tipc_port_unlock(p_ptr); goto exit; } - /* All is fine */ - if (msg_type(msg) == CONN_PROBE) { - r_buf = port_build_proto_msg(msg_origport(msg), - msg_orignode(msg), - msg_destport(msg), - tipc_own_addr, - CONN_MANAGER, - CONN_PROBE_REPLY, - TIPC_OK, - 0); + /* Process protocol message sent by peer */ + + switch (msg_type(msg)) { + case CONN_ACK: + wakeable = tipc_port_congested(p_ptr) && p_ptr->congested && + p_ptr->wakeup; + p_ptr->acked += msg_msgcnt(msg); + if (!tipc_port_congested(p_ptr)) { + p_ptr->congested = 0; + if (wakeable) + p_ptr->wakeup(p_ptr); + } + break; + case CONN_PROBE: + r_buf = port_build_proto_msg(p_ptr, CONN_PROBE_REPLY, 0); + break; + default: + /* CONN_PROBE_REPLY or unrecognized - no action required */ + break; } p_ptr->probing_state = CONFIRMED; + tipc_port_unlock(p_ptr); exit: - if (p_ptr) - tipc_port_unlock(p_ptr); tipc_net_route_msg(r_buf); - tipc_net_route_msg(abort_buf); buf_discard(buf); } @@ -889,14 +882,7 @@ void tipc_acknowledge(u32 ref, u32 ack) return; if (p_ptr->connected) { p_ptr->conn_unacked -= ack; - buf = port_build_proto_msg(port_peerport(p_ptr), - port_peernode(p_ptr), - ref, - tipc_own_addr, - CONN_MANAGER, - CONN_ACK, - TIPC_OK, - ack); + buf = port_build_proto_msg(p_ptr, CONN_ACK, ack); } tipc_port_unlock(p_ptr); tipc_net_route_msg(buf); @@ -1140,19 +1126,7 @@ int tipc_shutdown(u32 ref) if (!p_ptr) return -EINVAL; - if (p_ptr->connected) { - u32 imp = msg_importance(&p_ptr->phdr); - if (imp < TIPC_CRITICAL_IMPORTANCE) - imp++; - buf = port_build_proto_msg(port_peerport(p_ptr), - port_peernode(p_ptr), - ref, - tipc_own_addr, - imp, - TIPC_CONN_MSG, - TIPC_CONN_SHUTDOWN, - 0); - } + buf = port_build_peer_abort_msg(p_ptr, TIPC_CONN_SHUTDOWN); tipc_port_unlock(p_ptr); tipc_net_route_msg(buf); return tipc_disconnect(ref); @@ -1238,7 +1212,7 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, msg_set_type(msg, TIPC_NAMED_MSG); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, ref); - msg_set_hdr_sz(msg, LONG_H_SIZE); + msg_set_hdr_sz(msg, NAMED_H_SIZE); msg_set_nametype(msg, name->type); msg_set_nameinst(msg, name->instance); msg_set_lookup_scope(msg, tipc_addr_scope(domain)); @@ -1291,7 +1265,7 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, msg_set_origport(msg, ref); msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); + msg_set_hdr_sz(msg, BASIC_H_SIZE); if (dest->node == tipc_own_addr) res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect, @@ -1331,13 +1305,13 @@ int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, msg_set_origport(msg, ref); msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); - msg_set_size(msg, DIR_MSG_H_SIZE + dsz); - if (skb_cow(buf, DIR_MSG_H_SIZE)) + msg_set_hdr_sz(msg, BASIC_H_SIZE); + msg_set_size(msg, BASIC_H_SIZE + dsz); + if (skb_cow(buf, BASIC_H_SIZE)) return -ENOMEM; - skb_push(buf, DIR_MSG_H_SIZE); - skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE); + skb_push(buf, BASIC_H_SIZE); + skb_copy_to_linear_data(buf, msg, BASIC_H_SIZE); if (dest->node == tipc_own_addr) res = tipc_port_recv_msg(buf); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 338837396642..adb2eff4a102 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -36,9 +36,6 @@ #include <net/sock.h> -#include <linux/tipc.h> -#include <linux/tipc_config.h> - #include "core.h" #include "port.h" diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0722a25a3a33..ec68e1c05b85 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -808,8 +808,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net *net = sock_net(sk); struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; + char *sun_path = sunaddr->sun_path; struct dentry *dentry = NULL; - struct nameidata nd; + struct path path; int err; unsigned hash; struct unix_address *addr; @@ -845,48 +846,44 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr->hash = hash ^ sk->sk_type; atomic_set(&addr->refcnt, 1); - if (sunaddr->sun_path[0]) { + if (sun_path[0]) { unsigned int mode; err = 0; /* * Get the parent directory, calculate the hash for last * component. */ - err = kern_path_parent(sunaddr->sun_path, &nd); - if (err) - goto out_mknod_parent; - - dentry = lookup_create(&nd, 0); + dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); err = PTR_ERR(dentry); if (IS_ERR(dentry)) - goto out_mknod_unlock; + goto out_mknod_parent; /* * All right, let's create it. */ mode = S_IFSOCK | (SOCK_INODE(sock)->i_mode & ~current_umask()); - err = mnt_want_write(nd.path.mnt); + err = mnt_want_write(path.mnt); if (err) goto out_mknod_dput; - err = security_path_mknod(&nd.path, dentry, mode, 0); + err = security_path_mknod(&path, dentry, mode, 0); if (err) goto out_mknod_drop_write; - err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); + err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); out_mknod_drop_write: - mnt_drop_write(nd.path.mnt); + mnt_drop_write(path.mnt); if (err) goto out_mknod_dput; - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - dput(nd.path.dentry); - nd.path.dentry = dentry; + mutex_unlock(&path.dentry->d_inode->i_mutex); + dput(path.dentry); + path.dentry = dentry; addr->hash = UNIX_HASH_SIZE; } spin_lock(&unix_table_lock); - if (!sunaddr->sun_path[0]) { + if (!sun_path[0]) { err = -EADDRINUSE; if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { @@ -897,8 +894,8 @@ out_mknod_drop_write: list = &unix_socket_table[addr->hash]; } else { list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; - u->dentry = nd.path.dentry; - u->mnt = nd.path.mnt; + u->dentry = path.dentry; + u->mnt = path.mnt; } err = 0; @@ -915,9 +912,8 @@ out: out_mknod_dput: dput(dentry); -out_mknod_unlock: - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + mutex_unlock(&path.dentry->d_inode->i_mutex); + path_put(&path); out_mknod_parent: if (err == -EEXIST) err = -EADDRINUSE; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 4680b1e4c79c..d30615419b4d 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -237,21 +237,21 @@ static int x25_device_event(struct notifier_block *this, unsigned long event, #endif ) { switch (event) { - case NETDEV_UP: - x25_link_device_up(dev); - break; - case NETDEV_GOING_DOWN: - nb = x25_get_neigh(dev); - if (nb) { - x25_terminate_link(nb); - x25_neigh_put(nb); - } - break; - case NETDEV_DOWN: - x25_kill_by_device(dev); - x25_route_device_down(dev); - x25_link_device_down(dev); - break; + case NETDEV_UP: + x25_link_device_up(dev); + break; + case NETDEV_GOING_DOWN: + nb = x25_get_neigh(dev); + if (nb) { + x25_terminate_link(nb); + x25_neigh_put(nb); + } + break; + case NETDEV_DOWN: + x25_kill_by_device(dev); + x25_route_device_down(dev); + x25_link_device_down(dev); + break; } } @@ -1336,256 +1336,253 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) int rc; switch (cmd) { - case TIOCOUTQ: { - int amount; + case TIOCOUTQ: { + int amount; - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - rc = put_user(amount, (unsigned int __user *)argp); - break; - } + amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); + if (amount < 0) + amount = 0; + rc = put_user(amount, (unsigned int __user *)argp); + break; + } - case TIOCINQ: { - struct sk_buff *skb; - int amount = 0; - /* - * These two are safe on a single CPU system as - * only user tasks fiddle here - */ - lock_sock(sk); - if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) - amount = skb->len; - release_sock(sk); - rc = put_user(amount, (unsigned int __user *)argp); - break; - } + case TIOCINQ: { + struct sk_buff *skb; + int amount = 0; + /* + * These two are safe on a single CPU system as + * only user tasks fiddle here + */ + lock_sock(sk); + if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) + amount = skb->len; + release_sock(sk); + rc = put_user(amount, (unsigned int __user *)argp); + break; + } - case SIOCGSTAMP: - rc = -EINVAL; - if (sk) - rc = sock_get_timestamp(sk, + case SIOCGSTAMP: + rc = -EINVAL; + if (sk) + rc = sock_get_timestamp(sk, (struct timeval __user *)argp); + break; + case SIOCGSTAMPNS: + rc = -EINVAL; + if (sk) + rc = sock_get_timestampns(sk, + (struct timespec __user *)argp); + break; + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + rc = -EINVAL; + break; + case SIOCADDRT: + case SIOCDELRT: + rc = -EPERM; + if (!capable(CAP_NET_ADMIN)) break; - case SIOCGSTAMPNS: - rc = -EINVAL; - if (sk) - rc = sock_get_timestampns(sk, - (struct timespec __user *)argp); - break; - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFMETRIC: - case SIOCSIFMETRIC: - rc = -EINVAL; - break; - case SIOCADDRT: - case SIOCDELRT: - rc = -EPERM; - if (!capable(CAP_NET_ADMIN)) - break; - rc = x25_route_ioctl(cmd, argp); - break; - case SIOCX25GSUBSCRIP: - rc = x25_subscr_ioctl(cmd, argp); - break; - case SIOCX25SSUBSCRIP: - rc = -EPERM; - if (!capable(CAP_NET_ADMIN)) - break; - rc = x25_subscr_ioctl(cmd, argp); - break; - case SIOCX25GFACILITIES: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->facilities, - sizeof(x25->facilities)) - ? -EFAULT : 0; - release_sock(sk); + rc = x25_route_ioctl(cmd, argp); + break; + case SIOCX25GSUBSCRIP: + rc = x25_subscr_ioctl(cmd, argp); + break; + case SIOCX25SSUBSCRIP: + rc = -EPERM; + if (!capable(CAP_NET_ADMIN)) break; - } + rc = x25_subscr_ioctl(cmd, argp); + break; + case SIOCX25GFACILITIES: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->facilities, + sizeof(x25->facilities)) + ? -EFAULT : 0; + release_sock(sk); + break; + } - case SIOCX25SFACILITIES: { - struct x25_facilities facilities; - rc = -EFAULT; - if (copy_from_user(&facilities, argp, - sizeof(facilities))) - break; - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state != TCP_LISTEN && - sk->sk_state != TCP_CLOSE) - goto out_fac_release; - if (facilities.pacsize_in < X25_PS16 || - facilities.pacsize_in > X25_PS4096) - goto out_fac_release; - if (facilities.pacsize_out < X25_PS16 || - facilities.pacsize_out > X25_PS4096) - goto out_fac_release; - if (facilities.winsize_in < 1 || - facilities.winsize_in > 127) + case SIOCX25SFACILITIES: { + struct x25_facilities facilities; + rc = -EFAULT; + if (copy_from_user(&facilities, argp, sizeof(facilities))) + break; + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state != TCP_LISTEN && + sk->sk_state != TCP_CLOSE) + goto out_fac_release; + if (facilities.pacsize_in < X25_PS16 || + facilities.pacsize_in > X25_PS4096) + goto out_fac_release; + if (facilities.pacsize_out < X25_PS16 || + facilities.pacsize_out > X25_PS4096) + goto out_fac_release; + if (facilities.winsize_in < 1 || + facilities.winsize_in > 127) + goto out_fac_release; + if (facilities.throughput) { + int out = facilities.throughput & 0xf0; + int in = facilities.throughput & 0x0f; + if (!out) + facilities.throughput |= + X25_DEFAULT_THROUGHPUT << 4; + else if (out < 0x30 || out > 0xD0) goto out_fac_release; - if (facilities.throughput) { - int out = facilities.throughput & 0xf0; - int in = facilities.throughput & 0x0f; - if (!out) - facilities.throughput |= - X25_DEFAULT_THROUGHPUT << 4; - else if (out < 0x30 || out > 0xD0) - goto out_fac_release; - if (!in) - facilities.throughput |= - X25_DEFAULT_THROUGHPUT; - else if (in < 0x03 || in > 0x0D) - goto out_fac_release; - } - if (facilities.reverse && - (facilities.reverse & 0x81) != 0x81) + if (!in) + facilities.throughput |= + X25_DEFAULT_THROUGHPUT; + else if (in < 0x03 || in > 0x0D) goto out_fac_release; - x25->facilities = facilities; - rc = 0; -out_fac_release: - release_sock(sk); - break; - } - - case SIOCX25GDTEFACILITIES: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->dte_facilities, - sizeof(x25->dte_facilities)); - release_sock(sk); - if (rc) - rc = -EFAULT; - break; } + if (facilities.reverse && + (facilities.reverse & 0x81) != 0x81) + goto out_fac_release; + x25->facilities = facilities; + rc = 0; +out_fac_release: + release_sock(sk); + break; + } - case SIOCX25SDTEFACILITIES: { - struct x25_dte_facilities dtefacs; + case SIOCX25GDTEFACILITIES: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->dte_facilities, + sizeof(x25->dte_facilities)); + release_sock(sk); + if (rc) rc = -EFAULT; - if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) - break; - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state != TCP_LISTEN && - sk->sk_state != TCP_CLOSE) - goto out_dtefac_release; - if (dtefacs.calling_len > X25_MAX_AE_LEN) - goto out_dtefac_release; - if (dtefacs.calling_ae == NULL) - goto out_dtefac_release; - if (dtefacs.called_len > X25_MAX_AE_LEN) - goto out_dtefac_release; - if (dtefacs.called_ae == NULL) - goto out_dtefac_release; - x25->dte_facilities = dtefacs; - rc = 0; -out_dtefac_release: - release_sock(sk); - break; - } + break; + } - case SIOCX25GCALLUSERDATA: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->calluserdata, - sizeof(x25->calluserdata)) - ? -EFAULT : 0; - release_sock(sk); + case SIOCX25SDTEFACILITIES: { + struct x25_dte_facilities dtefacs; + rc = -EFAULT; + if (copy_from_user(&dtefacs, argp, sizeof(dtefacs))) break; - } + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state != TCP_LISTEN && + sk->sk_state != TCP_CLOSE) + goto out_dtefac_release; + if (dtefacs.calling_len > X25_MAX_AE_LEN) + goto out_dtefac_release; + if (dtefacs.calling_ae == NULL) + goto out_dtefac_release; + if (dtefacs.called_len > X25_MAX_AE_LEN) + goto out_dtefac_release; + if (dtefacs.called_ae == NULL) + goto out_dtefac_release; + x25->dte_facilities = dtefacs; + rc = 0; +out_dtefac_release: + release_sock(sk); + break; + } - case SIOCX25SCALLUSERDATA: { - struct x25_calluserdata calluserdata; + case SIOCX25GCALLUSERDATA: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->calluserdata, + sizeof(x25->calluserdata)) + ? -EFAULT : 0; + release_sock(sk); + break; + } - rc = -EFAULT; - if (copy_from_user(&calluserdata, argp, - sizeof(calluserdata))) - break; - rc = -EINVAL; - if (calluserdata.cudlength > X25_MAX_CUD_LEN) - break; - lock_sock(sk); - x25->calluserdata = calluserdata; - release_sock(sk); - rc = 0; - break; - } + case SIOCX25SCALLUSERDATA: { + struct x25_calluserdata calluserdata; - case SIOCX25GCAUSEDIAG: { - lock_sock(sk); - rc = copy_to_user(argp, &x25->causediag, - sizeof(x25->causediag)) - ? -EFAULT : 0; - release_sock(sk); + rc = -EFAULT; + if (copy_from_user(&calluserdata, argp, sizeof(calluserdata))) break; - } + rc = -EINVAL; + if (calluserdata.cudlength > X25_MAX_CUD_LEN) + break; + lock_sock(sk); + x25->calluserdata = calluserdata; + release_sock(sk); + rc = 0; + break; + } - case SIOCX25SCAUSEDIAG: { - struct x25_causediag causediag; - rc = -EFAULT; - if (copy_from_user(&causediag, argp, sizeof(causediag))) - break; - lock_sock(sk); - x25->causediag = causediag; - release_sock(sk); - rc = 0; + case SIOCX25GCAUSEDIAG: { + lock_sock(sk); + rc = copy_to_user(argp, &x25->causediag, sizeof(x25->causediag)) + ? -EFAULT : 0; + release_sock(sk); + break; + } + + case SIOCX25SCAUSEDIAG: { + struct x25_causediag causediag; + rc = -EFAULT; + if (copy_from_user(&causediag, argp, sizeof(causediag))) break; + lock_sock(sk); + x25->causediag = causediag; + release_sock(sk); + rc = 0; + break; - } + } - case SIOCX25SCUDMATCHLEN: { - struct x25_subaddr sub_addr; - rc = -EINVAL; - lock_sock(sk); - if(sk->sk_state != TCP_CLOSE) - goto out_cud_release; - rc = -EFAULT; - if (copy_from_user(&sub_addr, argp, - sizeof(sub_addr))) - goto out_cud_release; - rc = -EINVAL; - if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN) - goto out_cud_release; - x25->cudmatchlength = sub_addr.cudmatchlength; - rc = 0; + case SIOCX25SCUDMATCHLEN: { + struct x25_subaddr sub_addr; + rc = -EINVAL; + lock_sock(sk); + if(sk->sk_state != TCP_CLOSE) + goto out_cud_release; + rc = -EFAULT; + if (copy_from_user(&sub_addr, argp, + sizeof(sub_addr))) + goto out_cud_release; + rc = -EINVAL; + if (sub_addr.cudmatchlength > X25_MAX_CUD_LEN) + goto out_cud_release; + x25->cudmatchlength = sub_addr.cudmatchlength; + rc = 0; out_cud_release: - release_sock(sk); - break; - } + release_sock(sk); + break; + } - case SIOCX25CALLACCPTAPPRV: { - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) - break; - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); - release_sock(sk); - rc = 0; + case SIOCX25CALLACCPTAPPRV: { + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE) break; - } + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + release_sock(sk); + rc = 0; + break; + } - case SIOCX25SENDCALLACCPT: { - rc = -EINVAL; - lock_sock(sk); - if (sk->sk_state != TCP_ESTABLISHED) - break; - /* must call accptapprv above */ - if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - break; - x25_write_internal(sk, X25_CALL_ACCEPTED); - x25->state = X25_STATE_3; - release_sock(sk); - rc = 0; + case SIOCX25SENDCALLACCPT: { + rc = -EINVAL; + lock_sock(sk); + if (sk->sk_state != TCP_ESTABLISHED) break; - } - - default: - rc = -ENOIOCTLCMD; + /* must call accptapprv above */ + if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) break; + x25_write_internal(sk, X25_CALL_ACCEPTED); + x25->state = X25_STATE_3; + release_sock(sk); + rc = 0; + break; + } + + default: + rc = -ENOIOCTLCMD; + break; } return rc; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 9005f6daeab5..e547ca1578c3 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -146,21 +146,21 @@ void x25_establish_link(struct x25_neigh *nb) unsigned char *ptr; switch (nb->dev->type) { - case ARPHRD_X25: - if ((skb = alloc_skb(1, GFP_ATOMIC)) == NULL) { - printk(KERN_ERR "x25_dev: out of memory\n"); - return; - } - ptr = skb_put(skb, 1); - *ptr = X25_IFACE_CONNECT; - break; + case ARPHRD_X25: + if ((skb = alloc_skb(1, GFP_ATOMIC)) == NULL) { + printk(KERN_ERR "x25_dev: out of memory\n"); + return; + } + ptr = skb_put(skb, 1); + *ptr = X25_IFACE_CONNECT; + break; #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) - case ARPHRD_ETHER: - return; + case ARPHRD_ETHER: + return; #endif - default: - return; + default: + return; } skb->protocol = htons(ETH_P_X25); @@ -202,19 +202,19 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb) skb_reset_network_header(skb); switch (nb->dev->type) { - case ARPHRD_X25: - dptr = skb_push(skb, 1); - *dptr = X25_IFACE_DATA; - break; + case ARPHRD_X25: + dptr = skb_push(skb, 1); + *dptr = X25_IFACE_DATA; + break; #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) - case ARPHRD_ETHER: - kfree_skb(skb); - return; + case ARPHRD_ETHER: + kfree_skb(skb); + return; #endif - default: - kfree_skb(skb); - return; + default: + kfree_skb(skb); + return; } skb->protocol = htons(ETH_P_X25); diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 15de65f04719..0b073b51b183 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -94,55 +94,55 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp struct x25_sock *x25 = x25_sk(sk); switch (frametype) { - case X25_CALL_ACCEPTED: { - - x25_stop_timer(sk); - x25->condition = 0x00; - x25->vs = 0; - x25->va = 0; - x25->vr = 0; - x25->vl = 0; - x25->state = X25_STATE_3; - sk->sk_state = TCP_ESTABLISHED; - /* - * Parse the data in the frame. - */ - skb_pull(skb, X25_STD_MIN_LEN); - - len = x25_parse_address_block(skb, &source_addr, - &dest_addr); - if (len > 0) - skb_pull(skb, len); - else if (len < 0) - goto out_clear; - - len = x25_parse_facilities(skb, &x25->facilities, - &x25->dte_facilities, - &x25->vc_facil_mask); - if (len > 0) - skb_pull(skb, len); - else if (len < 0) - goto out_clear; - /* - * Copy any Call User Data. - */ - if (skb->len > 0) { - skb_copy_from_linear_data(skb, - x25->calluserdata.cuddata, - skb->len); - x25->calluserdata.cudlength = skb->len; - } - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - break; + case X25_CALL_ACCEPTED: { + + x25_stop_timer(sk); + x25->condition = 0x00; + x25->vs = 0; + x25->va = 0; + x25->vr = 0; + x25->vl = 0; + x25->state = X25_STATE_3; + sk->sk_state = TCP_ESTABLISHED; + /* + * Parse the data in the frame. + */ + skb_pull(skb, X25_STD_MIN_LEN); + + len = x25_parse_address_block(skb, &source_addr, + &dest_addr); + if (len > 0) + skb_pull(skb, len); + else if (len < 0) + goto out_clear; + + len = x25_parse_facilities(skb, &x25->facilities, + &x25->dte_facilities, + &x25->vc_facil_mask); + if (len > 0) + skb_pull(skb, len); + else if (len < 0) + goto out_clear; + /* + * Copy any Call User Data. + */ + if (skb->len > 0) { + skb_copy_from_linear_data(skb, + x25->calluserdata.cuddata, + skb->len); + x25->calluserdata.cudlength = skb->len; } - case X25_CLEAR_REQUEST: - x25_write_internal(sk, X25_CLEAR_CONFIRMATION); - x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); - break; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + break; + } + case X25_CLEAR_REQUEST: + x25_write_internal(sk, X25_CLEAR_CONFIRMATION); + x25_disconnect(sk, ECONNREFUSED, skb->data[3], skb->data[4]); + break; - default: - break; + default: + break; } return 0; @@ -354,18 +354,18 @@ int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb) frametype = x25_decode(sk, skb, &ns, &nr, &q, &d, &m); switch (x25->state) { - case X25_STATE_1: - queued = x25_state1_machine(sk, skb, frametype); - break; - case X25_STATE_2: - queued = x25_state2_machine(sk, skb, frametype); - break; - case X25_STATE_3: - queued = x25_state3_machine(sk, skb, frametype, ns, nr, q, d, m); - break; - case X25_STATE_4: - queued = x25_state4_machine(sk, skb, frametype); - break; + case X25_STATE_1: + queued = x25_state1_machine(sk, skb, frametype); + break; + case X25_STATE_2: + queued = x25_state2_machine(sk, skb, frametype); + break; + case X25_STATE_3: + queued = x25_state3_machine(sk, skb, frametype, ns, nr, q, d, m); + break; + case X25_STATE_4: + queued = x25_state4_machine(sk, skb, frametype); + break; } x25_kick(sk); diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 21306928d47f..037958ff8eed 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -76,30 +76,29 @@ void x25_link_control(struct sk_buff *skb, struct x25_neigh *nb, int confirm; switch (frametype) { - case X25_RESTART_REQUEST: - confirm = !x25_t20timer_pending(nb); - x25_stop_t20timer(nb); - nb->state = X25_LINK_STATE_3; - if (confirm) - x25_transmit_restart_confirmation(nb); - break; - - case X25_RESTART_CONFIRMATION: - x25_stop_t20timer(nb); - nb->state = X25_LINK_STATE_3; - break; - - case X25_DIAGNOSTIC: - printk(KERN_WARNING "x25: diagnostic #%d - " - "%02X %02X %02X\n", - skb->data[3], skb->data[4], - skb->data[5], skb->data[6]); - break; - - default: - printk(KERN_WARNING "x25: received unknown %02X " - "with LCI 000\n", frametype); - break; + case X25_RESTART_REQUEST: + confirm = !x25_t20timer_pending(nb); + x25_stop_t20timer(nb); + nb->state = X25_LINK_STATE_3; + if (confirm) + x25_transmit_restart_confirmation(nb); + break; + + case X25_RESTART_CONFIRMATION: + x25_stop_t20timer(nb); + nb->state = X25_LINK_STATE_3; + break; + + case X25_DIAGNOSTIC: + printk(KERN_WARNING "x25: diagnostic #%d - %02X %02X %02X\n", + skb->data[3], skb->data[4], + skb->data[5], skb->data[6]); + break; + + default: + printk(KERN_WARNING "x25: received unknown %02X with LCI 000\n", + frametype); + break; } if (nb->state == X25_LINK_STATE_3) @@ -193,18 +192,18 @@ void x25_transmit_clear_request(struct x25_neigh *nb, unsigned int lci, void x25_transmit_link(struct sk_buff *skb, struct x25_neigh *nb) { switch (nb->state) { - case X25_LINK_STATE_0: - skb_queue_tail(&nb->queue, skb); - nb->state = X25_LINK_STATE_1; - x25_establish_link(nb); - break; - case X25_LINK_STATE_1: - case X25_LINK_STATE_2: - skb_queue_tail(&nb->queue, skb); - break; - case X25_LINK_STATE_3: - x25_send_frame(skb, nb); - break; + case X25_LINK_STATE_0: + skb_queue_tail(&nb->queue, skb); + nb->state = X25_LINK_STATE_1; + x25_establish_link(nb); + break; + case X25_LINK_STATE_1: + case X25_LINK_STATE_2: + skb_queue_tail(&nb->queue, skb); + break; + case X25_LINK_STATE_3: + x25_send_frame(skb, nb); + break; } } @@ -214,14 +213,14 @@ void x25_transmit_link(struct sk_buff *skb, struct x25_neigh *nb) void x25_link_established(struct x25_neigh *nb) { switch (nb->state) { - case X25_LINK_STATE_0: - nb->state = X25_LINK_STATE_2; - break; - case X25_LINK_STATE_1: - x25_transmit_restart_request(nb); - nb->state = X25_LINK_STATE_2; - x25_start_t20timer(nb); - break; + case X25_LINK_STATE_0: + nb->state = X25_LINK_STATE_2; + break; + case X25_LINK_STATE_1: + x25_transmit_restart_request(nb); + nb->state = X25_LINK_STATE_2; + x25_start_t20timer(nb); + break; } } diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index dc20cf12f39b..24a342ebc7f5 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -126,32 +126,30 @@ void x25_write_internal(struct sock *sk, int frametype) * Adjust frame size. */ switch (frametype) { - case X25_CALL_REQUEST: - len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN + - X25_MAX_CUD_LEN; - break; - case X25_CALL_ACCEPTED: /* fast sel with no restr on resp */ - if(x25->facilities.reverse & 0x80) { - len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; - } else { - len += 1 + X25_MAX_FAC_LEN; - } - break; - case X25_CLEAR_REQUEST: - case X25_RESET_REQUEST: - len += 2; - break; - case X25_RR: - case X25_RNR: - case X25_REJ: - case X25_CLEAR_CONFIRMATION: - case X25_INTERRUPT_CONFIRMATION: - case X25_RESET_CONFIRMATION: - break; - default: - printk(KERN_ERR "X.25: invalid frame type %02X\n", - frametype); - return; + case X25_CALL_REQUEST: + len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; + break; + case X25_CALL_ACCEPTED: /* fast sel with no restr on resp */ + if (x25->facilities.reverse & 0x80) { + len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; + } else { + len += 1 + X25_MAX_FAC_LEN; + } + break; + case X25_CLEAR_REQUEST: + case X25_RESET_REQUEST: + len += 2; + break; + case X25_RR: + case X25_RNR: + case X25_REJ: + case X25_CLEAR_CONFIRMATION: + case X25_INTERRUPT_CONFIRMATION: + case X25_RESET_CONFIRMATION: + break; + default: + printk(KERN_ERR "X.25: invalid frame type %02X\n", frametype); + return; } if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) @@ -276,20 +274,20 @@ int x25_decode(struct sock *sk, struct sk_buff *skb, int *ns, int *nr, int *q, *ns = *nr = *q = *d = *m = 0; switch (frame[2]) { - case X25_CALL_REQUEST: - case X25_CALL_ACCEPTED: - case X25_CLEAR_REQUEST: - case X25_CLEAR_CONFIRMATION: - case X25_INTERRUPT: - case X25_INTERRUPT_CONFIRMATION: - case X25_RESET_REQUEST: - case X25_RESET_CONFIRMATION: - case X25_RESTART_REQUEST: - case X25_RESTART_CONFIRMATION: - case X25_REGISTRATION_REQUEST: - case X25_REGISTRATION_CONFIRMATION: - case X25_DIAGNOSTIC: - return frame[2]; + case X25_CALL_REQUEST: + case X25_CALL_ACCEPTED: + case X25_CLEAR_REQUEST: + case X25_CLEAR_CONFIRMATION: + case X25_INTERRUPT: + case X25_INTERRUPT_CONFIRMATION: + case X25_RESET_REQUEST: + case X25_RESET_CONFIRMATION: + case X25_RESTART_REQUEST: + case X25_RESTART_CONFIRMATION: + case X25_REGISTRATION_REQUEST: + case X25_REGISTRATION_CONFIRMATION: + case X25_DIAGNOSTIC: + return frame[2]; } if (x25->neighbour->extended) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9bec2e8a838c..94fdcc7f1030 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -50,7 +50,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); -static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family); +static int xfrm_bundle_ok(struct xfrm_dst *xdst); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -1497,7 +1497,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto free_dst; /* Copy neighbour for reachability confirmation */ - dst0->neighbour = neigh_clone(dst->neighbour); + dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour(dst))); xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); xfrm_init_pmtu(dst_prev); @@ -2241,7 +2241,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2313,7 +2313,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -static int xfrm_bundle_ok(struct xfrm_dst *first, int family) +static int xfrm_bundle_ok(struct xfrm_dst *first) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -2385,6 +2385,11 @@ static unsigned int xfrm_default_mtu(const struct dst_entry *dst) return dst_mtu(dst->path); } +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr) +{ + return dst_neigh_lookup(dst->path, daddr); +} + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { struct net *net; @@ -2410,6 +2415,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; + if (likely(dst_ops->neigh_lookup == NULL)) + dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) afinfo->garbage_collect = __xfrm_garbage_collect; xfrm_policy_afinfo[afinfo->family] = afinfo; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 47f1b8638df9..b11ea692bd7d 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -265,7 +265,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; @@ -471,7 +471,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); } else { - nr = replay_esn->replay_window >> 5; + nr = (replay_esn->replay_window - 1) >> 5; for (i = 0; i <= nr; i++) replay_esn->bmp[i] = 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d70f85eb7864..9414b9c5b1e4 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1345,6 +1345,8 @@ out: xfrm_state_check_expire(x1); err = 0; + x->km.state = XFRM_STATE_DEAD; + __xfrm_state_put(x); } spin_unlock_bh(&x1->lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c658cb3bc7c3..0256b8a0a7cf 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2299,7 +2299,8 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done); + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, + link->dump, link->done, 0); } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, |